
    Uh                        d dl Z d dlZd dlZd dlmZ d dlmZmZmZ d dl	Z	d dl	m
Z
 d dlmZmZmZ ddlmZ ddlmZmZmZmZmZmZmZmZ dd	lmZ dd
lmZmZ ddlm Z m!Z!m"Z" ddl#m$Z$  e"jJ                  e&      Z'd Z( G d de
jR                        Z*e
jV                  e*dZ, G d de
jR                        Z- G d de
jR                        Z. G d de
jR                        Z/ G d de
jR                        Z0 G d de
jR                        Z1 G d de
jR                        Z2 G d de
jR                        Z3 G d  d!e
jR                        Z4 G d" d#e
jR                        Z5 G d$ d%e
jR                        Z6 G d& d'e
jR                        Z7 G d( d)e
jR                        Z8 G d* d+e
jR                        Z9 G d, d-e
jR                        Z: G d. d/e
jR                        Z; G d0 d1e
jR                        Z< G d2 d3e
jR                        Z= G d4 d5e
jR                        Z>e! G d6 d7e             Z?e G d8 d9e              Z@e! G d: d;e?             ZA e!d<=       G d> d?e?             ZBe! G d@ dAe?             ZC G dB dCe
jR                        ZD e!dD=       G dE dFe?             ZE e!dG=       G dH dIe?             ZFe! G dJ dKe?             ZGe! G dL dMe?             ZHe! G dN dOe?             ZIg dPZJy)Q    N)	dataclass)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputMultipleChoiceModelOutputNextSentencePredictorOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)ModelOutputauto_docstringlogging   )MobileBertConfigc           	         	 ddl }ddl}ddl}t        j                  j                  |      }t        j                  d|        |j                  j                  |      }g }g }	|D ]^  \  }
}t        j                  d|
 d|        |j                  j                  ||
      }|j                  |
       |	j                  |       ` t        ||	      D ]  \  }
}|
j                  dd      }
|
j                  d	d
      }
|
j                  dd      }
|
j                  dd      }
|
j!                  d      }
t#        d |
D              r(t        j                  ddj%                  |
              | }|
D ]  }|j'                  d|      r|j!                  d|      }n|g}|d   dk(  s|d   dk(  rt)        |d      }nW|d   dk(  s|d   dk(  rt)        |d      }n:|d   dk(  rt)        |d      }n%|d   dk(  rt)        |d      }n	 t)        ||d         }t-        |      dk\  st/        |d         }||   } dd d k(  rt)        |d      }n|dk(  r|j1                  |      }	 |j2                  |j2                  k(  s"J d!|j2                   d"|j2                   d#       	 t        j                  d$|
        t9        j:                  |      |_         | S # t        $ r t        j                  d        w xY w# t*        $ r+ t        j                  ddj%                  |
              Y w xY w# t4        $ r1}|xj6                  |j2                  |j2                  fz  c_         d}~ww xY w)%z'Load tf checkpoints in a pytorch model.r   NzLoading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see https://www.tensorflow.org/install/ for installation instructions.z&Converting TensorFlow checkpoint from zLoading TF weight z with shape 	ffn_layerffnFakeLayerNorm	LayerNormextra_output_weightszdense/kernelbert
mobilebert/c              3   $   K   | ]  }|d v  
 yw))adam_vadam_mAdamWeightDecayOptimizerAdamWeightDecayOptimizer_1global_stepN ).0ns     /var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/mobilebert/modeling_mobilebert.py	<genexpr>z0load_tf_weights_in_mobilebert.<locals>.<genexpr>V   s      
 nn
s   z	Skipping z[A-Za-z]+_\d+z_(\d+)kernelgammaweightoutput_biasbetabiasoutput_weightssquad
classifier   r   i_embeddingszPointer shape z and array shape z mismatchedzInitialize PyTorch weight )renumpy
tensorflowImportErrorloggererrorospathabspathinfotrainlist_variablesload_variableappendzipreplacesplitanyjoin	fullmatchgetattrAttributeErrorlenint	transposeshapeAssertionErrorargstorch
from_numpydata)modelconfigtf_checkpoint_pathr<   nptftf_path	init_varsnamesarraysnamerU   arraypointerm_namescope_namesnumes                     r/   load_tf_weights_in_mobilebertrk   5   sh   
 ggoo01G
KK8	BC''0IEF  e(l5'BC&&w5Te	 5&) 1/e||K/||O[9||2NC||FL1zz#  

 
 KK)CHHTN#345 	'F||,f5 hhy&9%h1~)[^w-F!'84Q=0KNf4L!'62Q#33!'84Q7*!'<8%g{1~>G ;1$+a.)!#,+	', #$<=(gx0GxLL'E	==EKK/  /@[Y/ 	078''.c1/d LI  Q	
 	b & KK)CHHTN+; <=  	FFw}}ekk22F	s5   K K6 ;L- K360L*)L*-	M'6,M""M'c                   X     e Zd Zd fd	Zdej
                  dej
                  fdZ xZS )NoNormc                     t         |           t        j                  t	        j
                  |            | _        t        j                  t	        j                  |            | _        y N)	super__init__r   	ParameterrX   zerosr6   onesr3   )self	feat_sizeeps	__class__s      r/   rq   zNoNorm.__init__   s@    LLY!78	ll5::i#89    input_tensorreturnc                 :    || j                   z  | j                  z   S ro   )r3   r6   )ru   rz   s     r/   forwardzNoNorm.forward   s    dkk)DII55ry   ro   __name__
__module____qualname__rq   rX   Tensorr}   __classcell__rx   s   @r/   rm   rm      s#    :
6ELL 6U\\ 6ry   rm   )
layer_normno_normc                        e Zd ZdZ fdZ	 	 	 	 d	deej                     deej                     deej                     deej                     dej                  f
dZ
 xZS )
MobileBertEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                 X   t         |           |j                  | _        |j                  | _        |j                  | _        t        j                  |j                  |j                  |j                        | _	        t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        | j                  rdnd}| j                  |z  }t        j                  ||j                        | _        t!        |j"                     |j                        | _        t        j&                  |j(                        | _        | j-                  dt/        j0                  |j                        j3                  d      d       y )N)padding_idxr   r   position_ids)r   F)
persistent)rp   rq   trigram_inputembedding_sizehidden_sizer   	Embedding
vocab_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddingsLinearembedding_transformationNORM2FNnormalization_typer!   Dropouthidden_dropout_probdropoutregister_bufferrX   arangeexpand)ru   r\   embed_dim_multiplierembedded_input_sizerx   s       r/   rq   zMobileBertEmbeddings.__init__   sF   #11$33!--!||F,=,=v?T?Tbhbubuv#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]"$($6$6qA"114HH(*		2EvGYGY(Z% !:!:;F<N<NOzz&"<"<= 	ELL)G)GHOOPWXej 	 	
ry   	input_idstoken_type_idsr   inputs_embedsr{   c           
      $   ||j                         }n|j                         d d }|d   }|| j                  d d d |f   }|:t        j                  |t        j                  | j                  j
                        }|| j                  |      }| j                  rpt        j                  t        j                  j                  |d d dd f   g dd      |t        j                  j                  |d d d df   g dd      gd	      }| j                  s| j                  | j                  k7  r| j                  |      }| j                  |      }| j!                  |      }||z   |z   }	| j#                  |	      }	| j%                  |	      }	|	S )
Nr   r   dtypedevice)r   r   r   r   r   r           )value)r   r   r   r   r   r   r:   dim)sizer   rX   rs   longr   r   r   catr   
functionalpadr   r   r   r   r   r!   r   )
ru   r   r   r   r   input_shape
seq_lengthr   r   
embeddingss
             r/   r}   zMobileBertEmbeddings.forward   s     #..*K',,.s3K ^
,,Q^<L!"[[EJJtO`O`OgOghN  00;M "IIMM%%mAqrE&:<NVY%Z!MM%%mAssF&;=OWZ%[
 M !4!48H8H!H 99-HM #66|D $ : :> J"%88;PP
^^J/
\\*-
ry   )NNNN)r   r   r   __doc__rq   r   rX   
LongTensorFloatTensorr   r}   r   r   s   @r/   r   r      s~    Q
0 155937590E,,-0 !!1!120 u//0	0
   1 120 
0ry   r   c                        e Zd Z fdZd Z	 	 	 ddej                  dej                  dej                  deej                     deej                     dee	   d	e
ej                     fd
Z xZS )MobileBertSelfAttentionc                 `   t         |           |j                  | _        t        |j                  |j                  z        | _        | j                  | j
                  z  | _        t        j                  |j                  | j                        | _	        t        j                  |j                  | j                        | _
        t        j                  |j                  r|j                  n|j                  | j                        | _        t        j                  |j                        | _        y ro   )rp   rq   num_attention_headsrS   true_hidden_sizeattention_head_sizeall_head_sizer   r   querykeyuse_bottleneck_attentionr   r   r   attention_probs_dropout_probr   ru   r\   rx   s     r/   rq   z MobileBertSelfAttention.__init__   s    #)#=#= #&v'>'>A[A['[#\ !558P8PPYYv668J8JK
99V44d6H6HIYY'-'F'FF##FL^L^`d`r`r

 zz&"E"EFry   c                     |j                         d d | j                  | j                  fz   }|j                  |      }|j	                  dddd      S )Nr   r   r:   r   r   )r   r   r   viewpermute)ru   xnew_x_shapes      r/   transpose_for_scoresz,MobileBertSelfAttention.transpose_for_scores   sL    ffhsmt'?'?AYAY&ZZFF;yyAq!$$ry   query_tensor
key_tensorvalue_tensorattention_mask	head_maskoutput_attentionsr{   c                    | j                  |      }| j                  |      }| j                  |      }	| j                  |      }
| j                  |      }| j                  |	      }t	        j
                  |
|j                  dd            }|t        j                  | j                        z  }|||z   }t        j                  j                  |d      }| j                  |      }|||z  }t	        j
                  ||      }|j                  dddd      j                         }|j!                         d d | j"                  fz   }|j%                  |      }|r||f}|S |f}|S )Nr   r   r   r:   r   r   )r   r   r   r   rX   matmulrT   mathsqrtr   r   r   softmaxr   r   
contiguousr   r   r   )ru   r   r   r   r   r   r   mixed_query_layermixed_key_layermixed_value_layerquery_layer	key_layervalue_layerattention_scoresattention_probscontext_layernew_context_layer_shapeoutputss                     r/   r}   zMobileBertSelfAttention.forward   sm    !JJ|4((:. JJ|4//0AB--o>	//0AB !<<Y5H5HR5PQ+dii8P8P.QQ%/.@--//0@b/I ,,7 -	9O_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**+BC6G=/2 O\M]ry   NNN)r   r   r   rq   r   rX   r   r   r   boolr   r}   r   r   s   @r/   r   r      s    G% 7;15,0$ll$ LL$ ll	$
 !!2!23$ E--.$ $D>$ 
u||	$ry   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )MobileBertSelfOutputc                 j   t         |           |j                  | _        t        j                  |j
                  |j
                        | _        t        |j                     |j
                  |j                        | _
        | j                  s%t        j                  |j                        | _        y y Nrw   )rp   rq   use_bottleneckr   r   r   denser   r   layer_norm_epsr!   r   r   r   r   s     r/   rq   zMobileBertSelfOutput.__init__  s    $33YYv668O8OP
 !:!:;F<S<SY_YnYno""::f&@&@ADL #ry   hidden_statesresidual_tensorr{   c                     | j                  |      }| j                  s| j                  |      }| j                  ||z         }|S ro   )r   r   r   r!   ru   r   r   layer_outputss       r/   r}   zMobileBertSelfOutput.forward  s@    

=1"" LL7M}'FGry   r~   r   s   @r/   r   r     s2    BU\\ ELL UZUaUa ry   r   c                        e Zd Z fdZd Z	 	 	 ddej                  dej                  dej                  dej                  deej                     deej                     d	ee	   d
e
ej                     fdZ xZS )MobileBertAttentionc                     t         |           t        |      | _        t	        |      | _        t               | _        y ro   )rp   rq   r   ru   r   outputsetpruned_headsr   s     r/   rq   zMobileBertAttention.__init__(  s0    +F3	*62Ery   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   r   )rR   r   ru   r   r   r   r   r   r   r   r   r   r   union)ru   headsindexs      r/   prune_headszMobileBertAttention.prune_heads.  s   u:?749900$))2O2OQUQbQb
u
 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:ry   r   r   r   layer_inputr   r   r   r{   c                 n    | j                  ||||||      }| j                  |d   |      }	|	f|dd  z   }
|
S )Nr   r   )ru   r   )ru   r   r   r   r   r   r   r   self_outputsattention_outputr   s              r/   r}   zMobileBertAttention.forward@  sT     yy
  ;;|AD#%QR(88ry   r   )r   r   r   rq   r   rX   r   r   r   r   r   r}   r   r   s   @r/   r   r   '  s    ";0 7;15,0ll LL ll	
 \\ !!2!23 E--. $D> 
u||	ry   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MobileBertIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y ro   )rp   rq   r   r   r   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnr   s     r/   rq   zMobileBertIntermediate.__init__Z  s]    YYv668P8PQ
f''-'-f.?.?'@D$'-'8'8D$ry   r   r{   c                 J    | j                  |      }| j                  |      }|S ro   )r   r
  ru   r   s     r/   r}   zMobileBertIntermediate.forwardb  s&    

=100?ry   r~   r   s   @r/   r  r  Y  s#    9U\\ ell ry   r  c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )OutputBottleneckc                 .   t         |           t        j                  |j                  |j
                        | _        t        |j                     |j
                  |j                        | _
        t        j                  |j                        | _        y r   )rp   rq   r   r   r   r   r   r   r   r   r!   r   r   r   r   s     r/   rq   zOutputBottleneck.__init__i  sh    YYv668J8JK
 !:!:;F<N<NTZTiTijzz&"<"<=ry   r   r   r{   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S ro   )r   r   r!   r   s       r/   r}   zOutputBottleneck.forwardo  s7    

=1]3}'FGry   r~   r   s   @r/   r  r  h  s1    >U\\ ELL UZUaUa ry   r  c                        e Zd Z fdZdej
                  dej
                  dej
                  dej
                  fdZ xZS )MobileBertOutputc                 r   t         |           |j                  | _        t        j                  |j
                  |j                        | _        t        |j                     |j                        | _
        | j                  s%t        j                  |j                        | _        y t        |      | _        y ro   )rp   rq   r   r   r   r  r   r   r   r   r!   r   r   r   r  
bottleneckr   s     r/   rq   zMobileBertOutput.__init__w  s    $33YYv779P9PQ
 !:!:;F<S<ST""::f&@&@ADL.v6DOry   intermediate_statesresidual_tensor_1residual_tensor_2r{   c                     | j                  |      }| j                  s'| j                  |      }| j                  ||z         }|S | j                  ||z         }| j	                  ||      }|S ro   )r   r   r   r!   r  )ru   r  r  r  layer_outputs        r/   r}   zMobileBertOutput.forward  ss     zz"56""<<5L>>,9J*JKL   >>,9J*JKL??<9JKLry   r~   r   s   @r/   r  r  v  s?    7
#(<<
DILL
ejeqeq
	
ry   r  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BottleneckLayerc                     t         |           t        j                  |j                  |j
                        | _        t        |j                     |j
                  |j                        | _
        y r   )rp   rq   r   r   r   intra_bottleneck_sizer   r   r   r   r!   r   s     r/   rq   zBottleneckLayer.__init__  sR    YYv1163O3OP
 !:!:;F<X<X^d^s^stry   r   r{   c                 J    | j                  |      }| j                  |      }|S ro   r   r!   )ru   r   r   s      r/   r}   zBottleneckLayer.forward  s$    jj/nn[1ry   r~   r   s   @r/   r  r    s$    u
U\\ ell ry   r  c                   \     e Zd Z fdZdej
                  deej
                     fdZ xZS )
Bottleneckc                     t         |           |j                  | _        |j                  | _        t	        |      | _        | j                  rt	        |      | _        y y ro   )rp   rq   key_query_shared_bottleneckr   r  input	attentionr   s     r/   rq   zBottleneck.__init__  sP    +1+M+M((.(G(G%$V,
++,V4DN ,ry   r   r{   c                     | j                  |      }| j                  r|fdz  S | j                  r| j                  |      }||||fS ||||fS )N   )r$  r   r#  r%  )ru   r   bottlenecked_hidden_statesshared_attention_inputs       r/   r}   zBottleneck.forward  sc    " &*ZZ%>"((.0144--%)^^M%B"*,BMSmnn!=-A[\\ry   	r   r   r   rq   rX   r   r   r}   r   r   s   @r/   r!  r!    s+    5]U\\ ]eELL6I ]ry   r!  c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )	FFNOutputc                     t         |           t        j                  |j                  |j
                        | _        t        |j                     |j
                  |j                        | _
        y r   )rp   rq   r   r   r  r   r   r   r   r   r!   r   s     r/   rq   zFFNOutput.__init__  sR    YYv779P9PQ
 !:!:;F<S<SY_YnYnory   r   r   r{   c                 P    | j                  |      }| j                  ||z         }|S ro   r  r   s       r/   r}   zFFNOutput.forward  s)    

=1}'FGry   r~   r   s   @r/   r,  r,    s2    p
U\\ ELL UZUaUa ry   r,  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )FFNLayerc                 b    t         |           t        |      | _        t	        |      | _        y ro   )rp   rq   r  intermediater,  r   r   s     r/   rq   zFFNLayer.__init__  s'    26:'ry   r   r{   c                 L    | j                  |      }| j                  ||      }|S ro   )r2  r   )ru   r   intermediate_outputr   s       r/   r}   zFFNLayer.forward  s*    "//>$7Gry   r~   r   s   @r/   r0  r0    s#    (
U\\ ell ry   r0  c                        e Zd Z fdZ	 	 	 ddej
                  deej                     deej                     dee   de	ej
                     f
dZ
 xZS )	MobileBertLayerc                    t         |           |j                  | _        |j                  | _        t	        |      | _        t        |      | _        t        |      | _	        | j                  rt        |      | _        |j                  dkD  rHt        j                  t        |j                  dz
        D cg c]  }t        |       c}      | _        y y c c}w Nr   )rp   rq   r   num_feedforward_networksr   r%  r  r2  r  r   r!  r  r   
ModuleListranger0  r   ru   r\   _rx   s      r/   rq   zMobileBertLayer.__init__  s    $33(.(G(G%,V426:&v.(0DO**Q.}}fFeFehiFi@j%k1hv&6%klDH /%ks   6Cr   r   r   r   r{   c           	         | j                   r| j                  |      \  }}}}n|gdz  \  }}}}| j                  |||||||      }	|	d   }
|
f}|	dd  }| j                  dk7  r+t	        | j
                        D ]  \  }} ||
      }
||
fz  } | j                  |
      }| j                  ||
|      }|f|z   t        j                  d      |||||
|fz   |z   }|S )Nr'  )r   r   r   i  )
r   r  r%  r9  	enumerater   r2  r   rX   tensor)ru   r   r   r   r   r   r   r   r   self_attention_outputsr  sr   i
ffn_moduler4  r  s                    r/   r}   zMobileBertLayer.forward  s>    BF//R_B`?L*lKCP/TUBU?L*lK!%/ "0 "
 2!4(,((A-!*488!4 ):#-.>#? &(() #//0@A{{#68H-XO T" #
  	 ry   r   )r   r   r   rq   rX   r   r   r   r   r   r}   r   r   s   @r/   r6  r6    sp    m  7;15,0.||. !!2!23. E--.	.
 $D>. 
u||	.ry   r6  c                        e Zd Z fdZ	 	 	 	 	 d
dej
                  deej                     deej                     dee   dee   dee   de	e
ef   fd	Z xZS )MobileBertEncoderc                     t         |           t        j                  t	        |j
                        D cg c]  }t        |       c}      | _        y c c}w ro   )rp   rq   r   r:  r;  num_hidden_layersr6  layerr<  s      r/   rq   zMobileBertEncoder.__init__  s<    ]]U6KcKcEd#eOF$;#ef
#es   Ar   r   r   r   output_hidden_statesreturn_dictr{   c                     |rdnd }|rdnd }t        | j                        D ],  \  }	}
|r||fz   } |
||||	   |      }|d   }|s$||d   fz   }. |r||fz   }|st        d |||fD              S t        |||      S )Nr,   r   r   c              3   &   K   | ]	  }||  y wro   r,   )r-   vs     r/   r0   z,MobileBertEncoder.forward.<locals>.<genexpr>9  s     hqZ[Zghs   )last_hidden_stater   
attentions)r?  rI  tupler   )ru   r   r   r   r   rJ  rK  all_hidden_statesall_attentionsrC  layer_moduler   s               r/   r}   zMobileBertEncoder.forward  s     #7BD0d(4 	FOA|#$58H$H!(!!	M *!,M !/=3C2E!E	F    1]4D Dh]4E~$Vhhh+;LYg
 	
ry   )NNFFT)r   r   r   rq   rX   r   r   r   r   r   r   r   r}   r   r   s   @r/   rF  rF    s    g 7;15,1/4&*"
||"
 !!2!23"
 E--.	"

 $D>"
 'tn"
 d^"
 
uo%	&"
ry   rF  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MobileBertPoolerc                     t         |           |j                  | _        | j                  r0t	        j
                  |j                  |j                        | _        y y ro   )rp   rq   classifier_activationdo_activater   r   r   r   r   s     r/   rq   zMobileBertPooler.__init__@  sH    !776#5#5v7I7IJDJ ry   r   r{   c                     |d d df   }| j                   s|S | j                  |      }t        j                  |      }|S )Nr   )rY  r   rX   tanh)ru   r   first_token_tensorpooled_outputs       r/   r}   zMobileBertPooler.forwardF  sE     +1a40%% JJ'9:M!JJ}5M  ry   r~   r   s   @r/   rV  rV  ?  s$    K	!U\\ 	!ell 	!ry   rV  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )!MobileBertPredictionHeadTransformc                 Z   t         |           t        j                  |j                  |j                        | _        t        |j                  t              rt        |j                     | _
        n|j                  | _
        t        d   |j                  |j                        | _        y )Nr   r   )rp   rq   r   r   r   r   r  r  r	  r   transform_act_fnr   r   r!   r   s     r/   rq   z*MobileBertPredictionHeadTransform.__init__S  s|    YYv1163E3EF
f''-$*6+<+<$=D!$*$5$5D! .v/A/AvG\G\]ry   r   r{   c                 l    | j                  |      }| j                  |      }| j                  |      }|S ro   )r   ra  r!   r  s     r/   r}   z)MobileBertPredictionHeadTransform.forward\  s4    

=1--m<}5ry   r~   r   s   @r/   r_  r_  R  s$    ^U\\ ell ry   r_  c                   ^     e Zd Z fdZddZdej                  dej                  fdZ xZS )MobileBertLMPredictionHeadc                    t         |           t        |      | _        t	        j
                  |j                  |j                  |j                  z
  d      | _	        t	        j
                  |j                  |j                  d      | _
        t	        j                  t        j                  |j                              | _        | j                  | j                  _        y )NF)r6   )rp   rq   r_  	transformr   r   r   r   r   r   decoderrr   rX   rs   r6   r   s     r/   rq   z#MobileBertLMPredictionHead.__init__d  s    :6B YYv00&2D2DvG\G\2\chi
yy!6!68I8IPUVLLV->->!?@	 IIry   r{   c                 :    | j                   | j                  _         y ro   )r6   rg  ru   s    r/   _tie_weightsz'MobileBertLMPredictionHead._tie_weightso  s     IIry   r   c                    | j                  |      }|j                  t        j                  | j                  j
                  j                         | j                  j
                  gd            }|| j                  j                  z  }|S )Nr   r   )	rf  r   rX   r   rg  r3   tr   r6   r  s     r/   r}   z"MobileBertLMPredictionHead.forwardr  sk    }5%,,UYY8K8K8M8M8OQUQ[Q[QbQb7cij-kl***ry   )r{   N)	r   r   r   rq   rj  rX   r   r}   r   r   s   @r/   rd  rd  c  s(    	&&U\\ ell ry   rd  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MobileBertOnlyMLMHeadc                 B    t         |           t        |      | _        y ro   )rp   rq   rd  predictionsr   s     r/   rq   zMobileBertOnlyMLMHead.__init__z  s    5f=ry   sequence_outputr{   c                 (    | j                  |      }|S ro   )rp  )ru   rq  prediction_scoress      r/   r}   zMobileBertOnlyMLMHead.forward~  s     ,,_=  ry   r~   r   s   @r/   rn  rn  y  s#    >!u|| ! !ry   rn  c                   t     e Zd Z fdZdej
                  dej
                  deej
                     fdZ xZS )MobileBertPreTrainingHeadsc                     t         |           t        |      | _        t	        j
                  |j                  d      | _        y Nr:   )rp   rq   rd  rp  r   r   r   seq_relationshipr   s     r/   rq   z#MobileBertPreTrainingHeads.__init__  s4    5f= "		&*<*<a @ry   rq  r]  r{   c                 N    | j                  |      }| j                  |      }||fS ro   )rp  rx  )ru   rq  r]  rs  seq_relationship_scores        r/   r}   z"MobileBertPreTrainingHeads.forward  s0     ,,_=!%!6!6}!E "888ry   r*  r   s   @r/   ru  ru    s8    A
9u|| 9ELL 9UZ[`[g[gUh 9ry   ru  c                       e Zd ZeZeZdZd Zy)MobileBertPreTrainedModelr$   c                 x   t        |t        j                        rm|j                  j                  j                  d| j                  j                         |j                  %|j                  j                  j                          yyt        |t        j                        rz|j                  j                  j                  d| j                  j                         |j                  2|j                  j                  |j                     j                          yyt        |t        j                  t        f      rJ|j                  j                  j                          |j                  j                  j                  d       yt        |t              r%|j                  j                  j                          yy)zInitialize the weightsr   )meanstdNg      ?)r  r   r   r3   rZ   normal_r\   initializer_ranger6   zero_r   r   r!   rm   fill_rd  )ru   modules     r/   _init_weightsz'MobileBertPreTrainedModel._init_weights  s,   fbii( MM&&CT[[5R5R&S{{&  &&( '-MM&&CT[[5R5R&S!!-""6#5#56<<> .v 67KK""$MM$$S) :;KK""$ <ry   N)	r   r   r   r   config_classrk   load_tf_weightsbase_model_prefixr  r,   ry   r/   r|  r|    s    #L3O$%ry   r|  c                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeeej                        ed<   dZeeej                        ed<   y)MobileBertForPreTrainingOutputab  
    Output type of [`MobileBertForPreTraining`].

    Args:
        loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
            Total loss as the sum of the masked language modeling loss and the next sequence prediction
            (classification) loss.
        prediction_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
        seq_relationship_logits (`torch.FloatTensor` of shape `(batch_size, 2)`):
            Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
            before SoftMax).
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nlossprediction_logitsseq_relationship_logitsr   rP  )r   r   r   r   r  r   rX   r   __annotations__r  r  r   r   rP  r,   ry   r/   r  r    s~    2 )-D(5$$
%,59x 1 129;?Xe&7&78?8<M8E%"3"345<59Ju00129ry   r  c                   <    e Zd ZdZd fd	Zd Zd Zd Ze	 	 	 	 	 	 	 	 	 dde	e
j                     de	e
j                     de	e
j                     d	e	e
j                     d
e	e
j                     de	e
j                     de	e   de	e   de	e   deeef   fd       Z xZS )MobileBertModelz.
    https://arxiv.org/pdf/2004.02984.pdf
    c                     t         |   |       || _        t        |      | _        t        |      | _        |rt        |      nd| _        | j                          y)zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)
rp   rq   r\   r   r   rF  encoderrV  pooler	post_init)ru   r\   add_pooling_layerrx   s      r/   rq   zMobileBertModel.__init__  sN    
 	 .v6(02C&v. 	ry   c                 .    | j                   j                  S ro   r   r   ri  s    r/   get_input_embeddingsz$MobileBertModel.get_input_embeddings  s    ...ry   c                 &    || j                   _        y ro   r  )ru   r   s     r/   set_input_embeddingsz$MobileBertModel.set_input_embeddings  s    */'ry   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr  rI  r%  r   )ru   heads_to_prunerI  r   s       r/   _prune_headszMobileBertModel._prune_heads  sE    
 +002 	CLE5LLu%//;;EB	Cry   r   r   r   r   r   r   rJ  r   rK  r{   c
                 l   ||n| j                   j                  }||n| j                   j                  }|	|	n| j                   j                  }	||t	        d      |#| j                  ||       |j                         }
n!||j                         d d }
nt	        d      ||j                  n|j                  }|t        j                  |
|      }|&t        j                  |
t        j                  |      }| j                  ||
      }| j                  || j                   j                        }| j                  ||||      }| j!                  ||||||	      }|d   }| j"                  | j#                  |      nd }|	s
||f|d	d  z   S t%        |||j&                  |j(                  
      S )NzDYou cannot specify both input_ids and inputs_embeds at the same timer   z5You have to specify either input_ids or inputs_embeds)r   r   )r   r   r   r   )r   r   r   rJ  rK  r   r   )rO  pooler_outputr   rP  )r\   r   rJ  use_return_dict
ValueError%warn_if_padding_and_no_attention_maskr   r   rX   rt   rs   r   get_extended_attention_maskget_head_maskrH  r   r  r  r   r   rP  )ru   r   r   r   r   r   r   rJ  r   rK  r   r   extended_attention_maskembedding_outputencoder_outputsrq  r]  s                    r/   r}   zMobileBertModel.forward  s    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66y.Q#..*K&',,.s3KTUU%.%:!!@T@T!"ZZFCN!"[[EJJvVN 150P0PQ_al0m &&y$++2O2OP	??l>iv + 
 ,,2/!5# ' 
 *!,8<8OO4UY#]3oab6III)-')77&11	
 	
ry   )T)	NNNNNNNNN)r   r   r   r   rq   r  r  r  r   r   rX   r   r   r   r   r   r   r}   r   r   s   @r/   r  r    s   /0C  156:59371559/3,0&*D
E,,-D
 !!2!23D
 !!1!12	D

 u//0D
 E--.D
   1 12D
 'tnD
 $D>D
 d^D
 
u00	1D
 D
ry   r  z
    MobileBert Model with two heads on top as done during the pretraining: a `masked language modeling` head and a
    `next sentence prediction (classification)` head.
    )custom_introc                       e Zd ZddgZ fdZd Zd Zddee   de	j                  f fdZe	 	 	 	 	 	 	 	 	 	 	 dd	eej                     d
eej                     deej                     deej                     deej                     deej                     deej                     deej                     deej                     deej                     deej                     deeef   fd       Z xZS )MobileBertForPreTrainingcls.predictions.decoder.weightcls.predictions.decoder.biasc                     t         |   |       t        |      | _        t	        |      | _        | j                          y ro   )rp   rq   r  r$   ru  clsr  r   s     r/   rq   z!MobileBertForPreTraining.__init__>  s4     )&1-f5 	ry   c                 B    | j                   j                  j                  S ro   r  rp  rg  ri  s    r/   get_output_embeddingsz.MobileBertForPreTraining.get_output_embeddingsF      xx##+++ry   c                     || j                   j                  _        |j                  | j                   j                  _        y ro   r  rp  rg  r6   ru   new_embeddingss     r/   set_output_embeddingsz.MobileBertForPreTraining.set_output_embeddingsI  ,    '5$$2$7$7!ry   new_num_tokensr{   c                     | j                  | j                  j                  j                  |d      | j                  j                  _        t        |   |      S NT)r  
transposed)r  _get_resized_lm_headr  rp  r   rp   resize_token_embeddingsru   r  rx   s     r/   r  z0MobileBertForPreTraining.resize_token_embeddingsM  sR    %)%>%>HH  &&~RV &? &
" w.n.MMry   r   r   r   r   r   r   labelsnext_sentence_labelr   rJ  rK  c                 
   ||n| j                   j                  }| j                  |||||||	|
|	      }|dd \  }}| j                  ||      \  }}d}|u|st	               } ||j                  d| j                   j                        |j                  d            } ||j                  dd      |j                  d            }||z   }|s||f|dd z   }||f|z   S |S t        ||||j                  |j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        next_sentence_label (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
            (see `input_ids` docstring) Indices should be in `[0, 1]`:

            - 0 indicates sequence B is a continuation of sequence A,
            - 1 indicates sequence B is a random sequence.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, MobileBertForPreTraining
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
        >>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")

        >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0)
        >>> # Batch size 1
        >>> outputs = model(input_ids)

        >>> prediction_logits = outputs.prediction_logits
        >>> seq_relationship_logits = outputs.seq_relationship_logits
        ```Nr   r   r   r   r   r   rJ  rK  r:   r   )r  r  r  r   rP  )
r\   r  r$   r  r	   r   r   r  r   rP  )ru   r   r   r   r   r   r   r  r  r   rJ  rK  r   rq  r]  rs  rz  
total_lossloss_fctmasked_lm_lossnext_sentence_lossr   s                         r/   r}   z MobileBertForPreTraining.forwardU  sG   V &1%<k$++B]B]//))%'/!5# " 

 *1!&48HH_m4\11
"5"A')H%&7&<&<RAWAW&XZ`ZeZefhZijN!)*@*E*Eb!*LNaNfNfgiNj!k'*<<J')?@712;NF/9/EZMF*Q6Q-/$:!//))
 	
ry   ro   NNNNNNNNNNN)r   r   r   _tied_weights_keysrq   r  r  r   rS   r   r   r  r   rX   r   r   r   r   r  r}   r   r   s   @r/   r  r  5  s}    ;<Z[,8Nhsm Nr|| N  156:59371559-1:>9=<@37K
E,,-K
 !!2!23K
 !!1!12	K

 u//0K
 E--.K
   1 12K
 ))*K
 &e&6&67K
 $E$5$56K
 'u'8'89K
 e//0K
 
u44	5K
 K
ry   r  c                       e Zd ZddgZ fdZd Zd Zddee   de	j                  f fdZe	 	 	 	 	 	 	 	 	 	 dd	eej                     d
eej                     deej                     deej                     deej                     deej                     deej                     dee   dee   dee   deeef   fd       Z xZS )MobileBertForMaskedLMr  r  c                     t         |   |       t        |d      | _        t	        |      | _        || _        | j                          y NF)r  )rp   rq   r  r$   rn  r  r\   r  r   s     r/   rq   zMobileBertForMaskedLM.__init__  s=     )&EJ(0 	ry   c                 B    | j                   j                  j                  S ro   r  ri  s    r/   r  z+MobileBertForMaskedLM.get_output_embeddings  r  ry   c                     || j                   j                  _        |j                  | j                   j                  _        y ro   r  r  s     r/   r  z+MobileBertForMaskedLM.set_output_embeddings  r  ry   r  r{   c                     | j                  | j                  j                  j                  |d      | j                  j                  _        t        |   |      S r  r  r  s     r/   r  z-MobileBertForMaskedLM.resize_token_embeddings  sR    %)%>%>HH  &&~RV &? &
" w.n.MMry   r   r   r   r   r   r   r  r   rJ  rK  c                    |
|
n| j                   j                  }
| j                  ||||||||	|
	      }|d   }| j                  |      }d}|Ft	               } ||j                  d| j                   j                        |j                  d            }|
s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        Nr  r   r   r:   r  logitsr   rP  )
r\   r  r$   r  r	   r   r   r   r   rP  )ru   r   r   r   r   r   r   r  r   rJ  rK  r   rq  rs  r  r  r   s                    r/   r}   zMobileBertForMaskedLM.forward  s    ( &1%<k$++B]B]//))%'/!5# " 

 "!* HH_5')H%&7&<&<RAWAW&XZ`ZeZefhZijN')GABK7F3A3M^%.YSYY$!//))	
 	
ry   ro   
NNNNNNNNNN)r   r   r   r  rq   r  r  r   rS   r   r   r  r   rX   r   r   r   r   r   r   r}   r   r   s   @r/   r  r    s?   :<Z[,8Nhsm Nr|| N  156:59371559-1,0/3&*2
E,,-2
 !!2!232
 !!1!12	2

 u//02
 E--.2
   1 122
 ))*2
 $D>2
 'tn2
 d^2
 
un$	%2
 2
ry   r  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MobileBertOnlyNSPHeadc                 l    t         |           t        j                  |j                  d      | _        y rw  )rp   rq   r   r   r   rx  r   s     r/   rq   zMobileBertOnlyNSPHead.__init__  s'     "		&*<*<a @ry   r]  r{   c                 (    | j                  |      }|S ro   )rx  )ru   r]  rz  s      r/   r}   zMobileBertOnlyNSPHead.forward  s    !%!6!6}!E%%ry   r~   r   s   @r/   r  r    s$    A&U\\ &ell &ry   r  zZ
    MobileBert Model with a `next sentence prediction (classification)` head on top.
    c                   D    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	ee	   d
ee	   dee	   de
eef   fd       Z xZS )#MobileBertForNextSentencePredictionc                     t         |   |       t        |      | _        t	        |      | _        | j                          y ro   )rp   rq   r  r$   r  r  r  r   s     r/   rq   z,MobileBertForNextSentencePrediction.__init__  s4     )&1(0 	ry   r   r   r   r   r   r   r  r   rJ  rK  r{   c                    d|v r+t        j                  dt               |j                  d      }|
|
n| j                  j
                  }
| j                  ||||||||	|
	      }|d   }| j                  |      }d}|2t               } ||j                  dd      |j                  d            }|
s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )	a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
            (see `input_ids` docstring) Indices should be in `[0, 1]`.

            - 0 indicates sequence B is a continuation of sequence A,
            - 1 indicates sequence B is a random sequence.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, MobileBertForNextSentencePrediction
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
        >>> model = MobileBertForNextSentencePrediction.from_pretrained("google/mobilebert-uncased")

        >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
        >>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
        >>> encoding = tokenizer(prompt, next_sentence, return_tensors="pt")

        >>> outputs = model(**encoding, labels=torch.LongTensor([1]))
        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```r  zoThe `next_sentence_label` argument is deprecated and will be removed in a future version, use `labels` instead.Nr  r   r   r:   r  )warningswarnFutureWarningpopr\   r  r$   r  r	   r   r   r   rP  )ru   r   r   r   r   r   r   r  r   rJ  rK  kwargsr   r]  rz  r  r  r   s                     r/   r}   z+MobileBertForNextSentencePrediction.forward  s   R !F*MM%
 ZZ 56F%0%<k$++B]B]//))%'/!5# " 

  
!%-!8!')H!)*@*E*Eb!*LfkkZ\o!^,.<F7I7U')F2a[aa*#)!//))	
 	
ry   r  )r   r   r   rq   r   r   rX   r   r   r   r   r   r   r}   r   r   s   @r/   r  r    s     156:59371559-1,0/3&*O
E,,-O
 !!2!23O
 !!1!12	O

 u//0O
 E--.O
   1 12O
 ))*O
 $D>O
 'tnO
 d^O
 
u11	2O
 O
ry   r  z
    MobileBert Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c                   ^    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	ee   d
ee   dee   de	e
ej                     ef   fd       Z xZS )#MobileBertForSequenceClassificationc                 n   t         |   |       |j                  | _        || _        t	        |      | _        |j                  |j                  n|j                  }t        j                  |      | _
        t        j                  |j                  |j                        | _        | j                          y ro   )rp   rq   
num_labelsr\   r  r$   classifier_dropoutr   r   r   r   r   r   r9   r  ru   r\   r  rx   s      r/   rq   z,MobileBertForSequenceClassification.__init__i  s      ++)&1)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	ry   r   r   r   r   r   r   r  r   rJ  rK  r{   c                 @   |
|
n| j                   j                  }
| j                  ||||||||	|
	      }|d   }| j                  |      }| j	                  |      }d}|| j                   j
                  | j                  dk(  rd| j                   _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j
                  dk(  rIt               }| j                  dk(  r& ||j                         |j                               }n |||      }n| j                   j
                  dk(  r=t               } ||j                  d| j                        |j                  d            }n,| j                   j
                  dk(  rt               } |||      }|
s|f|dd z   }||f|z   S |S t!        |||j"                  |j$                  	      S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr  r   
regressionsingle_label_classificationmulti_label_classificationr   r:   r  )r\   r  r$   r   r9   problem_typer  r   rX   r   rS   r
   squeezer	   r   r   r   r   rP  )ru   r   r   r   r   r   r   r  r   rJ  rK  r   r]  r  r  r  r   s                    r/   r}   z+MobileBertForSequenceClassification.forwardx  s   ( &1%<k$++B]B]//))%'/!5# " 

  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
ry   r  )r   r   r   rq   r   r   rX   r   r   r   r   r   r}   r   r   s   @r/   r  r  a  s     -11515/3,004)-,0/3&*E
ELL)E
 !.E
 !.	E

 u||,E
 ELL)E
  -E
 &E
 $D>E
 'tnE
 d^E
 
uU\\"$<<	=E
 E
ry   r  c                   ~    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
ee   dee   dee   de	e
ej                     ef   fd       Z xZS )MobileBertForQuestionAnsweringc                     t         |   |       |j                  | _        t        |d      | _        t        j                  |j                  |j                        | _        | j                          y r  )
rp   rq   r  r  r$   r   r   r   
qa_outputsr  r   s     r/   rq   z'MobileBertForQuestionAnswering.__init__  sU      ++)&EJ))F$6$68I8IJ 	ry   r   r   r   r   r   r   start_positionsend_positionsr   rJ  rK  r{   c                 (   ||n| j                   j                  }| j                  |||||||	|
|	      }|d   }| j                  |      }|j	                  dd      \  }}|j                  d      j                         }|j                  d      j                         }d }||t        |j                               dkD  r|j                  d      }t        |j                               dkD  r|j                  d      }|j                  d      }|j                  d|      }|j                  d|      }t        |      } |||      } |||      }||z   dz  }|s||f|dd  z   }||f|z   S |S t        ||||j                  |j                        S )	Nr  r   r   r   r   )ignore_indexr:   )r  start_logits
end_logitsr   rP  )r\   r  r$   r  rL   r  r   rR   r   clampr	   r   r   rP  )ru   r   r   r   r   r   r   r  r  r   rJ  rK  r   rq  r  r  r  r  ignored_indexr  
start_lossend_lossr   s                          r/   r}   z&MobileBertForQuestionAnswering.forward  s    &1%<k$++B]B]//))%'/!5# " 

 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
ry   r  )r   r   r   rq   r   r   rX   r   r   r   r   r   r}   r   r   s   @r/   r  r    s     -11515/3,0042604,0/3&*>
ELL)>
 !.>
 !.	>

 u||,>
 ELL)>
  ->
 "%,,/>
  ->
 $D>>
 'tn>
 d^>
 
uU\\"$@@	A>
 >
ry   r  c                   ^    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	ee   d
ee   dee   de	e
ej                     ef   fd       Z xZS )MobileBertForMultipleChoicec                 *   t         |   |       t        |      | _        |j                  |j                  n|j
                  }t        j                  |      | _        t        j                  |j                  d      | _        | j                          y r8  )rp   rq   r  r$   r  r   r   r   r   r   r   r9   r  r  s      r/   rq   z$MobileBertForMultipleChoice.__init__  su     )&1)/)B)B)NF%%TZTnTn 	 zz"45))F$6$6: 	ry   r   r   r   r   r   r   r  r   rJ  rK  r{   c                 L   |
|
n| j                   j                  }
||j                  d   n|j                  d   }|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|1|j                  d|j	                  d      |j	                  d            nd}| j                  ||||||||	|
	      }|d   }| j                  |      }| j                  |      }|j                  d|      }d}|t               } |||      }|
s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )a[  
        input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        Nr   r   r   r  r:   r  )r\   r  rU   r   r   r$   r   r9   r	   r   r   rP  )ru   r   r   r   r   r   r   r  r   rJ  rK  num_choicesr   r]  r  reshaped_logitsr  r  r   s                      r/   r}   z#MobileBertForMultipleChoice.forward   s   X &1%<k$++B]B],5,Aiooa(}GZGZ[\G]>G>SINN2y~~b'9:Y]	M[Mg,,R1D1DR1HImqM[Mg,,R1D1DR1HImqGSG_|((\->->r-BCei ( r=#5#5b#9=;M;Mb;QR 	 //))%'/!5# " 

  
]3/ ++b+6')HOV4D%''!"+5F)-)9TGf$EvE("!//))	
 	
ry   r  )r   r   r   rq   r   r   rX   r   r   r   r   r   r}   r   r   s   @r/   r  r    s     -11515/3,004)-,0/3&*X
ELL)X
 !.X
 !.	X

 u||,X
 ELL)X
  -X
 &X
 $D>X
 'tnX
 d^X
 
uU\\"$==	>X
 X
ry   r  c                   ^    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	ee   d
ee   dee   de	e
ej                     ef   fd       Z xZS ) MobileBertForTokenClassificationc                 d   t         |   |       |j                  | _        t        |d      | _        |j
                  |j
                  n|j                  }t        j                  |      | _	        t        j                  |j                  |j                        | _        | j                          y r  )rp   rq   r  r  r$   r  r   r   r   r   r   r   r9   r  r  s      r/   rq   z)MobileBertForTokenClassification.__init__  s      ++)&EJ)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	ry   r   r   r   r   r   r   r  r   rJ  rK  r{   c                    |
|
n| j                   j                  }
| j                  ||||||||	|
	      }|d   }| j                  |      }| j	                  |      }d}|<t               } ||j                  d| j                        |j                  d            }|
s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )z
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr  r   r   r:   r  )r\   r  r$   r   r9   r	   r   r  r   r   rP  )ru   r   r   r   r   r   r   r  r   rJ  rK  r   rq  r  r  r  r   s                    r/   r}   z(MobileBertForTokenClassification.forward  s    $ &1%<k$++B]B]//))%'/!5# " 

 "!*,,71')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
ry   r  )r   r   r   rq   r   r   rX   r   r   r   r   r   r}   r   r   s   @r/   r  r  |  s     -11515/3,004)-,0/3&*2
ELL)2
 !.2
 !.	2

 u||,2
 ELL)2
  -2
 &2
 $D>2
 'tn2
 d^2
 
uU\\"$99	:2
 2
ry   r  )r  r  r  r  r  r  r  r6  r  r|  rk   )Kr   rB   r  dataclassesr   typingr   r   r   rX   r   torch.nnr   r	   r
   activationsr   modeling_outputsr   r   r   r   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   utilsr   r   r   configuration_mobilebertr   
get_loggerr   r@   rk   Modulerm   r!   r   r   r   r   r   r  r  r  r  r!  r,  r0  r6  rF  rV  r_  rd  rn  ru  r|  r  r  r  r  r  r  r  r  r  r  __all__r,   ry   r/   <module>r     s  .  	  ! ) )   A A !	 	 	 . Q 9 9 6 
		H	%K\6RYY 6 &
9I299 IX7bii 7t299 "/")) /dRYY ryy ryy 0	bii 	!] !]H			 		ryy 	<bii <~'
		 '
T!ryy !&		 " ,!BII !	9 	9 % % %0 :[ : :B g
/ g
 g
T f
8 f
f
R M
5 M
 M
`&BII & 
Z
*C Z

Z
z V
*C V
V
r J
%> J
 J
Z g
"; g
 g
T B
'@ B
 B
Jry   