
    Uh`                        d Z ddlZddlmZmZmZ ddlZddlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZmZmZmZmZmZmZ dd	lmZ dd
lmZmZ ddlmZ  ej:                  e      Z G d dej@                        Z! G d dej@                        Z" G d dejF                        Z$ G d dej@                        Z% G d dej@                        Z& G d dej@                        Z' G d dej@                        Z( G d dej@                        Z) G d dej@                        Z* G d d ej@                        Z+ G d! d"ej@                        Z, G d# d$ej@                        Z-e G d% d&e             Z.e G d' d(e.             Z/e G d) d*e.             Z0 ed+,       G d- d.e.             Z1e G d/ d0e.             Z2e G d1 d2e.             Z3e G d3 d4e.             Z4g d5Z5y)6zPyTorch SqueezeBert model.    N)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)auto_docstringlogging   )SqueezeBertConfigc                   *     e Zd ZdZ fdZddZ xZS )SqueezeBertEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                 |   t         |           t        j                  |j                  |j
                  |j                        | _        t        j                  |j                  |j
                        | _	        t        j                  |j                  |j
                        | _        t        j                  |j                  |j                        | _        t        j                  |j                         | _        | j%                  dt'        j(                  |j                        j+                  d      d       y )N)padding_idxepsposition_ids)r   F)
persistent)super__init__r   	Embedding
vocab_sizeembedding_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormhidden_sizelayer_norm_epsDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandselfconfig	__class__s     /var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/squeezebert/modeling_squeezebert.pyr"   zSqueezeBertEmbeddings.__init__0   s    !||F,=,=v?T?Tbhbubuv#%<<0N0NPVPePe#f %'\\&2H2H&J_J_%`" f&8&8f>S>STzz&"<"<= 	ELL)G)GHOOPWXej 	 	
    c                    ||j                         }n|j                         d d }|d   }|| j                  d d d |f   }|:t        j                  |t        j                  | j                  j
                        }|| j                  |      }| j                  |      }| j                  |      }||z   |z   }	| j                  |	      }	| j                  |	      }	|	S )Nr   r   dtypedevice)sizer   r3   zeroslongr?   r'   r)   r+   r,   r1   )
r7   	input_idstoken_type_idsr   inputs_embedsinput_shape
seq_lengthr)   r+   
embeddingss
             r:   forwardzSqueezeBertEmbeddings.forward@   s     #..*K',,.s3K ^
,,Q^<L!"[[EJJtO`O`OgOghN  00;M"66|D $ : :> J"%88;PP
^^J/
\\*-
r;   )NNNN__name__
__module____qualname____doc__r"   rI   __classcell__r9   s   @r:   r   r   -   s    Q
 r;   r   c                   (     e Zd ZdZ fdZd Z xZS )MatMulWrapperz
    Wrapper for torch.matmul(). This makes flop-counting easier to implement. Note that if you directly call
    torch.matmul() in your code, the flop counter will typically ignore the flops of the matmul.
    c                 "    t         |           y N)r!   r"   )r7   r9   s    r:   r"   zMatMulWrapper.__init___   s    r;   c                 .    t        j                  ||      S )a0  

        :param inputs: two torch tensors :return: matmul of these tensors

        Here are the typical dimensions found in BERT (the B is optional) mat1.shape: [B, <optional extra dims>, M, K]
        mat2.shape: [B, <optional extra dims>, K, N] output shape: [B, <optional extra dims>, M, N]
        )r3   matmul)r7   mat1mat2s      r:   rI   zMatMulWrapper.forwardb   s     ||D$''r;   rJ   rP   s   @r:   rR   rR   Y   s    
(r;   rR   c                       e Zd ZdZddZd Zy)SqueezeBertLayerNormz
    This is a nn.LayerNorm subclass that accepts NCW data layout and performs normalization in the C dimension.

    N = batch C = channels W = sequence length
    c                 H    t         j                  j                  | ||       y )N)normalized_shaper   )r   r,   r"   )r7   r-   r   s      r:   r"   zSqueezeBertLayerNorm.__init__t   s    
d[cJr;   c                     |j                  ddd      }t        j                  j                  | |      }|j                  ddd      S )Nr      r   )permuter   r,   rI   )r7   xs     r:   rI   zSqueezeBertLayerNorm.forwardw   s=    IIaALL  q)yyAq!!r;   N)g-q=)rK   rL   rM   rN   r"   rI    r;   r:   rZ   rZ   m   s    K"r;   rZ   c                   (     e Zd ZdZ fdZd Z xZS )ConvDropoutLayerNormz8
    ConvDropoutLayerNorm: Conv, Dropout, LayerNorm
    c                     t         |           t        j                  ||d|      | _        t        |      | _        t        j                  |      | _        y Nr   in_channelsout_channelskernel_sizegroups)	r!   r"   r   Conv1dconv1drZ   	layernormr/   r1   )r7   cincoutrj   dropout_probr9   s        r:   r"   zConvDropoutLayerNorm.__init__   sB    iiCdPQZ`a-d3zz,/r;   c                 v    | j                  |      }| j                  |      }||z   }| j                  |      }|S rT   )rl   r1   rm   )r7   hidden_statesinput_tensorr`   s       r:   rI   zConvDropoutLayerNorm.forward   s:    KK&LLONN1r;   rJ   rP   s   @r:   rc   rc   }   s    0r;   rc   c                   (     e Zd ZdZ fdZd Z xZS )ConvActivationz*
    ConvActivation: Conv, Activation
    c                 z    t         |           t        j                  ||d|      | _        t
        |   | _        y re   )r!   r"   r   rk   rl   r   act)r7   rn   ro   rj   rw   r9   s        r:   r"   zConvActivation.__init__   s1    iiCdPQZ`a#;r;   c                 F    | j                  |      }| j                  |      S rT   )rl   rw   )r7   r`   outputs      r:   rI   zConvActivation.forward   s    Qxxr;   rJ   rP   s   @r:   ru   ru      s    
 r;   ru   c                   8     e Zd Zd fd	Zd Zd Zd Zd Z xZS )SqueezeBertSelfAttentionc                    t         |           ||j                  z  dk7  rt        d| d|j                   d      |j                  | _        t	        ||j                  z        | _        | j                  | j
                  z  | _        t        j                  ||d|      | _	        t        j                  ||d|      | _
        t        j                  ||d|      | _        t        j                  |j                        | _        t        j                  d      | _        t#               | _        t#               | _        y	)
z
        config = used for some things; ignored for others (work in progress...) cin = input channels = output channels
        groups = number of groups to use in conv1d layers
        r   zcin (z6) is not a multiple of the number of attention heads ()r   rf   r   dimN)r!   r"   num_attention_heads
ValueErrorintattention_head_sizeall_head_sizer   rk   querykeyvaluer/   attention_probs_dropout_probr1   SoftmaxsoftmaxrR   	matmul_qk
matmul_qkv)r7   r8   rn   q_groupsk_groupsv_groupsr9   s         r:   r"   z!SqueezeBertSelfAttention.__init__   s
   
 	+++q0uRSYSmSmRnnop  $*#=#= #&sV-G-G'G#H !558P8PPYY3SaX`a
993AV^_YY3SaX`a
zz&"E"EFzzb)&'/r;   c                     |j                         d   | j                  | j                  |j                         d   f} |j                  | }|j	                  dddd      S )z
        - input: [N, C, W]
        - output: [N, C1, W, C2] where C1 is the head index, and C2 is one head's contents
        r   r   r   r
   r^   )r@   r   r   viewr_   r7   r`   new_x_shapes      r:   transpose_for_scoresz-SqueezeBertSelfAttention.transpose_for_scores   s]    
 vvx{D$<$<d>V>VXYX^X^X`acXdeAFFK yyAq!$$r;   c                     |j                         d   | j                  | j                  |j                         d   f} |j                  | }|S )z
        - input: [N, C, W]
        - output: [N, C1, C2, W] where C1 is the head index, and C2 is one head's contents
        r   r   )r@   r   r   r   r   s      r:   transpose_key_for_scoresz1SqueezeBertSelfAttention.transpose_key_for_scores   sM    
 vvx{D$<$<d>V>VXYX^X^X`acXdeAFFK r;   c                     |j                  dddd      j                         }|j                         d   | j                  |j                         d   f} |j                  | }|S )zE
        - input: [N, C1, W, C2]
        - output: [N, C, W]
        r   r   r
   r^   )r_   
contiguousr@   r   r   r   s      r:   transpose_outputz)SqueezeBertSelfAttention.transpose_output   s\    
 IIaAq!,,.vvx{D$6$6DAFFK r;   c                    | j                  |      }| j                  |      }| j                  |      }| j                  |      }| j	                  |      }| j                  |      }	| j                  ||      }
|
t        j                  | j                        z  }
|
|z   }
| j                  |
      }| j                  |      }| j                  ||	      }| j                  |      }d|i}|r|
|d<   |S )z
        expects hidden_states in [N, C, W] data layout.

        The attention_mask data layout is [N, W], and it does not need to be transposed.
        context_layerattention_score)r   r   r   r   r   r   mathsqrtr   r   r1   r   r   )r7   rr   attention_maskoutput_attentionsmixed_query_layermixed_key_layermixed_value_layerquery_layer	key_layervalue_layerr   attention_probsr   results                 r:   rI   z SqueezeBertSelfAttention.forward   s     !JJ}5((=1 JJ}5//0AB11/B	//0AB ..i@)DIId6N6N,OO)N: ,,7 ,,7E--m<!=1(7F$%r;   )r   r   r   )	rK   rL   rM   r"   r   r   r   rI   rO   rP   s   @r:   r{   r{      s    *0%!r;   r{   c                   $     e Zd Z fdZd Z xZS )SqueezeBertModulec                    t         |           |j                  }|j                  }|j                  }|j                  }t	        |||j
                  |j                  |j                        | _        t        |||j                  |j                        | _        t        |||j                  |j                        | _        t        |||j"                  |j                        | _        y)a  
        - hidden_size = input chans = output chans for Q, K, V (they are all the same ... for now) = output chans for
          the module
        - intermediate_size = output chans for intermediate layer
        - groups = number of groups for all layers in the BertModule. (eventually we could change the interface to
          allow different groups for different layers)
        )r8   rn   r   r   r   )rn   ro   rj   rp   )rn   ro   rj   rw   N)r!   r"   r-   intermediate_sizer{   r   r   r   	attentionrc   post_attention_groupsr0   post_attentionru   intermediate_groups
hidden_actintermediateoutput_groupsry   )r7   r8   c0c1c2c3r9   s         r:   r"   zSqueezeBertModule.__init__   s     	%%1rFOOfoo`f`o`o
 3F$@$@vOiOi
 +r6C]C]cictctu*F$8$8vGaGa
r;   c                     | j                  |||      }|d   }| j                  ||      }| j                  |      }| j                  ||      }d|i}	|r|d   |	d<   |	S )Nr   feature_mapr   )r   r   r   ry   )
r7   rr   r   r   attattention_outputpost_attention_outputintermediate_outputlayer_outputoutput_dicts
             r:   rI   zSqueezeBertModule.forward  s|    nn]N<MN/ $ 3 34Dm T"//0EF{{#68MN$l3-01B-CK)*r;   rK   rL   rM   r"   rI   rO   rP   s   @r:   r   r      s    
4r;   r   c                   0     e Zd Z fdZ	 	 	 	 	 ddZ xZS )SqueezeBertEncoderc                     t         |           j                  j                  k(  sJ d       t	        j
                  fdt        j                        D              | _        y )NzIf you want embedding_size != intermediate hidden_size, please insert a Conv1d layer to adjust the number of channels before the first SqueezeBertModule.c              3   4   K   | ]  }t                y wrT   )r   ).0_r8   s     r:   	<genexpr>z.SqueezeBertEncoder.__init__.<locals>.<genexpr>.  s     #g!$5f$=#gs   )	r!   r"   r%   r-   r   
ModuleListrangenum_hidden_layerslayersr6   s    `r:   r"   zSqueezeBertEncoder.__init__%  sW    $$(:(:: 	
2	
: mm#guVMeMeGf#ggr;   c                    |d}n"|j                  d       t        |      k(  rd}nd}|du sJ d       |j                  ddd      }|rdnd }|rdnd }	| j                  D ]T  }
|r,|j                  ddd      }||fz  }|j                  ddd      }|
j	                  |||      }|d   }|sL|	|d	   fz  }	V |j                  ddd      }|r||fz  }|st        d
 |||	fD              S t        |||	      S )NTFzAhead_mask is not yet supported in the SqueezeBert implementation.r   r^   r   ra   r   r   c              3   &   K   | ]	  }||  y wrT   ra   )r   vs     r:   r   z-SqueezeBertEncoder.forward.<locals>.<genexpr>[  s     hqZ[Zghs   )last_hidden_staterr   
attentions)countlenr_   r   rI   tupler   )r7   rr   r   	head_maskr   output_hidden_statesreturn_dicthead_mask_is_all_noneall_hidden_statesall_attentionslayerr   s               r:   rI   zSqueezeBertEncoder.forward0  sH    $(!__T"c)n4$(!$)!$,q.qq, &--aA6"6BD0d[[ 	EE# - 5 5aA >!m%55! - 5 5aA > ==HYZL(7M <0A#B"DD	E &--aA6-!11h]4E~$Vhhh+;LYg
 	
r;   )NNFFTr   rP   s   @r:   r   r   $  s    	h ".
r;   r   c                   $     e Zd Z fdZd Z xZS )SqueezeBertPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y rT   )r!   r"   r   Linearr-   denseTanh
activationr6   s     r:   r"   zSqueezeBertPooler.__init__b  s9    YYv1163E3EF
'')r;   c                 \    |d d df   }| j                  |      }| j                  |      }|S )Nr   )r   r   )r7   rr   first_token_tensorpooled_outputs       r:   rI   zSqueezeBertPooler.forwardg  s6     +1a40

#566r;   r   rP   s   @r:   r   r   a  s    $
r;   r   c                   $     e Zd Z fdZd Z xZS )"SqueezeBertPredictionHeadTransformc                 h   t         |           t        j                  |j                  |j                        | _        t        |j                  t              rt        |j                     | _
        n|j                  | _
        t        j                  |j                  |j                        | _        y )Nr   )r!   r"   r   r   r-   r   
isinstancer   strr   transform_act_fnr,   r.   r6   s     r:   r"   z+SqueezeBertPredictionHeadTransform.__init__q  s{    YYv1163E3EF
f''-$*6+<+<$=D!$*$5$5D!f&8&8f>S>STr;   c                 l    | j                  |      }| j                  |      }| j                  |      }|S rT   )r   r   r,   r7   rr   s     r:   rI   z*SqueezeBertPredictionHeadTransform.forwardz  s4    

=1--m<}5r;   r   rP   s   @r:   r   r   p  s    Ur;   r   c                   ,     e Zd Z fdZddZd Z xZS )SqueezeBertLMPredictionHeadc                 H   t         |           t        |      | _        t	        j
                  |j                  |j                  d      | _        t	        j                  t        j                  |j                              | _        | j                  | j                  _        y )NF)bias)r!   r"   r   	transformr   r   r-   r$   decoder	Parameterr3   rA   r   r6   s     r:   r"   z$SqueezeBertLMPredictionHead.__init__  sm    ;FC yy!3!3V5F5FUSLLV->->!?@	 !IIr;   c                 :    | j                   | j                  _         y rT   )r   r   r7   s    r:   _tie_weightsz(SqueezeBertLMPredictionHead._tie_weights  s     IIr;   c                 J    | j                  |      }| j                  |      }|S rT   )r   r   r   s     r:   rI   z#SqueezeBertLMPredictionHead.forward  s$    }5]3r;   )returnN)rK   rL   rM   r"   r   rI   rO   rP   s   @r:   r   r     s    &&r;   r   c                   $     e Zd Z fdZd Z xZS )SqueezeBertOnlyMLMHeadc                 B    t         |           t        |      | _        y rT   )r!   r"   r   predictionsr6   s     r:   r"   zSqueezeBertOnlyMLMHead.__init__  s    6v>r;   c                 (    | j                  |      }|S rT   )r   )r7   sequence_outputprediction_scoress      r:   rI   zSqueezeBertOnlyMLMHead.forward  s     ,,_=  r;   r   rP   s   @r:   r   r     s    ?!r;   r   c                       e Zd ZeZdZd Zy)SqueezeBertPreTrainedModeltransformerc                    t        |t        j                  t        j                  f      rm|j                  j
                  j                  d| j                  j                         |j                  %|j                  j
                  j                          yyt        |t        j                        rz|j                  j
                  j                  d| j                  j                         |j                  2|j                  j
                  |j                     j                          yyt        |t        j                        rJ|j                  j
                  j                          |j                  j
                  j                  d       yt        |t              r%|j                  j
                  j                          yy)zInitialize the weightsg        )meanstdNg      ?)r   r   r   rk   weightdatanormal_r8   initializer_ranger   zero_r#   r   r,   fill_r   )r7   modules     r:   _init_weightsz(SqueezeBertPreTrainedModel._init_weights  s0   fryy"))45 MM&&CT[[5R5R&S{{&  &&( '-MM&&CT[[5R5R&S!!-""6#5#56<<> .-KK""$MM$$S) ;<KK""$ =r;   N)rK   rL   rM   r   config_classbase_model_prefixr  ra   r;   r:   r  r    s    $L%%r;   r  c                   6    e Zd Z fdZd Zd Zd Ze	 	 	 	 	 	 	 	 	 ddee	j                     dee	j                     dee	j                     dee	j                     d	ee	j                     d
ee	j                     dee   dee   dee   deeef   fd       Z xZS )SqueezeBertModelc                     t         |   |       t        |      | _        t	        |      | _        t        |      | _        | j                          y rT   )	r!   r"   r   rH   r   encoderr   pooler	post_initr6   s     r:   r"   zSqueezeBertModel.__init__  s@     /7)&1'/ 	r;   c                 .    | j                   j                  S rT   rH   r'   r   s    r:   get_input_embeddingsz%SqueezeBertModel.get_input_embeddings  s    ...r;   c                 &    || j                   _        y rT   r  r7   new_embeddingss     r:   set_input_embeddingsz%SqueezeBertModel.set_input_embeddings  s    *8'r;   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr  r   r   prune_heads)r7   heads_to_pruner   headss       r:   _prune_headszSqueezeBertModel._prune_heads  sE    
 +002 	CLE5LLu%//;;EB	Cr;   rC   r   rD   r   r   rE   r   r   r   r   c
                 P   ||n| j                   j                  }||n| j                   j                  }|	|	n| j                   j                  }	||t	        d      |#| j                  ||       |j                         }
n!||j                         d d }
nt	        d      ||j                  n|j                  }|t        j                  |
|      }|&t        j                  |
t        j                  |      }| j                  ||
      }| j                  || j                   j                        }| j                  ||||      }| j!                  ||||||	      }|d   }| j#                  |      }|	s
||f|d	d  z   S t%        |||j&                  |j(                  
      S )NzDYou cannot specify both input_ids and inputs_embeds at the same timer   z5You have to specify either input_ids or inputs_embeds)r?   r=   )rC   r   rD   rE   )rr   r   r   r   r   r   r   r   )r   pooler_outputrr   r   )r8   r   r   use_return_dictr   %warn_if_padding_and_no_attention_maskr@   r?   r3   onesrA   rB   get_extended_attention_maskget_head_maskr   rH   r  r  r   rr   r   )r7   rC   r   rD   r   r   rE   r   r   r   rF   r?   extended_attention_maskembedding_outputencoder_outputsr   r   s                    r:   rI   zSqueezeBertModel.forward  s    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66y.Q#..*K&',,.s3KTUU%.%:!!@T@T!"ZZFCN!"[[EJJvVN"&"B"B>S^"_ &&y$++2O2OP	??l>iv + 
 ,,*2/!5# ' 
 *!,O4#]3oab6III)-')77&11	
 	
r;   )	NNNNNNNNN)rK   rL   rM   r"   r  r  r"  r   r   r3   TensorFloatTensorboolr   r   r   rI   rO   rP   s   @r:   r  r    s   /9C  -11515/3,059,0/3&*A
ELL)A
 !.A
 !.	A

 u||,A
 ELL)A
   1 12A
 $D>A
 'tnA
 d^A
 
u00	1A
 A
r;   r  c                   X    e Zd ZddgZ fdZd Zd Ze	 	 	 	 	 	 	 	 	 	 ddee	j                     dee	j                     dee	j                     d	ee	j                     d
ee	j                     dee	j                     dee	j                     dee   dee   dee   deeef   fd       Z xZS )SqueezeBertForMaskedLMzcls.predictions.decoder.weightzcls.predictions.decoder.biasc                     t         |   |       t        |      | _        t	        |      | _        | j                          y rT   )r!   r"   r  r  r   clsr  r6   s     r:   r"   zSqueezeBertForMaskedLM.__init__  s5     +F3)&1 	r;   c                 B    | j                   j                  j                  S rT   )r3  r   r   r   s    r:   get_output_embeddingsz,SqueezeBertForMaskedLM.get_output_embeddings&  s    xx##+++r;   c                     || j                   j                  _        |j                  | j                   j                  _        y rT   )r3  r   r   r   r  s     r:   set_output_embeddingsz,SqueezeBertForMaskedLM.set_output_embeddings)  s,    '5$$2$7$7!r;   rC   r   rD   r   r   rE   labelsr   r   r   r   c                    |
|
n| j                   j                  }
| j                  ||||||||	|
	      }|d   }| j                  |      }d}|Ft	               } ||j                  d| j                   j                        |j                  d            }|
s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        Nr   rD   r   r   rE   r   r   r   r   r   r^   losslogitsrr   r   )
r8   r%  r  r3  r   r   r$   r   rr   r   )r7   rC   r   rD   r   r   rE   r8  r   r   r   outputsr   r   masked_lm_lossloss_fctry   s                    r:   rI   zSqueezeBertForMaskedLM.forward-  s    ( &1%<k$++B]B]""))%'/!5# # 

 "!* HH_5')H%&7&<&<RAWAW&XZ`ZeZefhZijN')GABK7F3A3M^%.YSYY$!//))	
 	
r;   
NNNNNNNNNN)rK   rL   rM   _tied_weights_keysr"   r5  r7  r   r   r3   r-  r/  r   r   r   rI   rO   rP   s   @r:   r1  r1    s   :<Z[,8  -11515/3,004)-,0/3&*2
ELL)2
 !.2
 !.	2

 u||,2
 ELL)2
  -2
 &2
 $D>2
 'tn2
 d^2
 
un$	%2
 2
r;   r1  z
    SqueezeBERT Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    )custom_introc                   D    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	ee   d
ee   dee   de	e
ef   fd       Z xZS )$SqueezeBertForSequenceClassificationc                 N   t         |   |       |j                  | _        || _        t	        |      | _        t        j                  |j                        | _	        t        j                  |j                  | j                  j                        | _        | j                          y rT   )r!   r"   
num_labelsr8   r  r  r   r/   r0   r1   r   r-   
classifierr  r6   s     r:   r"   z-SqueezeBertForSequenceClassification.__init__j  su      +++F3zz&"<"<=))F$6$68N8NO 	r;   rC   r   rD   r   r   rE   r8  r   r   r   r   c                 @   |
|
n| j                   j                  }
| j                  ||||||||	|
	      }|d   }| j                  |      }| j	                  |      }d}|| j                   j
                  | j                  dk(  rd| j                   _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j
                  dk(  rIt               }| j                  dk(  r& ||j                         |j                               }n |||      }n| j                   j
                  dk(  r=t               } ||j                  d| j                        |j                  d            }n,| j                   j
                  dk(  rt               } |||      }|
s|f|dd z   }||f|z   S |S t!        |||j"                  |j$                  	      S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr:  r   
regressionsingle_label_classificationmulti_label_classificationr   r^   r;  )r8   r%  r  r1   rH  problem_typerG  r>   r3   rB   r   r	   squeezer   r   r   r   rr   r   )r7   rC   r   rD   r   r   rE   r8  r   r   r   r>  r   r=  r<  r@  ry   s                    r:   rI   z,SqueezeBertForSequenceClassification.forwardv  s   ( &1%<k$++B]B]""))%'/!5# # 

  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
r;   rA  )rK   rL   rM   r"   r   r   r3   r-  r/  r   r   r   rI   rO   rP   s   @r:   rE  rE  c  s   
  -11515/3,004)-,0/3&*F
ELL)F
 !.F
 !.	F

 u||,F
 ELL)F
  -F
 &F
 $D>F
 'tnF
 d^F
 
u..	/F
 F
r;   rE  c                   D    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	ee   d
ee   dee   de	e
ef   fd       Z xZS )SqueezeBertForMultipleChoicec                     t         |   |       t        |      | _        t	        j
                  |j                        | _        t	        j                  |j                  d      | _
        | j                          y )Nr   )r!   r"   r  r  r   r/   r0   r1   r   r-   rH  r  r6   s     r:   r"   z%SqueezeBertForMultipleChoice.__init__  sW     +F3zz&"<"<=))F$6$6: 	r;   rC   r   rD   r   r   rE   r8  r   r   r   r   c                 L   |
|
n| j                   j                  }
||j                  d   n|j                  d   }|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|1|j                  d|j	                  d      |j	                  d            nd}| j                  ||||||||	|
	      }|d   }| j                  |      }| j                  |      }|j                  d|      }d}|t               } |||      }|
s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )a[  
        input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where *num_choices* is the size of the second dimension of the input tensors. (see
            *input_ids* above)
        Nr   r   r:  r^   r;  )r8   r%  shaper   r@   r  r1   rH  r   r   rr   r   )r7   rC   r   rD   r   r   rE   r8  r   r   r   num_choicesr>  r   r=  reshaped_logitsr<  r@  ry   s                      r:   rI   z$SqueezeBertForMultipleChoice.forward  s   X &1%<k$++B]B],5,Aiooa(}GZGZ[\G]>G>SINN2y~~b'9:Y]	M[Mg,,R1D1DR1HImqM[Mg,,R1D1DR1HImqGSG_|((\->->r-BCei ( r=#5#5b#9=;M;Mb;QR 	 ""))%'/!5# # 

  
]3/ ++b+6')HOV4D%''!"+5F)-)9TGf$EvE("!//))	
 	
r;   rA  )rK   rL   rM   r"   r   r   r3   r-  r/  r   r   r   rI   rO   rP   s   @r:   rP  rP    s     -11515/3,004)-,0/3&*X
ELL)X
 !.X
 !.	X

 u||,X
 ELL)X
  -X
 &X
 $D>X
 'tnX
 d^X
 
u//	0X
 X
r;   rP  c                   D    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	ee   d
ee   dee   de	e
ef   fd       Z xZS )!SqueezeBertForTokenClassificationc                 ,   t         |   |       |j                  | _        t        |      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        | j                          y rT   )r!   r"   rG  r  r  r   r/   r0   r1   r   r-   rH  r  r6   s     r:   r"   z*SqueezeBertForTokenClassification.__init__*  sj      +++F3zz&"<"<=))F$6$68I8IJ 	r;   rC   r   rD   r   r   rE   r8  r   r   r   r   c                    |
|
n| j                   j                  }
| j                  ||||||||	|
	      }|d   }| j                  |      }| j	                  |      }d}|<t               } ||j                  d| j                        |j                  d            }|
s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )z
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr:  r   r   r^   r;  )r8   r%  r  r1   rH  r   r   rG  r   rr   r   )r7   rC   r   rD   r   r   rE   r8  r   r   r   r>  r   r=  r<  r@  ry   s                    r:   rI   z)SqueezeBertForTokenClassification.forward5  s    $ &1%<k$++B]B]""))%'/!5# # 

 "!*,,71')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
r;   rA  )rK   rL   rM   r"   r   r   r3   r-  r/  r   r   r   rI   rO   rP   s   @r:   rX  rX  (  s    	  -11515/3,004)-,0/3&*2
ELL)2
 !.2
 !.	2

 u||,2
 ELL)2
  -2
 &2
 $D>2
 'tn2
 d^2
 
u++	,2
 2
r;   rX  c                   d    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
ee   dee   dee   de	e
ef   fd       Z xZS )SqueezeBertForQuestionAnsweringc                     t         |   |       |j                  | _        t        |      | _        t        j                  |j                  |j                        | _        | j                          y rT   )
r!   r"   rG  r  r  r   r   r-   
qa_outputsr  r6   s     r:   r"   z(SqueezeBertForQuestionAnswering.__init__m  sT      +++F3))F$6$68I8IJ 	r;   rC   r   rD   r   r   rE   start_positionsend_positionsr   r   r   r   c                 (   ||n| j                   j                  }| j                  |||||||	|
|	      }|d   }| j                  |      }|j	                  dd      \  }}|j                  d      j                         }|j                  d      j                         }d }||t        |j                               dkD  r|j                  d      }t        |j                               dkD  r|j                  d      }|j                  d      }|j                  d|      }|j                  d|      }t        |      } |||      } |||      }||z   dz  }|s||f|dd  z   }||f|z   S |S t        ||||j                  |j                        S )	Nr:  r   r   r   r~   )ignore_indexr^   )r<  start_logits
end_logitsrr   r   )r8   r%  r  r^  splitrN  r   r   r@   clampr   r   rr   r   )r7   rC   r   rD   r   r   rE   r_  r`  r   r   r   r>  r   r=  rc  rd  
total_lossignored_indexr@  
start_lossend_lossry   s                          r:   rI   z'SqueezeBertForQuestionAnswering.forwardw  s    &1%<k$++B]B]""))%'/!5# # 

 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r;   )NNNNNNNNNNN)rK   rL   rM   r"   r   r   r3   r-  r/  r   r   r   rI   rO   rP   s   @r:   r\  r\  k  s     -11515/3,0042604,0/3&*>
ELL)>
 !.>
 !.	>

 u||,>
 ELL)>
  ->
 "%,,/>
  ->
 $D>>
 'tn>
 d^>
 
u22	3>
 >
r;   r\  )r1  rP  r\  rE  rX  r  r   r  )6rN   r   typingr   r   r   r3   r   torch.nnr   r   r	   activationsr   modeling_outputsr   r   r   r   r   r   r   modeling_utilsr   utilsr   r   configuration_squeezebertr   
get_loggerrK   loggerModuler   rR   r,   rZ   rc   ru   r{   r   r   r   r   r   r   r  r  r1  rE  rP  rX  r\  __all__ra   r;   r:   <module>rv     s   !  ) )   A A !   . 9 
		H	%)BII )X(BII (("2<< " 299 ( RYY  Wryy Wt'		 'T:
 :
z		  "")) .!RYY ! % % %. [
1 [
 [
| F
7 F
 F
R T
+E T
T
n d
#= d
 d
N ?
(B ?
 ?
D J
&@ J
 J
Z	r;   