
    Uh                     r   d Z ddlZddlmZmZmZ ddlZddlmZ ddlm	Z	m
Z
mZ ddlmZmZ ddlmZmZmZmZmZmZmZ dd	lmZ dd
lmZmZ ddlmZmZ ddlm Z   ejB                  e"      Z#e G d de             Z$ G d dejJ                        Z& G d dejJ                        Z' G d dejJ                        Z( G d dejJ                        Z) G d dejJ                        Z* G d dejJ                        Z+ G d dejJ                        Z, G d dejJ                        Z-e G d  d!e$             Z. G d" d#e$      Z/ G d$ d%ejJ                        Z0 ed&'       G d( d)e$             Z1e G d* d+e$             Z2e G d, d-e$             Z3 G d. d/ejJ                        Z4e G d0 d1e$             Z5d2 Z6g d3Z7y)4zPyTorch MPNet model.    N)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FNgelu)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )MPNetConfigc                       e Zd ZeZdZd Zy)MPNetPreTrainedModelmpnetc                 l   t        |t        j                        rm|j                  j                  j                  d| j                  j                         |j                  %|j                  j                  j                          yyt        |t        j                        rz|j                  j                  j                  d| j                  j                         |j                  2|j                  j                  |j                     j                          yyt        |t        j                        rJ|j                  j                  j                          |j                  j                  j                  d       yt        |t              r%|j                  j                  j                          yy)zInitialize the weightsg        )meanstdNg      ?)
isinstancer   Linearweightdatanormal_configinitializer_rangebiaszero_	Embeddingpadding_idx	LayerNormfill_MPNetLMHead)selfmodules     z/var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/mpnet/modeling_mpnet.py_init_weightsz"MPNetPreTrainedModel._init_weights1   s&   fbii( MM&&CT[[5R5R&S{{&  &&( '-MM&&CT[[5R5R&S!!-""6#5#56<<> .-KK""$MM$$S),KK""$ -    N)__name__
__module____qualname__r   config_classbase_model_prefixr2    r3   r1   r   r   ,   s    L%r3   r   c                   ,     e Zd Z fdZddZd Z xZS )MPNetEmbeddingsc                 D   t         |           d| _        t        j                  |j
                  |j                  | j                        | _        t        j                  |j                  |j                  | j                        | _	        t        j                  |j                  |j                        | _
        t        j                  |j                        | _        | j                  dt!        j"                  |j                        j%                  d      d       y )Nr   )r+   epsposition_ids)r   F)
persistent)super__init__r+   r   r*   
vocab_sizehidden_sizeword_embeddingsmax_position_embeddingsposition_embeddingsr,   layer_norm_epsDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandr/   r&   	__class__s     r1   rC   zMPNetEmbeddings.__init__E   s    !||F,=,=v?Q?Q_c_o_op#%<<**F,>,>DL\L\$
  f&8&8f>S>STzz&"<"<=ELL)G)GHOOPWXej 	 	
r3   c                 x   |*|t        || j                        }n| j                  |      }||j                         }n|j                         d d }|d   }|| j                  d d d |f   }|| j                  |      }| j                  |      }||z   }| j                  |      }| j                  |      }|S )Nr@   r   )	"create_position_ids_from_input_idsr+   &create_position_ids_from_inputs_embedssizer?   rF   rH   r,   rL   )	r/   	input_idsr?   inputs_embedskwargsinput_shape
seq_lengthrH   
embeddingss	            r1   forwardzMPNetEmbeddings.forwardS   s    $A)TM]M]^#JJ=Y #..*K',,.s3K ^
,,Q^<L  00;M"66|D"%88
^^J/
\\*-
r3   c                    |j                         dd }|d   }t        j                  | j                  dz   || j                  z   dz   t        j                  |j
                        }|j                  d      j                  |      S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: torch.Tensor

        Returns: torch.Tensor
        Nr@   r   )dtypedevicer   )rV   rN   rO   r+   longr`   	unsqueezerP   )r/   rX   rZ   sequence_lengthr?   s        r1   rU   z6MPNetEmbeddings.create_position_ids_from_inputs_embedsm   s     $((*3B/%a.||q /D4D4D"Dq"HPUPZPZcpcwcw
 %%a(//<<r3   )NNN)r4   r5   r6   rC   r]   rU   __classcell__rR   s   @r1   r;   r;   D   s    
4=r3   r;   c                   4     e Zd Z fdZd Z	 	 	 	 ddZ xZS )MPNetSelfAttentionc                 *   t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                         | _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ())rB   rC   rE   num_attention_headshasattr
ValueErrorintattention_head_sizeall_head_sizer   r"   qkvorJ   attention_probs_dropout_probrL   rQ   s     r1   rC   zMPNetSelfAttention.__init__   s:    : ::a?PVXhHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PP6--t/A/AB6--t/A/AB6--t/A/AB6--v/A/ABzz&"E"EFr3   c                     |j                         d d | j                  | j                  fz   } |j                  | }|j	                  dddd      S )Nr@   r      r   r
   )rV   rk   ro   viewpermute)r/   xnew_x_shapes      r1   transpose_for_scoresz'MPNetSelfAttention.transpose_for_scores   sN    ffhsmt'?'?AYAY&ZZAFFK yyAq!$$r3   c                    | j                  |      }| j                  |      }| j                  |      }	| j                  |      }| j                  |      }| j                  |	      }	t	        j
                  ||j                  dd            }
|
t        j                  | j                        z  }
||
|z  }
||
|z   }
t        j                  j                  |
d      }| j                  |      }|||z  }t	        j
                  ||	      }|j                  dddd      j                         }|j!                         d d | j"                  fz   } |j$                  | }| j'                  |      }|r||f}|S |f}|S )Nr@   dimr   rw   r   r
   )rq   rr   rs   r|   rN   matmul	transposemathsqrtro   r   
functionalsoftmaxrL   ry   
contiguousrV   rp   rx   rt   )r/   hidden_statesattention_mask	head_maskposition_biasoutput_attentionsrY   rq   rr   rs   attention_scoresattention_probscnew_c_shapert   outputss                   r1   r]   zMPNetSelfAttention.forward   s~    FF=!FF=!FF=!%%a(%%a(%%a( !<<1;;r2+>?+dii8P8P.QQ $-%/.@ --//0@b/I,,7 -	9OLL!,IIaAq!,,.ffhsmt'9'9&;;AFFK FF1I*;1o& CDr3   NNNF)r4   r5   r6   rC   r|   r]   rd   re   s   @r1   rg   rg      s!    G&% -r3   rg   c                   4     e Zd Z fdZd Z	 	 	 	 ddZ xZS )MPNetAttentionc                    t         |           t        |      | _        t	        j
                  |j                  |j                        | _        t	        j                  |j                        | _
        t               | _        y Nr=   )rB   rC   rg   attnr   r,   rE   rI   rJ   rK   rL   setpruned_headsrQ   s     r1   rC   zMPNetAttention.__init__   sX    &v.	f&8&8f>S>STzz&"<"<=Er3   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _
        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   r   )lenr   r   rk   ro   r   r   rq   rr   rs   rt   rp   union)r/   headsindexs      r1   prune_headszMPNetAttention.prune_heads   s   u:?749900$))2O2OQUQbQb
u )e<		(e<		(e<		(eC		(,		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r3   c                     | j                  |||||      }| j                  | j                  |d         |z         }|f|dd  z   }	|	S )N)r   r   r   )r   r,   rL   )
r/   r   r   r   r   r   rY   self_outputsattention_outputr   s
             r1   r]   zMPNetAttention.forward   sb     yy/ ! 
  >>$,,|A*G-*WX#%QR(88r3   r   )r4   r5   r6   rC   r   r]   rd   re   s   @r1   r   r      s     ";& r3   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MPNetIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y N)rB   rC   r   r"   rE   intermediate_sizedenser!   
hidden_actstrr   intermediate_act_fnrQ   s     r1   rC   zMPNetIntermediate.__init__   s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$r3   r   returnc                 J    | j                  |      }| j                  |      }|S r   )r   r   )r/   r   s     r1   r]   zMPNetIntermediate.forward   s&    

=100?r3   r4   r5   r6   rC   rN   Tensorr]   rd   re   s   @r1   r   r      s#    9U\\ ell r3   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )MPNetOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r   )rB   rC   r   r"   r   rE   r   r,   rI   rJ   rK   rL   rQ   s     r1   rC   zMPNetOutput.__init__  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r3   r   input_tensorr   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S r   )r   rL   r,   )r/   r   r   s      r1   r]   zMPNetOutput.forward  s7    

=1]3}|'CDr3   r   re   s   @r1   r   r     s1    >U\\  RWR^R^ r3   r   c                   .     e Zd Z fdZ	 	 	 	 ddZ xZS )
MPNetLayerc                     t         |           t        |      | _        t	        |      | _        t        |      | _        y r   )rB   rC   r   	attentionr   intermediater   outputrQ   s     r1   rC   zMPNetLayer.__init__  s3    '/-f5!&)r3   c                     | j                  |||||      }|d   }|dd  }	| j                  |      }
| j                  |
|      }|f|	z   }	|	S )N)r   r   r   r   )r   r   r   )r/   r   r   r   r   r   rY   self_attention_outputsr   r   intermediate_outputlayer_outputs               r1   r]   zMPNetLayer.forward  su     "&'/ "0 "
 2!4(,"//0@A{{#68HI/G+r3   r   )r4   r5   r6   rC   r]   rd   re   s   @r1   r   r     s    * r3   r   c                        e Zd Z fdZ	 	 	 	 	 ddej
                  deej
                     deej
                     dededefdZdd	Z	e
dd
       Z xZS )MPNetEncoderc                 B   t         |           || _        |j                  | _        t        j                  t        |j                        D cg c]  }t        |       c}      | _
        t        j                  |j                  | j                        | _        y c c}w r   )rB   rC   r&   rk   n_headsr   
ModuleListrangenum_hidden_layersr   layerr*   relative_attention_num_bucketsrelative_attention_bias)r/   r&   _rR   s      r1   rC   zMPNetEncoder.__init__6  ss    11]]fF^F^@_#`1Jv$6#`a
')||F4Y4Y[_[g[g'h$ $as   Br   r   r   r   output_hidden_statesreturn_dictc                 $   | j                  |      }|rdnd }	|rdnd }
t        | j                        D ]/  \  }}|r|	|fz   }	 |||||   |fd|i|}|d   }|s'|
|d   fz   }
1 |r|	|fz   }	|st        d ||	|
fD              S t	        ||	|
      S )Nr9   r   r   r   c              3   &   K   | ]	  }||  y wr   r9   ).0rs   s     r1   	<genexpr>z'MPNetEncoder.forward.<locals>.<genexpr>`  s     hqZ[Zghs   )last_hidden_stater   
attentions)compute_position_bias	enumerater   tupler   )r/   r   r   r   r   r   r   rY   r   all_hidden_statesall_attentionsilayer_modulelayer_outputss                 r1   r]   zMPNetEncoder.forward=  s     22=A"6BD0d(4 	FOA|#$58H$H!(!	
 #4 M *!,M !/=3C2E!E	F$   1]4D Dh]4E~$Vhhh++%
 	
r3   c                 d   |j                  d      |j                  d      |j                  d      }}}||d d d d d f   }|d d d d d f   }nXt        j                  |t        j                        d d d f   }t        j                  |t        j                        d d d f   }||z
  }	| j	                  |	|      }
|
j                  |j                        }
| j                  |
      }|j                  g d      j                  d      }|j                  |d||f      j                         }|S )Nr   r   )r_   )num_buckets)rw   r   r   r@   )rV   rN   rO   ra   relative_position_buckettor`   r   ry   rb   rP   r   )r/   rz   r?   r   bszqlenklencontext_positionmemory_positionrelative_position	rp_bucketvaluess               r1   r   z"MPNetEncoder.compute_position_biasg  s   &&)QVVAYq	4T#+Aq$J7*1dA:6O$||D

CAtGL#ll4uzzB47KO+.>>112CQ\1]	LL*	--i8	*44Q7Rt45@@Br3   c                     d}|  }|dz  }||dk  j                  t        j                        |z  z  }t        j                  |      }|dz  }||k  }|t        j                  |j                         |z        t        j                  ||z        z  ||z
  z  j                  t        j                        z   }t        j                  |t        j                  ||dz
              }|t        j                  |||      z  }|S )Nr   rw   r   )
r   rN   ra   abslogfloatr   min	full_likewhere)r   r   max_distanceretn	max_exactis_smallval_if_larges           r1   r   z%MPNetEncoder.relative_position_buckety  s    Azz%**%33IIaL1$	y= IIaggi)+,txxy8P/QQU`clUlm
"UZZ. yyu|[[\_/]^u{{8Q55
r3   )NNFFF)N    )r      )r4   r5   r6   rC   rN   r   r   boolr]   r   staticmethodr   rd   re   s   @r1   r   r   5  s    i 26,0"'%*!(
||(
 !.(
 ELL)	(

  (
 #(
 (
T$  r3   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MPNetPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y r   )rB   rC   r   r"   rE   r   Tanh
activationrQ   s     r1   rC   zMPNetPooler.__init__  s9    YYv1163E3EF
'')r3   r   r   c                 \    |d d df   }| j                  |      }| j                  |      }|S Nr   )r   r   )r/   r   first_token_tensorpooled_outputs       r1   r]   zMPNetPooler.forward  s6     +1a40

#566r3   r   re   s   @r1   r   r     s#    $
U\\ ell r3   r   c                   2    e Zd Zd fd	Zd Zd Zd Ze	 	 	 	 	 	 	 	 ddee	j                     dee	j                     dee	j                     dee	j                     d	ee	j                     d
ee   dee   dee   deee	j                     ef   fd       Z xZS )
MPNetModelc                     t         |   |       || _        t        |      | _        t        |      | _        |rt        |      nd| _        | j                          y)zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)
rB   rC   r&   r;   r\   r   encoderr   pooler	post_init)r/   r&   add_pooling_layerrR   s      r1   rC   zMPNetModel.__init__  sM    
 	 )&1#F+->k&)D 	r3   c                 .    | j                   j                  S r   r\   rF   r/   s    r1   get_input_embeddingszMPNetModel.get_input_embeddings  s    ...r3   c                 &    || j                   _        y r   r  )r/   values     r1   set_input_embeddingszMPNetModel.set_input_embeddings  s    */'r3   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr  r   r   r   )r/   heads_to_pruner   r   s       r1   _prune_headszMPNetModel._prune_heads  sE    
 +002 	CLE5LLu%//;;EB	Cr3   rW   r   r?   r   rX   r   r   r   r   c	                    ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }||t	        d      |#| j                  ||       |j                         }
n!||j                         d d }
nt	        d      ||j                  n|j                  }|t        j                  |
|      }| j                  ||
      }| j                  || j                   j                        }| j                  |||      }| j                  ||||||      }|d   }| j                  | j                  |      nd }|s
||f|dd  z   S t!        |||j"                  |j$                  	      S )
NzDYou cannot specify both input_ids and inputs_embeds at the same timer@   z5You have to specify either input_ids or inputs_embeds)r`   )rW   r?   rX   )r   r   r   r   r   r   r   )r   pooler_outputr   r   )r&   r   r   use_return_dictrm   %warn_if_padding_and_no_attention_maskrV   r`   rN   onesget_extended_attention_maskget_head_maskr   r\   r  r  r   r   r   )r/   rW   r   r?   r   rX   r   r   r   rY   rZ   r`   extended_attention_maskembedding_outputencoder_outputssequence_outputr   s                    r1   r]   zMPNetModel.forward  s    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66y.Q#..*K&',,.s3KTUU%.%:!!@T@T!"ZZFCN040P0PQ_al0m&&y$++2O2OP	??Y\iv?w,,2/!5# ' 
 *!,8<8OO4UY#]3oab6III)-')77&11	
 	
r3   )T)NNNNNNNN)r4   r5   r6   rC   r
  r  r  r   r   rN   
LongTensorFloatTensorr   r   r   r   r   r]   rd   re   s   @r1   r  r    s    /0C  156:371559,0/3&*7
E,,-7
 !!2!237
 u//0	7

 E--.7
   1 127
 $D>7
 'tn7
 d^7
 
uU\\"$>>	?7
 7
r3   r  c                   P    e Zd ZdgZ fdZd Zd Ze	 	 	 	 	 	 	 	 	 ddee	j                     dee	j                     dee	j                     dee	j                     d	ee	j                     d
ee	j                     dee   dee   dee   deee	j                     ef   fd       Z xZS )MPNetForMaskedLMzlm_head.decoderc                     t         |   |       t        |d      | _        t	        |      | _        | j                          y NF)r  )rB   rC   r  r   r.   lm_headr  rQ   s     r1   rC   zMPNetForMaskedLM.__init__  s6     %@
"6* 	r3   c                 .    | j                   j                  S r   )r#  decoderr	  s    r1   get_output_embeddingsz&MPNetForMaskedLM.get_output_embeddings  s    ||###r3   c                 \    || j                   _        |j                  | j                   _        y r   )r#  r%  r(   )r/   new_embeddingss     r1   set_output_embeddingsz&MPNetForMaskedLM.set_output_embeddings  s     -*//r3   rW   r   r?   r   rX   labelsr   r   r   r   c
           
         |	|	n| j                   j                  }	| j                  ||||||||	      }
|
d   }| j                  |      }d}|Ft	               } ||j                  d| j                   j                        |j                  d            }|	s|f|
dd z   }||f|z   S |S t        |||
j                  |
j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        Nr   r?   r   rX   r   r   r   r   r@   rw   losslogitsr   r   )
r&   r  r   r#  r   rx   rD   r   r   r   )r/   rW   r   r?   r   rX   r*  r   r   r   r   r  prediction_scoresmasked_lm_lossloss_fctr   s                   r1   r]   zMPNetForMaskedLM.forward  s    & &1%<k$++B]B]**)%'/!5#  	
 "!* LL9')H%&7&<&<RAWAW&XZ`ZeZefhZijN')GABK7F3A3M^%.YSYY$!//))	
 	
r3   	NNNNNNNNN)r4   r5   r6   _tied_weights_keysrC   r&  r)  r   r   rN   r  r  r   r   r   r   r   r]   rd   re   s   @r1   r   r     s   +,$0  156:371559-1,0/3&*0
E,,-0
 !!2!230
 u//0	0

 E--.0
   1 120
 ))*0
 $D>0
 'tn0
 d^0
 
uU\\"N2	30
 0
r3   r   c                   .     e Zd ZdZ fdZd Zd Z xZS )r.   z5MPNet Head for masked and permuted language modeling.c                    t         |           t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                  |j                  d      | _
        t        j                  t        j                  |j                              | _        | j                  | j                  _        y )Nr=   F)r(   )rB   rC   r   r"   rE   r   r,   rI   
layer_normrD   r%  	ParameterrN   zerosr(   rQ   s     r1   rC   zMPNetLMHead.__init__B  s    YYv1163E3EF
,,v'9'9v?T?TUyy!3!3V5F5FUSLLV->->!?@	 !IIr3   c                 :    | j                   | j                  _         y r   )r(   r%  r	  s    r1   _tie_weightszMPNetLMHead._tie_weightsM  s     IIr3   c                     | j                  |      }t        |      }| j                  |      }| j                  |      }|S r   )r   r   r7  r%  r/   featuresrY   rz   s       r1   r]   zMPNetLMHead.forwardP  s;    JJx GOOA LLOr3   )r4   r5   r6   __doc__rC   r;  r]   rd   re   s   @r1   r.   r.   ?  s    ?	&&r3   r.   z
    MPNet Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    )custom_introc                   >    e Zd Z fdZe	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     dee	   d	ee	   d
ee	   de
eej                     ef   fd       Z xZS )MPNetForSequenceClassificationc                     t         |   |       |j                  | _        t        |d      | _        t        |      | _        | j                          y r"  )rB   rC   
num_labelsr  r   MPNetClassificationHead
classifierr  rQ   s     r1   rC   z'MPNetForSequenceClassification.__init__b  sC      ++%@
1&9 	r3   rW   r   r?   r   rX   r*  r   r   r   r   c
           
         |	|	n| j                   j                  }	| j                  ||||||||	      }
|
d   }| j                  |      }d}|| j                   j                  | j
                  dk(  rd| j                   _        nl| j
                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j                  dk(  rIt               }| j
                  dk(  r& ||j                         |j                               }n |||      }n| j                   j                  dk(  r=t               } ||j                  d| j
                        |j                  d            }n,| j                   j                  dk(  rt               } |||      }|	s|f|
d	d z   }||f|z   S |S t        |||
j                   |
j"                  
      S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr,  r   r   
regressionsingle_label_classificationmulti_label_classificationr@   rw   r-  )r&   r  r   rF  problem_typerD  r_   rN   ra   rn   r	   squeezer   rx   r   r   r   r   r/   rW   r   r?   r   rX   r*  r   r   r   r   r  r/  r.  r2  r   s                   r1   r]   z&MPNetForSequenceClassification.forwardl  s   ( &1%<k$++B]B]**)%'/!5#  	
 "!*1{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
r3   r3  )r4   r5   r6   rC   r   r   rN   r  r  r   r   r   r   r   r]   rd   re   s   @r1   rB  rB  [  s	     156:371559-1,0/3&*A
E,,-A
 !!2!23A
 u//0	A

 E--.A
   1 12A
 ))*A
 $D>A
 'tnA
 d^A
 
uU\\"$<<	=A
 A
r3   rB  c                   >    e Zd Z fdZe	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     dee	   d	ee	   d
ee	   de
eej                     ef   fd       Z xZS )MPNetForMultipleChoicec                     t         |   |       t        |      | _        t	        j
                  |j                        | _        t	        j                  |j                  d      | _
        | j                          y )Nr   )rB   rC   r  r   r   rJ   rK   rL   r"   rE   rF  r  rQ   s     r1   rC   zMPNetForMultipleChoice.__init__  sV     '
zz&"<"<=))F$6$6: 	r3   rW   r   r?   r   rX   r*  r   r   r   r   c
           
          |	|	n| j                   j                  }	||j                  d   n|j                  d   }
|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|1|j                  d|j	                  d      |j	                  d            nd}| j                  ||||||||	      }|d   }| j                  |      }| j                  |      }|j                  d|
      }d}|t               } |||      }|	s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )a  
        input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        Nr   r@   r~   )r?   r   r   rX   r   r   r   rw   r-  )r&   r  shaperx   rV   r   rL   rF  r   r   r   r   )r/   rW   r   r?   r   rX   r*  r   r   r   num_choicesflat_input_idsflat_position_idsflat_attention_maskflat_inputs_embedsr   r   r/  reshaped_logitsr.  r2  r   s                         r1   r]   zMPNetForMultipleChoice.forward  s   H &1%<k$++B]B],5,Aiooa(}GZGZ[\G]CLCXINN2,>?^bLXLdL--b,2C2CB2GHjnR`Rln11"n6I6I"6MNrv ( r=#5#5b#9=;M;Mb;QR 	 ***.,/!5#  	
  
]3/ ++b+6')HOV4D%''!"+5F)-)9TGf$EvE("!//))	
 	
r3   r3  )r4   r5   r6   rC   r   r   rN   r  r  r   r   r   r   r   r]   rd   re   s   @r1   rO  rO    s	     156:371559-1,0/3&*M
E,,-M
 !!2!23M
 u//0	M

 E--.M
   1 12M
 ))*M
 $D>M
 'tnM
 d^M
 
uU\\"$==	>M
 M
r3   rO  c                   >    e Zd Z fdZe	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     dee	   d	ee	   d
ee	   de
eej                     ef   fd       Z xZS )MPNetForTokenClassificationc                 0   t         |   |       |j                  | _        t        |d      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        | j                          y r"  )rB   rC   rD  r  r   r   rJ   rK   rL   r"   rE   rF  r  rQ   s     r1   rC   z$MPNetForTokenClassification.__init__  sk      ++%@
zz&"<"<=))F$6$68I8IJ 	r3   rW   r   r?   r   rX   r*  r   r   r   r   c
           
         |	|	n| j                   j                  }	| j                  ||||||||	      }
|
d   }| j                  |      }| j	                  |      }d}|<t               } ||j                  d| j                        |j                  d            }|	s|f|
dd z   }||f|z   S |S t        |||
j                  |
j                        S )z
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr,  r   r@   rw   r-  )r&   r  r   rL   rF  r   rx   rD  r   r   r   rM  s                   r1   r]   z#MPNetForTokenClassification.forward  s    $ &1%<k$++B]B]**)%'/!5#  	
 "!*,,71')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
r3   r3  )r4   r5   r6   rC   r   r   rN   r  r  r   r   r   r   r   r]   rd   re   s   @r1   rZ  rZ    s    	  156:371559-1,0/3&*1
E,,-1
 !!2!231
 u//0	1

 E--.1
   1 121
 ))*1
 $D>1
 'tn1
 d^1
 
uU\\"$99	:1
 1
r3   rZ  c                   (     e Zd ZdZ fdZd Z xZS )rE  z-Head for sentence-level classification tasks.c                 &   t         |           t        j                  |j                  |j                        | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _
        y r   )rB   rC   r   r"   rE   r   rJ   rK   rL   rD  out_projrQ   s     r1   rC   z MPNetClassificationHead.__init__S  s`    YYv1163E3EF
zz&"<"<=		&"4"4f6G6GHr3   c                     |d d dd d f   }| j                  |      }| j                  |      }t        j                  |      }| j                  |      }| j	                  |      }|S r   )rL   r   rN   tanhr_  r=  s       r1   r]   zMPNetClassificationHead.forwardY  sY    Q1WLLOJJqMJJqMLLOMM!r3   )r4   r5   r6   r?  rC   r]   rd   re   s   @r1   rE  rE  P  s    7Ir3   rE  c                   ^    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	ee	   d
ee	   dee	   de
eej                     ef   fd       Z xZS )MPNetForQuestionAnsweringc                     t         |   |       |j                  | _        t        |d      | _        t        j                  |j                  |j                        | _        | j                          y r"  )
rB   rC   rD  r  r   r   r"   rE   
qa_outputsr  rQ   s     r1   rC   z"MPNetForQuestionAnswering.__init__e  sU      ++%@
))F$6$68I8IJ 	r3   rW   r   r?   r   rX   start_positionsend_positionsr   r   r   r   c           
      &   |
|
n| j                   j                  }
| j                  |||||||	|
      }|d   }| j                  |      }|j	                  dd      \  }}|j                  d      j                         }|j                  d      j                         }d }||t        |j                               dkD  r|j                  d      }t        |j                               dkD  r|j                  d      }|j                  d      }|j                  d|      }|j                  d|      }t        |      } |||      } |||      }||z   dz  }|
s||f|dd  z   }||f|z   S |S t        ||||j                  |j                        S )	Nr,  r   r   r@   r   )ignore_indexrw   )r.  start_logits
end_logitsr   r   )r&   r  r   re  splitrL  r   r   rV   clampr   r   r   r   )r/   rW   r   r?   r   rX   rf  rg  r   r   r   r   r  r/  rj  rk  
total_lossignored_indexr2  
start_lossend_lossr   s                         r1   r]   z!MPNetForQuestionAnswering.forwardo  s    &1%<k$++B]B]**)%'/!5#  	
 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r3   )
NNNNNNNNNN)r4   r5   r6   rC   r   r   rN   r  r  r   r   r   r   r   r]   rd   re   s   @r1   rc  rc  c  s     156:3715596:48,0/3&*<
E,,-<
 !!2!23<
 u//0	<

 E--.<
   1 12<
 "%"2"23<
   0 01<
 $D><
 'tn<
 d^<
 
uU\\"$@@	A<
 <
r3   rc  c                     | j                  |      j                         }t        j                  |d      j	                  |      |z  }|j                         |z   S )z
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`. :param torch.Tensor x: :return torch.Tensor:
    r   r   )nern   rN   cumsumtype_asra   )rW   r+   maskincremental_indicess       r1   rT   rT     sP     <<$((*D,,t3;;DADH##%33r3   )r   rO  rc  rB  rZ  r   r  r   )8r?  r   typingr   r   r   rN   r   torch.nnr   r   r	   activationsr   r   modeling_outputsr   r   r   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   utilsr   r   configuration_mpnetr   
get_loggerr4   loggerr   Moduler;   rg   r   r   r   r   r   r   r  r   r.   rB  rO  rZ  rE  rc  rT   __all__r9   r3   r1   <module>r     s      ) )   A A '   . Q , , 
		H	% %? % %.8=bii 8=vF FR+RYY +^		  "))  @V299 Vt"))  V
% V
 V
rD
+ D
N")) 8 M
%9 M
M
` Y
1 Y
 Y
x >
"6 >
 >
Bbii & H
 4 H
 H
V4	r3   