
    Uhe              	          d Z ddlZddlZddlmZmZmZmZ ddlZddl	m
c mZ ddlZddlm
Z
 ddlmZmZmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZ ddlmZmZ ddlm Z   ejB                  e"      Z#d,dejH                  de%de&dejH                  fdZ' G d de
jP                        Z) G d de
jP                        Z* G d de
jP                        Z+ G d de
jP                        Z, G d de
jP                        Z- G d de
jP                        Z. G d d e
jP                        Z/ G d! d"e
jP                        Z0e G d# d$e             Z1e G d% d&e1             Z2 ed'(       G d) d*e1             Z3g d+Z4y)-zPyTorch PVT model.    N)IterableOptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputImageClassifierOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )	PvtConfiginput	drop_probtrainingreturnc                    |dk(  s|s| S d|z
  }| j                   d   fd| j                  dz
  z  z   }|t        j                  || j                  | j
                        z   }|j                          | j                  |      |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
            r   r   )r   )dtypedevice)shapendimtorchrandr   r   floor_div)r   r   r   	keep_probr   random_tensoroutputs          v/var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/pvt/modeling_pvt.py	drop_pathr(   )   s     CxII[[^

Q 77E

5ELL YYMYYy!M1FM    c                   x     e Zd ZdZd	dee   ddf fdZdej                  dej                  fdZ	de
fdZ xZS )
PvtDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 0    t         |           || _        y N)super__init__r   )selfr   	__class__s     r'   r/   zPvtDropPath.__init__A   s    "r)   hidden_statesc                 D    t        || j                  | j                        S r-   )r(   r   r   r0   r2   s     r'   forwardzPvtDropPath.forwardE   s    FFr)   c                 8    dj                  | j                        S )Nzp={})formatr   )r0   s    r'   
extra_reprzPvtDropPath.extra_reprH   s    }}T^^,,r)   r-   )__name__
__module____qualname____doc__r   floatr/   r    Tensorr5   strr8   __classcell__r1   s   @r'   r+   r+   >   sG    b#(5/ #T #GU\\ Gell G-C -r)   r+   c                        e Zd ZdZ	 ddedeeee   f   deeee   f   dedededef fd	Z	d
e
j                  dedede
j                  fdZde
j                  dee
j                  eef   fdZ xZS )PvtPatchEmbeddingsz
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
    Transformer.
    config
image_size
patch_sizestridenum_channelshidden_size	cls_tokenc                    t         	|           || _        t        |t        j
                  j                        r|n||f}t        |t        j
                  j                        r|n||f}|d   |d   z  |d   |d   z  z  }|| _        || _        || _	        || _
        t        j                  t        j                  d|r|dz   n||            | _        |r*t        j                  t        j                   dd|            nd | _        t        j$                  ||||      | _        t        j(                  ||j*                        | _        t        j.                  |j0                        | _        y )Nr   r   kernel_sizerG   eps)p)r.   r/   rD   
isinstancecollectionsabcr   rE   rF   rH   num_patchesr   	Parameterr    randnposition_embeddingszerosrJ   Conv2d
projection	LayerNormlayer_norm_eps
layer_normDropouthidden_dropout_probdropout)
r0   rD   rE   rF   rG   rH   rI   rJ   rT   r1   s
            r'   r/   zPvtPatchEmbeddings.__init__S   s0    	#-j+//:R:R#SZZdfpYq
#-j+//:R:R#SZZdfpYq
!!}
15*Q-:VW=:XY$$(&#%<<KKi;?[+V$
  JSekk!Q&DEX\))L+6Zde,,{8M8MNzzF$>$>?r)   
embeddingsheightwidthr   c                    ||z  }t         j                  j                         s<|| j                  j                  | j                  j                  z  k(  r| j
                  S |j                  d||d      j                  dddd      }t        j                  |||fd      }|j                  dd||z        j                  ddd      }|S )Nr   r   r      bilinear)sizemode)
r    jit
is_tracingrD   rE   rW   reshapepermuteFinterpolate)r0   ra   rb   rc   rT   interpolated_embeddingss         r'   interpolate_pos_encodingz+PvtPatchEmbeddings.interpolate_pos_encodingo   s    un yy##%+9O9ORVR]R]RhRh9h*h+++''65"=EEaAqQ
"#--
&%Wa"b"9"A"A!RRW"X"`"`abdegh"i&&r)   pixel_valuesc                    |j                   \  }}}}|| j                  k7  rt        d      | j                  |      }|j                   ^ }}}|j	                  d      j                  dd      }| j                  |      }| j                  | j                  j                  |dd      }	t        j                  |	|fd      }| j                  | j                  d d dd f   ||      }
t        j                  | j                  d d d df   |
fd      }
n| j                  | j                  ||      }
| j                  ||
z         }|||fS )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.rf   r   re   dim)r   rH   
ValueErrorrZ   flatten	transposer]   rJ   expandr    catrq   rW   r`   )r0   rr   
batch_sizerH   rb   rc   patch_embed_ra   rJ   rW   s              r'   r5   zPvtPatchEmbeddings.forwardz   sM   2>2D2D/
L&%4,,,w  ool3'--FE!))!,66q!<__[1
>>%--j"bAIIz#:BJ"&"?"?@X@XYZ\]\^Y^@_agin"o"'))T-E-Ea!e-LNa,bhi"j"&"?"?@X@XZ`bg"h\\*/B"BC
65((r)   F)r9   r:   r;   r<   r   r   intr   boolr/   r    r>   rq   r   r5   r@   rA   s   @r'   rC   rC   L   s      @@ #x},-@ #x},-	@
 @ @ @ @8	'5<< 	' 	'UX 	']b]i]i 	')ELL )U5<<c;Q5R )r)   rC   c                   `     e Zd Zdedef fdZdej                  dej                  fdZ xZ	S )PvtSelfOutputrD   rI   c                     t         |           t        j                  ||      | _        t        j
                  |j                        | _        y r-   )r.   r/   r   Lineardenser^   r_   r`   )r0   rD   rI   r1   s      r'   r/   zPvtSelfOutput.__init__   s6    YY{K8
zz&"<"<=r)   r2   r   c                 J    | j                  |      }| j                  |      }|S r-   )r   r`   r4   s     r'   r5   zPvtSelfOutput.forward   s$    

=1]3r)   )
r9   r:   r;   r   r   r/   r    r>   r5   r@   rA   s   @r'   r   r      s1    >y >s >
U\\ ell r)   r   c                        e Zd ZdZdedededef fdZdedej                  fd	Z
	 ddej                  d
edededeej                     f
dZ xZS )PvtEfficientSelfAttentionzpEfficient self-attention mechanism with reduction of the sequence [PvT paper](https://arxiv.org/abs/2102.12122).rD   rI   num_attention_headssequences_reduction_ratioc                    t         |           || _        || _        | j                  | j                  z  dk7  r&t	        d| j                   d| j                   d      t        | j                  | j                  z        | _        | j                  | j                  z  | _        t        j                  | j                  | j                  |j                        | _        t        j                  | j                  | j                  |j                        | _        t        j                  | j                  | j                  |j                        | _        t        j                  |j                        | _        || _        |dkD  rEt        j$                  ||||      | _        t        j(                  ||j*                        | _        y y )	Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ())biasr   rL   rN   )r.   r/   rI   r   rv   r   attention_head_sizeall_head_sizer   r   qkv_biasquerykeyvaluer^   attention_probs_dropout_probr`   r   rY   sequence_reductionr[   r\   r]   r0   rD   rI   r   r   r1   s        r'   r/   z"PvtEfficientSelfAttention.__init__   sr    	&#6 d666!;#D$4$4#5 622316 
 $'t'7'7$:R:R'R#S !558P8PPYYt//1C1C&//Z
99T--t/A/AXYYt//1C1C&//Z
zz&"E"EF)B&$q(&(ii[6OXq'D# !ll;F<Q<QRDO	 )r)   r2   r   c                     |j                         d d | j                  | j                  fz   }|j                  |      }|j	                  dddd      S )Nre   r   rf   r   r   )rh   r   r   viewrm   )r0   r2   	new_shapes      r'   transpose_for_scoresz.PvtEfficientSelfAttention.transpose_for_scores   sT    !&&("-1I1I4KcKc0dd	%**95$$Q1a00r)   rb   rc   output_attentionsc                    | j                  | j                  |            }| j                  dkD  r{|j                  \  }}}|j	                  ddd      j                  ||||      }| j                  |      }|j                  ||d      j	                  ddd      }| j                  |      }| j                  | j                  |            }	| j                  | j                  |            }
t        j                  ||	j                  dd            }|t        j                  | j                        z  }t         j"                  j%                  |d      }| j'                  |      }t        j                  ||
      }|j	                  dddd      j)                         }|j+                         d d | j,                  fz   }|j/                  |      }|r||f}|S |f}|S )Nr   r   rf   re   rt   r   )r   r   r   r   rm   rl   r   r]   r   r   r    matmulrx   mathsqrtr   r   
functionalsoftmaxr`   
contiguousrh   r   r   )r0   r2   rb   rc   r   query_layerr{   seq_lenrH   	key_layervalue_layerattention_scoresattention_probscontext_layernew_context_layer_shapeoutputss                   r'   r5   z!PvtEfficientSelfAttention.forward   s    //

=0IJ))A-0=0C0C-J)11!Q:BB:|]cejkM 33MBM)11*lBOWWXY[\^_`M OOM:M--dhh}.EF	//

=0IJ !<<Y5H5HR5PQ+dii8P8P.QQ --//0@b/I ,,7_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**+BC6G=/2 O\M]r)   r~   )r9   r:   r;   r<   r   r   r=   r/   r    r>   r   r   r   r5   r@   rA   s   @r'   r   r      s    zSS.1SHKShmS:1# 1%,, 1 #(*||* * 	*
  * 
u||	*r)   r   c                        e Zd Zdedededef fdZd Z	 ddej                  ded	ed
e
deej                     f
dZ xZS )PvtAttentionrD   rI   r   r   c                     t         |           t        ||||      | _        t	        ||      | _        t               | _        y )N)rI   r   r   )rI   )r.   r/   r   r0   r   r&   setpruned_headsr   s        r'   r/   zPvtAttention.__init__   sB     	-# 3&?	
	 $FDEr)   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   rt   )lenr   r0   r   r   r   r   r   r   r   r&   r   r   union)r0   headsindexs      r'   prune_headszPvtAttention.prune_heads   s   u:?749900$))2O2OQUQbQb
u
 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r)   r2   rb   rc   r   r   c                 h    | j                  ||||      }| j                  |d         }|f|dd  z   }|S )Nr   r   )r0   r&   )r0   r2   rb   rc   r   self_outputsattention_outputr   s           r'   r5   zPvtAttention.forward  sE     yy?PQ;;|A7#%QR(88r)   r~   )r9   r:   r;   r   r   r=   r/   r   r    r>   r   r   r5   r@   rA   s   @r'   r   r      sn    "".1"HK"hm";& _d"\\36?BW[	u||	r)   r   c            
       z     e Zd Z	 	 d	dededee   dee   f fdZdej                  dej                  fdZ	 xZ
S )
PvtFFNrD   in_featureshidden_featuresout_featuresc                 j   t         |           ||n|}t        j                  ||      | _        t        |j                  t              rt        |j                     | _	        n|j                  | _	        t        j                  ||      | _
        t        j                  |j                        | _        y r-   )r.   r/   r   r   dense1rQ   
hidden_actr?   r   intermediate_act_fndense2r^   r_   r`   )r0   rD   r   r   r   r1   s        r'   r/   zPvtFFN.__init__  s     	'3'?|[ii_=f''-'-f.?.?'@D$'-'8'8D$ii>zz&"<"<=r)   r2   r   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }| j                  |      }|S r-   )r   r   r`   r   r4   s     r'   r5   zPvtFFN.forward*  sP    M200?]3M2]3r)   )NN)r9   r:   r;   r   r   r   r/   r    r>   r5   r@   rA   s   @r'   r   r     sY    
 *.&*>> > "#	>
 sm>"U\\ ell r)   r   c                   f     e Zd Zdedededededef fdZddej                  d	ed
ede	fdZ
 xZS )PvtLayerrD   rI   r   r(   r   	mlp_ratioc                 v   t         |           t        j                  ||j                        | _        t        ||||      | _        |dkD  rt        |      nt        j                         | _
        t        j                  ||j                        | _        t        ||z        }t        |||      | _        y )NrN   )rD   rI   r   r   r   )rD   r   r   )r.   r/   r   r[   r\   layer_norm_1r   	attentionr+   Identityr(   layer_norm_2r   r   mlp)	r0   rD   rI   r   r(   r   r   mlp_hidden_sizer1   s	           r'   r/   zPvtLayer.__init__4  s     	LL&:O:OP%# 3&?	
 4=s?Y/LL&:O:OPkI56[Rabr)   r2   rb   rc   r   c                    | j                  | j                  |      |||      }|d   }|dd  }| j                  |      }||z   }| j                  | j	                  |            }| j                  |      }||z   }	|	f|z   }|S )N)r2   rb   rc   r   r   r   )r   r   r(   r   r   )
r0   r2   rb   rc   r   self_attention_outputsr   r   
mlp_outputlayer_outputs
             r'   r5   zPvtLayer.forwardJ  s    !%++M:/	 "0 "
 2!4(,>>*:;(=8XXd//>?
^^J/
$z1/G+r)   r~   )r9   r:   r;   r   r   r=   r/   r    r>   r   r5   r@   rA   s   @r'   r   r   3  so    cc c !	c
 c $)c c,U\\ 3 s _c r)   r   c                   x     e Zd Zdef fdZ	 	 	 d	dej                  dee   dee   dee   de	e
ef   f
dZ xZS )

PvtEncoderrD   c                    t         	|           || _        t        j                  d|j
                  t        |j                        d      j                         }g }t        |j                        D ]  }|j                  t        ||dk(  r|j                  n| j                  j                  d|dz   z  z  |j                  |   |j                  |   |dk(  r|j                   n|j"                  |dz
     |j"                  |   ||j                  dz
  k(                t%        j&                  |      | _        g }d}t        |j                        D ]  }g }|dk7  r||j                  |dz
     z  }t        |j                  |         D ]\  }|j                  t+        ||j"                  |   |j,                  |   |||z      |j.                  |   |j0                  |                ^ |j                  t%        j&                  |              t%        j&                  |      | _        t%        j4                  |j"                  d   |j6                  	      | _        y )
Nr   cpu)r   rf   r   )rD   rE   rF   rG   rH   rI   rJ   )rD   rI   r   r(   r   r   re   rN   )r.   r/   rD   r    linspacedrop_path_ratesumdepthstolistrangenum_encoder_blocksappendrC   rE   patch_sizesstridesrH   hidden_sizesr   
ModuleListpatch_embeddingsr   r   sequence_reduction_ratios
mlp_ratiosblockr[   r\   r]   )
r0   rD   drop_path_decaysra   iblockscurlayersjr1   s
            r'   r/   zPvtEncoder.__init__b  s-    !>>!V-B-BCDV_delln 
v001 	A"!45Fv00@V@V[\abefaf[g@h%11!4!>>!,89Q!4!4FDWDWXY\]X]D^ & 3 3A 66#<#<q#@@
	 !#j 9 v001 	1AFAvv}}QU++6==+, 
%$*$7$7$:,2,F,Fq,I"237";282R2RST2U"("3"3A"6	
 MM"--/0!	1$ ]]6*
 ,,v':':2'>FDYDYZr)   rr   r   output_hidden_statesreturn_dictr   c                 2   |rdnd }|rdnd }|j                   d   }t        | j                        }|}	t        t	        | j
                  | j                              D ]|  \  }
\  }} ||	      \  }	}}|D ]&  } ||	|||      }|d   }	|r	||d   fz   }|s!||	fz   }( |
|dz
  k7  sI|	j                  |||d      j                  dddd      j                         }	~ | j                  |	      }	|r||	fz   }|st        d |	||fD              S t        |	||      S )	N r   r   re   r   rf   c              3   &   K   | ]	  }||  y wr-   r   ).0vs     r'   	<genexpr>z%PvtEncoder.forward.<locals>.<genexpr>  s     mq_`_lms   last_hidden_stater2   
attentions)r   r   r   	enumeratezipr   rl   rm   r   r]   tupler   )r0   rr   r   r   r   all_hidden_statesall_self_attentionsr{   
num_blocksr2   idxembedding_layerblock_layerrb   rc   r   layer_outputss                    r'   r5   zPvtEncoder.forward  si    #7BD$5b4!''*
_
$3<SAVAVX\XbXb=c3d 	v/C//;+:=+I(M65$ M %mVUDU V -a 0$*=qAQ@S*S''(9]<L(L%M j1n$ - 5 5j&%QS T \ \]^`acdfg h s s u	v 6 1]4D Dm]4EGZ$[mmm++*
 	
r)   )FFT)r9   r:   r;   r   r/   r    FloatTensorr   r   r   r   r   r5   r@   rA   s   @r'   r   r   a  sn    0[y 0[j -2/4&*#
''#
 $D>#
 'tn	#

 d^#
 
uo%	&#
r)   r   c                   t    e Zd ZeZdZdZg Zdee	j                  e	j                  e	j                  f   ddfdZy)PvtPreTrainedModelpvtrr   moduler   Nc                    t        |t        j                        rt        j                  j	                  |j
                  j                  d| j                  j                        |j
                  _        |j                  %|j                  j                  j                          yyt        |t        j                        rJ|j                  j                  j                          |j
                  j                  j                  d       yt        |t              rt        j                  j	                  |j                  j                  d| j                  j                        |j                  _        |j                  Zt        j                  j	                  |j                  j                  d| j                  j                        |j                  _        yyy)zInitialize the weightsr   )meanstdNg      ?)rQ   r   r   inittrunc_normal_weightdatarD   initializer_ranger   zero_r[   fill_rC   rW   rJ   )r0   r  s     r'   _init_weightsz PvtPreTrainedModel._init_weights  sS   fbii( "$!6!6v}}7I7IPSY]YdYdYvYv!6!wFMM{{&  &&( '-KK""$MM$$S) 23.0gg.C.C**//KK11 /D /F&&+
 +(*(=(=$$))55 )> )  % , 4r)   )r9   r:   r;   r   config_classbase_model_prefixmain_input_name_no_split_modulesr   r   r   rY   r[   r  r   r)   r'   r  r    sD    L$OE"))RYY*L$M RV r)   r  c                        e Zd Zdef fdZd Ze	 	 	 d
dej                  de	e
   de	e
   de	e
   deeef   f
d	       Z xZS )PvtModelrD   c                 r    t         |   |       || _        t        |      | _        | j                          y r-   )r.   r/   rD   r   encoder	post_initr0   rD   r1   s     r'   r/   zPvtModel.__init__  s1      "&) 	r)   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr  layerr   r   )r0   heads_to_pruner  r   s       r'   _prune_headszPvtModel._prune_heads  sE    
 +002 	CLE5LLu%//;;EB	Cr)   rr   r   r   r   r   c                 ,   ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }| j	                  ||||      }|d   }|s	|f|dd  z   S t        ||j                  |j                        S )Nrr   r   r   r   r   r   r   )rD   r   r   use_return_dictr  r   r2   r   )r0   rr   r   r   r   encoder_outputssequence_outputs          r'   r5   zPvtModel.forward  s     2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B],,%/!5#	 ' 
 *!,#%(;;;-)77&11
 	
r)   )NNN)r9   r:   r;   r   r/   r!  r   r    r  r   r   r   r   r   r5   r@   rA   s   @r'   r  r    s    y C  -1/3&*
''
 $D>
 'tn	

 d^
 
uo%	&
 
r)   r  z
    Pvt Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    )custom_introc                        e Zd Zdeddf fdZe	 	 	 	 ddeej                     deej                     dee	   dee	   d	ee	   de
eef   fd
       Z xZS )PvtForImageClassificationrD   r   Nc                 0   t         |   |       |j                  | _        t        |      | _        |j                  dkD  r-t        j                  |j                  d   |j                        nt        j                         | _	        | j                          y )Nr   re   )r.   r/   
num_labelsr  r  r   r   r   r   
classifierr  r  s     r'   r/   z"PvtForImageClassification.__init__  sy      ++F# FLEVEVYZEZBIIf))"-v/@/@A`b`k`k`m 	
 	r)   rr   labelsr   r   r   c                 (   ||n| j                   j                  }| j                  ||||      }|d   }| j                  |dddddf         }d}	|| j                   j                  | j
                  dk(  rd| j                   _        nl| j
                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j                  dk(  rIt               }
| j
                  dk(  r& |
|j                         |j                               }	n |
||      }	n| j                   j                  dk(  r=t               }
 |
|j                  d| j
                        |j                  d            }	n,| j                   j                  dk(  rt               }
 |
||      }	|s|f|dd z   }|	|	f|z   S |S t        |	||j                   |j"                  	      S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr#  r   r   
regressionsingle_label_classificationmulti_label_classificationre   )losslogitsr2   r   )rD   r$  r  r,  problem_typer+  r   r    longr   r
   squeezer	   r   r   r   r2   r   )r0   rr   r-  r   r   r   r   r&  r3  r2  loss_fctr&   s               r'   r5   z!PvtForImageClassification.forward#  s    &1%<k$++B]B]((%/!5#	  
 "!*Aq!9:{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE$!//))	
 	
r)   )NNNN)r9   r:   r;   r   r/   r   r   r    r>   r   r   r   r   r5   r@   rA   s   @r'   r)  r)    s    y T   *.,0/3&*;
u||,;
 &;
 $D>	;

 'tn;
 d^;
 
u++	,;
 ;
r)   r)  )r)  r  r  )r   F)5r<   rR   r   typingr   r   r   r   r    torch.nn.functionalr   r   rn   torch.utils.checkpointtorch.nnr   r	   r
   activationsr   modeling_outputsr   r   modeling_utilsr   pytorch_utilsr   r   utilsr   r   configuration_pvtr   
get_loggerr9   loggerr>   r=   r   r(   Moduler+   rC   r   r   r   r   r   r   r  r  r)  __all__r   r)   r'   <module>rF     sr  "    3 3      A A ! F - Q , ( 
		H	%U\\ e T V[VbVb *-")) -A) A)H	BII 	O		 Od'299 'TRYY 6+ryy +\V
 V
r   > 0
! 0
 0
f K
 2 K
K
\ Jr)   