
    Uh:]                        d dl Z d dlmZmZmZ d dlZd dlmZ d dlmc m	Z
 ddlmZ ddlmZ ddlmZmZ ddlmZ ddlmZ d	d
lmZmZmZmZmZmZmZmZmZ ddl m!Z!  ejD                  e#      Z$ G d de      Z% G d de      Z& G d dejN                        Z( G d de      Z) G d dejN                        Z* G d dejN                        Z+ G d dejN                        Z, G d dejN                        Z- G d dejN                        Z. G d d ee      Z/eZ0 G d! d"e      Z1 G d# d$e      Z2 G d% d&e      Z3 G d' d(e      Z4 G d) d*e      Z5g d+Z6y),    N)OptionalTupleUnion   )is_deepspeed_zero3_enabled)is_fsdp_managed_module)BaseModelOutputWav2Vec2BaseModelOutput)PreTrainedModel)logging   )	Wav2Vec2FeatureProjectionWav2Vec2FeedForward#Wav2Vec2ForAudioFrameClassificationWav2Vec2ForCTC!Wav2Vec2ForSequenceClassificationWav2Vec2ForXVectorWav2Vec2ModelWav2Vec2PositionalConvEmbeddingWav2Vec2PreTrainedModel   )WavLMConfigc                       e Zd Zy)WavLMPositionalConvEmbeddingN__name__
__module____qualname__     y/var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/wavlm/modular_wavlm.pyr   r          r    r   c                       e Zd Zy)WavLMFeatureProjectionNr   r   r    r!   r$   r$   "   r"   r    r$   c                       e Zd ZdZ	 	 	 	 ddedededededef fdZ	 	 	 	 dd	ej                  d
e
ej                     de
ej                     dedeej                  e
ej                     e
eej                        f   f
dZd	ej                  d
eej                  ej                   f   dej                  dedej                  ej                  ff
dZdededej                  fdZdej                  dej                  fdZ xZS )WavLMAttentionz=Multi-headed attention from 'Attention Is All You Need' paper	embed_dim	num_headsdropoutnum_bucketsmax_distancehas_relative_position_biasc                    t         |           || _        || _        || _        ||z  | _        | j
                  |z  | j                  k7  rt        d| j                   d| d      | j
                  dz  | _        t        j                  ||      | _
        t        j                  ||      | _        t        j                  ||      | _        t        j                  ||      | _        || _        || _        t        j                   t#        j$                  d| j                  dd            | _        t        j                  | j
                  d      | _        |r0t        j*                  | j                  | j                        | _        y y )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).g      r      )super__init__r'   r(   r)   head_dim
ValueErrorscalingnnLineark_projv_projq_projout_projr*   r+   	Parametertorchonesgru_rel_pos_constgru_rel_pos_linear	Embeddingrel_attn_embed)selfr'   r(   r)   r*   r+   r,   	__class__s          r!   r0   zWavLMAttention.__init__)   s7    	""!Y.MMI%$..8MdnnM]$YKr3  }}d*ii	95ii	95ii	95		)Y7&(!#ejjDNNAq.Q!R"$))DMM1"=%"$,,t/?/?"PD &r    hidden_statesattention_maskposition_biasoutput_attentionsreturnc                     |j                         \  }}}|S| j                  ||      }|j                  d      j                  |ddd      j	                  || j
                  z  ||      }|j	                  |j                  dd | j
                  dfz         }	|	j                  dddd      }	| j                  |	      }
|
j	                  |	j                  dd dz         j                  d      }
t        j                  |
      j                  dd      \  }}||| j                  z  d	z
  z  d
z   }|j	                  || j
                  z  dd      |z  }|j	                  d||f      }| j                  ||||      \  }}|||fS )z'Attention layer with relative attentionNr   r   r   r   )r      dim      ?g       @)sizecompute_bias	unsqueezerepeatviewr(   shapepermuter>   sumr;   sigmoidchunkr=   torch_multi_head_self_attention)rA   rC   rD   rE   rF   indexbsztgt_len_gated_hidden_statesrelative_position_projgate_agate_bgate_outputgated_position_biasattn_outputattn_weightss                    r!   forwardzWavLMAttention.forwardM   s    (,,.Wa   --gw?M''*11#q!Q?DDS4>>EY[bdkl  ,001D1DSb1IT^^]_L`1`a199!Q1E "&!8!89L!M!7!<!<=P=V=VWZXZ=[^d=d!e!i!ijl!m '=>DDQBDO)?)? ?# EFL *..sT^^/CRKm[166GW7MN$($H$H>+>@Q%
!\ L-77r    rb   c                 X   |j                  dd      x}x}}||j                  d      nd}dx}	}
d}t        j                  |||| j                  | j
                  t        j                  dg      t        j                  | j                  j                  | j                  j                  | j                  j                  f      |	|
|| j                  | j                  j                  | j                  j                  | j                   |||d| j                  j                  | j                  j                  | j                  j                        \  }}|j                  dd      }|C|dddf   j#                  |j$                  dd | j
                  fz   |j$                  dd z         }||fS )zCsimple wrapper around torch's multi_head_attention_forward functionr   r   NFT)use_separate_proj_weightq_proj_weightk_proj_weightv_proj_weight)	transposeneFmulti_head_attention_forwardr'   r(   r;   emptycatr8   biasr6   r7   r)   r9   weighttrainingbroadcast_torS   )rA   rC   rD   rb   rF   querykeyvaluekey_padding_maskbias_kbias_vadd_zero_attnrc   rd   s                 r!   rX   z.WavLMAttention.torch_multi_head_self_attentionv   s    ,55a;;;e3A3M>,,Q/SW  %&$B$BNNNNKKIIt{{'')9)94;;;K;KLMLLMM  MMMM%)++,,++,,++,,+%
!\2 "++Aq1# (40==""2A&$..)::\=O=OPQPR=SSL L((r    query_length
key_lengthc                    t        j                  |t         j                        d d d f   }t        j                  |t         j                        d d d f   }||z
  }| j                  |      }|j	                  | j
                  j                  j                        }| j                  |      }|j                  g d      }|S )N)dtype)r   r   r   )	r;   arangelong_relative_positions_buckettor@   rr   devicerT   )rA   r|   r}   context_positionmemory_positionrelative_positionrelative_position_bucketvaluess           r!   rO   zWavLMAttention.compute_bias   s     <<EJJG4P,,zDT1WM+.>>#'#B#BCT#U #;#>#>t?R?R?Y?Y?`?`#a $$%=>	*r    relative_positionsc                 $   | j                   dz  }|dkD  j                  t        j                        |z  }t        j                  |      }|dz  }||k  }t        j
                  |j                         |z        }|t        j
                  | j                  |z        z  }|||z
  z  }||z   j                  t        j                        }t        j                  |t        j                  ||dz
              }|t        j                  |||      z  }|S )Nr   r   r   )r*   r   r;   r   abslogfloatmathr+   min	full_likewhere)rA   r   r*   relative_buckets	max_exactis_smallrelative_positions_if_largerelative_position_if_larges           r!   r   z)WavLMAttention._relative_positions_bucket   s   &&!+.266uzzB[P"YY'9:1$	%	1&+ii0B0H0H0JY0V&W#&ADHHTM^M^ajMjDk&k#&A[S\E\&]#&/2M&M%Q%QRWR\R\%]"%*YY&8RT_bcTc(d&
" 	EKK2DF`aar    )        i@  i   TNNFr   )r   r   r   __doc__intr   boolr0   r;   Tensorr   r   re   FloatTensorr   
LongTensor
BoolTensorrX   rO   r   __classcell__rB   s   @r!   r&   r&   &   s   G +/"Q"Q "Q 	"Q
 "Q "Q %)"QN 2604"''8||'8 !.'8  -	'8
  '8 
u||Xell3XeELL>Q5RR	S'8R5)((5) e..0@0@@A5) #..	5)
  5) 

U..	/5)n # %BSBS  U=N=N  SXSdSd  r    r&   c                       e Zd Zy)WavLMFeedForwardNr   r   r    r!   r   r      r"   r    r   c                   2     e Zd Zddedef fdZddZ xZS )WavLMEncoderLayerconfigr,   c                    t         |           t        |j                  |j                  |j
                  |j                  |j                  |      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        t!        |      | _        t        j                  |j                  |j                        | _        y N)r'   r(   r)   r*   r+   r,   epsr/   r0   r&   hidden_sizenum_attention_headsattention_dropoutr*   max_bucket_distance	attentionr4   Dropouthidden_dropoutr)   	LayerNormlayer_norm_eps
layer_normr   feed_forwardfinal_layer_normrA   r   r,   rB   s      r!   r0   zWavLMEncoderLayer.__init__       '((00,,**33'A
 zz&"7"78,,v'9'9v?T?TU,V4 "V-?-?VEZEZ [r    c                     |}| j                  |||||      \  }}}| j                  |      }||z   }| j                  |      }|| j                  |      z   }| j	                  |      }||f}|r||fz  }|S )NrD   rE   rF   rY   )r   r)   r   r   r   )	rA   rC   rD   rE   rF   rY   attn_residualrd   outputss	            r!   re   zWavLMEncoderLayer.forward   s    %59^^)'/ 6D 6
2|] ]3%56%(9(9-(HH--m< -0&Gr    Tr   r   r   r   r   r   r0   re   r   r   s   @r!   r   r      s    \{ \ \r    r   c                   2     e Zd Zddedef fdZddZ xZS ) WavLMEncoderLayerStableLayerNormr   r,   c                    t         |           t        |j                  |j                  |j
                  |j                  |j                  |      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        t!        |      | _        t        j                  |j                  |j                        | _        y r   r   r   s      r!   r0   z)WavLMEncoderLayerStableLayerNorm.__init__   r   r    c                     |}| j                  |      }| j                  ||||      \  }}}| j                  |      }||z   }|| j                  | j	                  |            z   }||f}|r||fz  }|S )N)rD   rE   rF   )r   r   r)   r   r   )rA   rC   rD   rE   rF   r   rd   r   s           r!   re   z(WavLMEncoderLayerStableLayerNorm.forward	  s    %659^^)'/	 6D 6
2|] ]3%5%(9(9$:O:OP]:^(__ -0&Gr    r   )NNFr   r   s   @r!   r   r      s    \{ \ \r    r   c                   .     e Zd Z fdZ	 	 	 	 ddZ xZS )WavLMEncoderc           
         t         |           || _        t        |      | _        t        j                  |j                  |j                        | _	        t        j                  |j                        | _        t        j                  t        |j                        D cg c]  }t!        ||dk(         c}      | _        d| _        y c c}w Nr   r   )r,   F)r/   r0   r   r   pos_conv_embedr4   r   r   r   r   r   r   r)   
ModuleListrangenum_hidden_layersr   layersgradient_checkpointingrA   r   irB   s      r!   r0   zWavLMEncoder.__init__  s    :6B,,v'9'9v?T?TUzz&"7"78mmUZ[a[s[sUtuPQv16Ku
 ',# v   !Cc                    |rdnd }|rdnd }|5|j                  d      j                  dd|j                  d         }d|| <   | j                  |      }	||	z   }| j	                  |      }| j                  |      }t               xs t        |       }
d }t        | j                        D ]  \  }}|r||fz   }t        j                  g       }| j                  xr  |dkD  xr || j                  j                  k  }|r|
rM| j                  r,| j                  r | j!                  |j"                  ||||      }n ||||||      }|d d \  }}|rd}|s|d   fz   } |r||fz   }|st%        d |||fD              S t'        |||	      S )
Nr   rI   r   r   r   r   NNNc              3   &   K   | ]	  }||  y wNr   .0vs     r!   	<genexpr>z'WavLMEncoder.forward.<locals>.<genexpr>i       mq_`_lm   last_hidden_staterC   
attentions)rP   rQ   rS   r   r   r)   r   r   	enumerater   r;   randrs   r   	layerdropr   _gradient_checkpointing_func__call__tupler	   rA   rC   rD   rF   output_hidden_statesreturn_dictall_hidden_statesall_self_attentionsexpand_attention_maskposition_embeddingssynced_gpusrE   r   layerdropout_probabilityskip_the_layerlayer_outputss                    r!   re   zWavLMEncoder.forward*  s    #7BD$5b4%$2$<$<R$@$G$G1mNaNabcNd$e!45M001"11-@%(;;6]302R6LT6R!$++. !	PHAu#$58H$H! #(**R.!]]fq1uf:MPTP[P[PePe:eN![..4==$($E$E%&%)%M %*%'5&3*;%M 0=Ra/@,} 2 &9]1=M<O&O#C!	PF   1]4D Dm]4EGZ$[mmm++*
 	
r    NFFTr   r   r   r0   re   r   r   s   @r!   r   r     s    	, "D
r    r   c                   .     e Zd Z fdZ	 	 	 	 ddZ xZS )WavLMEncoderStableLayerNormc           
         t         |           || _        t        |      | _        t        j                  |j                  |j                        | _	        t        j                  |j                        | _        t        j                  t        |j                        D cg c]  }t!        ||dk(         c}      | _        d| _        y c c}w r   )r/   r0   r   r   r   r4   r   r   r   r   r   r   r)   r   r   r   r   r   r   r   s      r!   r0   z$WavLMEncoderStableLayerNorm.__init__r  s    :6B,,v'9'9v?T?TUzz&"7"78mm v778 1UVZ[U[]
 ',#r   c                    |rdnd }|rdnd }|5|j                  d      j                  dd|j                  d         }d|| <   | j                  |      }	||	z   }| j	                  |      }t               xs t        |       }
d }t        | j                        D ]  \  }}|r||fz   }t        j                  g       }| j                  xr  |dkD  xr || j                  j                  k  }|r|
rL| j                  r,| j                  r | j                  |j                   ||||      }n |||||      }|d d \  }}|rd}|s|d   fz   } | j#                  |      }|r||fz   }|st%        d |||fD              S t'        |||	      S )
Nr   rI   r   r   r   )rD   rF   rE   r   c              3   &   K   | ]	  }||  y wr   r   r   s     r!   r   z6WavLMEncoderStableLayerNorm.forward.<locals>.<genexpr>  r   r   r   )rP   rQ   rS   r   r)   r   r   r   r   r;   r   rs   r   r   r   r   r   r   r   r	   r   s                    r!   re   z#WavLMEncoderStableLayerNorm.forward  s    #7BD$5b4%$2$<$<R$@$G$G1mNaNabcNd$e!45M001"11-@%(;;]302R6LT6R!$++.  	PHAu#$58H$H! #(**R.!]]fq1uf:MPTP[P[PePe:eN![ ..4==$($E$E%&%)%M %*%'5*;&3	%M 0=Ra/@,} 2 &9]1=M<O&O#A 	PD 6 1]4D Dm]4EGZ$[mmm+;LYl
 	
r    r   r   r   s   @r!   r   r   q  s    ," "B
r    r   c                   8     e Zd ZdZ fdZed        Zd Z xZS )WavLMGumbelVectorQuantizerz
    Vector quantization using gumbel softmax. See [CATEGORICAL REPARAMETERIZATION WITH
    GUMBEL-SOFTMAX](https://arxiv.org/pdf/1611.01144.pdf) for more information.
    c                 0   t         |           |j                  | _        |j                  | _        |j                  | j                  z  dk7  r&t        d|j                   d| j                   d      t        j                  t        j                  d| j                  | j
                  z  |j                  | j                  z              | _        t        j                  |j                  d   | j                  | j
                  z        | _        d| _        y )Nr   z`config.codevector_dim z5 must be divisible by `config.num_codevector_groups` z for concatenation.r   rI   r   )r/   r0   num_codevector_groups
num_groupsnum_codevectors_per_groupnum_varscodevector_dimr2   r4   r:   r;   r   codevectorsr5   conv_dimweight_projtemperature)rA   r   rB   s     r!   r0   z#WavLMGumbelVectorQuantizer.__init__  s     6688  4??2a7)&*?*?)@ A66:oo5F G%%  <<a4==!@&BWBW[_[j[jBjk
 99V__R%8$//DMM:YZ r    c           	          | j                  d      }t        j                  t        j                  |t        j                  |dz         z  d             j                         }|S )Nr   rK   gHz>rI   )meanr;   exprU   r   )probsmarginal_probs
perplexitys      r!   _compute_perplexityz.WavLMGumbelVectorQuantizer._compute_perplexity  sR    *YY		.599^VZEZ;[*[ac ddeiik
r    c                    |j                   \  }}}| j                  |      }|j                  ||z  | j                  z  d      }| j                  rt
        j                  j                  |j                         | j                  d      }|j                  |      }t        j                  |j                  ||z  | j                  d      j                         d      }| j                  |      }n}|j                  d      } |j                  |j                    j!                  d|j                  dd      d      }|j                  ||z  | j                  d      }| j                  |      }|j                  ||z  d      }|j#                  d      | j$                  z  }	|	j                  ||z  | j                  | j&                  d      }
|
j)                  d      j                  ||d      }
|
|fS )NrI   T)tauhardrK   r   rM   )rS   r  rR   r   rs   r4   
functionalgumbel_softmaxr   r  type_asr;   softmaxr
  argmax	new_zerosscatter_rP   r   r   rU   )rA   rC   
batch_sizesequence_lengthr   codevector_probscodevector_soft_distr	  codevector_idxcodevectors_per_groupr   s              r!   re   z"WavLMGumbelVectorQuantizer.forward  s   3@3F3F0
O[ ((7%**:+G$//+Y[]^==!}};;M<O<O<QW[WgWgnr;s/77F $)=="":#?RTU[[]ce$  112FGJ +11b19N6}668K8KLUUN''A.   044Z/5QSWSbSbdfg112BCJ+00o1MrR 0 : :2 >AQAQ Q+00o1Mt`d`m`moqr!oob)..z?BOJ&&r    )	r   r   r   r   r0   staticmethodr
  re   r   r   s   @r!   r   r     s&    
*  
"'r    r   c                   <    e Zd ZeZdZdZdZdZdZ	d Z
d Zd Zd Zy	)
WavLMPreTrainedModelwavlminput_valuesTFc           
      z   t        |t              r|j                  j                  j                  j                  dd       |j                  j                  j                  j                          t        j                  j                  |j                         yt        |t              rt        j                  j                  |j                  j                  ddt        j                  d|j                  j                   d   |j                  j"                  z  z        z         t        j                  j%                  |j                  j                  d       yt        |t&              rt        j                  d|j(                  j*                  z        }t        j                  j                  |j(                  j                  | |       t        j                  j                  |j(                  j                  | |       yt        |t        j,                        rm|j                  j                  j                  d| j.                  j0                         |j                  %|j                  j                  j                          yyt        |t        j2                  t        j4                  f      rJ|j                  j                  j                          |j                  j                  j7                  d       yt        |t        j8                        rt        j                  j;                  |j                         |j                  jt        j                  |j<                  |j"                  |j                   d   z  z        }t        j                  j                  |j                  | |       yyy)	zInitialize the weightsr   r   )r  stdr   r   )abNrM   )
isinstancer   r  rr   datanormal_rq   zero_r4   inituniform_r   r   convr   sqrtkernel_sizein_channels	constant_r$   
projectionin_featuresr5   r   initializer_ranger   	GroupNormfill_Conv1dkaiming_normal_groups)rA   moduleks      r!   _init_weightsz"WavLMPreTrainedModel._init_weights  s    f89%%**222C##((..0GGV//0 <=GGOO""		!v{{'>'>q'AFKKD[D['["\]]  
 GGfkk..2 67		!f//;;;<AGGV..55!qAGGV..33rQ?		*MM&&CT[[5R5R&S{{&  &&( 'r|| <=KK""$MM$$S)		*GG##FMM2{{&IIfmmv/A/AFDVDVWXDY/YZ[  a 8 ' +r    c                     t        d      NzNot needed for WavLMAttributeErrorrA   s    r!   _get_adaptersz"WavLMPreTrainedModel._get_adapters4      344r    c                     t        d      r<  r=  r?  s    r!   init_adapter_layersz(WavLMPreTrainedModel.init_adapter_layers7  rA  r    c                     t        d      r<  r=  r?  s    r!   load_adapterz!WavLMPreTrainedModel.load_adapter:  rA  r    N)r   r   r   r   config_classbase_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_2_supports_sdpar:  r@  rC  rE  r   r    r!   r  r    s8    L$O&*#"N9B555r    r  c                       e Zd Zy)
WavLMModelNr   r   r    r!   rM  rM  A  r"   r    rM  c                       e Zd Zy)WavLMForCTCNr   r   r    r!   rO  rO  E  r"   r    rO  c                       e Zd Zy)WavLMForSequenceClassificationNr   r   r    r!   rQ  rQ  I  r"   r    rQ  c                       e Zd Zy) WavLMForAudioFrameClassificationNr   r   r    r!   rS  rS  M  r"   r    rS  c                       e Zd Zy)WavLMForXVectorNr   r   r    r!   rU  rU  Q  r"   r    rU  )rS  rO  rQ  rU  rM  r  )7r   typingr   r   r   r;   torch.nnr4   torch.nn.functionalr  rm   integrations.deepspeedr   integrations.fsdpr   modeling_outputsr	   r
   modeling_utilsr   utilsr   wav2vec2.modeling_wav2vec2r   r   r   r   r   r   r   r   r   configuration_wavlmr   
get_loggerr   loggerr   r$   Moduler&   r   r   r   r   r   r   r  WavLMBaseModelOutputrM  rO  rQ  rS  rU  __all__r   r    r!   <module>re     sD    ) )     @ 7 H - 
 
 
 - 
		H	%	#B 		6 	c RYY c L	* 	&		 &R"ryy "JP
299 P
fQ
")) Q
hC' C'L05?,C 05f / 	 		. 		%F 		'J 		( 	r    