
    Uh4,                        d dl mZmZmZ d dlZd dlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ d	d
lmZmZmZmZmZmZmZ ddlmZ dZ G d dej8                        Z G d de      Z G d de      Z G d dej8                        Z  G d de      Z! G d de      Z"e G d de             Z# G d dee#      Z$ G d de      Z% G d d e      Z&g d!Z'y)"    )OptionalTupleUnionN   )ACT2FN)is_deepspeed_zero3_enabled)BaseModelOutput)PreTrainedModel)auto_docstring   )Wav2Vec2EncoderWav2Vec2EncoderStableLayerNormWav2Vec2FeatureEncoderWav2Vec2ForCTC!Wav2Vec2ForSequenceClassificationWav2Vec2ModelWav2Vec2SamePadLayer   )HubertConfigc                   $     e Zd Z fdZd Z xZS )HubertPositionalConvEmbeddingc                    t         |           t        j                  |j                  |j                  |j
                  |j
                  dz  |j                        | _        d | _        |j                  r&t        j                  |j                        | _        nt        j                  j                  }t        t        j                  j                  d      r$t        j                  j                  j                  }t               r(dd l}|j"                  j%                  | j                  j&                  d      5   || j                  dd      | _        d d d        t        | j                  d      rU| j                  j                  j&                  j(                  }| j                  j                  j&                  j*                  }n,| j                  j,                  }| j                  j.                  }|j"                  j1                  | |       |j"                  j1                  | |       n || j                  dd      | _        t3        |j
                        | _        t6        |j8                     | _        y # 1 sw Y   'xY w)	Nr   )kernel_sizepaddinggroupsweight_normr   modifier_rankweight)namedimparametrizations)super__init__nnConv1dhidden_sizenum_conv_pos_embeddingsnum_conv_pos_embedding_groupsconv
batch_normconv_pos_batch_normBatchNorm1dutilsr   hasattrr"   r   	deepspeedzeroGatheredParametersr   	original0	original1weight_gweight_vregister_external_parameterHubertSamePadLayerr   r   feat_extract_activation
activation)selfconfigr   r0   r5   r6   	__class__s         {/var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/hubert/modular_hubert.pyr$   z&HubertPositionalConvEmbedding.__init__   s   II6622a777
	 %% nnV-?-?@DO((..Krxx00-@ hh77CC)+ ^^66tyy7G7GWX6Y M +DIIH! LDIM499&89#yy99@@JJH#yy99@@JJH#yy11H#yy11H::4J::4J'		aH	)&*H*HI !?!?@M Ms   ?I??J	c                     |j                  dd      }| j                  | j                  |      }| j                  |      }| j                  |      }| j	                  |      }|j                  dd      }|S )Nr   r   )	transposer+   r*   r   r:   r;   hidden_statess     r>   forwardz%HubertPositionalConvEmbedding.forward@   sn    %//15??& OOM:M		-0]36%//15    __name__
__module____qualname__r$   rC   __classcell__r=   s   @r>   r   r      s    #AJ	rD   r   c                       e Zd Zy)r8   NrF   rG   rH    rD   r>   r8   r8   L       rD   r8   c                       e Zd Zy)HubertFeatureEncoderNrL   rM   rD   r>   rP   rP   P   rN   rD   rP   c                   $     e Zd Z fdZd Z xZS )HubertFeatureProjectionc                 n   t         |           |j                  | _        | j                  r3t        j                  |j
                  d   |j                        | _        t        j                  |j
                  d   |j                        | _
        t        j                  |j                        | _        y )N)eps)r#   r$   feat_proj_layer_normr%   	LayerNormconv_dimlayer_norm_eps
layer_normLinearr'   
projectionDropoutfeat_proj_dropoutdropoutr;   r<   r=   s     r>   r$   z HubertFeatureProjection.__init__U   s}    $*$?$?!$$ ll6??2+>FDYDYZDO))FOOB$79K9KLzz&":":;rD   c                     | j                   r| j                  |      }| j                  |      }| j                  |      }|S )N)rV   rZ   r\   r_   rA   s     r>   rC   zHubertFeatureProjection.forward]   s;    $$ OOM:M6]3rD   rE   rJ   s   @r>   rR   rR   T   s    <rD   rR   c                       e Zd Zy)HubertEncoderNrL   rM   rD   r>   rc   rc   f   rN   rD   rc   c                       e Zd Zy)HubertEncoderStableLayerNormNrL   rM   rD   r>   re   re   j   rN   rD   re   c                   x    e Zd ZeZdZdZdZdZdZ	d Z
deej                  ef   fdZdedej                  fd	Zy
)HubertPreTrainedModelhubertinput_valuesTc                 z   t        |t        j                        rm|j                  j                  j                  d| j                  j                         |j                  %|j                  j                  j                          yyt        |t        j                  t        j                  t        j                  f      rJ|j                  j                  j                          |j                  j                  j                  d       yt        |t        j                        r_t               rddl}t#        |d      r|t#        |d      rp|j$                  j'                  |j(                  |j*                  gd      5  t        j,                  j/                  |j                  j                         ddd       n|j$                  j'                  |j                  d      5  t        j,                  j/                  |j                  j                         ddd       n3t        j,                  j/                  |j                  j                         |j                  %|j                  j                  j                          yyt        |t0              r2t#        |d	      r%|j2                  j                  j5                          yyt        |t6              rMt#        |d
      r@|j8                  j                  j                  d| j                  j:                  dz   z         yyy# 1 sw Y   xY w# 1 sw Y   xY w)zInitialize the weights        )meanstdNg      ?r   r6   r5   r   masked_spec_embedlayer_weightsr   )
isinstancer%   r[   r   datanormal_r<   initializer_rangebiaszero_rW   	GroupNormr-   fill_r&   r   r0   r/   r1   r2   r6   r5   initkaiming_normal_HubertModelrn   uniform_HubertForSequenceClassificationro   num_hidden_layers)r;   moduler0   s      r>   _init_weightsz#HubertPreTrainedModel._init_weightsw   sP   fbii( MM&&CT[[5R5R&S{{&  &&( 'r||R^^ LMKK""$MM$$S)		*)+ 6:.76:3N"::FOOV__;]mn:o D//0B0BCD D #::6==XY:Z D//0B0BCD D ''(:(:;{{&  &&( ',v23((--668 4 ?@v/$$))//t{{7T7TWX7X0YZ 0 AD DD Ds   ?4L%#4L1%L.1L:input_lengthsc                     d }t        | j                  j                  | j                  j                        D ]  \  }} ||||      } |S )zH
        Computes the output length of the convolutional layers
        c                 >    t        j                  | |z
  |d      dz   S )Nfloor)rounding_moder   )torchdiv)input_lengthr   strides      r>   _conv_out_lengthzPHubertPreTrainedModel._get_feat_extract_output_lengths.<locals>._conv_out_length   s"     99\K7wWZ[[[rD   )zipr<   conv_kernelconv_stride)r;   r   r   r   r   s        r>    _get_feat_extract_output_lengthsz6HubertPreTrainedModel._get_feat_extract_output_lengths   sQ    
	\
 $'t{{'>'>@W@W#X 	QK,]KPM	Q rD   feature_vector_lengthattention_maskc                    | j                  |j                  d            j                  t        j                        }|j
                  d   }t        j                  ||f|j                  |j                        }d|t        j                  |j
                  d   |j                        |dz
  f<   |j                  dg      j                  d      j                  dg      j                         }|S )NrT   r   )dtypedevicer   )r   )r   sumtor   longshapezerosr   r   arangeflipcumsumbool)r;   r   r   output_lengths
batch_sizes        r>   "_get_feature_vector_attention_maskz8HubertPreTrainedModel._get_feature_vector_attention_mask   s    >>~?Q?QRT?UVYYZ_ZdZde#))!,
./~7K7KTbTiTi
 uv^%9%9!%<^EZEZ[]kno]opq',,bT299"=BBB4HMMOrD   N)rF   rG   rH   r   config_classbase_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_2_supports_sdpar   r   r   
LongTensorintr   r   rM   rD   r>   rg   rg   n   sa    L $O&*#!N[BeEDTDTVYDY>Z 
 
]b]m]m 
rD   rg   c                        e Zd Zdef fdZd Zd Z	 	 	 	 	 ddeej                     deej                     deej                     dee   d	ee   d
ee   deeef   fdZ xZS )rz   r<   c                    t         |   |       || _        t        |      | _        t        |      | _        |j                  dkD  s|j                  dkD  rEt        j                  t        j                  |j                        j                               | _        |j                   rt#        |      | _        nt'        |      | _        | j)                          | `y )Nrk   )r#   r$   r<   rP   feature_extractorrR   feature_projectionmask_time_probmask_feature_probr%   	Parameterr   Tensorr'   r{   rn   do_stable_layer_normre   encoderrc   	post_initadapterr`   s     r>   r$   zHubertModel.__init__   s     !5f!="9&"A  3&&*B*BS*H%'\\%,,v?Q?Q2R2[2[2]%^D"&&7?DL(0DL 	LrD   c                     t        d      NzNot needed for HubertAttributeErrorr;   s    r>   freeze_feature_extractorz$HubertModel.freeze_feature_extractor       455rD   c                     t        d      r   r   r   s    r>   freeze_feature_encoderz"HubertModel.freeze_feature_encoder   r   rD   ri   r   mask_time_indicesoutput_attentionsoutput_hidden_statesreturn_dictreturnc                    ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }| j	                  |      }|j                  dd      }|| j                  |j                  d   |      }| j                  |      }| j                  ||      }| j                  |||||      }	|	d   }|s	|f|	dd z   S t        ||	j                  |	j                        S )an  
        mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Indices to mask extracted features for contrastive loss. When in training mode, model learns to predict
            masked extracted features in *config.proj_codevector_dim* space.

        Example:

        ```python
        >>> from transformers import AutoProcessor, HubertModel
        >>> from datasets import load_dataset
        >>> import soundfile as sf

        >>> processor = AutoProcessor.from_pretrained("facebook/hubert-large-ls960-ft")
        >>> model = HubertModel.from_pretrained("facebook/hubert-large-ls960-ft")


        >>> def map_to_array(batch):
        ...     speech, _ = sf.read(batch["file"])
        ...     batch["speech"] = speech
        ...     return batch


        >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
        >>> ds = ds.map(map_to_array)

        >>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values  # Batch size 1
        >>> hidden_states = model(input_values).last_hidden_state
        ```Nr   r   )r   )r   r   r   r   r   )last_hidden_staterB   
attentions)r<   r   r   use_return_dictr   r@   r   r   r   _mask_hidden_statesr   r	   rB   r   )
r;   ri   r   r   r   r   r   extract_featuresrB   encoder_outputss
             r>   rC   zHubertModel.forward   s,   J 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]11,?+55a;%!DDEUE[E[\]E^`noN//0@A00Rc0d,,)/!5# ' 
 (*!#oab&999+)77&11
 	
rD   )NNNNN)rF   rG   rH   r   r$   r   r   r   r   r   FloatTensorr   r   r   r	   rC   rI   rJ   s   @r>   rz   rz      s    | &66 269=,0/3&*F
u||,F
 !.F
 $E$5$56	F

 $D>F
 'tnF
 d^F
 
uo%	&F
rD   rz   c                       e Zd Zy)HubertForCTCNrL   rM   rD   r>   r   r     rN   rD   r   c                       e Zd Zy)r|   NrL   rM   rD   r>   r|   r|     rN   rD   r|   )r   r|   rz   rg   )(typingr   r   r   r   torch.nnr%   activationsr   integrations.deepspeedr   modeling_outputsr	   modeling_utilsr
   r.   r   wav2vec2.modeling_wav2vec2r   r   r   r   r   r   r   configuration_hubertr   _HIDDEN_STATES_START_POSITIONModuler   r8   rP   rR   rc   re   rg   rz   r   r|   __all__rM   rD   r>   <module>r      s    ) )   ! @ / - #   / !" /BII /d	- 		1 	bii $	O 		#A 	 BO B BJ`
-!6 `
F	> 		&G 	 frD   