
    Uh                        d dl mZmZmZ d dlZd dlZd dlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ d
dlmZmZmZmZmZmZmZmZmZmZ d
dlmZ ddlmZ  ej@                  e!      Z" G d de      Z# G d de      Z$ G d de      Z% G d de      Z& G d de      Z' G d de      Z( G d de      Z) G d de      Z* G d d e      Z+g d!Z,y)"    )CallableOptionalTupleN)nn   )Cache)FlashAttentionKwargs)ALL_ATTENTION_FUNCTIONS)Unpack)logging   )
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForQuestionAnsweringLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLPLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward)MistralModel   )Qwen2Configc                        e Zd Z fdZ xZS )Qwen2MLPc                 J   t         |   |       t        j                  | j                  | j
                  d      | _        t        j                  | j                  | j
                  d      | _        t        j                  | j
                  | j                  d      | _        y )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_proj)selfconfig	__class__s     y/var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/qwen2/modular_qwen2.pyr!   zQwen2MLP.__init__    ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWX    )__name__
__module____qualname__r!   __classcell__r*   s   @r+   r   r      s    Y Yr,   r   c                   2    e Zd Zdedef fdZ	 	 ddej                  deej                  ej                  f   de	ej                     de	e
   de	ej                     d	ee   d
eej                  e	ej                     e	eej                        f   fdZ xZS )Qwen2Attentionr)   	layer_idxc                    t         |   ||       t        j                  |j                  |j
                  | j                  z  d      | _        t        j                  |j                  |j                  | j                  z  d      | _	        t        j                  |j                  |j                  | j                  z  d      | _
        t        j                  |j
                  | j                  z  |j                  d      | _        y )NTr   F)r    r!   r   r"   r#   num_attention_headshead_dimq_projnum_key_value_headsk_projv_projo_projr(   r)   r4   r*   s      r+   r!   zQwen2Attention.__init__(   s    +ii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii : :T]] JFL^L^ejkr,   hidden_statesposition_embeddingsattention_maskpast_key_valuecache_positionkwargsreturnc                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }|\  }}t        |	|
||      \  }	}
|'|||d}|j                  |
|| j                  |      \  }
}d }| j                  j                  rPt        | j                  dd       9| j                  | j                  j                  k\  r| j                  j                  }t        }| j                  j                   dk7  r^| j                  j                   dk(  r(|j#                  dd	      rt$        j'                  d
       nt(        | j                  j                      } || |	|
||f| j*                  sdn| j,                  | j.                  |d|\  }} |j0                  g |d j3                         }| j5                  |      }||fS )Nr   r   )sincosrB   sliding_windoweagersdpaoutput_attentionsFz`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.g        )dropoutscalingrI   )shaper7   r8   view	transposer:   r;   r   updater4   r)   use_sliding_windowgetattrmax_window_layersrI   r   _attn_implementationgetloggerwarning_oncer
   trainingattention_dropoutrN   reshape
contiguousr<   )r(   r>   r?   r@   rA   rB   rC   input_shapehidden_shapequery_states
key_statesvalue_statesrH   rG   cache_kwargsrI   attention_interfaceattn_outputattn_weightss                      r+   forwardzQwen2Attention.forward/   s/    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j%#&snUL'5'<'<ZW[WeWegs't$JKK**%5t<H$++"?"??![[77N(?;;++w6{{//69fjjI\^c>d##L
 '>dkk>^>^&_#$7
%
  $}}C$2H2HLL)
%
 
%
!\ *k));;;;FFHkk+.L((r,   )NN)r-   r.   r/   r   intr!   torchTensorr   r   r   
LongTensorr   r	   rg   r0   r1   s   @r+   r3   r3   '   s    l{ ls l +/598)||8) #5<<#=>8) !.	8)
 !8) !!1!128) -.8) 
u||Xell3XeELL>Q5RR	S8)r,   r3   c                   (     e Zd Zdedef fdZ xZS )Qwen2DecoderLayerr)   r4   c                     t         |           t        ||      | _        t	        |      | _        |j                  r4|j                  dk7  r$t        j                  d|j                   d       y y y )N)r)   r4   flash_attention_2z=Sliding Window Attention is enabled but not implemented for `z)`; unexpected results may be encountered.)
r    r!   r3   	self_attnr   mlprS   rV   rX   rY   r=   s      r+   r!   zQwen2DecoderLayer.__init__k   sp    'vKF#$$)D)DH[)[OPVPkPkOl m9 9 *\$r,   )r-   r.   r/   r   rh   r!   r0   r1   s   @r+   rm   rm   j   s    { s  r,   rm   c                       e Zd Zy)Qwen2PreTrainedModelNr-   r.   r/    r,   r+   rs   rs   v       r,   rs   c                       e Zd Zy)
Qwen2ModelNrt   ru   r,   r+   rx   rx   z   rv   r,   rx   c                       e Zd Zy)Qwen2ForCausalLMNrt   ru   r,   r+   rz   rz   ~   rv   r,   rz   c                       e Zd Zy)Qwen2ForSequenceClassificationNrt   ru   r,   r+   r|   r|      rv   r,   r|   c                       e Zd Zy)Qwen2ForTokenClassificationNrt   ru   r,   r+   r~   r~      rv   r,   r~   c                       e Zd Zy)Qwen2ForQuestionAnsweringNrt   ru   r,   r+   r   r      rv   r,   r   )rs   rx   rz   r|   r~   r   )-typingr   r   r   ri   torch.utils.checkpointr   cache_utilsr   modeling_flash_attention_utilsr	   modeling_utilsr
   processing_utilsr   utilsr   llama.modeling_llamar   r   r   r   r   r   r   r   r   r   mistral.modeling_mistralr   configuration_qwen2r   
get_loggerr-   rX   r   r3   rm   rs   rx   rz   r|   r~   r   __all__ru   r,   r+   <module>r      s    , ,      B 5 &    4 , 
		H	%Yx Y@)^ @)F	) 		/ 		 		' 		%C 		"= 		 9 	r,   