Ë
    ´ãUhâ  ã                   ót  — d dl mZmZmZ d dlZddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZmZ d
dlmZmZmZmZ d
dlmZ ddlmZ ddlmZ  ej<                  e«      Z dZ! G d„ de«      Z" G d„ de«      Z# G d„ de«      Z$ G d„ de	e«      Z% G d„ de«      Z& G d„ de«      Z' G d„ de«      Z(g d¢Z)y) é    )ÚOptionalÚTupleÚUnionNé   )ÚCache)ÚFlashAttentionKwargs)ÚGradientCheckpointingLayer)ÚCausalLMOutputWithPast)ÚUnpack)Ú
LossKwargsÚloggingé   )ÚGlmAttentionÚGlmForCausalLMÚGlmForSequenceClassificationÚGlmForTokenClassification)ÚPhi3MLPé   )Ú
Glm4Config)ÚGlm4RMSNormzTHUDM/GLM-4-9B-Chat-0414c                   ó   — e Zd Zy)ÚGlm4MLPN©Ú__name__Ú
__module__Ú__qualname__© ó    úw/var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/glm4/modular_glm4.pyr   r   %   ó   „ Ør   r   c                   óp  ‡ — e Zd Zdedefˆ fd„Z	 	 	 	 	 	 	 ddej                  deej                     deej                     dee
   dee   d	ee   d
eej                     deeej                  ej                  f      dee   deej                  eeej                  ej                  f      f   fd„Zˆ xZS )ÚGlm4DecoderLayerÚconfigÚ	layer_idxc                 ó¸  •— t         ‰|   «        |j                  | _        t        ||¬«      | _        t        |«      | _        t        |j                  |j                  ¬«      | _	        t        |j                  |j                  ¬«      | _
        t        |j                  |j                  ¬«      | _        t        |j                  |j                  ¬«      | _        y )N)r#   r$   )Úeps)ÚsuperÚ__init__Úhidden_sizeÚGlm4AttentionÚ	self_attnr   Úmlpr   Úrms_norm_epsÚinput_layernormÚpost_attention_layernormÚpost_self_attn_layernormÚpost_mlp_layernorm)Úselfr#   r$   Ú	__class__s      €r   r(   zGlm4DecoderLayer.__init__*   s¡   ø€ Ü‰ÑÔØ!×-Ñ-ˆÔÜ&¨fÀ	ÔJˆŒä˜6“?ˆŒÜ*¨6×+=Ñ+=À6×CVÑCVÔWˆÔÜ(3°F×4FÑ4FÈF×L_ÑL_Ô(`ˆÔ%Ü(3°F×4FÑ4FÈF×L_ÑL_Ô(`ˆÔ%Ü"-¨f×.@Ñ.@Àf×FYÑFYÔ"ZˆÕr   Úhidden_statesÚattention_maskÚposition_idsÚpast_key_valueÚoutput_attentionsÚ	use_cacheÚcache_positionÚposition_embeddingsÚkwargsÚreturnc	                 ó  — |}
| j                  |«      } | j                  d||||||||dœ|	¤Ž\  }}| j                  |«      }|
|z   }|}
| j                  |«      }| j	                  |«      }| j                  |«      }|
|z   }|f}|r||fz  }|S )N)r4   r5   r6   r7   r8   r9   r:   r;   r   )r.   r+   r0   r/   r,   r1   )r2   r4   r5   r6   r7   r8   r9   r:   r;   r<   ÚresidualÚself_attn_weightsÚoutputss                r   ÚforwardzGlm4DecoderLayer.forward5   sÑ   € ð !ˆà×,Ñ,¨]Ó;ˆð ,:¨4¯>©>ð 
,
Ø'Ø)Ø%Ø)Ø/ØØ)Ø 3ñ
,
ð ñ
,
Ñ(ˆÐ(ð ×5Ñ5°mÓDˆØ  =Ñ0ˆð !ˆØ×5Ñ5°mÓDˆØŸ™ Ó/ˆØ×/Ñ/°Ó>ˆØ  =Ñ0ˆà Ð"ˆÙØÐ)Ð+Ñ+ˆGàˆr   )NNNFFNN)r   r   r   r   Úintr(   ÚtorchÚTensorr   Ú
LongTensorr   Úboolr   r   r   ÚFloatTensorrB   Ú__classcell__©r3   s   @r   r"   r"   )   s  ø„ ð	[˜zð 	[°cõ 	[ð 26Ø37Ø*.Ø,1Ø$)Ø59ØKOñ+à—|‘|ð+ð ! §¡Ñ.ð+ð ˜u×/Ñ/Ñ0ð	+ð
 ! ™ð+ð $ D™>ð+ð ˜D‘>ð+ð ! ×!1Ñ!1Ñ2ð+ð & e¨E¯L©L¸%¿,¹,Ð,FÑ&GÑHð+ð Ð-Ñ.ð+ð 
ˆu× Ñ  (¨5°×1BÑ1BÀE×DUÑDUÐ1UÑ+VÑ"WÐWÑ	X÷+r   r"   c                   ó   — e Zd Zy)r*   Nr   r   r   r   r*   r*   c   r    r   r*   c                   ó   — e Zd Zy)ÚKwargsForCausalLMNr   r   r   r   rM   rM   g   s   … r   rM   c                   ó8   ‡ — e Zd Zdee   deeef   fˆ fd„Zˆ xZ	S )ÚGlm4ForCausalLMÚsuper_kwargsr=   c                 ó"   •— t        ‰|   di |¤ŽS )ar  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, Glm4ForCausalLM

        >>> model = Glm4ForCausalLM.from_pretrained("THUDM/GLM-4-9B-Chat-0414")
        >>> tokenizer = AutoTokenizer.from_pretrained("THUDM/GLM-4-9B-Chat-0414")

        >>> prompt = "Hey, are you conscious? Can you talk to me?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
        ```r   )r'   rB   )r2   rP   r3   s     €r   rB   zGlm4ForCausalLM.forwardk   s   ø€ ô4 ‰w‰Ñ. Ñ.Ð.r   )
r   r   r   r   rM   r   r   r
   rB   rI   rJ   s   @r   rO   rO   j   s0   ø„ ð/àÐ0Ñ1ð/ð 
ˆuÐ,Ð,Ñ	-÷/ñ /r   rO   c                   ó   — e Zd Zy)ÚGlm4ForSequenceClassificationNr   r   r   r   rS   rS   ˆ   r    r   rS   c                   ó   — e Zd Zy)ÚGlm4ForTokenClassificationNr   r   r   r   rU   rU   Œ   r    r   rU   )ÚGlm4PreTrainedModelÚ	Glm4ModelrO   rS   rU   )*Útypingr   r   r   Útorch.utils.checkpointrD   Úcache_utilsr   Úmodeling_flash_attention_utilsr   Úmodeling_layersr	   Úmodeling_outputsr
   Úprocessing_utilsr   Úutilsr   r   Úglm.modeling_glmr   r   r   r   Úphi3.modeling_phi3r   Úconfiguration_glm4r   Úmodeling_glm4r   Ú
get_loggerr   ÚloggerÚ_CHECKPOINT_FOR_DOCr   r"   r*   rM   rO   rS   rU   Ú__all__r   r   r   ú<module>rh      s«   ð÷  *Ñ )ã å  Ý BÝ 9Ý 6Ý &ß (ß tÓ tÝ (Ý *Ý &ð 
ˆ×	Ñ	˜HÓ	%€à0Ð ô	ˆgô 	ô7Ð1ô 7ôt	Lô 	ô ?Ð,¨jÔ >ô/nô /ô<	Ð$@ô 	ô	Ð!:ô 	òr   