
    Uh                     t   d dl mZmZmZ d dlZddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZmZ d
dlmZmZmZmZ d
dlmZ ddlmZ ddlmZ  ej<                  e      Z dZ! G d de      Z" G d de      Z# G d de      Z$ G d de	e      Z% G d de      Z& G d de      Z' G d de      Z(g dZ)y)     )OptionalTupleUnionN   )Cache)FlashAttentionKwargs)GradientCheckpointingLayer)CausalLMOutputWithPast)Unpack)
LossKwargslogging   )GlmAttentionGlmForCausalLMGlmForSequenceClassificationGlmForTokenClassification)Phi3MLP   )
Glm4Config)Glm4RMSNormzTHUDM/GLM-4-9B-Chat-0414c                       e Zd Zy)Glm4MLPN__name__
__module____qualname__     w/var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/glm4/modular_glm4.pyr   r   %       r   r   c                   p    e Zd Zdedef fdZ	 	 	 	 	 	 	 ddej                  deej                     deej                     dee
   dee   d	ee   d
eej                     deeej                  ej                  f      dee   deej                  eeej                  ej                  f      f   fdZ xZS )Glm4DecoderLayerconfig	layer_idxc                    t         |           |j                  | _        t        ||      | _        t        |      | _        t        |j                  |j                        | _	        t        |j                  |j                        | _
        t        |j                  |j                        | _        t        |j                  |j                        | _        y )N)r#   r$   )eps)super__init__hidden_sizeGlm4Attention	self_attnr   mlpr   rms_norm_epsinput_layernormpost_attention_layernormpost_self_attn_layernormpost_mlp_layernorm)selfr#   r$   	__class__s      r   r(   zGlm4DecoderLayer.__init__*   s    !--&f	J6?*6+=+=6CVCVW(3F4F4FFL_L_(`%(3F4F4FFL_L_(`%"-f.@.@fFYFY"Zr   hidden_statesattention_maskposition_idspast_key_valueoutput_attentions	use_cachecache_positionposition_embeddingskwargsreturnc	                    |}
| j                  |      } | j                  d||||||||d|	\  }}| j                  |      }|
|z   }|}
| j                  |      }| j	                  |      }| j                  |      }|
|z   }|f}|r||fz  }|S )N)r4   r5   r6   r7   r8   r9   r:   r;   r   )r.   r+   r0   r/   r,   r1   )r2   r4   r5   r6   r7   r8   r9   r:   r;   r<   residualself_attn_weightsoutputss                r   forwardzGlm4DecoderLayer.forward5   s     !,,]; ,:4>> 
,
')%)/) 3
,
 
,
(( 55mD =0 !55mD///> =0 ")++Gr   )NNNFFNN)r   r   r   r   intr(   torchTensorr   
LongTensorr   boolr   r   r   FloatTensorrB   __classcell__r3   s   @r   r"   r"   )   s   	[z 	[c 	[ 2637*.,1$)59KO+||+ !.+ u//0	+
 !+ $D>+ D>+ !!1!12+ &eELL%,,,F&GH+ -.+ 
u  (51B1BEDUDU1U+V"WW	X+r   r"   c                       e Zd Zy)r*   Nr   r   r   r   r*   r*   c   r    r   r*   c                       e Zd Zy)KwargsForCausalLMNr   r   r   r   rM   rM   g   s    r   rM   c                   8     e Zd Zdee   deeef   f fdZ xZ	S )Glm4ForCausalLMsuper_kwargsr=   c                 "    t        |   di |S )ar  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, Glm4ForCausalLM

        >>> model = Glm4ForCausalLM.from_pretrained("THUDM/GLM-4-9B-Chat-0414")
        >>> tokenizer = AutoTokenizer.from_pretrained("THUDM/GLM-4-9B-Chat-0414")

        >>> prompt = "Hey, are you conscious? Can you talk to me?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
        ```r   )r'   rB   )r2   rP   r3   s     r   rB   zGlm4ForCausalLM.forwardk   s    4 w...r   )
r   r   r   r   rM   r   r   r
   rB   rI   rJ   s   @r   rO   rO   j   s0    /01/ 
u,,	-/ /r   rO   c                       e Zd Zy)Glm4ForSequenceClassificationNr   r   r   r   rS   rS      r    r   rS   c                       e Zd Zy)Glm4ForTokenClassificationNr   r   r   r   rU   rU      r    r   rU   )Glm4PreTrainedModel	Glm4ModelrO   rS   rU   )*typingr   r   r   torch.utils.checkpointrD   cache_utilsr   modeling_flash_attention_utilsr   modeling_layersr	   modeling_outputsr
   processing_utilsr   utilsr   r   glm.modeling_glmr   r   r   r   phi3.modeling_phi3r   configuration_glm4r   modeling_glm4r   
get_loggerr   logger_CHECKPOINT_FOR_DOCr   r"   r*   rM   rO   rS   rU   __all__r   r   r   <module>rh      s     * )    B 9 6 & ( t t ( * & 
		H	%0 	g 	71 7t	L 	 ?,j >/n /<	$@ 		!: 	r   