Ë
    ´ãUhD  ã                   óX  — d Z ddlmZmZmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZmZmZmZmZ ddlmZ  ej<                  e«      Z  G d„ de«      Z! G d„ de«      Z" G d„ de«      Z# G d„ de«      Z$ G d„ de«      Z% G d„ de«      Z&g d¢Z'y)zPyTorch BitNet model.é    )ÚCallableÚOptionalÚTupleNé   )ÚCache)ÚFlashAttentionKwargs)ÚCausalLMOutputWithPast)ÚALL_ATTENTION_FUNCTIONS)ÚUnpack)Úloggingé   )ÚGemmaMLP)ÚLlamaAttentionÚLlamaDecoderLayerÚLlamaForCausalLMÚ
LlamaModelÚLlamaRMSNormÚapply_rotary_pos_embÚeager_attention_forwardé   )ÚBitNetConfigc                   ó   — e Zd Zy)ÚBitNetRMSNormN©Ú__name__Ú
__module__Ú__qualname__© ó    ú{/var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/bitnet/modular_bitnet.pyr   r   *   ó   „ Ør   r   c                   ó*   ‡ — e Zd Zdefˆ fd„Zd„ Zˆ xZS )Ú	BitNetMLPÚconfigc                 óp   •— t         ‰|   |«       t        |j                  |j                  ¬«      | _        y ©N)Úeps)ÚsuperÚ__init__r   Úintermediate_sizeÚrms_norm_epsÚffn_sub_norm)Úselfr$   Ú	__class__s     €r    r)   zBitNetMLP.__init__/   s+   ø€ Ü‰Ñ˜Ô Ü)¨&×*BÑ*BÈ×H[ÑH[Ô\ˆÕr   c           	      ó¦   — | j                  | j                  | j                  | j                  |«      «      | j	                  |«      z  «      «      }|S )N)Ú	down_projr,   Úact_fnÚ	gate_projÚup_proj)r-   Úxr0   s      r    ÚforwardzBitNetMLP.forward3   sF   € Ø—N‘N 4×#4Ñ#4°T·[±[ÀÇÁÐPQÓARÓ5SÐVZ×VbÑVbÐcdÓVeÑ5eÓ#fÓgˆ	ØÐr   )r   r   r   r   r)   r5   Ú__classcell__©r.   s   @r    r#   r#   .   s   ø„ ð]˜|õ ]ör   r#   c                   ó2  ‡ — e Zd Zdedefˆ fd„Z	 	 ddej                  deej                  ej                  f   de	ej                     de	e
   de	ej                     d	ee   d
eej                  e	ej                     e	eej                        f   fd„Zˆ xZS )ÚBitNetAttentionr$   Ú	layer_idxc                 ór   •— t         ‰|   ||«       t        |j                  |j                  ¬«      | _        y r&   )r(   r)   r   Úhidden_sizer+   Úattn_sub_norm)r-   r$   r:   r.   s      €r    r)   zBitNetAttention.__init__9   s-   ø€ Ü‰Ñ˜ Ô+Ü*¨6×+=Ñ+=À6×CVÑCVÔWˆÕr   Úhidden_statesÚposition_embeddingsÚattention_maskÚpast_key_valueÚcache_positionÚkwargsÚreturnc                 óØ  — |j                   d d }g |¢d‘| j                  ‘­}| j                  |«      j                  |«      j	                  dd«      }	| j                  |«      j                  |«      j	                  dd«      }
| j                  |«      j                  |«      j	                  dd«      }|\  }}t        |	|
||«      \  }	}
|'|||dœ}|j                  |
|| j                  |«      \  }
}t        }| j                  j                  dk7  r^| j                  j                  dk(  r(|j                  dd«      rt        j                  d	«       nt         | j                  j                     } || |	|
||f| j"                  sd
n| j$                  | j&                  dœ|¤Ž\  }} |j(                  g |¢d‘­Ž j+                  «       }| j-                  |«      }| j/                  |«      }||fS )Néÿÿÿÿr   r   )ÚsinÚcosrB   ÚeagerÚsdpaÚoutput_attentionsFzã`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.g        )ÚdropoutÚscaling)ÚshapeÚhead_dimÚq_projÚviewÚ	transposeÚk_projÚv_projr   Úupdater:   r   r$   Ú_attn_implementationÚgetÚloggerÚwarning_oncer
   ÚtrainingÚattention_dropoutrM   ÚreshapeÚ
contiguousr=   Úo_proj)r-   r>   r?   r@   rA   rB   rC   Úinput_shapeÚhidden_shapeÚquery_statesÚ
key_statesÚvalue_statesrH   rG   Úcache_kwargsÚattention_interfaceÚattn_outputÚattn_weightss                     r    r5   zBitNetAttention.forward=   sñ  € ð $×)Ñ)¨#¨2Ð.ˆØ8˜Ð8 bÐ8¨$¯-©-Ñ8ˆà—{‘{ =Ó1×6Ñ6°|ÓD×NÑNÈqÐRSÓTˆØ—[‘[ Ó/×4Ñ4°\ÓB×LÑLÈQÐPQÓRˆ
Ø—{‘{ =Ó1×6Ñ6°|ÓD×NÑNÈqÐRSÓTˆà&‰ˆˆSÜ#7¸ÀjÐRUÐWZÓ#[Ñ ˆjàÐ%à#&¨sÀnÑUˆLØ'5×'<Ñ'<¸ZÈÐW[×WeÑWeÐgsÓ'tÑ$ˆJ˜ä(?Ðà;‰;×+Ñ+¨wÒ6Ø{‰{×/Ñ/°6Ò9¸f¿j¹jÐI\Ð^cÔ>dÜ×#Ñ#ðLõô
 '>¸d¿k¹k×>^Ñ>^Ñ&_Ð#á$7ØØØØØð	%
ð  $Ÿ}š}‘C°$×2HÑ2HØ—L‘Lñ	%
ð ñ	%
Ñ!ˆ\ð *k×)Ñ)Ð;¨;Ð;¸Ò;×FÑFÓHˆØ×(Ñ(¨Ó5ˆØ—k‘k +Ó.ˆØ˜LÐ(Ð(r   )NN)r   r   r   r   Úintr)   ÚtorchÚTensorr   r   r   Ú
LongTensorr   r   r5   r6   r7   s   @r    r9   r9   8   sÅ   ø„ ðX˜|ð X¸õ Xð +/Ø59ñ1)à—|‘|ð1)ð # 5§<¡<°·±Ð#=Ñ>ð1)ð ! §¡Ñ.ð	1)ð
 ! ™ð1)ð ! ×!1Ñ!1Ñ2ð1)ð Ð-Ñ.ð1)ð 
ˆu|‰|˜X e§l¡lÑ3°X¸eÀEÇLÁLÑ>QÑ5RÐRÑ	S÷1)r   r9   c                   ó   — e Zd Zy)ÚBitNetDecoderLayerNr   r   r   r    rm   rm   q   r!   r   rm   c                   ó   — e Zd Zy)ÚBitNetModelNr   r   r   r    ro   ro   u   r!   r   ro   c                   ó2   ‡ — e Zd ZdgZdZdZdefˆ fd„Zˆ xZS )ÚBitNetForCausalLMzlm_head.weightNrD   c                 ó"   •— t        ‰|   di |¤ŽS )a$  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, transformers.,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, transformers., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, BitNetForCausalLM

        >>> model = BitNetForCausalLM.from_pretrained("microsoft/bitnet-b1.58-2B-4T")
        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/bitnet-b1.58-2B-4T")

        >>> prompt = f'<|begin_of_text|>User: Hey, are you conscious? Can you talk to me?<|eot_id|>Assistant: '
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=100)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "User: Hey, are you conscious? Can you talk to me?Assistant: No, I'm not conscious. I'm an artificial intelligence designed to assist with information and tasks. How can I help you today?"
        ```r   )r(   r5   )r-   Úsuper_kwargsr.   s     €r    r5   zBitNetForCausalLM.forward~   s   ø€ ô4 ‰w‰Ñ. Ñ.Ð.r   )	r   r   r   Ú_tied_weights_keysÚ_tp_planÚ_pp_planr	   r5   r6   r7   s   @r    rq   rq   y   s*   ø„ Ø*Ð+ÐØ€HØ€Hð/ð 
 ÷/ñ /r   rq   )rq   ro   ÚBitNetPreTrainedModel)(Ú__doc__Útypingr   r   r   ri   Úcache_utilsr   Úmodeling_flash_attention_utilsr   Úmodeling_outputsr	   Úmodeling_utilsr
   Úprocessing_utilsr   Úutilsr   Úgemma.modeling_gemmar   Úllama.modeling_llamar   r   r   r   r   r   r   Úconfiguration_bitnetr   Ú
get_loggerr   rX   r   r#   r9   rm   ro   rq   Ú__all__r   r   r    ú<module>r…      s£   ðñ ç ,Ñ ,ã å  Ý BÝ 6Ý 5Ý &Ý Ý +÷÷ ñ õ /ð 
ˆ×	Ñ	˜HÓ	%€ô	Lô 	ôô ô6)nô 6)ôr	Ð*ô 	ô	*ô 	ô/Ð(ô /òDr   