
    UhD                     X   d Z ddlmZmZmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZmZmZmZmZ ddlmZ  ej<                  e      Z  G d de      Z! G d de      Z" G d de      Z# G d de      Z$ G d de      Z% G d de      Z&g dZ'y)zPyTorch BitNet model.    )CallableOptionalTupleN   )Cache)FlashAttentionKwargs)CausalLMOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)logging   )GemmaMLP)LlamaAttentionLlamaDecoderLayerLlamaForCausalLM
LlamaModelLlamaRMSNormapply_rotary_pos_embeager_attention_forward   )BitNetConfigc                       e Zd Zy)BitNetRMSNormN__name__
__module____qualname__     {/var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/bitnet/modular_bitnet.pyr   r   *       r   r   c                   *     e Zd Zdef fdZd Z xZS )	BitNetMLPconfigc                 p    t         |   |       t        |j                  |j                        | _        y N)eps)super__init__r   intermediate_sizerms_norm_epsffn_sub_norm)selfr$   	__class__s     r    r)   zBitNetMLP.__init__/   s+     )&*B*BH[H[\r   c           	          | j                  | j                  | j                  | j                  |            | j	                  |      z              }|S )N)	down_projr,   act_fn	gate_projup_proj)r-   xr0   s      r    forwardzBitNetMLP.forward3   sF    NN4#4#4T[[PQAR5SVZVbVbcdVe5e#fg	r   )r   r   r   r   r)   r5   __classcell__r.   s   @r    r#   r#   .   s    ]| ]r   r#   c                   2    e Zd Zdedef fdZ	 	 ddej                  deej                  ej                  f   de	ej                     de	e
   de	ej                     d	ee   d
eej                  e	ej                     e	eej                        f   fdZ xZS )BitNetAttentionr$   	layer_idxc                 r    t         |   ||       t        |j                  |j                        | _        y r&   )r(   r)   r   hidden_sizer+   attn_sub_norm)r-   r$   r:   r.   s      r    r)   zBitNetAttention.__init__9   s-    +*6+=+=6CVCVWr   hidden_statesposition_embeddingsattention_maskpast_key_valuecache_positionkwargsreturnc                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }|\  }}t        |	|
||      \  }	}
|'|||d}|j                  |
|| j                  |      \  }
}t        }| j                  j                  dk7  r^| j                  j                  dk(  r(|j                  dd      rt        j                  d	       nt         | j                  j                     } || |	|
||f| j"                  sd
n| j$                  | j&                  d|\  }} |j(                  g |d j+                         }| j-                  |      }| j/                  |      }||fS )Nr   r   )sincosrB   eagersdpaoutput_attentionsFz`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.g        )dropoutscaling)shapehead_dimq_projview	transposek_projv_projr   updater:   r   r$   _attn_implementationgetloggerwarning_oncer
   trainingattention_dropoutrM   reshape
contiguousr=   o_proj)r-   r>   r?   r@   rA   rB   rC   input_shapehidden_shapequery_states
key_statesvalue_statesrH   rG   cache_kwargsattention_interfaceattn_outputattn_weightss                     r    r5   zBitNetAttention.forward=   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j%#&snUL'5'<'<ZW[WeWegs't$J(?;;++w6{{//69fjjI\^c>d##L
 '>dkk>^>^&_#$7	%
  $}}C$2H2HLL	%
 	%
!\ *k));;;;FFH((5kk+.L((r   )NN)r   r   r   r   intr)   torchTensorr   r   r   
LongTensorr   r   r5   r6   r7   s   @r    r9   r9   8   s    X| X X +/591)||1) #5<<#=>1) !.	1)
 !1) !!1!121) -.1) 
u||Xell3XeELL>Q5RR	S1)r   r9   c                       e Zd Zy)BitNetDecoderLayerNr   r   r   r    rm   rm   q   r!   r   rm   c                       e Zd Zy)BitNetModelNr   r   r   r    ro   ro   u   r!   r   ro   c                   2     e Zd ZdgZdZdZdef fdZ xZS )BitNetForCausalLMzlm_head.weightNrD   c                 "    t        |   di |S )a$  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, transformers.,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, transformers., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, BitNetForCausalLM

        >>> model = BitNetForCausalLM.from_pretrained("microsoft/bitnet-b1.58-2B-4T")
        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/bitnet-b1.58-2B-4T")

        >>> prompt = f'<|begin_of_text|>User: Hey, are you conscious? Can you talk to me?<|eot_id|>Assistant: '
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=100)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "User: Hey, are you conscious? Can you talk to me?Assistant: No, I'm not conscious. I'm an artificial intelligence designed to assist with information and tasks. How can I help you today?"
        ```r   )r(   r5   )r-   super_kwargsr.   s     r    r5   zBitNetForCausalLM.forward~   s    4 w...r   )	r   r   r   _tied_weights_keys_tp_plan_pp_planr	   r5   r6   r7   s   @r    rq   rq   y   s*    *+HH/ 
 / /r   rq   )rq   ro   BitNetPreTrainedModel)(__doc__typingr   r   r   ri   cache_utilsr   modeling_flash_attention_utilsr   modeling_outputsr	   modeling_utilsr
   processing_utilsr   utilsr   gemma.modeling_gemmar   llama.modeling_llamar   r   r   r   r   r   r   configuration_bitnetr   
get_loggerr   rX   r   r#   r9   rm   ro   rq   __all__r   r   r    <module>r      s     , ,    B 6 5 &  +   / 
		H	%	L 	 6)n 6)r	* 		* 	/( /Dr   