
    ~UhY                     
   d dl Z d dlmZmZmZmZmZ d dlZd dlZd dlm	Z	m
Z
mZmZ d dlmc mZ d dlZd dlmZ d dlmZ d dlmZmZ  edd	      Z G d
 dej                  j4                        Z G d dej                  j4                        Z G d dej                  j8                        Zded   fdZ G d dej>                        Z  G d de       Z! G d de       Z" G d dej                  j8                        Z#d Z$ G d dej4                        Z% G d dej4                        Z& G d  d!e&      Z' G d" d#e&      Z( G d$ d%ej>                        Z) G d& d'ej>                        Z* G d( d)ej>                        Z+y)*    N)AnyOptionalTypeVarUnionoverload)Tensordevicedtypenn)
QuantState)GlobalOptimManager)*INVERSE_LINEAR_8BIT_WEIGHTS_FORMAT_MAPPINGOutlierTracerTztorch.nn.Module)boundc                        e Zd ZdZ	 	 	 	 	 	 	 	 ddededee   dee   deded	ed
ee   ddf fdZ	ddZ
	 ddZdedefdZ xZS )StableEmbeddinga  
    Custom embedding layer designed to improve stability during training for NLP tasks by using 32-bit optimizer states. It is designed to reduce gradient variations that can result from quantization. This embedding layer is initialized with Xavier uniform initialization followed by layer normalization.

    Example:

    ```
    # Initialize StableEmbedding layer with vocabulary size 1000, embedding dimension 300
    embedding_layer = StableEmbedding(num_embeddings=1000, embedding_dim=300)

    # Reset embedding parameters
    embedding_layer.reset_parameters()

    # Perform a forward pass with input tensor
    input_tensor = torch.tensor([1, 2, 3])
    output_embedding = embedding_layer(input_tensor)
    ```

    Attributes:
        norm (`torch.nn.LayerNorm`): Layer normalization applied after the embedding.

    Methods:
        reset_parameters(): Reset embedding parameters using Xavier uniform initialization.
        forward(input: Tensor) -> Tensor: Forward pass through the stable embedding layer.
    Nnum_embeddingsembedding_dimpadding_idxmax_norm	norm_typescale_grad_by_freqsparse_weightreturnc                     t         |   |||||||||	|

       t        j                  j	                  ||	      | _        t        j                         j                  | dddi       ya  
        Args:
            num_embeddings (`int`):
                The number of unique embeddings (vocabulary size).
            embedding_dim (`int`):
                The dimensionality of the embedding.
            padding_idx (`Optional[int]`):
                Pads the output with zeros at the given index.
            max_norm (`Optional[float]`):
                Renormalizes embeddings to have a maximum L2 norm.
            norm_type (`float`, defaults to `2.0`):
                The p-norm to compute for the `max_norm` option.
            scale_grad_by_freq (`bool`, defaults to `False`):
                Scale gradient by frequency during backpropagation.
            sparse (`bool`, defaults to `False`):
                Computes dense gradients. Set to `True` to compute sparse gradients instead.
            _weight (`Optional[Tensor]`):
                Pretrained embeddings.
        r	   weight
optim_bits    N)	super__init__torchr   	LayerNormnormr   get_instanceregister_module_override)selfr   r   r   r   r   r   r   r   r	   r
   	__class__s              i/var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/bitsandbytes/nn/modules.pyr$   zStableEmbedding.__init__2   sp    @ 		
 HH&&}V&D	'')BB4T`bdSef    c                     t         j                  j                  j                  | j                         | j                          y Nr%   r   initxavier_uniform_r    _fill_padding_idx_with_zeror*   s    r,   reset_parametersz StableEmbedding.reset_parametersa   (    %%dkk2((*r-   c                     | j                   Ft        j                         5  | j                  | j                      j	                  d       d d d        y y # 1 sw Y   y xY wNr   r   r%   no_gradr    fill_r4   s    r,   r3   z+StableEmbedding._fill_padding_idx_with_zerol   S    ' 7D,,-33A67 7 (7 7   )AAinputc           	      ^   t        j                  || j                  | j                  | j                  | j
                  | j                  | j                        }|j                  t        j                               }| j                  |      j                  | j                  j                        S r/   )F	embeddingr    r   r   r   r   r   tor%   get_default_dtyper'   r
   r*   r>   embs      r,   forwardzStableEmbedding.forwardq   s}    kkKKMMNN##KK
 ffU,,./yy~  !2!233r-   )NN       @FFNNNr   N)__name__
__module____qualname____doc__intr   floatboolr   r$   r5   r3   rF   __classcell__r+   s   @r,   r   r      s    : &*$(#($(-g-g -g c]	-g
 5/-g -g !-g -g &!-g 
-g^+7
4V 4 4r-   r   c                        e Zd ZdZ	 	 	 	 	 	 	 ddededee   dee   deded	ed
ee   dee	   ddf fdZ
ddZ	 ddZdedefdZ xZS )	EmbeddingzS
    Embedding class to store and retrieve word embeddings from their indices.
    Nr   r   r   r   r   r   r   r   r	   r   c
                     t         
|   |||||||||		       t        j                         j	                  | dddi       yr   )r#   r$   r   r(   r)   )r*   r   r   r   r   r   r   r   r   r	   r+   s             r,   r$   zEmbedding.__init__   sV    > 	 	 
	
 	'')BB4T`bdSefr-   c                     t         j                  j                  j                  | j                         | j                          y r/   r0   r4   s    r,   r5   zEmbedding.reset_parameters   r6   r-   c                     | j                   Ft        j                         5  | j                  | j                      j	                  d       d d d        y y # 1 sw Y   y xY wr8   r9   r4   s    r,   r3   z%Embedding._fill_padding_idx_with_zero   r<   r=   r>   c           	          t        j                  || j                  | j                  | j                  | j
                  | j                  | j                        }|S r/   )r@   rA   r    r   r   r   r   r   rD   s      r,   rF   zEmbedding.forward   sH    kkKKMMNN##KK
 
r-   )NNrG   FFNNrH   )rI   rJ   rK   rL   rM   r   rN   rO   r   r	   r$   r5   r3   rF   rP   rQ   s   @r,   rS   rS      s     &*$(#($(#'*g*g *g c]	*g
 5/*g *g !*g *g &!*g  *g 
*gX+7
V  r-   rS   c                       e Zd Zddddddej                  ddf	deej                     dee   ded	e	d
e
dej                  ded   de	dd fdZd Zd Zd Zd Ze	 	 	 d&dej                  dee
ef   de	ded   dd f
d       Zed'd       Zd Zd Zd(deeeee
f      de	fdZd(deeeee
f      de	fdZe	 	 	 d)dedeeeef      d eeee
f      de	def
d!       Zed*ded eee
f   de	defd"       Zed*ded#ede	defd$       Z fd%Z xZS )+
Params4bitNF@   Tfp4dataquant_state	blocksizecompress_statistics
quant_typequant_storagemodule
Linear4bitbnb_quantizedr   c
                     |t        j                  d      }t         j                  j                  | ||      }
||
_        ||
_        ||
_        ||
_        ||
_        |	|
_	        ||
_
        ||
_        |
S r8   )r%   emptyr   _make_subclassr^   r_   r`   r]   ra   rd   r\   rb   )clsr\   requires_gradr]   r^   r_   r`   ra   rb   rd   r*   s              r,   __new__zParams4bit.__new__   so     <;;q>D||**3mD"#6 $&**	r-   c                 v    | j                   j                         }| j                  |d<   | j                  |d<   |S )Nr\   ri   )__dict__copyr\   ri   r*   states     r,   __getstate__zParams4bit.__getstate__   s6    ""$		f!%!3!3or-   c                     |d   | _         |d   | _        |d   | _        |d   | _        |d   | _        |d   | _        |d   | _        |d   | _        |d	   | _        y )
Nri   r^   r_   r`   r]   r\   ra   rd   rb   )	ri   r^   r_   r`   r]   r\   ra   rd   rb   rn   s     r,   __setstate__zParams4bit.__setstate__   sr    "?3{+#()>#? - /&M	"?3"?3Hor-   c                    t        |       j                  t        |             }| j                         }|j                  |       t	        j
                  |d         |_        t	        j
                  |d         |_        |S )Nr]   r\   )typerj   rp   rr   rm   deepcopyr]   r\   )r*   memonew_instancero   s       r,   __deepcopy__zParams4bit.__deepcopy__   sg    Dz))$t*5!!#!!%(#'==}1E#F  MM%-8r-   c                     t        |       j                  t        |             }| j                         }|j                  |       |S r/   )rt   rj   rp   rr   )r*   rw   ro   s      r,   __copy__zParams4bit.__copy__  s<    Dz))$t*5!!#!!%(r-   quantized_statsri   c                    t         j                  j                  | |j                  |            }||_        t        j                  ||      |_        |j                  j                  |_        |j                  j                  |_
        |j                  j                  |_        d|_        |j                  |_        ||_        |j                  |j                  |j                  _        |S )N)qs_dictr	   T)r%   r   rg   rB   ri   r   	from_dictr]   r^   nestedr_   r`   rd   r
   ra   rb   )rh   r\   r{   ri   r	   rb   kwargsr*   s           r,   from_prequantizedzParams4bit.from_prequantized  s     ||**3@*%//PVW))33#'#3#3#:#: **55!!ZZ;;"&*&6&6DKK#r-   c                     |i }t         j                  j                         5   ||i |cd d d        S # 1 sw Y   y xY wr/   )r%   _CDisableTorchFunctionSubclass)rh   functypesargsr   s        r,   __torch_function__zParams4bit.__torch_function__%  s>    >FXX224 	)((	) 	) 	)s   5>c                 Z   | j                   j                         j                  |      }t        j                  j                  || j                  | j                  | j                  | j                        \  }}|| _         || _
        | j                  || j                  _
        d| _        | S )N)r^   r_   r`   ra   T)r\   
contiguousrB   bnb
functionalquantize_4bitr^   r_   r`   ra   r]   rb   rd   )r*   r	   ww_4bitr]   s        r,   	_quantizezParams4bit._quantize,  s    II  "%%f-!nn::nn $ 8 8,, ; 
 	&;;"&1DKK#!r-   c                 &    | j                  d      S Ncpur   rB   r4   s    r,   r   zParams4bit.cpu<      wwew$$r-   r	   non_blockingc                 <    | j                  |d|      S ||      S Ncudar	   r   r   r*   r	   r   s      r,   r   zParams4bit.cuda?  '    wwfQ]w^^FQ]w^^r-   c                 <    | j                  |d|      S ||      S Nxpur   r   r   s      r,   r   zParams4bit.xpuB  '    wwv~eP\w]]6P\w]]r-   r*   r
   c                      y r/    r*   r	   r
   r   s       r,   rB   zParams4bit.toE       r-   c                      y r/   r   r*   r
   r   s      r,   rB   zParams4bit.toM      NQr-   tensorc                      y r/   r   r*   r   r   s      r,   rB   zParams4bit.toP      DGr-   c           	         t        j                  j                  j                  |i |\  }}}}|,|j                  dk7  r| j
                  s| j                  |      S | j                  | j                  j                  |       t        t        | !  |||      | j                  | j                  | j                  | j                  | j                  | j                        }|S )Nmetar	   r
   r   )ri   r]   r^   r_   r`   ra   )r%   r   _nn	_parse_tort   rd   r   r]   rB   rY   r#   ri   r^   r_   r`   ra   	r*   r   r   r	   r
   r   convert_to_format	new_paramr+   s	           r,   rB   zParams4bit.toS  s    9>9O9OQU9`Y_9`6|%6&++"7@R@R>>&))+  ##F+"
&L
Q"00 ,,..$($<$<??"00I r-   )Fr   N)r   NNF....) rI   rJ   rK   r%   uint8r   r   r   rM   rO   strr
   rj   rp   rr   rx   rz   classmethoddictr   r   r   r   r   r   r	   r   r   r   r   rB   rP   rQ   s   @r,   rY   rY      s<    (,,0$(%*[[)-#u||$ j)	
  "  {{ &  
4	& 
 $)-ll c3h 	 & 
 2 ) ) %_8E#vs*:$;< _SW _^(5fc)9#:; ^RV ^  03-0 	sF{+, eSj)* 	
 
  QQ5,QDQ1Q QGGFG$GG G r-   rY   rb   )Embedding4bitrc   c                 l   t        | j                  dd       y t        | dd       t        j                  d       | j                  j                  d   dk(  sJ t        | j                  t              s't        | j                  | j                  d      | _        | j                  | j                  _        y )Nr]   zhFP4 quantization state not initialized. Please call .cuda() or .to(device) on the LinearFP4 layer first.   T)ra   rd   )	getattrr    warningswarnshape
isinstancerY   ra   r]   )rb   s    r,   'fix_4bit_weight_quant_state_from_moduler   i  s    v}}mT2>v}d+3v	
 ==q!Q&&&fmmZ0"6==@T@Tdhi & 2 2FMMr-   c                   t     e Zd ZdZddddej
                  df fd	Zd Z fdZdej                  fd	Z
 xZS )
rc   a  
    This class is the base module for the 4-bit quantization algorithm presented in [QLoRA](https://arxiv.org/abs/2305.14314).
    QLoRA 4-bit linear layers uses blockwise k-bit quantization under the hood, with the possibility of selecting various
    compute datatypes such as FP4 and NF4.

    In order to quantize a linear layer one should first load the original fp16 / bf16 weights into
    the Linear4bit module, then call `quantized_module.to("cuda")` to quantize the fp16 / bf16 weights.

    Example:

    ```python
    import torch
    import torch.nn as nn

    import bitsandbytes as bnb
    from bnb.nn import Linear4bit

    fp16_model = nn.Sequential(
        nn.Linear(64, 64),
        nn.Linear(64, 64)
    )

    quantized_model = nn.Sequential(
        Linear4bit(64, 64),
        Linear4bit(64, 64)
    )

    quantized_model.load_state_dict(fp16_model.state_dict())
    quantized_model = quantized_model.to(0) # Quantization happens here
    ```
    TNr[   c	                     t         	|   ||||       t        | j                  j                  d||||       | _        || _        d| _        d| _        || _        y)aw  
        Initialize Linear4bit class.

        Args:
            input_features (`str`):
                Number of input features of the linear layer.
            output_features (`str`):
                Number of output features of the linear layer.
            bias (`bool`, defaults to `True`):
                Whether the linear class uses the bias term as well.
        Fri   r_   r`   ra   rb   N)	r#   r$   rY   r    r\   compute_dtypecompute_type_is_setr]   ra   )
r*   input_featuresoutput_featuresbiasr   r_   r`   ra   r	   r+   s
            r,   r$   zLinear4bit.__init__  sb    , 	$G KK 3!'
 +#( *r-   c                 d   |j                   t        j                  t        j                  fv r|j                   | _        y |j                   t        j
                  k(  r| j                  t        j                  k(  rL|j                         |j                  d   k(  r,t        j                  d       t        j                  dd       | j                  t        j                  k(  rN|j                         |j                  d   k7  r-t        j                  d       t        j                  dd       y y y y )NzInput type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference.ignorez.*inference.)messagezInput type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference or training speed.z.*inference or training)r
   r%   float32bfloat16r   float16numelr   r   r   filterwarnings)r*   xs     r,   set_compute_typezLinear4bit.set_compute_type  s    77u}}enn55 "#DWW%!!U]]2	QWWR[8P  Y ''.I!!U]]2	QWWR[8P k '':ST	 9Q2 &r-   c                 
   t         |   |||       t        | j                  dd      Z| j                  j                  j                  d      j                         D ]"  \  }}|r|n|j                         ||dz   |z   <   $ yy)zc
        save weight and bias,
        then fill state_dict with components of quant_state
        r]   NT)packedzweight.)r#   _save_to_state_dictr   r    r]   as_dictitemsdetach)r*   destinationprefix	keep_varskvr+   s         r,   r   zLinear4bit._save_to_state_dict  s    
 	#KC4;;t4@//77t7DJJL U1;Da!((*FY.23U Ar-   r   c                    t        |        | j                  a| j                  j                  |j                  k7  r>| j                  j                  j	                  |j                        | j                  _        | j
                  s| j                  |       d| _        |j                  }| j                  |j	                  | j                        }| j                  d n$| j                  j	                  | j                        }t        j                  || j                  j                         || j                  j                        j	                  |      S )NT)r   r]   )r   r   r
   r\   rB   r   r   r   r   matmul_4bitr    tr]   )r*   r   	inp_dtyper   s       r,   rF   zLinear4bit.forward  s    /5 99 TYY__%?!YY^^..qww7DIIN''!!!$'+D$GG	)T''(Ayy(tdiill4;M;M.Nq$++--/$++JaJabeefoppr-   )rI   rJ   rK   rL   r%   r   r$   r   r   r   rF   rP   rQ   s   @r,   rc   rc   z  sE    H  kk#+JU(	Uq qr-   rc   c                   B     e Zd ZdZdddej
                  df fd	Z xZS )	LinearFP4z'
    Implements the FP4 data type.
    TNc           
      2    t         |   |||||d||       y)Q  
        Args:
            input_features (`str`):
                Number of input features of the linear layer.
            output_features (`str`):
                Number of output features of the linear layer.
            bias (`bool`, defaults to `True`):
                Whether the linear class uses the bias term as well.
        r[   Nr#   r$   	r*   r   r   r   r   r_   ra   r	   r+   s	           r,   r$   zLinearFP4.__init__  *    & 			
r-   rI   rJ   rK   rL   r%   r   r$   rP   rQ   s   @r,   r   r     s'      kk
 
r-   r   c                   B     e Zd ZdZdddej
                  df fd	Z xZS )	LinearNF4a"  Implements the NF4 data type.

    Constructs a quantization data type where each bin has equal area under a standard normal distribution N(0, 1) that
    is normalized into the range [-1, 1].

    For more information read the paper: QLoRA: Efficient Finetuning of Quantized LLMs (https://arxiv.org/abs/2305.14314)

    Implementation of the NF4 data type in bitsandbytes can be found in the `create_normal_map` function in
    the `functional.py` file: https://github.com/TimDettmers/bitsandbytes/blob/main/bitsandbytes/functional.py#L236.
    TNc           
      2    t         |   |||||d||       y)r   nf4Nr   r   s	           r,   r$   zLinearNF4.__init__#  r   r-   r   rQ   s   @r,   r   r     s'    	  kk
 
r-   r   c                       e Zd Z	 	 	 	 	 ddeej
                     deej
                     deej
                     fdZ fdZd Zddee	e
eef      defd	Zddee	e
eef      defd
Zd Ze	 	 	 ddedee	e
ef      dee	eef      dedef
d       Zeddede	eef   dedefd       Zeddedededefd       Z fdZ xZS )
Int8Paramsr\   CBSCBc                     |t        j                  d      }t         j                  j                  | ||      }||_        ||_        ||_        |S r8   )r%   rf   r   rg   r   r   has_fp16_weights)rh   r\   ri   r   r   r   objs          r,   rj   zInt8Params.__new__C  sI     <;;q>Dll))#t]C/
r-   c                     | j                   rt        | 	  |      S | j                  j	                         j                  |t
        j                        }t        j                  j                  |      \  }}}|| _        || _
        || _        | S )Nr	   r
   )r   r#   rB   r\   r   r%   r   r   r   int8_vectorwise_quantr   r   )r*   r	   Br   r   _r+   s         r,   r   zInt8Params._quantizeS  sw      7:f%% II  "%%V5==%I^^99!<
C	r-   c                 &    | j                  d      S r   r   r4   s    r,   r   zInt8Params.cpu`  r   r-   r	   r   c                 <    | j                  |d|      S ||      S r   r   r   s      r,   r   zInt8Params.cudac  r   r-   c                 <    | j                  |d|      S ||      S r   r   r   s      r,   r   zInt8Params.xpuf  r   r-   c                 4   t        |       j                  t        |       t        j                  | j                  |      | j
                  | j                  t        j                  | j                  |      t        j                  | j                  |            }|S )N)r\   ri   r   r   r   )	rt   rj   rm   ru   r\   ri   r   r   r   )r*   rv   rw   s      r,   rx   zInt8Params.__deepcopy__i  sr    Dz))Jtyy$/,,!22}}TWWd+dhh- * 
 r-   r*   r
   r   c                      y r/   r   r   s       r,   rB   zInt8Params.tou  r   r-   c                      y r/   r   r   s      r,   rB   zInt8Params.to}  r   r-   r   c                      y r/   r   r   s      r,   rB   zInt8Params.to  r   r-   c                    t        j                  j                  j                  |i |\  }}}}|C|j                  dk7  r4| j
                  j                  j                  dk(  r| j                  |      S t        t        | )  |||      | j                  | j                        }| j                  |_        | j                  |_        |S )Nr   r   r   )ri   r   )r%   r   r   r   rt   r\   r	   r   r   r#   rB   ri   r   r   r   r   s	           r,   rB   zInt8Params.to  s    9>9O9OQU9`Y_9`6|%6&++"7DII<L<L<Q<QUZ<Z>>&))"
&L
Q"00!%!6!6I
  77IL HHIMr-   )NTFNNr   r   r   )rI   rJ   rK   r   r%   r   rj   r   r   r   rM   r	   r   rO   r   r   rx   r   r   r
   rB   rP   rQ   s   @r,   r   r   B  sq    (,%)&*u||$
 U\\" ell# %_8E#vs*:$;< _SW _^(5fc)9#:; ^RV ^
  03-0 	sF{+, eSj)* 	
 
  QQ5,QDQ1Q QGGFG$GG G r-   r   c                 d   | j                  | d      }|y | j                  | dd      }t        |t        j                        r|j                         }t        |t              r|t        vrt        d|       t        |t              r|t        v r	t        |   }|dk7  rt        d|       y )Nr    weight_formatrowz'Expected supported weight format - got z+Only 'row' weight format is supported, got )	getpopr   r%   r   itemrM   r   
ValueError)	
state_dictr   local_metadatastrictmissing_keysunexpected_keys
error_msgsr    r  s	            r,   maybe_rearrange_weightr    s    ^^vhf-.F~NNfX]#;UCM-.%**, -%-?i*iB=/RSS	M3	'M=g,gB=QF}oVWW r-   c                   :     e Zd ZdZd fd	Zd ZdedefdZ xZS )Embedding8bita  
    This class implements [LLM.int8()](https://arxiv.org/abs/2208.07339) algorithm for embedding layer

    Quantization API is similar to Linear8bitLt:
    ```python
    import torch
    import torch.nn as nn

    from bitsandbytes.nn import Embedding8bit

    fp16_module = nn.Embedding(128, 64)
    int8_module = Embedding8bit(128, 64)

    int8_module.load_state_dict(fp16_module.state_dict())

    int8_module = int8_module.to(0) # Quantization happens here
    ```
    c                     t         |   ||||       | j                  j                  j                  | _        t        | j                  j                  dd      | _        y )Nr   Fr   ri   )r#   r$   r    r\   r
   r   )r*   r   r   r	   r
   r+   s        r,   r$   zEmbedding8bit.__init__  sK    vUS[[%%++
 !1!1EY^_r-   c                     t        d      )Nz.Saving Embedding8bit module is not implementedNotImplementedErrorr*   r   r   r   s       r,   r   z!Embedding8bit._save_to_state_dict      !"RSSr-   r>   r   c                    t        | j                  d      st        d      | j                  j                  }| j                  j                  }|j
                  | j                  | j                  fk(  sJ |j
                  | j                  fk(  sJ t        j                  ||      }t        j                  ||j                  | j                  d            }||dz  z  }|j                  | j                        S )Nr   zKEmbedding layer is not quantized. Please call .cuda() or .to(device) first.r   g     _@)hasattrr    RuntimeErrorr\   r   r   r   r   r@   rA   viewrB   r
   )r*   r>   rows	row_statscompressed_outputcompressed_output_statsoutputs          r,   rF   zEmbedding8bit.forward  s    t{{E*lmm{{KKOO	zzd1143E3EFFFF4#6#6"8888KKt4"#++eY^^DDWDWYZ5["\"&=&EFyy$$r-   )NN)	rI   rJ   rK   rL   r$   r   r   rF   rP   rQ   s   @r,   r  r    s'    &`T%V % %r-   r  c                   b     e Zd ZdZddej
                  df fd	ZdefdZd Z	dedefd	Z
 xZS )
r   a3  
    This is the base class similar to Linear4bit. It implements the 4-bit quantization algorithm presented in
    [QLoRA](https://arxiv.org/abs/2305.14314) for embeddings.

    Quantization API is similar to Linear4bit:
    ```python
    import torch
    import torch.nn as nn

    from bitsandbytes.nn import Embedding4bit

    fp16_module = nn.Embedding(128, 64)
    quantized_module = Embedding4bit(128, 64)

    quantized_module.load_state_dict(fp16_module.state_dict())

    quantized_module = quantized_module.to(0) # Quantization happens here
    ```
    Nr[   c                 @   t         |   ||||       | j                  j                  j                  | _        t        | j                  j                  dd |||       | _        | j                  j                  }||z  dk7  rt        j                  d| d| d       y y )Nr   Fr   r   zEmbedding size z  is not divisible by block size z#. This will lead to slow inference.)	r#   r$   r    r\   r
   rY   r^   r   r   )	r*   r   r   r
   r`   ra   r	   r^   r+   s	           r,   r$   zEmbedding4bit.__init__  s     	vUS[[%%++
 KK $!'
 KK))	9$)MM!-0PQZP[ \4 4 *r-   r>   c                    | j                   | j                  j                  j                  z  dk(  sJ | j                  j                  j                  t        j                        j                  | j                  | j                   z  dz  d      }t        j                  j                  j                  |j                  | j                  | j                   dz        |      j                  dd      }|j                  |j                         | j                   z  dz  dfk(  sJ | j                   | j                  j                  z  }| j                  j                  j                  }|j                  | j                  |z  fk(  sJ t        j                  j                  j                  |j                  | j                  |      |      j                  d      }|j                  |j                         |z  fk(  sJ t        j                   | j                  j                        }||_        t        j"                  g |j                  | j                         |_        t$        j                  j'                  ||      }|j                  g |j                  | j                   k(  sJ |j)                  | j*                        S )Nr      r   r    r>   r   )r   r    r]   r^   r\   r   r%   r   r   r   r   rA   r   r   absmaxrm   ru   Sizer   dequantize_4bitrB   r
   )	r*   r>   w_4bit_uint8output_4bitblocks_per_embr+  output_absmaxoutput_quant_stater%  s	            r,    _forward_with_partial_dequantizez.Embedding4bit._forward_with_partial_dequantize
  sI   !!DKK$;$;$E$EEJJJ{{'',,U[[9>>t?R?RUYUgUg?gkl?lnophh))33$$T%8%8$:L:LPQ:QR 4 
 $r1+ 	   U[[]T5G5G%G1%La$PPPP++t{{/D/DD((//|| 3 3n DFFFF++55;;t22NC 6 
 $
 	 ""u{{}~'E&GGGG!]]4;;+B+BC$1!#(::.P.PT=O=O.P#Q //=OP||AAd.@.@AAAAyy$$r-   c                     t        d      )Nz.Saving Embedding4bit module is not implementedr  r  s       r,   r   z!Embedding4bit._save_to_state_dict+  r  r-   r   c                    t        |        | j                  | j                  j                  j                  z  dk(  r| j                  |      S t        j                  j                  | j                  j                  | j                  j                        }t        j                  j                  j                  ||      j                  | j                        S )Nr   r*  )r   r   r    r]   r^   r3  r   r   r-  r\   r%   r   rA   rB   r
   )r*   r>   dequantized_weights      r,   rF   zEmbedding4bit.forward.  s    /5 7 7 A AAQF88?? ^^;;DKK<L<LdkkNeNefxx"",,% - 
 "TZZ.	r-   )rI   rJ   rK   rL   r%   r   r$   r   r3  r   rF   rP   rQ   s   @r,   r   r     sE    0 kk:%f %BTV  r-   r   c                   :     e Zd Zdej                  df fd	Z xZS )EmbeddingFP4Nc                 0    t         |   |||d||       y )Nr[   r
   r`   ra   r	   r   r*   r   r   r
   ra   r	   r+   s         r,   r$   zEmbeddingFP4.__init__=  )     	' 	 	
r-   rI   rJ   rK   r%   r   r$   rP   rQ   s   @r,   r8  r8  <      
 kk
 
r-   r8  c                   :     e Zd Zdej                  df fd	Z xZS )EmbeddingNF4Nc                 0    t         |   |||d||       y )Nr   r:  r   r;  s         r,   r$   zEmbeddingNF4.__init__P  r<  r-   r=  rQ   s   @r,   r@  r@  O  r>  r-   r@  c                   r     e Zd ZdZ	 	 	 	 	 d
dedef fdZ fdZ fdZd Zde	j                  fd	Z xZS )Linear8bitLtaZ  
    This class is the base module for the [LLM.int8()](https://arxiv.org/abs/2208.07339) algorithm.
    To read more about it, have a look at the paper.

    In order to quantize a linear layer one should first load the original fp16 / bf16 weights into
    the Linear8bitLt module, then call `int8_module.to("cuda")` to quantize the fp16 weights.

    Example:

    ```python
    import torch
    import torch.nn as nn

    import bitsandbytes as bnb
    from bnb.nn import Linear8bitLt

    fp16_model = nn.Sequential(
        nn.Linear(64, 64),
        nn.Linear(64, 64)
    )

    int8_model = nn.Sequential(
        Linear8bitLt(64, 64, has_fp16_weights=False),
        Linear8bitLt(64, 64, has_fp16_weights=False)
    )

    int8_model.load_state_dict(fp16_model.state_dict())
    int8_model = int8_model.to(0) # Quantization happens here
    ```
    r   r   c                 V   t         |   ||||       t        j                         | _        || _        || j                  _        || j                  _        |dkD  r|sd| j                  _        t        | j                  j                  ||      | _
        | j                  t               y)ay  
        Initialize Linear8bitLt class.

        Args:
            input_features (`int`):
                Number of input features of the linear layer.
            output_features (`int`):
                Number of output features of the linear layer.
            bias (`bool`, defaults to `True`):
                Whether the linear class uses the bias term as well.
                Tr  N)r#   r$   r   MatmulLtStatero   index	thresholdr   use_poolr   r    r\   "_register_load_state_dict_pre_hookr  )	r*   r   r   r   r   rH  rG  r	   r+   s	           r,   r$   zLinear8bitLt.__init__  s    * 	$G&&(

(

&6

#s?#3"&DJJ !1!1DTdtu//0FGr-   c                    t         	|   |||       d}t        | j                  |      }t        | j                  |      }|| z   }|dz   }| j                  j
                  s|@|r|n|j                         ||<   t        j                  dt        j                        ||<   y |@|r|n|j                         ||<   t        j                  dt        j                        ||<   y y y )Nr   r  r   )r
   )
r#   r   r   r    ro   r   r   r%   r   r   )
r*   r   r   r   scb_nameparam_from_weightparam_from_statekey_nameformat_namer+   s
            r,   r   z Linear8bitLt._save_to_state_dict  s    #KC  $DKK:"4::x8xj) .zz** ,=F(9L]LdLdLfH%+0<<+MK(!-<E(8K[KbKbKdH%+0<<+MK( .	 +r-   c           	         t         |   |||||||       t        |      }|D ]  }	|	t        |      d  }
|
dk(  s| j                  j
                  t        d      ||	   }| j                  j
                  j                  |       | j                  j
                  %| j                  j
                  | j                  _        |j                  |	        y )Nr   zLoading a quantized checkpoint into non-quantized Linear8bitLt is not supported. Please call module.cuda() before module.load_state_dict())
r#   _load_from_state_dictlistlenr    r   r  copy_ro   remove)r*   r  r   r  r  r  r  r  unexpected_copykey
input_nameinput_paramr+   s               r,   rR  z"Linear8bitLt._load_from_state_dict  s     	%	
 /" 	,CS[]+JU";;??*&c 
 )o%%k2::>>-%)[[__DJJN&&s+!	,r-   c                     | j                   j                  | j                  _        | j                   j                  | j                  _        d | j                   _        d | j                   _        y r/   r    r   ro   r   r4   s    r,   init_8bit_statezLinear8bitLt.init_8bit_state  >    



r-   r   c                 r   | j                   | j                  _        | j                  j                  | j                          | j                  a| j                  j                  |j                  k7  r>| j                  j                  j                  |j                        | j                  _        t        j                  || j                  | j                  | j                        }| j                  j                  s;| j                  j                  %| j                  j                  | j                  _        |S N)r   ro   )trainingro   is_trainingr    r   r]  r   r
   r\   rB   r   matmulr   r*   r   outs      r,   rF   zLinear8bitLt.forward  s    !%

;;>>%  " 99 TYY__%?!YY^^..qww7DIINjjDKKdiitzzJzz**tzz}}/H#zz}}DKK
r-   )TTrE  NN)rI   rJ   rK   rL   rM   r$   r   rR  r]  r%   r   rF   rP   rQ   s   @r,   rC  rC  b  sV    F  H H  HDN0%,N r-   rC  c                   2     e Zd Zd fd	Zd Zd Zd Z xZS )OutlierAwareLinearc                 F    t         |   ||||       d | _        d| _        y r   )r#   r$   outlier_dimis_quantized)r*   r   r   r   r	   r+   s        r,   r$   zOutlierAwareLinear.__init__  s&    $G!r-   c                     t        d      )NzJPlease override the `forward_with_outliers(self, x, outlier_idx)` functionr  )r*   r   outlier_idxs      r,   forward_with_outliersz(OutlierAwareLinear.forward_with_outliers   s    !"noor-   c                     t        d      )NzEPlease override the `quantize_weights(self, w, outlier_idx)` functionr  )r*   r   rl  s      r,   quantize_weightz"OutlierAwareLinear.quantize_weight  s    !"ijjr-   c                 |   | j                   Qt        j                         }|j                         st	        d       |j                  | j                        }|| _         | j                  sS| j                  | j                  | j                         }| j                  j                  j                  |       d| _        y y )NzTPlease use OutlierTracer.initialize(model) before using the OutlierAwareLinear layerT)ri  r   r(   is_initializedprintget_outliersr    rj  ro  r\   rU  )r*   r   tracerrl  r   s        r,   rF   zOutlierAwareLinear.forward  s    #"//1F((*lm --dkk:K*D  $$T[[$2B2BCAKK""1% $D !r-   )TN)rI   rJ   rK   r$   rm  ro  rF   rP   rQ   s   @r,   rg  rg    s    "
pk%r-   rg  c                   8     e Zd Z	 	 	 	 	 	 d fd	Zd Zd Z xZS )SwitchBackLinearBnbc	                 N   t         	|   ||||       t        j                         | _        || _        || j                  _        || j                  _        || j                  _        |dkD  r|sd| j                  _	        t        | j                  j                  ||      | _        y )NrE  Tr  )r#   r$   r   rF  ro   rG  rH  r   memory_efficient_backwardrI  r   r    r\   )
r*   r   r   r   r   rx  rH  rG  r	   r+   s
            r,   r$   zSwitchBackLinearBnb.__init__  s     	$G&&(

(

&6

#/H

,s?#3"&DJJ !1!1DTdtur-   c                     | j                   j                  | j                  _        | j                   j                  | j                  _        d | j                   _        d | j                   _        y r/   r\  r4   s    r,   r]  z#SwitchBackLinearBnb.init_8bit_state-  r^  r-   c                 2   | j                   | j                  _        | j                  j                  | j                          t        j                  |j                         | j                  j                         d | j                        | j                  z   }y r`  )
ra  ro   rb  r    r   r]  r   matmul_mixedhalfr   rd  s      r,   rF   zSwitchBackLinearBnb.forward3  se    !%

;;>>%  "qvvx)9)9);$djjY\`\e\eer-   )TTFrE  NN)rI   rJ   rK   r$   r]  rF   rP   rQ   s   @r,   rv  rv    s(    
 "'v.fr-   rv  ),rm   typingr   r   r   r   r   r   r%   r   r	   r
   r   torch.nn.functionalr   r@   bitsandbytesr   bitsandbytes.functionalr   bitsandbytes.optimr   bitsandbytes.utilsr   r   r   rS   r   	ParameterrY   r   Linearrc   r   r   r   r  r  r   r8  r@  rC  rg  rv  r   r-   r,   <module>r     s[  
  : :   + +    . 1
 C()g4ehh(( g4TL"" L^U## Up3E:W4X 3"vq vqr!

 !
H(

 (
VO## OdX*,%BLL ,%^aBLL aH
= 
&
= 
&U299 Up% %6$f")) $fr-   