
    ~Uh!7                     |   d dl mZ d dlmZ d dlmZmZ d dlZd dlmZ d dl	Z	d dl
mZ d dlmZ 	  G d d      Z ed	e
      dee	j"                  ge	j"                  f   deeef   fd       Z ee	j,                  d      re	j,                  j.                  Zne	j2                  j.                  Z ed	e
      de	j"                  de	j4                  de	j"                  fd       Ze G d d             Z G d de	j:                  j<                        Z G d de	j:                  j<                        Z 	 	 	 	 d!de	j"                  de	j"                  dee	j"                     dee   dee	j"                     f
dZ!	 	 d"de	j"                  de	j"                  dejD                  dee	j"                     dee	j"                     f
d Z#y)#    )	dataclass)prod)CallableOptionalN)warn)
deprecatedc                   8    e Zd ZdZd Zd Zed        Zd Zd Z	y)GlobalOutlierPoolerNc                     t        d      )NzCall get_instance() instead)RuntimeErrorselfs    r/var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/bitsandbytes/autograd/_functions.py__init__zGlobalOutlierPooler.__init__   s    899    c                 0    t               | _        d | _        y N)setoutliers	model_dimr   s    r   
initializezGlobalOutlierPooler.initialize   s    r   c                     | j                   0| j                  |       | _         | j                   j                          | j                   S r   )	_instance__new__r   )clss    r   get_instancez GlobalOutlierPooler.get_instance!   s6    == KK,CMMM$$&}}r   c                     | j                   || _         || j                   k7  ry | j                  j                  |j                                y r   )r   r   updatetolist)r   outlier_idxfeature_dims      r   add_outliersz GlobalOutlierPooler.add_outliers(   s=    >>!(DN$..([//12r   c                     t        j                  t        | j                              j	                  t         j
                        S r   )torchTensorlistr   toint64r   s    r   get_current_outlier_idxz+GlobalOutlierPooler.get_current_outlier_idx0   s)    ||D/033EKK@@r   )
__name__
__module____qualname__r   r   r   classmethodr   r"   r)    r   r   r
   r
      s0    I:  3Ar   r
   zDThis function is deprecated and will be removed in a future release.)categorytransform_tile	tile_sizec                 j   |\  }}d||z  cxk  rdk  sJ  J t        j                  ||z  t         j                        j                  ||      }t        j                  |      }t        d      D ]  }t        j                  |d|z  d      dz  }|dz
  j                  t         j                        j                         }t        j                  |j                         dz   |k(        sJ d	        | |      }	|	j                  |j                        dz   }
||
d|z  z  z  }||z  d|z  k  s |S  |S )
a  
    Compute a permutation of indices that invert the specified (tiled) matrix transformation

    :param transform_tile: a function that applies forward transform to a tensor of shape [dim1, dim2]
    :param tile_size: higher-level tile dimensions, i.e. (8, 32) for Turing and (32, 32) for Ampere
    :note: we assume that tile_transform applies to a cpu-based int8 tensor of shape tile_size
    :example: transform_tile function for the turing layout (bitsandbytes.functional as F)
    :returns: indices
    r   l            dtype      trunc)rounding_mode   zint overflow)r$   aranger(   view
zeros_likerangedivr'   int8
contiguousallintr4   )r0   r1   d1d2tile_indicespermuted_tile_indicesiith_dim_indicessample_tile_ipermuted_tile_iith_permuted_indicess              r   get_inverse_transform_indicesrL   4   s7   " FBrBw<<Ru{{;@@RHL!,,\:1X 	))L#q&PSVV(3.225::>IIKyy**,s2oEFVVF(7.11,2D2DEK!5a!@@7S!V  	 ! r   is_compilingpermuted_tensorrE   returnc                    | j                   |j                   c\  }}\  }}||z  ||z  cxk(  rdk(  sJ d        J d       | j                  d|j                               j                         }t	        j
                  |      }|||j                         <   |j                  ||||z  ||z        }|j                  dddd      }|j                  ||      j                         S )a  
    Undo a tiled permutation such as turing or ampere layout

    :param permuted_tensor: torch tensor in a permuted layout
    :param tile_indices: reverse transformation indices, from get_inverse_transform_indices
    :return: contiguous row-major tensor
    r   z+tensor must contain a whole number of tiles         )	shapereshapenumeltr$   
empty_likeflattenpermuter@   )rN   rE   rowscols	tile_rows	tile_colstensoroutputss           r   undo_layoutrb   ^   s     ,;+@+@,BTBT(LT4(9i)ti/414c6cc4c6cc4$$R););)=>@@BFv&G&,GL  "#ooiDI4EtyGXYGooaAq)G??4&1133r   c                      e Zd ZU dZeej                     ed<   dZe	ed<   dZ
eej                     ed<   dZeej                     ed<   dZeej                     ed<   dZeej                     ed<   dZeej                     ed	<   dZeej                     ed
<   dZeej                     ed<   dZeej                     ed<   dZee   ed<   dZdZdZeej                     ed<   dZdZdZdZd Zed        Zy)MatmulLtStateN_tile_indicesFforce_no_igemmltCBCxBSBSCBCxBtSBtCBtsubBoutlier_pool        idxTrowc                 f    d | _         d | _        d | _        d | _        d | _        d | _        d | _        y r   )rg   rh   ri   rj   rk   rl   rm   r   s    r   reset_gradszMatmulLtState.reset_grads   s3    	r   c                     t        d      )Nz$tile_indices is no longer supported.)
ValueErrorr   s    r   rE   zMatmulLtState.tile_indices   s    ?@@r   )r*   r+   r,   re   r   r$   r%   __annotations__rf   boolrg   rh   ri   rj   rk   rl   rm   rn   ro   r
   has_accumulated_gradients	thresholdrq   is_traininghas_fp16_weightsuse_poolformatBrt   propertyrE   r.   r   r   rd   rd   t   s   ,0M8ELL)0"d"!%B%"&C%,,	&!%B%"&C%,,	&#'D(5<<
 '"&C%,,	&"&C%,,	&#'D(5<<
 '26L(./6 %I"&C%,,	&KHG A Ar   rd   c                   F   e Zd Ze	 	 	 ddej
                  j                  j                  dej                  dej                  de	ej                     de	ej                     de	e
   fd       Zedej
                  j                  j                  d	ej                  fd
       Zy)MatMul8bitLtNctxABoutbiasstatec           	      	   |xs
 t               }d| _        t        |j                        dk(  rd| _        || _        || _        || _        |j                  d   |j                  d   k(  rIt        j                  |j                  d d |j                  dd  z   |j                  |j                        S t        j                  |j                  d d |j                  d d z   |j                  |j                        S |j                  }|j                  t        j                  k7  r-t               s#t        j                  d|j                   d       t        |j                        d	k(  r|j!                  d|j                  d         }| j"                  d   rEt%        j&                  |j)                  t        j                        |j*                  
      \  }}}	}
}nFt%        j,                  |j)                  t        j                        |j*                  
      \  }}	}d x}}
d}|j.                  s|j0                  t3        |dd       d u}|j5                          xr! |j                  d   |j7                  d      k(  }|r|j9                         }|j:                  r|r|j0                  |j<                  P|j?                          t%        j,                  |j)                  t        j                              \  |_        |_        }|j*                  dkD  rN||_         t        jB                  jD                  jG                  |||j0                  |	|j<                  ||      \  }}nYt        jB                  jD                  jH                  jK                  ||j0                  |	|j<                  ||j                        }d }|| _&        || _'        |j                  | _(        |d n|j                  | _)        tU        | j"                  d d       r|||f| _+        |
|j@                  f| _,        n"g d| _+        d| _,        | j[                  d d        g |d d |j0                  j                  d   }t        |      d	k(  r|j!                  |      S |S )NFr   TrQ   rT   r4   devicez'MatMul8bitLt: inputs will be cast from z to float16 during quantizationrR   )rz   gradrp   )r   r4   rS   NNNNN).rd   is_emptyr   rU   r   r   r   r$   emptyr4   r   float16_is_compilingwarningsr   lenrV   needs_input_gradFint8_double_quantr'   rz   int8_vectorwise_quantr|   rg   getattris_contiguousstrider@   r{   rj   rt   rq   opsbitsandbytesint8_mixed_scaled_mmint8_scaled_mmdefaultr   
grad_shapedtype_A
dtype_biasanytensorstensor_statessave_for_backward)r   r   r   r   r   r   input_shapeCACAtSCASCAtoutlier_colshas_gradis_transposed_outputsubAoutput_shapes                     r   forwardzMatMul8bitLt.forward   s    ( =ACLCECECHwwr{aggaj({{1773B<!''!"+#=QWWUVU]U]^^{{1773B<!''"1+#=QWWUVU]U]^^gg 77emm#MOMMCAGG9Lklmqww<1		"aggbk*A "/0/B/B144CVbgbqbq/r,BS$ %&$;$;ADD<O[`[j[j$k!B\C$!!UXX%5q&$/t;H ! 11OaggajAHHQK6OMLLN!!(uxx7G599K\!!# *+)@)@emmAT)U&%)Q ??S $EI !9911FF		LFD YY++::BBEHHc5994qww C F D 	$gg!%4::s##BQ'(a.CK!%uyy 1C,CK ,C!!$-=Sb)=588>>!+<={q >>,//r   grad_outputc                    | j                   rn| j                  d nt        j                  | j                        }t        j                  | j                        t        j                  | j
                        d |d fS | j                  \  }}}}}| j                  \  }}}	| j                  \  }
}| j                  }d x}x}}|r|j                  d| j                        }t        |j                        dk(  r-|j                  d|j                  d         j                         }|rt!        j"                  |j%                  t        j&                              \  }}}}}t        j(                  j*                  j,                  j/                  |j1                         j                         |j1                         ||
t        j&                        }|j2                  dkD  rH|F|j5                         dkD  r3|d d |fxx   t        j6                  |j1                         |      z  cc<   |r|j8                  |j8                  j%                  | j:                  d      j=                  |j>                  jA                  d      jC                  d	            }t        j6                  |j%                  | j:                        |      jE                  | jF                        }ntI        d
      ||d |d fS )Nr   r3   rR   rQ   rp   T)copyrT   g@ ?z)State must contain CB matrix for backward)%r   r   r$   r<   r   r   r   r   r   r   sumr   r   rU   rV   r@   r   r   r'   r   r   r   r   r   rX   rz   rW   matmulrg   r   mul_rj   	unsqueezemulr;   r   	Exception)r   r   	bias_grad	req_gradA	req_gradBr   req_gradBiasr   r   r   r   rq   r   grad_Agrad_B	grad_biasCgradSCgradtrg   s                      r   backwardzMatMul8bitLt.backward  sK   << # 0e6F6Fsxx6PI##CEE*E,<,<SUU,CT9VZZZ363G3G0	9aq{{T1%%	c"yy&***)#@I {  !Q&%--b+2C2CB2GHSSUK&'&9&9+..:W&X#E1a!YY++::BB	$$&mm C F $)9djjlQ>Nq#v%,,{}}"EExx#XX[[4[8==eii>Q>QRS>T>X>XYd>efknnS[[&A2FKKCNN[ KLLvtY44r   r   )r*   r+   r,   staticmethodr$   autogradfunctionFunctionCtxr%   r   rd   r   r   r.   r   r   r   r      s    
 '+'+)-`^^$$00`<<` <<` ell#	`
 u||$` &` `D (5enn--99 (5 (5 (5r   r   c                   N    e Zd Zeddeej                     fd       Zed        Zy)
MatMul4BitNquant_statec                 X   d| _         t        |j                        dk(  rd| _         || _        || _        || _        |j                  }|j                  d   |d   k(  r?t        j                  |j                  d d |dd  z   |j                  |j                        S t        j                  |j                  d d |d d z   |j                  |j                        S t        j                  j                  j                  |t        j                  ||      j                  |j                        j!                         |      }|| _        |j                  |j                  |d n|j                  c| _        | _        | _        t+        | j,                  d d       rd |f| _        |S d| _        |S )	NFr   TrQ   rT   r   rS   r   )r   r   rU   r   r   r   r$   r   r4   r   nn
functionallinearr   dequantize_4bitr'   rX   r   r   dtype_Br   r   r   r   )r   r   r   r   r   r   B_shaper   s           r   r   zMatMul4Bit.forward1  sn    =ACLCECECH!''Gwwr{gaj({{1773B<'!"+#=QWWUVU]U]^^{{1773B<'"1+#=QWWUVU]U]^^ $$++Aq/@/@K/P/S/STUT[T[/\/^/^/`bfg  	3477AGGT\T_c_i_i0S[#.s##BQ'()CK  'CKr   c                 J   | j                   rn| j                  d nt        j                  | j                        }t        j                  | j                        t        j                  | j
                        d |d fS | j                  \  }}}}}| j                  \  }}d\  }}}	|r|j                  d| j                        }	|r[t        j                  |t        j                  || j                        j                  |j                        j!                               }||d |	d fS )Nr   r   r3   )r   r   r$   r<   r   r   r   r   r   r   r   r   r   r   r'   r4   rX   )
r   r   r   r   r   r   r   r   r   r   s
             r   r   zMatMul4Bit.backwardO  s    << # 0e6F6Fsxx6PI##CEE*E,<,<SUU,CT9VZZZ+.+?+?(	1aq{{1$4!	#@I \\+q/@/@CII/N/Q/QR]RcRc/d/f/f/hiFvtY44r   r   )	r*   r+   r,   r   r   r   
QuantStater   r   r.   r   r   r   r   -  s:     Xall=S  : 5 5r   r   r   r   r   r   r   c                 h    |xs
 t               }|dkD  r||_        t        j                  | ||||      S )Nrp   )rd   rz   r   apply)r   r   r   r   rz   r   s         r   r   r   f  s7     $]_E3#aCu55r   r   c                    |J | j                         | j                  d   k(  r| j                  dk(  r| j                  d   |j                  z  dk7  r>t	        d|j                   d| j                          t
        j                  | ||||      S t        j                  | |j                         ||      }|||z  }|S t
        j                  | ||||      S )NrQ   Fr   z4Some matrices hidden dimension is not a multiple of z^ and efficient inference kernels are not supported for these (slow). Matrix input size found: )r   )
rW   rU   requires_grad	blocksizer   r   r   r   	gemv_4bitrX   )r   r   r   r   r   s        r   matmul_4bitr   t  s     """wwyAGGBKAOOu$<772;...!3F{G\G\F]  ^|  }~  }D  }D  |E  F ##Aq#t[AA++a;?CtJ1c4==r   )NNrp   Nr   )$dataclassesr   mathr   typingr   r   r   r   r$   typing_extensionsr   bitsandbytes.functionalr   r   r
   FutureWarningr%   tuplerB   rL   hasattrcompilerrM   r   _dynamo
LongTensorrb   rd   r   Functionr   r   r   r   r   r.   r   r   <module>r      s   !  %    ( #A A: J!ell^U\\9:!S#X!	!@ 5>>>*NN//MMM..M J4 4U=M=M 4RWR^R^ 4	4$ %A %A %APM55>>** M5`65(( 65x #'%)#'6||6||6 
%,,	6 M"	6 5<<
 6$ #'#'>||>||> > 
%,,		>
 5<<
 >r   