
    Uh                     B   d Z ddlmZ ddlmZmZmZmZ ddlZddl	m
c mZ ddlZddlm
Z
 ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddlmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) ddl*m+Z+m,Z,  e'       rddl-m.Z. ddl/m0Z0  e)jb                  e2      Z3 G d de
jh                        Z5 e"jl                  e5        G d de
jh                        Z7 G d de7      Z8 G d de7      Z9d Z:dOdZ; G d d e
jh                        Z< G d! d"e
jz                        Z>d#ej~                  d$e@d%ej~                  fd&ZA	 dPd'e
jh                  d(ej~                  d)ej~                  d*ej~                  d+eej~                     d,eBd-eBfd.ZC G d/ d0e
jh                        ZD G d1 d2e
jh                        ZE G d3 d4e
jh                        ZF G d5 d6e
jh                        ZG G d7 d8e
jh                        ZH G d9 d:e
jh                        ZI G d; d<e
jh                        ZJ G d= d>e
jh                        ZK G d? d@      ZLe% G dA dBe             ZM e%dCD       G dE dFeM             ZNe% G dG dHeM             ZO G dI dJee$      ZP e%dKD       G dL dMeMe             ZQg dNZRy)QzPyTorch Chameleon model.    )cached_property)CallableOptionalTupleUnionN)nn   )ACT2FN)CacheDynamicCache)GenerationMixin)AttentionMaskConverter)FlashAttentionKwargs)BaseModelOutputWithPastCausalLMOutputWithPast)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)ALL_LAYERNORM_LAYERS)
LossKwargsauto_docstringcan_return_tupleis_torch_flex_attn_availableis_torchdynamo_compilinglogging   )ChameleonConfigChameleonVQVAEConfig)	BlockMask)make_flex_block_causal_maskc                   ,     e Zd Zd fd	Zd Zd Z xZS )ChameleonRMSNormc                     t         |           t        j                  t	        j
                  |            | _        || _        y)z?
        ChameleonRMSNorm is equivalent to T5LayerNorm
        N)super__init__r   	Parametertorchonesweightvariance_epsilon)selfhidden_sizeeps	__class__s      /var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/chameleon/modeling_chameleon.pyr%   zChameleonRMSNorm.__init__8   s1     	ll5::k#:; #    c                 "   |j                   }|j                  t        j                        }|j	                  d      j                  dd      }|t        j                  || j                  z         z  }| j                  |j                  |      z  S )N   T)keepdim)	dtypetor'   float32powmeanrsqrtr*   r)   )r+   hidden_statesinput_dtypevariances       r/   forwardzChameleonRMSNorm.forward@   sy    #))%((7 $$Q',,R,>%Ht?T?T4T(UU{{]--k:::r0   c                 ^    t        | j                  j                         d| j                   S )Nz, eps=)tupler)   shaper*   r+   s    r/   
extra_reprzChameleonRMSNorm.extra_reprG   s*    ))*+6$2G2G1HIIr0   )ư>)__name__
__module____qualname__r%   r>   rC   __classcell__r.   s   @r/   r"   r"   7   s    $;Jr0   r"   c                   N     e Zd Zd fd	Z ej
                         d        Z xZS )ChameleonRotaryEmbeddingc                 j   t         |           || _        || _        || _        || _        d| j
                  t        j                  d| j                  dt        j                        j                  |t        j                        | j                  z  z  z  }| j                  d|d       || _        y )	N      ?r   r2   r5   devicer5   inv_freqF
persistent)r$   r%   scaling_factordimmax_position_embeddingsbaser'   arangeint64r6   floatregister_buffermax_seq_len_cached)r+   rU   rV   rW   rP   rT   rQ   r.   s          r/   r%   z!ChameleonRotaryEmbedding.__init__Q   s    ,'>$	IIQ!5;;?BB&X]XcXcBdgkgogooq
 	ZeD"9r0   c                    | j                   d d d d f   j                         j                  |j                  d   dd      }|d d d d d f   j                         }|j                  j
                  }t        |t              r|dk7  r|nd}t        j                  |d      5  |j                         |j                         z  j                  dd      }t        j                  ||fd	      }|j                         }|j                         }	d d d        j                  |j                  
      	j                  |j                  
      fS # 1 sw Y   AxY w)Nr   r3   r   mpscpuF)device_typeenabledr2   rU   rN   )rQ   rZ   expandrA   rP   type
isinstancestrr'   autocast	transposecatcossinr6   r5   )
r+   xposition_idsinv_freq_expandedposition_ids_expandedr`   freqsembrj   rk   s
             r/   r>   z ChameleonRotaryEmbedding.forward_   s%    !MM$4-8>>@GGHZHZ[\H]_acde ,QaZ 8 > > @ hhmm%/S%AkUZFZk`e^^UC 	&,,.1F1L1L1NNYYZ[]^_E))UEN3C'')C'')C		
 vvAGGv$cff177f&;;;	 	s   !A+EE)i   i'  NrM   )rE   rF   rG   r%   r'   no_gradr>   rH   rI   s   @r/   rK   rK   P   s$    : U]]_< <r0   rK   c                   "     e Zd ZdZ fdZ xZS )%ChameleonLinearScalingRotaryEmbeddingz_ChameleonRotaryEmbedding extended with linear scaling. Credits to the Reddit user /u/kaiokendevc                 l    |j                         | j                  z  }t        |   ||      \  }}||fS N)rZ   rT   r$   r>   )r+   rl   rm   rj   rk   r.   s        r/   r>   z-ChameleonLinearScalingRotaryEmbedding.forwards   s9    #))+d.A.AA7?1l3SCxr0   rE   rF   rG   __doc__r>   rH   rI   s   @r/   rt   rt   p   s    i r0   rt   c                   "     e Zd ZdZ fdZ xZS ))ChameleonDynamicNTKScalingRotaryEmbeddingzqChameleonRotaryEmbedding extended with Dynamic NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozillac                 .   t        j                  |      dz   }|| j                  kD  r| j                  | j                  |z  | j                  z  | j                  dz
  z
  | j
                  | j
                  dz
  z  z  z  }d|t        j                  d| j
                  dt         j                        j                  |j                  t         j                        | j
                  z  z  z  }| j                  d|d	       t        | 5  ||      \  }}||fS )
Nr   r2   rM   r   rN   rO   rQ   FrR   )r'   maxrV   rW   rT   rU   rX   rY   r6   rP   rZ   r[   r$   r>   )	r+   rl   rm   seq_lenrW   rQ   rj   rk   r.   s	           r/   r>   z1ChameleonDynamicNTKScalingRotaryEmbedding.forward}   s    ))L)A-T11199$$w.1M1MMRVReRehiRij((dhhl+ - -D LLDHHau{{CFFahh^c^i^iFjmqmumuuwH   X% H7?1l3SCxr0   rw   rI   s   @r/   rz   rz   z   s    { r0   rz   c                     | dd| j                   d   dz  f   }| d| j                   d   dz  df   }t        j                  | |fd      S )z*Rotates half the hidden dims of the input..Nr3   r2   rb   )rA   r'   ri   )rl   x1x2s      r/   rotate_halfr      sZ    	
3"!''"+"""	#B	
3q ""	#B99rc2YB''r0   c                     |j                  |      }|j                  |      }| |z  t        |       |z  z   }||z  t        |      |z  z   }||fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    )	unsqueezer   )qkrj   rk   rm   unsqueeze_dimq_embedk_embeds           r/   apply_rotary_pos_embr      sY    ( --
&C
--
&C3w;q>C/0G3w;q>C/0GGr0   c                   $     e Zd Z fdZd Z xZS )ChameleonMLPc                    t         |           || _        |j                  | _        |j                  | _        t        j                  | j                  | j                  |j                        | _        t        j                  | j                  | j                  |j                        | _	        t        j                  | j                  | j                  |j                        | _
        t        |j                     | _        y )Nbias)r$   r%   configr,   intermediate_sizer   Linearmlp_bias	gate_projup_proj	down_projr
   
hidden_actact_fnr+   r   r.   s     r/   r%   zChameleonMLP.__init__   s    !--!'!9!94#3#3T5K5KRXRaRabyy!1!143I3IPVP_P_`4#9#94;K;KRXRaRabV../r0   c                     | j                  | j                  | j                  |            | j                  |      z        }|S rv   )r   r   r   r   )r+   rl   r   s      r/   r>   zChameleonMLP.forward   s6    NN4;;t~~a/@#ADLLQRO#ST	r0   rE   rF   rG   r%   r>   rH   rI   s   @r/   r   r      s    0r0   r   c                   (     e Zd ZdZ fdZd Z xZS )ChameleonLayerNorma  
    LayerNorm but computes stats only over the last dim because Chameleon applies gamma and beta
    from each shard separately to each head, instead of reducing. We can apply each head's own
    gamma/beta by repeat-interleaving weights from each shard, but the stats have to be computed
    in the last dimension. This module applies gamma/beta manually to fulfill this requirement.
    c                 B    t        |   |g|i | |d   f| _        y )Nr3   )r$   r%   normalized_shape)r+   r,   argskwargsr.   s       r/   r%   zChameleonLayerNorm.__init__   s)    6t6v6!,R 2r0   c                     t        j                  || j                  d d d      }|| j                  z  | j                  z   }|S )Ngh㈵>r-   )F
layer_normr   r)   r   r+   r;   s     r/   r>   zChameleonLayerNorm.forward   s=    ]D4I4I4QU[_`%3dii?r0   )rE   rF   rG   rx   r%   r>   rH   rI   s   @r/   r   r      s    3r0   r   r;   n_repreturnc                     | j                   \  }}}}|dk(  r| S | dddddddddf   j                  |||||      } | j                  |||z  ||      S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    r   N)rA   rc   reshape)r;   r   batchnum_key_value_headsslenhead_dims         r/   	repeat_kvr      so    
 2?1D1D.Ehz!!Qa"23::5BUW\^bdlmM  (;e(CT8TTr0   modulequerykeyvalueattention_maskscalingdropoutc                 T   t        || j                        }t        || j                        }	t        j                  ||j	                  dd            |z  }
|#|d d d d d d d |j
                  d   f   }|
|z   }
t        j                  j                  |
dt        j                        j                  |j                        }
t        j                  j                  |
|| j                        }
t        j                  |
|	      }|j	                  dd      j                         }||
fS )Nr2   r	   r3   )rU   r5   )ptrainingr   )r   num_key_value_groupsr'   matmulrh   rA   r   
functionalsoftmaxr7   r6   r5   r   r   
contiguous)r   r   r   r   r   r   r   r   
key_statesvalue_statesattn_weightscausal_maskattn_outputs                r/   eager_attention_forwardr      s    3 ; ;<JUF$?$?@L<<z';';Aq'ABWLL!$Q1.D
0@0@0D.D%DE#k1==((2U]](SVVW\WbWbcL==((6??([L,,|\:K''1-88:K$$r0   c                   2    e Zd ZdZddedee   f fdZd Z	 	 	 	 	 	 dde	j                  dee	j                     dee	j                     d	ee   d
ededee	j                     dee	j                  ee	j                     eee	j                        f   fdZ xZS )ChameleonAttentionz=Multi-headed attention from 'Attention Is All You Need' paperr   	layer_idxc                    t         |           || _        || _        |-t        j                  d| j                  j                   d       |j                  | _        |j                  | _	        |j                  | _        | j                  | j                  z  | _        |j                  | _        | j                  | j                  z  | _        |j                  | _        |j                   | _        d| _        |j$                  | _        | j                  dz  | _        | j                  | j                  z  | j                  k7  r&t)        d| j                   d| j                   d      t+        j,                  | j                  | j                  | j                  z  |j.                        | _        t+        j,                  | j                  | j                  | j                  z  |j.                        | _        t+        j,                  | j                  | j                  | j                  z  |j.                        | _        t+        j,                  | j                  | j                  |j.                        | _        t9        | j                  | j                  f      | _        t9        | j                  | j                  f      | _        | j?                          y )	NzInstantiating z without passing a `layer_idx` is not recommended and will lead to errors during the forward call if caching is used. Please make sure to provide a `layer_idx` when creating this class.T      z?hidden_size must be divisible by num_heads (got `hidden_size`: z and `num_heads`: z).r   ) r$   r%   r   r   loggerwarning_oncer.   rE   attention_dropoutr,   num_attention_heads	num_headsr   r   r   rV   
rope_theta	is_causalmodel_parallel_sizer   
ValueErrorr   r   attention_biasq_projk_projv_projo_projr   q_normk_norm
_init_roper+   r   r   r.   s      r/   r%   zChameleonAttention.__init__  s0   " !8!8 9 :, , "(!9!9!--33((DNN:#)#=#= $(NNd6N6N$N!'-'E'E$ ++#)#=#= }}d*MMDNN*t/?/??QRVRbRbQc$T^^$4B8 
 ii 0 0$..4==2PW]WlWlmii 0 0$2J2JT]]2Zagavavwii 0 0$2J2JT]]2Zagavavwii 0 0$2B2BI^I^_($..$--)HI($*B*BDMM)RSr0   c                    | j                   j                  2t        | j                  | j                  | j
                        | _        y | j                   j                  d   }| j                   j                  d   }|dk(  r3t        | j                  | j                  || j
                        | _        y |dk(  r3t        | j                  | j                  || j
                        | _        y t        d|       )N)rV   rW   rd   factorlinear)rV   rT   rW   dynamiczUnknown RoPE scaling type )
r   rope_scalingrK   r   rV   r   
rotary_embrt   rz   r   )r+   scaling_typerT   s      r/   r   zChameleonAttention._init_rope(  s    ;;##+6(,(D(D__DO  ;;33F;L![[55h?Nx'"GMM,0,H,H#1	# *"KMM,0,H,H#1	# !#=l^!LMMr0   r;   r   rm   past_key_valueoutput_attentions	use_cachecache_positionr   c                    |j                         \  }	}
}| j                  |      }| j                  |      }| j                  |      }|j	                  d| j
                  | j                        }| j                  |      }|j	                  d| j                  | j                        }| j                  |      }|j	                  |	|
| j
                  | j                        j                  dd      }|j	                  |	|
| j                  | j                        j                  dd      }|j                  |	|
| j                  | j                        j                  dd      }| j                  ||      \  }}t        ||||      \  }}|'|||d}|j                  ||| j                  |      \  }}t         }| j"                  j$                  dk7  rN| j"                  j$                  dk(  r|rt&        j)                  d       nt*        | j"                  j$                     } || ||||f| j,                  sdn| j.                  | j0                  d	|\  }}|j	                  |	|
d      j3                         }| j5                  |      }|sd }|||fS )
Nr3   r   r2   )rk   rj   r   eagersdpaz`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.        )r   r   )sizer   r   r   r   r   r   r   r   r   rh   viewr   r   updater   r   r   _attn_implementationr   r   r   r   r   r   r   r   )r+   r;   r   rm   r   r   r   r   r   bszq_len_query_statesr   r   rj   rk   cache_kwargsattention_interfacer   r   s                        r/   r>   zChameleonAttention.forwardC  sd    &**,UA{{=1[[/
{{=1#++BN{{<0''D,D,DdmmT
[[,
#++CV``abdef''UD4L4Ldmm\ffghjkl
#((eT5M5Mt}}]gghiklm??<>S#7jRUWZ#[ j%#&snUL'5'<'<ZW[WeWegs't$J(?;;++w6{{//69>O##L
 '>dkk>^>^&_#$7	%
  $}}C$2H2HLL	%
 	%
!\ "))#ub9DDFkk+. LL.88r0   rv   NNNFFN)rE   rF   rG   rx   r   r   intr%   r   r'   Tensor
LongTensorr   boolr   r>   rH   rI   s   @r/   r   r      s    G# #8C= #NN< 2637*."'59?9||?9 !.?9 u//0	?9
 !?9  ?9 ?9 !!1!12?9 
u||Xell3XeELL>Q5RR	S?9r0   r   c                   (    e Zd Zdedef fdZ	 	 	 	 	 	 ddej                  deej                     deej                     dee
   dee   d	ee   d
eej                     deej                  eeej                  ej                  f      f   fdZ xZS )ChameleonDecoderLayerr   r   c                     t         |           |j                  | _        t        ||      | _        t        |      | _        t        |j                  |j                        | _	        t        |j                  |j                        | _
        y N)r   r   r   r$   r%   r,   r   	self_attnr   mlpr"   rms_norm_epsinput_layernormpost_attention_layernormr   s      r/   r%   zChameleonDecoderLayer.__init__  m    !--+6YO'/0B0BH[H[\(89K9KQWQdQd(e%r0   r;   r   rm   r   r   r   r   r   c                     |}	| j                  |      } | j                  d|||||||d|\  }}
}|	|z   }|}	| j                  |      }| j                  |      }|	|z   }|f}|r||
fz  }|r||fz  }|S )a  
        Args:
            hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
            attention_mask (`torch.FloatTensor`, *optional*):
                attention mask of size `(batch_size, sequence_length)` if flash attention is used or `(batch_size, 1,
                query_sequence_length, key_sequence_length)` if default attention is used.
            output_attentions (`bool`, *optional*):
                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
                returned tensors for more detail.
            use_cache (`bool`, *optional*):
                If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
                (see `past_key_values`).
            past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
            cache_position (`torch.LongTensor` of shape `(sequence_length)`, *optional*):
                Indices depicting the position of the input sequence tokens in the sequence
            kwargs (`dict`, *optional*):
                Arbitrary kwargs to be ignored, used for FSDP and other methods that injects code
                into the model
        r;   r   rm   r   r   r   r    )r  r   r  r   r+   r;   r   rm   r   r   r   r   r   residualself_attn_weightspresent_key_valueoutputss                r/   r>   zChameleonDecoderLayer.forward  s    < !,,]; ?Mdnn 	?
')%)/)	?
 	?
;(*; !=0 !55mD/ =0 ")++G)++Gr0   r   rE   rF   rG   r   r   r%   r'   r   r   r   r   r   r   FloatTensorr>   rH   rI   s   @r/   r   r     s    f f3 f 2637*.,1$)59=||= !.= u//0	=
 != $D>= D>= !!1!12= 
u  (51B1BEDUDU1U+V"WW	X=r0   r   c                   (    e Zd Zdedef fdZ	 	 	 	 	 	 ddej                  deej                     deej                     dee
   dee   d	ee   d
eej                     deej                  eeej                  ej                  f      f   fdZ xZS )ChameleonSwinDecoderLayerr   r   c                     t         |           |j                  | _        t        ||      | _        t        |      | _        t        |j                  |j                        | _	        t        |j                  |j                        | _
        y r   r   r   s      r/   r%   z"ChameleonSwinDecoderLayer.__init__  r  r0   r;   r   rm   r   r   r   r   r   c                     |}	 | j                   d|||||||d|\  }}
}| j                  |      }|	|z   }|}	| j                  |      }| j                  |      }|	|z   }|f}|r||
fz  }|r||fz  }|S )a-  
        Args:
            hidden_states (`torch.FloatTensor`):
                input to the layer of shape `(batch, seq_len, embed_dim)`
            attention_mask (`torch.FloatTensor`, *optional*):
                attention mask of size `(batch_size, sequence_length)` if flash attention is used or `(batch_size, 1,
                query_sequence_length, key_sequence_length)` if default attention is used.
            position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
                Indices of positions of each input sequence tokens in the position embeddings
            past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
            output_attentions (`bool`, *optional*):
                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
                returned tensors for more detail.
            use_cache (`bool`, *optional*):
                If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
                (see `past_key_values`).
            cache_position (`torch.LongTensor` of shape `(sequence_length)`, *optional*):
                Indices depicting the position of the input sequence tokens in the sequence.
        r  r  )r   r  r   r  r  s                r/   r>   z!ChameleonSwinDecoderLayer.forward  s    > ! ?Mdnn 	?
')%)/)	?
 	?
;(*; ,,]; =0 /55mD =0 ")++G)++Gr0   r   r  rI   s   @r/   r  r    s    f f3 f 2637*.,1$)59;||; !.; u//0	;
 !; $D>; D>; !!1!12; 
u  (51B1BEDUDU1U+V"WW	X;r0   r  c                   B     e Zd ZdZ fdZdej                  fdZ xZS )ChameleonVQVAEVectorQuantizera  
    A module for vector quantization using learned embedding vectors.

    This module implements the quantization process similar to te one described in
    the VQ-VAE (Vector Quantized Variational AutoEncoder) paper. It quantizes continuous
    input vectors into discrete codebook vectors, which are learned during training.
    Current implementation improves over previous ones by avoiding costly matrix multiplications
    and allowing for post-hoc remapping of indices.
    c                     t         |           |j                  | _        |j                  | _        t        |dd      | _        t        j                  | j                  | j                        | _	        y )Nbetag      ?)
r$   r%   num_embeddings	embed_dimembedding_dimgetattrr  r   	Embedding	embeddingr   s     r/   r%   z&ChameleonVQVAEVectorQuantizer.__init__%  sX    $33#--FFD1	d&9&94;M;MNr0   hidden_statec           
      L   |j                  dddd      j                         }|j                  d| j                        }t	        j
                  |dz  dd      t	        j
                  | j                  j                  dz  d      z   dt	        j                  d	|| j                  j                  j                  dd            z  z
  }t	        j                  |d      }| j                  |      j                  |j                        }t	        j                  |j                         |z
  dz        | j                  t	        j                  ||j                         z
  dz        z  z   }|||z
  j                         z   }|j                  dddd      j                         }|||fS )
Nr   r2   r	   r   r3   T)rU   r4   rb   z	bd,dn->bn)permuter   r   r  r'   sumr  r)   einsumrh   argminrA   r9   detachr  )r+   r  hidden_state_flattened	distancesmin_encoding_indiceshidden_state_quantlosss          r/   r>   z%ChameleonVQVAEVectorQuantizer.forward-  s   #++Aq!Q7BBD!-!2!22t7I7I!J II,a/QEii--q0a89%,,{,BDNNDYDYDcDcdeghDijjk 	  %||I1=!^^,@AFF|GYGYZ zz-446E!KLtyy[`[e[e,"5"5"77A=\
 P
 

 *-?,-N,V,V,XX 0771aCNNP!4)===r0   )	rE   rF   rG   rx   r%   r'   r   r>   rH   rI   s   @r/   r  r    s    O>ELL >r0   r  c                   $     e Zd Z fdZd Z xZS )#ChameleonVQVAEEncoderConvDownsamplec                 `    t         |           t        j                  ||ddd      | _        y )Nr	   r2   r   kernel_sizestridepadding)r$   r%   r   Conv2dconvr+   in_channelsr.   s     r/   r%   z,ChameleonVQVAEEncoderConvDownsample.__init__J  s'    IIk;AaYZ[	r0   c                 Z    t        j                  |ddd      }| j                  |      }|S )N)r   r   r   r   constantr   )padmoder   )r   r6  r1  r   s     r/   r>   z+ChameleonVQVAEEncoderConvDownsample.forwardN  s+    mJVWX		-0r0   r   rI   s   @r/   r*  r*  I  s    \r0   r*  c                   *     e Zd Z	 	 d fd	Zd Z xZS ) ChameleonVQVAEEncoderResnetBlockc                    t         |           || _        ||n|| _        || _        t
        j                  j                  d|dd      | _        t
        j                  j                  ||ddd      | _
        t
        j                  j                  d|dd      | _        t
        j                  j                  |j                        | _        t
        j                  j                  ||ddd      | _        | j                  | j                  k7  r`| j                  r*t
        j                  j                  ||ddd      | _        y t
        j                  j                  ||ddd      | _        y y )	N    rD   T
num_groupsnum_channelsr-   affiner	   r   r,  r   )r$   r%   r3  out_channelsuse_conv_shortcutr'   r   	GroupNormnorm1r0  conv1norm2Dropoutr   conv2conv_shortcutnin_shortcut)r+   r   r3  r@  rH  r.   s        r/   r%   z)ChameleonVQVAEEncoderResnetBlock.__init__V  s1    	&+7+?K\!.XX''2KUYbf'g
XX__[,AVWab_c
XX''2LVZcg'h
xx''7XX__\<QWXbc_d
t000%%%*XX__[,\]fgqr_%s"$)HHOOK[\efpqO$r!	 1r0   c                    |}| j                  |      }|t        j                  |      z  }| j                  |      }| j	                  |      }|t        j                  |      z  }| j                  |      }| j                  |      }| j                  | j                  k7  r3| j                  r| j                  |      }||z   S | j                  |      }||z   S rv   )rC  r'   sigmoidrD  rE  r   rG  r3  r@  rA  rH  rI  )r+   r;   r	  s      r/   r>   z(ChameleonVQVAEEncoderResnetBlock.forwardm  s     

=1}55

=1

=1}55]3

=1t000%%--h7 -''  ,,X6-''r0   )NFr   rI   s   @r/   r9  r9  U  s    
 s.(r0   r9  c                   $     e Zd Z fdZd Z xZS )ChameleonVQVAEEncoderAttnBlockc                    t         |           || _        t        j                  j                  d|dd      | _        t        j                  j                  ||ddd      | _        t        j                  j                  ||ddd      | _	        t        j                  j                  ||ddd      | _
        t        j                  j                  ||ddd      | _        y )Nr;  rD   Tr<  r   r   r,  )r$   r%   r3  r'   r   rB  normr0  r   r   vproj_outr2  s     r/   r%   z'ChameleonVQVAEEncoderAttnBlock.__init__  s    &HH&&";TXae&f	kqQR\]^kqQR\]^kqQR\]^[aXYcder0   c                 t   |}| j                  |      }| j                  |      }| j                  |      }| j                  |      }|j                  \  }}}}	|j                  ||||	z        j                  ddd      }|j                  ||||	z        }t        j                  ||      }
|
t        |      dz  z  }
t        j                  |
d      }
|j                  ||||	z        }|
j                  ddd      }
t        j                  ||
      j                  ||||	      }| j                  |      }||z   S )Nr   r2   r   r   rb   )rO  r   r   rP  rA   r   r  r'   bmmr   r   r   rQ  )r+   r;   r	  r   r   r   
batch_sizechannelsheightwidthr   r   s               r/   r>   z&ChameleonVQVAEEncoderAttnBlock.forward  s5    		-0vvm,VVM*
vvm, /;.@.@+
Hfe#++J&5.QYYZ[]^`ab''
HfunM
yyz:#s8}'>?yy15 $++J&5.Q#++Aq!4iil;CCJPXZ`bghmmK0+%%r0   r   rI   s   @r/   rM  rM    s    f&r0   rM  c                   >     e Zd Z fdZdej
                  fdZ xZS )ChameleonVQVAEEncoderc           	         t         |           t        |j                        | _        |j
                  | _        |j                  }|j                  }|j                  }|j                  }|j                  }|j                  }t        j                  j                  ||ddd      | _        |}dt        |      z   }	|	| _        t        j"                         | _        t'        | j                        D ]  }
t        j"                         }t        j"                         }||	|
   z  }|||
   z  }t'        | j
                        D ]g  }|j)                  t+        |||             |}|j,                  /||j,                  v s>|j.                  dk(  sN|j)                  t1        |             i t        j2                         }||_        ||_        |
| j                  dz
  k7  rt9        |      |_        |dz  }| j$                  j)                  |       ! t        j2                         | _        t+        ||      | j<                  _        |j.                  dk(  rt1        |      nt        j@                         | j<                  _!        t+        |||      | j<                  _"        t        j                  jG                  d|d	d
      | _$        t        j                  j                  ||rd|z  n|ddd      | _%        y )Nr	   r   r,  )r   )r   r3  r@  vanillar2   r;  rD   Tr<  )&r$   r%   lenchannel_multipliernum_resolutionsnum_res_blocksbase_channels
resolutionr3  double_latentlatent_channelsr'   r   r0  conv_inr@   in_channel_multiplier
ModuleListdownrangeappendr9  attn_resolutions	attn_typerM  Moduleblockattnr*  
downsamplemidblock_1Identityattn_1block_2rB  norm_outconv_out)r+   r   r`  ra  r3  rb  rc  r]  curr_resre  i_levelrm  rn  block_in	block_outi_blockrg  r.   s                    r/   r%   zChameleonVQVAEEncoder.__init__  s   "6#<#<=$33,,&&
((,, 00#66xx{MqYZdef $u-?'@ @%:"MMO	T112 	#GMMOE==?D$'<W'EEH%(:7(CCI !4!45 J4%$,%. %++7 F$;$;;((I5KK >x HIJ  99;DDJDI$..22"Eh"O#q=IIT"7	#: 99;; !

 GMFVFVZcFc8Bikititiv; !
 **bxUYbf*g#0Ao ( 
r0   pixel_valuesc                 2   | j                  |      g}t        | j                        D ]  }t        | j                        D ]  } | j                  |   j
                  |   |d         }t        | j                  |   j                        dkD  r" | j                  |   j                  |   |      }|j                  |        || j                  dz
  k7  s|j                  | j                  |   j                  |d                 |d   }| j                  j                  |      }| j                  j                  |      }| j                  j                  |      }| j                  |      }|t        j                   |      z  }| j#                  |      }|S )Nr3   r   r   )rd  rh  r^  r_  rg  rm  r\  rn  ri  ro  rp  rq  rs  rt  ru  r'   rK  rv  )r+   r|  r;   rx  r{  r  last_hidden_states          r/   r>   zChameleonVQVAEEncoder.forward  s   l34T112 		WG !4!45 3@tyy177@!"%  tyy)../!3#C499W#5#:#:7#CL#QL$$\23 $..22$$TYYw%7%B%B=QSCT%UV		W *"- HH,,->? HHOO,=> HH,,->? !MM*;<U]]+<== MM*;<  r0   )rE   rF   rG   r%   r'   r   r>   rH   rI   s   @r/   rY  rY    s    C
J!E$4$4 !r0   rY  c                       e Zd ZdZd Zed        Zed        Zed        Zed        Z	ed        Z
ed        Zd	ej                  d
ej                  fdZy)ChameleonImageVocabularyMappingzM
    A class for mapping discrete image tokens from VQGAN to BPE tokens.
    c                 >    || _         |j                  d      | _        y )Nz<image>)	vocab_mapgetimage_token_id)r+   r  s     r/   r%   z(ChameleonImageVocabularyMapping.__init__
  s    "'mmI6r0   c                 j    | j                   j                         D ci c]  \  }}||
 c}}S c c}}w rv   )r  itemsr+   r   rP  s      r/   val2namez(ChameleonImageVocabularyMapping.val2name  s+    !%!5!5!78A1888   /c           	          t        | j                  j                         D cg c]  \  }}|j                  d      s| c}}      S c c}}w )NIMGIMG)sortedr  r  
startswith)r+   namevals      r/   image_tokensz,ChameleonImageVocabularyMapping.image_tokens  s8    DNN,@,@,B`ytSdooV^F_s`aa`s
   A	
A	
c           
         t        d      D ci c]#  }t        t        d      |z         t        |      % c}dt        dt        ffd}| j                  D ci c]!  }|t         || j                  |               # c}S c c}w c c}w )N
   Aold_namer   c                 P    dj                  fd| t        d      d D              S )N c              3   B   K   | ]  }j                  ||        y wrv   )r  ).0cimg_tkn_chr_mappings     r/   	<genexpr>zIChameleonImageVocabularyMapping.bpe2img.<locals>.remap.<locals>.<genexpr>  s     _Q.221a8_s   r  r3   )joinr\  )r  r  s    r/   remapz6ChameleonImageVocabularyMapping.bpe2img.<locals>.remap  s$    77_(3x=[]B^___r0   )rh  chrordrf   r  r   r  )r+   ir  tokr  s       @r/   bpe2imgz'ChameleonImageVocabularyMapping.bpe2img  s    BG)LQs3s8a<0#a&8L	`C 	`C 	` @D?P?PQSt}}S1233QQ M
 Rs   (B&Bc                 j    | j                   j                         D ci c]  \  }}||
 c}}S c c}}w rv   )r  r  r  s      r/   img2bpez'ChameleonImageVocabularyMapping.img2bpe  s+    !%!3!3!56A1666r  c                     t        j                  t        | j                  j	                                     t        j                  t        | j                  j                                     fS rv   )r'   tensorr  r  keysvaluesrB   s    r/   bpe2img_search_tensorsz6ChameleonImageVocabularyMapping.bpe2img_search_tensors#  sC    ||F4<<#4#4#678%,,vdllNaNaNcGd:eeer0   c                     t        j                  t        | j                  j	                               dz   t         j
                        }| j                  j                         D ]
  \  }}|||<    |S )Nr   rN   )r'   zerosr|   r  r  r   r  )r+   mappingr   rP  s       r/   img2bpe_mapping_tensorz6ChameleonImageVocabularyMapping.img2bpe_mapping_tensor'  s[    ++c$,,"3"3"56:%))LLL&&( 	DAqGAJ	r0   	img_batchr   c                 x    |j                   }| j                  |j                  d         }|j                  |      S )Nr_   )rP   r  r6   )r+   r  rP   
img_tokenss       r/   convert_img2bpez/ChameleonImageVocabularyMapping.convert_img2bpe.  s5    !!00e1DE
}}V$$r0   N)rE   rF   rG   rx   r%   r   r  r  r  r  r  r  r'   r   r  r  r0   r/   r  r    s    7 9 9 b b R R 7 7 f f  % %%,, %r0   r  c                   J    e Zd ZeZdZdZddgZddgZdZ	dZ
dZdZdZdZdZd Zy	)
ChameleonPreTrainedModelmodelTr   r  past_key_valuesr   Fc                    | j                   j                  }t        |t        j                  t        j
                  f      rY|j                  j                  j                  d|       |j                  %|j                  j                  j                          y y t        |t        j                  t        j                  f      rJ|j                  j                  j                          |j                  j                  j                  d       y t        |t              r&|j                  j                  j                  d       y t        |t        j                        rf|j                  j                  j                  d|       |j                   2|j                  j                  |j                      j                          y y y )Nr   )r9   stdrM   )r   initializer_rangere   r   r   r0  r)   datanormal_r   zero_rB  	LayerNormfill_r"   r  padding_idx)r+   r   r  s      r/   _init_weightsz&ChameleonPreTrainedModel._init_weightsC  s4   kk++fryy"))45MM&&CS&9{{&  &&( 'r|| <=KK""$MM$$S) 01MM$$S)-MM&&CS&9!!-""6#5#56<<> . .r0   N)rE   rF   rG   r   config_classbase_model_prefixsupports_gradient_checkpointing_no_split_modules_skip_keys_device_placement_supports_flash_attn_2_supports_sdpa_supports_quantized_cache_supports_cache_class_supports_static_cache!_supports_param_buffer_assignment_supports_attention_backendr  r  r0   r/   r  r  4  sX    "L&*#02MN#4m"D!N $ !(-%"&?r0   r  aK  
    The VQ-VAE model used in Chameleon for encoding/decoding images into discrete tokens.
    This model follows the "Make-a-scene: Scene-based text-to-image generation with human priors" paper from
    [ Oran Gafni, Adam Polyak, Oron Ashual, Shelly Sheynin, Devi Parikh, and Yaniv Taigman](https://arxiv.org/abs/2203.13131).
    )custom_introc                   N     e Zd ZeZdgZdef fdZdej                  fdZ	 xZ
S )ChameleonVQVAEr  r   c                 l   t         |   |       t        |      | _        t	        |      | _        t        j                  j                  |j                  |j                  d      | _        t        j                  j                  |j                  |j                  d      | _        | j                          y Nr   )r$   r%   rY  encoderr  quantizer'   r   r0  rc  r  
quant_convpost_quant_convevalr   s     r/   r%   zChameleonVQVAE.__init__`  s|     ,V45f=((//&*@*@&BRBRTUV$xxv/?/?AWAWYZ[		r0   r|  c                 z    | j                  |      }| j                  |      }| j                  |      \  }}}|||fS rv   )r  r  r  )r+   r|  r;   quantemb_lossindicess         r/   encodezChameleonVQVAE.encodei  s@    \26#'==#? xh''r0   )rE   rF   rG   r   r  r  r%   r'   r   r  rH   rI   s   @r/   r  r  U  s2     (L893 (5#3#3 (r0   r  c                   d    e Zd Zdef fdZd Zd Zdej                  fdZ	dej                  fdZ
e	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     d	eej                     d
eej                     dee   deej                     dee   dee   dee   dee   deej                     dee   deeef   fd       Z	 dd	eej                  df   dej                  dej                  dedef
dZed	ej                  dededej4                  dej                  defd       Z xZS )ChameleonModelr   c           	         t         |   |       |j                  | _        |j                  | _        t        j                  |j                  |j                  | j                        | _        t        |j                        | _        | j                  j                  st        nt        }t        j                   t#        |j$                        D cg c]  } |||       c}      | _        t)        |j                  |j*                        | _        t.        j1                  |j2                        | _        d| _        | j9                          y c c}w )Nr   F)r$   r%   pad_token_idr  
vocab_sizer   r  r,   embed_tokensr  vocabulary_mapvocabulary_mappingr   	swin_normr   r  rf  rh  num_hidden_layerslayersr"   r  rO  r  _from_config	vq_configvqmodelgradient_checkpointing	post_init)r+   r   decoder_layerr   r.   s       r/   r%   zChameleonModel.__init__r  s     !.. ++LL):):F<N<NPTP`P`a"A&BWBW"X59[[5J5J-Pimm?DVE]E]?^_)]69-_
 %V%7%7V=P=PQ	%2263C3CD&+# 	 `s   Ec                     | j                   S rv   r  rB   s    r/   get_input_embeddingsz#ChameleonModel.get_input_embeddings  s       r0   c                     || _         y rv   r  r+   r   s     r/   set_input_embeddingsz#ChameleonModel.set_input_embeddings  s
    !r0   r|  c                 N    t         j                  d       | j                  |      S )Nz`model.get_image_tokens()` is deprecated and will be removed in v4.58. To obtain discrete token use `model.get_image_features()`)r   warningget_image_featues)r+   r|  s     r/   get_image_tokenszChameleonModel.get_image_tokens  s'     O	
 %%l33r0   c                     |j                   d   }| j                  j                  |      \  }}}| j                  j	                  |      }|j                  |d      }|S )as  
        Tokenizes images into discrete tokens with VQGAN module. Converts
        obtained image tokens into BPE tokens and wraps with "boi" and "eoi"
        special tokens.

        Args:
            pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, image_size, image_size)):
                The tensors corresponding to the input images.
        r   r3   )rA   r  r  r  r  r   )r+   r|  rT  r   
image_toksbpe_tokss         r/   get_image_featuresz!ChameleonModel.get_image_features  sZ     "''*
<<..|<1j**:::F==R0r0   	input_idsr   rm   r  inputs_embedsr   r   output_hidden_statesreturn_dictr   r   r   c                 V   ||n| j                   j                  }|	|	n| j                   j                  }	||n| j                   j                  }|
|
n| j                   j                  }
| j
                  r%| j                  r|rt        j                  d       d}|d u |d uz  rt        d      ||t        d      || j                  |      }|| j                  j                  k(  }t               s{||   j                         |j                         k7  rW|| j                  j                  k(  j                         }|j                   d   |j                   d   z  }t        d| d|       |j#                  |j$                  |j&                        }|j)                  ||      }|| j+                  |      }|r*|(t,        j.                  j1                         s
t3               }|F||j5                         nd}t-        j6                  |||j                   d   z   |j$                  	      }||j9                  d      }| j;                  |||||      }|}|	rd
nd }|rd
nd }d }| j<                  D ]q  }|	r||fz  }| j
                  r/| j                  r#| j?                  |j@                  |||||||      }n ||f||||||d|}|d   }|r	||rdnd   }|si||d   fz  }s | jC                  |      }|	r||fz  }d }|r|}|
stE        d ||||fD              S tG        ||||      S )NzX`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.Fz:You must specify exactly one of input_ids or inputs_embedszdYou cannot specify both pixel_values and inputs_embeds at the same time, and must specify either oner   r   z6Image features and image tokens do not match: tokens: z, features rP   r  )r   rm   r   r   r   r   r2   c              3   &   K   | ]	  }||  y wrv   r  )r  rP  s     r/   r  z)ChameleonModel.forward.<locals>.<genexpr>  s     tqfgfsts   )r~  r  r;   
attentions)$r   r   r  r   use_return_dictr  r   r   r   r   r  r  r  r   numelr   rA   r6   rP   r5   masked_scatterr  r'   jit
is_tracingr   get_seq_lengthrX   r   _update_causal_maskr  _gradient_checkpointing_func__call__rO  r@   r   )r+   r  r|  r   rm   r  r  r   r   r  r  r   r   r  special_image_maskn_image_tokens_in_textn_image_featurespast_seen_tokensr   r;   all_hidden_statesall_self_attnsnext_decoder_cacher  layer_outputs
next_caches                             r/   r>   zChameleonModel.forward  s     2C1N-TXT_T_TqTq$8$D $++JjJj 	 "+!6IDKK<Q<Q	%0%<k$++B]B]&&4==Yj I-t";<YZZ#(Av  #22<@L!*d.E.E.T.T!T+-)<N2O2U2U2W[g[m[m[o2o*3t7N7N7]7]*])b)b)d&#/#5#5a#8<;M;Ma;P#P  LMcLddo  qA  pB  C  (??9+;+;Y__ML!001C\RI  --i8M 09M9M9O*nO!CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L..M>?L]

 & #7BD0d!![[ !	6M#!m%55!**t}} $ A A!**! #%"	! !.!	!#.!-#2&7'#1	! 	! *!,M%28I1q%Q" =#3"55C!	6F 		-0  -!11
+Jt]J@QSa$bttt&+&+%	
 	
r0   r   input_tensorc           	         | j                   j                  dk(  r||dk(  j                         r|S y | j                   j                  dk(  r't        |t        j
                        rt        |      }|S ||j                         nd}||j                  nd}| j                   j                  dk(  r(|s&|s$t        j                  |||| j                        ry |j                  }|j                  d   }	|r|j                         }
n1t        |t        j
                        r|j                  d	   n||	z   dz   }
| j                  ||	|
|||j                  d   
      }| j                   j                  dk(  rQ|O|j                   j"                  dv r7|s5t	        j$                  |      j&                  }t        j(                  ||      }|S )Nflash_attention_2r   flex_attentionr   Fr   )r  past_key_values_lengthis_trainingr   r3   )sequence_lengthtarget_lengthr5   r   rT  )cudaxpunpu)r   r   anyre   r'   r   r    r  is_compileabler   _ignore_causal_mask_sdpar   r5   rA   get_max_cache_shape5_prepare_4d_causal_attention_mask_with_cache_positionrP   rd   finfomin_unmask_unattended)r+   r   r  r   r  r   r  using_compilable_cacher5   r  r  r   	min_dtypes                r/   r  z"ChameleonModel._update_causal_mask%  s    ;;++/BB)~/D.I.I.K%%;;++/??.%,,7!<^!L!!
 @O?Z?99;`aCRC^!?!?di ;;++v5>T]n%>>*'7 MM	 ""&,,Q/!+??AM nell; $$R(%7!;  PP+')#))!, Q 
 KK,,6*%%**.DD%
 E*..I0CCKQZ[Kr0   r  r  r5   rT  c                    | | j                         dk(  r| }|S t        j                  |      j                  }t        j                  ||f|||j
                        }|dk7  rt        j                  |d      }|t        j                  ||j
                        |j                  dd      kD  z  }|ddddddf   j                  |ddd      }| |j                         }| j                  d   }	|ddddddd|	f   | ddddddf   j                  |j
                        z   }
|
dk(  }
|ddddddd|	f   j                  |
|      |ddddddd|	f<   |S )	aM  
        Creates a causal 4D mask of shape `(batch_size, 1, query_length, key_value_length)` from a 2D mask of shape
        `(batch_size, key_value_length)`, or if the input `attention_mask` is already 4D, do nothing.

        Args:
            attention_mask (`torch.Tensor`):
                A 2D attention mask of shape `(batch_size, key_value_length)` or a 4D attention mask of shape
                `(batch_size, 1, query_length, key_value_length)`.
            sequence_length (`int`):
                The sequence length being processed.
            target_length (`int`):
                The target length: when generating with static cache, the mask should be as long as the static cache,
                to account for the 0 padding, the part of the cache that is not filled yet.
            dtype (`torch.dtype`):
                The dtype to use for the 4D attention mask.
            cache_position (`torch.Tensor`):
                Indices depicting the position of the input sequence tokens in the sequence.
            batch_size (`torch.Tensor`):
                Batch size.
        N   )
fill_valuer5   rP   r   )diagonalr  r3   r   )rU   r'   r$  r%  fullrP   triurX   r   rc   clonerA   r6   masked_fill)r   r  r  r5   r   rT  r   r   r(  mask_lengthpadding_masks              r/   r#  zDChameleonModel._prepare_4d_causal_attention_mask_with_cache_positioni  s   > %.*<*<*>!*C(K* ' E*..I** -0Ye\j\q\qK !##jjqA5<<n>S>STWeWmWmnprsWtttK%dD!Q&67>>z1bRTUK))//1,2226*1aL[L+@ANSTVZ\`bcScDdDgDg&&E    ,q05@Aq,;,AV5W5c5c )6Aq!\k\12 r0   )NNNNNNNNNNN)F)rE   rF   rG   r   r%   r  r  r'   r  r  r  r   r   r   r   r   r   r   r   r   r   r   r>   r  staticmethodr   r5   r#  rH   rI   s   @r/   r  r  p  s    $!"4U->-> 4u/@/@    15481537+/59$(,0/3&*59A
E,,-A
 u001A
 !.	A

 u//0A
 "%A
   1 12A
 D>A
 $D>A
 'tnA
 d^A
 !!1!12A
 -.A
 
u--	.A
 A
T #(BellK78B llB 	B
 B  BH 444 4 {{	4
 4 4 4r0   r  c                       e Zd Zy)KwargsForCausalLMN)rE   rF   rG   r  r0   r/   r5  r5    s    r0   r5  zb
    Chameleon Model with a head on top used for outputting logits for next token prediction.
    c            !           e Zd ZdgZ fdZd Zd Zd Zd Zd Z	d Z
ee	 	 	 	 	 	 	 	 	 	 	 	 dd	eej                     d
eej                      deej"                     deej                     dee   deej                      deej                     dee   dee   dee   dee   deej                     dee   deeef   fd              Z	 	 	 	 	 	 	 d fd	Z xZS )!ChameleonForConditionalGenerationzlm_head.weightc                     t         |   |       t        |      | _        |j                  | _        t        j                  |j                  |j                  d      | _        | j                          y )NFr   )
r$   r%   r  r  r  r   r   r,   lm_headr  r   s     r/   r%   z*ChameleonForConditionalGeneration.__init__  sU     #F+
 ++yy!3!3V5F5FUS 	r0   c                 .    | j                   j                  S rv   r  r  rB   s    r/   r  z6ChameleonForConditionalGeneration.get_input_embeddings  s    zz&&&r0   c                 &    || j                   _        y rv   r;  r  s     r/   r  z6ChameleonForConditionalGeneration.set_input_embeddings  s    "'

r0   c                     | j                   S rv   r9  rB   s    r/   get_output_embeddingsz7ChameleonForConditionalGeneration.get_output_embeddings  s    ||r0   c                     || _         y rv   r>  )r+   new_embeddingss     r/   set_output_embeddingsz7ChameleonForConditionalGeneration.set_output_embeddings  s	    %r0   c                     || _         y rv   r  )r+   decoders     r/   set_decoderz-ChameleonForConditionalGeneration.set_decoder  s	    
r0   c                     | j                   S rv   rD  rB   s    r/   get_decoderz-ChameleonForConditionalGeneration.get_decoder  s    zzr0   r  r|  r   rm   r  r  labelsr   r   r  r  r   r   r   c                 d   |	|	n| j                   j                  }	|
|
n| j                   j                  }
||n| j                   j                  } | j                  d||||||||	|
||d|}|d   }| j                  |      }| j                  j                  j                  }t        j                  |j                        j                  |dddd|f<   d}|* | j                  d||| j                   j                  d|}t        |||j                  |j                   |j"                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import ChameleonProcessor, ChameleonForConditionalGeneration
        >>> import torch
        >>> import requests
        >>> from PIL import Image

        >>> model = ChameleonForConditionalGeneration.from_pretrained("facebook/chameleon-7b", torch_dtype=torch.bfloat16)
        >>> processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b")

        >>> prompt = "I used to know a lot about constellations when I was younger, but as I grew older, I forgot most of what I knew. These are the only two constellations that I really remember now.<image><image>I would like for you to tell me about 3 more constellations and give me a little bit of history about the constellation."
        >>> image = Image.open(requests.get("https://nineplanets.org/wp-content/uploads/2020/12/the-big-dipper-1.jpg", stream=True).raw)
        >>> image_2 = Image.open(requests.get("https://www.kxan.com/wp-content/uploads/sites/40/2020/10/ORION.jpg", stream=True).raw)

        >>> inputs = processor(images=[image, image_2], text=prompt, return_tensors="pt").to(model.device, torch.bfloat16)

        >>> generated_ids = model.generate(**inputs, max_new_tokens=100, do_sample=False)
        >>> processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        ```N)r  r|  r   rm   r  r  r   r   r  r  r   r   )logitsrI  r  )r(  rK  r  r;   r  r  )r   r   r  r  r  r9  r  r  r'   r$  r5   r%  loss_functionr  r   r  r;   r  )r+   r  r|  r   rm   r  r  rI  r   r   r  r  r   r   r  r;   rK  r  r(  s                      r/   r>   z)ChameleonForConditionalGeneration.forward  sK   X 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] $** 
%)%+'/!5#)
 
  
m, zz44AA%*[[%>%B%Bq!\!"%4%%pVFt{{OeOepiopD%#33!//))
 	
r0   c	                 R    t        |   |f|||||||d|	}
|d   dk7  rd |
d<   |
S )N)r|  r  r   r  r   rm   r   r   r|  )r$   prepare_inputs_for_generation)r+   r  r|  r  r   r  r   rm   r   r   model_inputsr.   s              r/   rN  z?ChameleonForConditionalGeneration.prepare_inputs_for_generation  s\     w<

%+)')%

 

 !! ,0L(r0   )NNNNNNNNNNNN)NNNNNNT)rE   rF   rG   _tied_weights_keysr%   r  r  r?  rB  rF  rH  r   r   r   r'   r   r  r   r   r   r   r5  r   r   r   r>   rN  rH   rI   s   @r/   r7  r7    s    ++'(&  15481537+/59-1$(,0/3&*59Q
E,,-Q
 u001Q
 !.	Q

 u//0Q
 "%Q
   1 12Q
 ))*Q
 D>Q
 $D>Q
 'tnQ
 d^Q
 !!1!12Q
 *+Q
 
u,,	-Q
  Q
l  r0   r7  )r7  r  r  r  r  )r   )Srx   	functoolsr   typingr   r   r   r   r'   torch.nn.functionalr   r   r   torch.utils.checkpointactivationsr
   cache_utilsr   r   
generationr   modeling_attn_mask_utilsr   modeling_flash_attention_utilsr   modeling_outputsr   r   modeling_utilsr   r   processing_utilsr   pytorch_utilsr   utilsr   r   r   r   r   r   configuration_chameleonr   r   !torch.nn.attention.flex_attentionr   integrations.flex_attentionr    
get_loggerrE   r   rl  r"   ri  rK   rt   rz   r   r   r   r  r   r   r   r   rZ   r   r   r   r  r  r*  r9  rM  rY  r  r  r  r  r5  r7  __all__r  r0   r/   <module>rd     s    % 3 3      ! . ) > B O F & 1  K  !;J 
		H	%Jryy J(    , -
<ryy <@,D 0H *(8299 " &	UU\\ 	U# 	U%,, 	U( %II%<<% 
% <<	%
 U\\*% % %4D9 D9PHBII HVF		 FR,>BII ,>^	")) 	)(ryy )(X &RYY  &F^!BII ^!B,% ,%^ ? ? ?@ (- ((( n- n nb	 ?,j > 
R(@/ R
Rj pr0   