
    Uh                           d dl mZmZmZmZ ddlmZ ddlmZ ddl	m
Z
mZmZ ddlmZmZ ddlmZ dd	lmZ  G d
 de
d      Z G d de      ZdgZy)    )DictListOptionalUnion   )BatchFeature)
ImageInput)ProcessingKwargsProcessorMixinUnpack)PreTokenizedInput	TextInput)
TensorType   )AutoTokenizerc                   6    e Zd Zddidddej                  dZy)AriaProcessorKwargspaddingF  )max_image_sizesplit_image)text_kwargsimages_kwargsreturn_tensorsN)__name__
__module____qualname__r   PYTORCH	_defaults     z/var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/aria/processing_aria.pyr   r      s-     u
 " 
 %,,	Ir!   r   F)totalc                        e Zd ZdZddgZddgZdZdZ	 	 	 	 ddee	e
f   dee
   deeeeef   ef      f fdZ	 	 	 dd	eeeee   ee   f   d
ee   dee   defdZd Zd Zed        Z xZS )AriaProcessora  
    AriaProcessor is a processor for the Aria model which wraps the Aria image preprocessor and the LLama slow tokenizer.

    Args:
        image_processor (`AriaImageProcessor`, *optional*):
            The AriaImageProcessor to use for image preprocessing.
        tokenizer (`PreTrainedTokenizerBase`, *optional*):
            An instance of [`PreTrainedTokenizerBase`]. This should correspond with the model's text model. The tokenizer is a required input.
        chat_template (`str`, *optional*):
            A Jinja template which will be used to convert lists of messages in a chat into a tokenizable string.
        size_conversion (`Dict`, *optional*):
            A dictionary indicating size conversions for images.
    image_processor	tokenizerchat_templatesize_conversionAriaImageProcessorr   c                 *   |ddd}|j                         D ci c]  \  }}t        |      | c}}| _        |j                  | _        |j                  | _        ||j
                  |j                  |_        t        | !  |||       y c c}}w )N      )i  r   )r(   )	itemsintr)   image_tokenimage_token_id	pad_token	unk_tokensuper__init__)selfr&   r'   r(   r)   kv	__class__s          r"   r5   zAriaProcessor.__init__@   s     "$'c2O6E6K6K6MNdaA	N$00'66 Y%8%8%@"+"5"5I)=Q  Os   Btextimageskwargsreturnc                     | j                   t        fd| j                  j                  i|}t	        |t
              r|g}n.t	        |t              st	        |d   t
              st        d      | | j                  |fi |d   }| j                  |j                  j                  d      }g }	|j                  d      |z  }
|D ]P  }|j                  | j                  j                  | j                  j                  |
z        }|	j                  |       R ni }|}	|d   j                  d	d      } | j                  |	fi |d   }| j!                  |	|d
g       t#        i |||      S )a  
        Main method to prepare for the model one or several sequences(s) and image(s).

        Args:
            text (`TextInput`, `PreTokenizedInput`, `List[TextInput]`, `List[PreTokenizedInput]`):
                The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
                (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
                `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
            images (`ImageInput`):
                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
                tensor. Both channels-first and channels-last formats are supported.


        Returns:
            [`BatchFeature`]: A [`BatchFeature`] with the following fields:
            - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`.
            - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
            `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
            `None`).
            - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
            - **pixel_mask** -- Pixel mask to be fed to a model. Returned when `images` is not `None`.
        tokenizer_init_kwargsr   zAInvalid input text. Please provide a string, or a list of stringsNr   r   	num_cropsr   r   image)
modalities)datatensor_type)_merge_kwargsr   r'   init_kwargs
isinstancestrlist
ValueErrorr&   r)   pixel_valuesshapepopreplacer0   append_check_special_mm_tokensr   )r6   r:   r;   audiovideosr<   output_kwargsimage_inputstokens_per_imageprompt_stringsr@   sampler   text_inputss                 r"   __call__zAriaProcessor.__call__R   s   < +**
"&.."<"<
 
 dC 6DD$'
47C0H`aa/4//0L
  $33L4M4M4S4STU4VWN$((58HHI .(B(BDNND^D^ajDjk%%f-.
 L!N&}599:JDQ$dnn^T}]7ST%%nkwi%X!@K!@<!@n]]r!   c                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )r'   batch_decoder6   argsr<   s      r"   r[   zAriaProcessor.batch_decode   s     
 +t~~**D;F;;r!   c                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
        the docstring of this method for more information.
        )r'   decoder\   s      r"   r_   zAriaProcessor.decode   s     
 %t~~$$d5f55r!   c                     | j                   j                  }| j                  j                  }|D cg c]
  }|dk7  s	| }}t        t        j                  ||z               S c c}w )Nr@   )r'   model_input_namesr&   rI   dictfromkeys)r6   tokenizer_input_namesimage_processor_input_namesnames       r"   ra   zAriaProcessor.model_input_names   se     $ @ @&*&:&:&L&L# 9T&kW[_jWjt&k#&kDMM"7:U"UVWW 'ls
   
A#A#)NNNN)NNN)r   r   r   __doc__
attributesvalid_kwargsimage_processor_classtokenizer_classr   r   rH   r   r   floatr/   r5   r   r   r   r	   r   r   r   rY   r[   r_   propertyra   __classcell__)r9   s   @r"   r%   r%   ,   s    $[1J#%67L0%O /3'+BFR +,R  }	R
 "$uUCZ'8#'=">?R* (,>^I0$y/4HYCZZ[>^ $>^ ,->^ 
>^@<6 X Xr!   r%   N)typingr   r   r   r   image_processing_utilsr   image_utilsr	   processing_utilsr
   r   r   tokenization_utilsr   r   utilsr   autor   r   r%   __all__r    r!   r"   <module>rw      sL   * / . 2 % H H >   
*% 
|XN |X~ 
r!   