
    Uhb                        d Z ddlZddlmZ ddlmZmZmZmZ ddl	Z	ddl
Z	ddl	mZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZmZmZ ddlmZmZmZ ddlmZ  ej@                  e!      Z"e G d de             Z# G d dejH                        Z% G d dejH                        Z& G d dejH                        Z' G d dejH                        Z( G d dejH                        Z) G d dejH                        Z* G d dejH                        Z+ G d d ejH                        Z, G d! d"ejH                        Z- G d# d$ejH                        Z. G d% d&ejH                        Z/ G d' d(ejH                        Z0 G d) d*ejH                        Z1e G d+ d,e             Z2e G d- d.e2             Z3e G d/ d0e2             Z4 ed12       G d3 d4e2             Z5 ed52       G d6 d7e2             Z6g d8Z7y)9zPyTorch Bros model.    N)	dataclass)ListOptionalTupleUnion)nn)CrossEntropyLoss   )ACT2FN))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)ModelOutputauto_docstringlogging   )
BrosConfigc                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeeej                        ed<   dZeeej                        ed<   y)BrosSpadeOutputa  
    Base class for outputs of token classification models.

    Args:
        loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided) :
            Classification loss.
        initial_token_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.num_labels)`):
            Classification scores for entity initial tokens (before SoftMax).
        subsequent_token_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, sequence_length+1)`):
            Classification scores for entity sequence tokens (before SoftMax).
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nlossinitial_token_logitssubsequent_token_logitshidden_states
attentions)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   r   r   r        x/var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/bros/modeling_bros.pyr   r   )   s~    . )-D(5$$
%,8<(5#4#45<;?Xe&7&78?8<M8E%"3"345<59Ju00129r'   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BrosPositionalEmbedding1Dc                     t         t        |           |j                  | _        ddt	        j
                  d| j                  d      | j                  z  z  z  }| j                  d|       y )Nr   i'          g       @inv_freq)superr*   __init__dim_bbox_sinusoid_emb_1dr#   arangeregister_buffer)selfconfigr-   	__class__s      r(   r/   z"BrosPositionalEmbedding1D.__init__L   sa    '79(.(G(G%ell3(E(EsKdNkNkkl
 	Z2r'   pos_seqreturnc                    |j                         }|\  }}}|j                  |||d      | j                  j                  ddd| j                  dz        z  }t	        j
                  |j                         |j                         gd      }|S )Nr      dim)sizeviewr-   r0   r#   catsincos)r3   r6   seq_sizeb1b2b3sinusoid_inppos_embs           r(   forwardz!BrosPositionalEmbedding1D.forwardV   s    <<>
B||BB2T]]5G5G1aQUQnQnrsQs5tt))\--/1A1A1CD"Mr'   r   r    r!   r/   r#   TensorrH   __classcell__r5   s   @r(   r*   r*   I   s#    3u||  r'   r*   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BrosPositionalEmbedding2Dc                     t         t        |           |j                  | _        t	        |      | _        t	        |      | _        y N)r.   rN   r/   dim_bboxr*   	x_pos_emb	y_pos_embr3   r4   r5   s     r(   r/   z"BrosPositionalEmbedding2D.__init___   s4    '7926:26:r'   bboxr7   c                    g }t        | j                        D ]U  }|dz  dk(  r&|j                  | j                  |d|f                1|j                  | j	                  |d|f                W t        j                  |d      }|S )Nr9   r   .r:   r;   )rangerQ   appendrR   rS   r#   r?   )r3   rU   stackibbox_pos_embs        r(   rH   z!BrosPositionalEmbedding2D.forwardf   s|    t}}% 	;A1uzT^^DaL9:T^^DaL9:		;
 yyB/r'   rI   rL   s   @r(   rN   rN   ^   s#    ;ELL U\\ r'   rN   c                   >     e Zd Z fdZdej
                  fdZ xZS )BrosBboxEmbeddingsc                     t         t        |           t        |      | _        t        j                  |j                  |j                  d      | _	        y )NF)bias)
r.   r]   r/   rN   bbox_sinusoid_embr   Lineardim_bbox_sinusoid_emb_2ddim_bbox_projectionbbox_projectionrT   s     r(   r/   zBrosBboxEmbeddings.__init__r   s@     $02!:6!B!yy)H)H&JdJdkpqr'   rU   c                     |j                  dd      }|d d d d d d d f   |d d d d d d d f   z
  }| j                  |      }| j                  |      }|S )Nr   r   )	transposer`   rd   )r3   rU   bbox_tbbox_posr[   s        r(   rH   zBrosBboxEmbeddings.forwardw   s\    1%$1a-(6!T1a-+@@--h7++L9r'   rI   rL   s   @r(   r]   r]   q   s    r
ELL r'   r]   c                        e Zd ZdZ fdZ	 	 	 	 	 d
deej                     deej                     deej                     deej                     dedej                  fd	Z	 xZ
S )BrosTextEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                 d   t         |           t        j                  |j                  |j
                  |j                        | _        t        j                  |j                  |j
                        | _	        t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        t#        |dd      | _        | j'                  dt)        j*                  |j                        j-                  d             | j'                  dt)        j.                  | j0                  j3                         t(        j4                  | j0                  j6                        d	
       y )N)padding_idxepsposition_embedding_typeabsoluteposition_ids)r   r:   token_type_idsdtypedeviceF)
persistent)r.   r/   r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutgetattrro   r2   r#   r1   expandzerosrq   r=   longru   rT   s     r(   r/   zBrosTextEmbeddings.__init__   s8   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]" f&8&8f>S>STzz&"<"<='.v7PR\']$^U\\&:X:X-Y-`-`ah-ijKK!!&&(jj((//
  	 	
r'   	input_idsrr   rq   inputs_embedspast_key_values_lengthr7   c                 Z   ||j                         }n|j                         d d }|d   }|| j                  d d |||z   f   }|st        | d      r-| j                  d d d |f   }|j	                  |d   |      }	|	}n:t        j                  |t
        j                  | j                  j                        }|| j                  |      }| j                  |      }
||
z   }| j                  dk(  r| j                  |      }||z  }| j                  |      }| j                  |      }|S )Nr:   r   rr   r   rs   rp   )r=   rq   hasattrrr   r   r#   r   r   ru   r{   r   ro   r}   r   r   )r3   r   rr   rq   r   r   input_shape
seq_lengthbuffered_token_type_ids buffered_token_type_ids_expandedr   
embeddingsr}   s                r(   rH   zBrosTextEmbeddings.forward   sF     #..*K',,.s3K ^
,,Q0FVlIl0l-lmL!t-.*.*=*=a*n*M'3J3Q3QR]^_R`bl3m0!A!&[

SWSdSdSkSk!l  00;M $ : :> J"%::
'':5"&":":<"H--J^^J/
\\*-
r'   )NNNNr   )r   r    r!   r"   r/   r   r#   rJ   intrH   rK   rL   s   @r(   rj   rj      s    Q
4 -115/304&'$ELL)$ !.$ u||,	$
  -$ !$$ 
$r'   rj   c                   b    e Zd Z fdZdej
                  fdZ	 	 	 	 	 	 ddej
                  dej
                  deej
                     deej
                     deej
                     d	eej
                     d
eeeej                           deej
                     deej
                     fdZ
 xZS )BrosSelfAttentionc                    t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                        | _        t#        |dd      | _        | j$                  dk(  s| j$                  d	k(  rF|j&                  | _        t        j(                  d
|j&                  z  dz
  | j                        | _        |j,                  | _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()ro   rp   relative_keyrelative_key_queryr9   r   )r.   r/   ry   num_attention_headsr   
ValueErrorr   attention_head_sizeall_head_sizer   ra   querykeyvaluer   attention_probs_dropout_probr   r   ro   r|   rw   distance_embedding
is_decoderrT   s     r(   r/   zBrosSelfAttention.__init__   s    : ::a?PVXhHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF'.v7PR\']$''>9T=Y=Y]q=q+1+I+ID(&(ll1v7U7U3UXY3Y[_[s[s&tD# ++r'   xc                     |j                         d d | j                  | j                  fz   } |j                  | }|j	                  dddd      S )Nr:   r   r9   r   r
   )r=   r   r   r>   permute)r3   r   new_x_shapes      r(   transpose_for_scoresz&BrosSelfAttention.transpose_for_scores   sV    ffhsm$$$$'
 
 AFFK yyAq!$$r'   r   r[   attention_mask	head_maskencoder_hidden_statesencoder_attention_maskpast_key_valueoutput_attentionsr7   c	                    | j                  |      }	|d u}
|
r||d   }|d   }|}n |
rC| j                  | j                  |            }| j                  | j                  |            }|}n|y| j                  | j                  |            }| j                  | j                  |            }t	        j
                  |d   |gd      }t	        j
                  |d   |gd      }n@| j                  | j                  |            }| j                  | j                  |            }| j                  |	      }| j                  r||f}t	        j                  ||j                  dd            }| j                  dk(  s| j                  dk(  rF|j                         d   }t	        j                  |t        j                  |j                  	      j                  dd      }t	        j                  |t        j                  |j                  	      j                  dd      }||z
  }| j                  || j                   z   dz
        }|j#                  |j$                  
      }| j                  dk(  rt	        j&                  d||      }||z   }nE| j                  dk(  r6t	        j&                  d||      }t	        j&                  d||      }||z   |z   }|j(                  \  }}}}|j                  ||||      }|j+                  g d      }t	        j&                  d||f      }||z   }|t-        j.                  | j0                        z  }|||z   } t3        j4                  d      |      }| j7                  |      }|||z  }t	        j                  ||      }|j+                  dddd      j9                         }|j                         d d | j:                  fz   } |j                  | }|r||fn|f}| j                  r||fz   }|S )Nr   r   r9   r;   r:   r   r   rs   )rt   zbhld,lrd->bhlrzbhrd,lrd->bhlr)r9   r   r   r
   zbnid,bijd->bnijr
   )r   r   r   r   r#   r?   r   matmulrf   ro   r=   r1   r   ru   r>   r   r|   tort   einsumshaper   mathsqrtr   r   Softmaxr   
contiguousr   )r3   r   r[   r   r   r   r   r   r   mixed_query_layeris_cross_attention	key_layervalue_layerquery_layerattention_scoresr   position_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_key
batch_sizen_headd_headbbox_pos_scoresattention_probscontext_layernew_context_layer_shapeoutputss                                  r(   rH   zBrosSelfAttention.forward   s    !JJ}5
 3$>."<&q)I(+K3N11$((;P2QRI33DJJ?T4UVK3N'11$((=2IJI33DJJ}4MNK		>!#4i"@aHI))^A%6$D!LK11$((=2IJI33DJJ}4MNK//0AB?? (5N !<<Y5H5HR5PQ''>9T=Y=Y]q=q&++-a0J"\\*EJJ}OcOcdiijlnopN"\\*EJJ}OcOcdiijkmopN%6H#'#:#:8dFbFb;bef;f#g #7#:#:ARAR#:#S ++~=+0<<8H+Wk+l(#36N#N --1EE16>NP[]q1r./4||<LiYm/n,#36T#TWs#s  2=1B1B.
FJ#((ZVT#++L9,,'8;:UV+o=+dii8P8P.QQ%/.@ -"**,-=> ,,7  -	9O_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S***,CD6G=/2mM]?? 11Gr'   NNNNNF)r   r    r!   r/   r#   rJ   r   r   r   r$   rH   rK   rL   s   @r(   r   r      s    ,0%ell % 26,08<9=DH49f||f llf !.	f
 ELL)f  (5f !) 6f !uU->->'?!@Af $ELL1f 
u||	fr'   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )BrosSelfOutputc                 (   t         |           t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                        | _
        y Nrm   )r.   r/   r   ra   ry   denser   r   r   r   r   rT   s     r(   r/   zBrosSelfOutput.__init__N  s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=r'   r   input_tensorr7   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S rP   r   r   r   r3   r   r   s      r(   rH   zBrosSelfOutput.forwardT  7    

=1]3}|'CDr'   rI   rL   s   @r(   r   r   M  1    >U\\  RWR^R^ r'   r   c                   4    e Zd Z fdZd Z	 	 	 	 	 	 ddej                  dej                  deej                     deej                     deej                     deej                     d	eeeej                           d
ee
   deej                     fdZ xZS )BrosAttentionc                     t         |           t        |      | _        t	        |      | _        t               | _        y rP   )r.   r/   r   r3   r   outputsetpruned_headsrT   s     r(   r/   zBrosAttention.__init__\  s0    %f-	$V,Er'   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   r;   )lenr   r3   r   r   r   r   r   r   r   r   r   r   union)r3   headsindexs      r(   prune_headszBrosAttention.prune_headsb  s   u:?7II))II))	
u -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r'   r   r[   r   r   r   r   r   r   r7   c	           
      t    | j                  ||||||||      }	| j                  |	d   |      }
|
f|	dd  z   }|S )Nr   r[   r   r   r   r   r   r   r   r   )r3   r   )r3   r   r[   r   r   r   r   r   r   self_outputsattention_outputr   s               r(   rH   zBrosAttention.forwardw  s_     yy'%)"7#9)/ ! 	
  ;;|AF#%QR(88r'   r   )r   r    r!   r/   r   r#   rJ   r   r   r$   boolrH   rK   rL   s   @r(   r   r   [  s    ";2 26,08<9=DH,1|| ll !.	
 ELL)  (5 !) 6 !uU->->'?!@A $D> 
u||	r'   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BrosIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y rP   )r.   r/   r   ra   ry   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnrT   s     r(   r/   zBrosIntermediate.__init__  s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$r'   r   r7   c                 J    | j                  |      }| j                  |      }|S rP   )r   r   )r3   r   s     r(   rH   zBrosIntermediate.forward  s&    

=100?r'   rI   rL   s   @r(   r   r     s#    9U\\ ell r'   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )
BrosOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r   )r.   r/   r   ra   r   ry   r   r   r   r   r   r   rT   s     r(   r/   zBrosOutput.__init__  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r'   r   r   r7   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S rP   r   r   s      r(   rH   zBrosOutput.forward  r   r'   rI   rL   s   @r(   r   r     r   r'   r   c                   4    e Zd Z fdZ	 	 	 	 	 	 ddej
                  dej
                  deej                     deej                     deej                     deej                     deeeej                           d	ee	   d
eej
                     fdZ
d Z xZS )	BrosLayerc                 b   t         |           |j                  | _        d| _        t	        |      | _        |j                  | _        |j                  | _        | j                  r*| j                  st        |  d      t	        |      | _	        t        |      | _        t        |      | _        y )Nr   z> should be used as a decoder model if cross attention is added)r.   r/   chunk_size_feed_forwardseq_len_dimr   	attentionr   add_cross_attention	Exceptioncrossattentionr   intermediater   r   rT   s     r(   r/   zBrosLayer.__init__  s    '-'E'E$&v. ++#)#=#= ##??4&(f ghh"/"7D,V4 (r'   r   r[   r   r   r   r   r   r   r7   c	           	         ||d d nd }	| j                  ||||||	      }
|
d   }| j                  r|
dd }|
d   }n|
dd  }d }| j                  rT|Rt        | d      rt        d|  d      ||d	d  nd }| j	                  |||||||      }|d   }||dd z   }|d   }|z   }t        | j                  | j                  | j                  |      }|f|z   }| j                  r|fz   }|S )
Nr9   )r[   r   r   r   r   r   r   r:   r  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )	r   r   r   r   r  r   feed_forward_chunkr   r   )r3   r   r[   r   r   r   r   r   r   self_attn_past_key_valueself_attention_outputsr   r   present_key_valuecross_attn_present_key_valuecross_attn_past_key_valuecross_attention_outputslayer_outputs                     r(   rH   zBrosLayer.forward  s    :H9S>"1#5Y] !%%)/3 "0 "
 2!4 ??,Qr2G 6r :,QR0G'+$??4@t-.=dV  Dd  e 
 @N?Yrs(;_c%&*&9&9 %&)!'#  7q9 7" ==G ,C2+F( 14P P0##((	
  /G+ ??!2 44Gr'   c                 L    | j                  |      }| j                  ||      }|S rP   )r  r   )r3   r   intermediate_outputr  s       r(   r  zBrosLayer.feed_forward_chunk  s,    "//0@A{{#68HIr'   r   )r   r    r!   r/   r#   rJ   r   r$   r   r   rH   r  rK   rL   s   @r(   r   r     s    )$ 7;15=A>BDH,1C||C llC !!2!23	C
 E--.C  ((9(9:C !)):): ;C !uU->->'?!@AC $D>C 
u||	CJr'   r   c                   \    e Zd Z fdZ	 	 	 	 	 	 	 	 	 ddej
                  dej
                  deej                     deej                     deej                     deej                     deeeej                           d	ee	   d
ee	   dee	   dee	   de
eej
                     ef   fdZ xZS )BrosEncoderc                     t         |           || _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        y c c}w rP   )	r.   r/   r4   r   
ModuleListrW   num_hidden_layersr   layer)r3   r4   _r5   s      r(   r/   zBrosEncoder.__init__
  sC    ]]uVE]E]?^#_!If$5#_`
#_s   Ar   r[   r   r   r   r   past_key_values	use_cacher   output_hidden_statesreturn_dictr7   c                    |
rdnd }|	rdnd }|	r| j                   j                  rdnd }|rdnd }t        | j                        D ]  \  }}|
r||fz   }|||   nd }|||   nd }t	        | j                   dd      rH| j
                  r<|rt        j                  d       d}| j                  |j                  |||||||	      }n |||||||||	      }|d   }|r	||d   fz  }|	s||d   fz   }| j                   j                  s||d	   fz   } |
r||fz   }|st        d
 |||||fD              S t        |||||      S )Nr&   gradient_checkpointingFzh`use_cache=True` is incompatible with `config.gradient_checkpointing=True`. Setting `use_cache=False`...r   r   r:   r   r9   c              3   $   K   | ]  }|| 
 y wrP   r&   ).0vs     r(   	<genexpr>z&BrosEncoder.forward.<locals>.<genexpr>R  s      
 = 
s   )last_hidden_stater  r   r   cross_attentions)r4   r   	enumerater  r   trainingloggerwarning_gradient_checkpointing_func__call__tupler   )r3   r   r[   r   r   r   r   r  r  r   r  r  all_hidden_statesall_self_attentionsall_cross_attentionsnext_decoder_cacherZ   layer_modulelayer_head_maskr   layer_outputss                        r(   rH   zBrosEncoder.forward  s    #7BD$5b4%64;;;Z;Zr`d#,R$(4 *	VOA|#$58H$H!.7.CilO3B3N_Q/TXNt{{$<eDNN/ !&I $ A A ))! "#)*%	! !-"/!-#1-*?+A#1&7	! *!,M"}R'8&::" &9]1=M<O&O#;;22+?=QRCSBU+U(U*	VX   1]4D D 
 "&%'(
 
 
 9+.+*1
 	
r'   )	NNNNNNFFT)r   r    r!   r/   r#   rJ   r   r$   r   r   r   r   rH   rK   rL   s   @r(   r  r  	  s   a 7;15=A>BEI$(,1/4&*T
||T
 llT
 !!2!23	T

 E--.T
  ((9(9:T
 !)):): ;T
 "%e.?.?(@"ABT
 D>T
 $D>T
 'tnT
 d^T
 
uU\\"$MM	NT
r'   r  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )
BrosPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y rP   )r.   r/   r   ra   ry   r   Tanh
activationrT   s     r(   r/   zBrosPooler.__init__h  s9    YYv1163E3EF
'')r'   r   r7   c                 \    |d d df   }| j                  |      }| j                  |      }|S )Nr   )r   r3  )r3   r   first_token_tensorpooled_outputs       r(   rH   zBrosPooler.forwardm  s6     +1a40

#566r'   rI   rL   s   @r(   r0  r0  g  s#    $
U\\ ell r'   r0  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BrosRelationExtractorc                 R   t         |           |j                  | _        |j                  | _        |j                  | _        |j                  | _        t        j                  | j                        | _	        t        j                  | j                  | j                  | j
                  z        | _        t        j                  | j                  | j                  | j
                  z        | _        t        j                  t        j                  d| j                              | _        y )Nr   )r.   r/   n_relationsry   backbone_hidden_sizehead_hidden_sizeclassifier_dropout_probr   r   dropra   r   r   	Parameterr#   r   
dummy_noderT   s     r(   r/   zBrosRelationExtractor.__init__w  s    !--$*$6$6! & 2 2'-'E'E$JJt;;<	YYt88$:J:JTMbMb:bc
99T668H8H4K`K`8`a,,u{{1d6O6O'PQr'   r   r   c           	         | j                  | j                  |            }| j                  j                  d      j	                  d|j                  d      d      }t        j                  ||gd      }| j                  | j                  |            }|j                  |j                  d      |j                  d      | j                  | j                        }|j                  |j                  d      |j                  d      | j                  | j                        }t        j                  |j                  dddd      |j                  dddd            }|S )Nr   r   axisr9   r
   )r   r>  r@  	unsqueezerepeatr=   r#   r?   r   r>   r:  r<  r   r   )r3   r   r   	dummy_vecrelation_scores        r(   rH   zBrosRelationExtractor.forward  s   jj;!78OO--a0779>>!;LaP	IIy)41=	HHTYYy12	!&&Q!1!1!!4d6F6FH]H]
 NN9>>!#4innQ6GIYIY[_[p[pq	1a+Y->->q!Q-J
 r'   rI   rL   s   @r(   r8  r8  v  s$    R5<< ELL r'   r8  c                       e Zd ZeZdZd Zy)BrosPreTrainedModelbrosc                    t        |t        j                        rm|j                  j                  j                  d| j                  j                         |j                  %|j                  j                  j                          yyt        |t        j                        rz|j                  j                  j                  d| j                  j                         |j                  2|j                  j                  |j                     j                          yyt        |t        j                        rJ|j                  j                  j                          |j                  j                  j                  d       yy)zInitialize the weightsr,   )meanstdNg      ?)r   r   ra   weightdatanormal_r4   initializer_ranger_   zero_rw   rl   r   fill_)r3   modules     r(   _init_weightsz!BrosPreTrainedModel._init_weights  s   fbii( MM&&CT[[5R5R&S{{&  &&( '-MM&&CT[[5R5R&S!!-""6#5#56<<> .-KK""$MM$$S) .r'   N)r   r    r!   r   config_classbase_model_prefixrU  r&   r'   r(   rI  rI    s    L*r'   rI  c            "           e Zd Zd fd	Zd Zd Zd Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddee	j                     dee	j                     dee	j                     dee	j                     d	ee	j                     d
ee	j                     dee	j                     dee	j                     dee	j                     deee	j                        dee   dee   dee   dee   deee	j                     ef   fd       Z xZS )	BrosModelc                     t         |   |       || _        t        |      | _        t        |      | _        t        |      | _        |rt        |      nd| _
        | j                          y)zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)r.   r/   r4   rj   r   r]   bbox_embeddingsr  encoderr0  poolerinit_weights)r3   r4   add_pooling_layerr5   s      r(   r/   zBrosModel.__init__  sZ    
 	 ,V41&9"6*,=j(4r'   c                 .    | j                   j                  S rP   r   r{   )r3   s    r(   get_input_embeddingszBrosModel.get_input_embeddings  s    ...r'   c                 &    || j                   _        y rP   ra  )r3   r   s     r(   set_input_embeddingszBrosModel.set_input_embeddings  s    */'r'   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr\  r  r   r   )r3   heads_to_pruner  r   s       r(   _prune_headszBrosModel._prune_heads  sE    
 +002 	CLE5LLu%//;;EB	Cr'   r   rU   r   rr   rq   r   r   r   r   r  r  r   r  r  r7   c                     ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }| j                   j                  r||n| j                   j
                  }nd}||t        d      ||j                         }n!||j                         dd }nt        d      |t        d      |\  }}||j                  n|j                  }|
|
d   d   j                  d   nd}|t        j                  ||	      }|pt        | j                  d
      r4| j                  j                  ddd|f   }|j                  ||      }|}n&t        j                   |t        j"                  |      }| j%                  |||      }| j                   j                  rE|C|j                         \  }}}||f}|	t        j                  ||	      }	| j'                  |	      }nd}| j)                  || j                   j*                        }| j                  |||||      }|j                  d   dk(  r|ddddg df   }|| j                   j,                  z  }| j/                  |      }| j1                  |||||||
||||      }|d   } | j2                  | j3                  |       nd}!|s
| |!f|dd z   S t5        | |!|j6                  |j8                  |j:                  |j<                        S )a  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosModel

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosModel.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        >>> last_hidden_states = outputs.last_hidden_state
        ```NFzDYou cannot specify both input_ids and inputs_embeds at the same timer:   z5You have to specify either input_ids or inputs_embedszYou have to specify bboxr   r9   )ru   rr   rs   )r   rq   rr   r   r      )r   r   r9   r   r9   r
   r   r
   )
r[   r   r   r   r   r  r  r   r  r  r   )r  pooler_outputr  r   r   r   )r4   r   r  use_return_dictr   r  r   r=   ru   r   r#   onesr   r   rr   r   r   r   get_extended_attention_maskinvert_attention_maskget_head_maskr  
bbox_scaler[  r\  r]  r   r  r   r   r   )"r3   r   rU   r   rr   rq   r   r   r   r   r  r  r   r  r  r   r   r   ru   r   r   r   extended_attention_maskencoder_batch_sizeencoder_sequence_lengthr  encoder_hidden_shapeencoder_extended_attention_maskembedding_outputscaled_bboxbbox_position_embeddingsencoder_outputssequence_outputr6  s"                                     r(   rH   zBrosModel.forward  sV   R 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B];;!!%.%:	@U@UII ]%>cdd"#..*K&',,.s3KTUU<788!,
J%.%:!!@T@T DSC^!3A!6!<!<Q!?de!"ZZFCN!t(89*.//*H*HKZK*X'3J3Q3QR\^h3i0!A!&[

SY!Z 150P0PQ_alnt0u ;;!!&;&G=R=W=W=Y: 7$68O#P %-).4HQW)X&.2.H.HI_.`+.2+ &&y$++2O2OP	??%)'#9 + 
 ::b>Q1667DT[[333#'#7#7#D ,,12"7#B+/!5# ' 
 *!,8<8OO4UY#]3oab6III;-'+;;)77&11,==
 	
r'   )T)NNNNNNNNNNNNNN)r   r    r!   r/   rb  rd  rh  r   r   r#   rJ   r   r$   r   r   r   r   rH   rK   rL   s   @r(   rY  rY    s    /0C  -1'+1515/3,0048<9==A$(,0/3&*N
ELL)N
 u||$N
 !.	N

 !.N
 u||,N
 ELL)N
  -N
  (5N
 !) 6N
 "$u'8'8"9:N
 D>N
 $D>N
 'tnN
 d^N
  
uU\\"$PP	Q!N
 N
r'   rY  c                       e Zd ZdgZ fdZe	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
eej                     deej                     dee	   dee	   dee	   de
eej                     ef   fd       Z xZS )BrosForTokenClassificationr]  c                 `   t         |   |       |j                  | _        t        |      | _        t        |d      r|j                  n|j                  }t        j                  |      | _
        t        j                  |j                  |j                        | _        | j                          y Nclassifier_dropout)r.   r/   
num_labelsrY  rJ  r   r  r   r   r   r   ra   ry   
classifierr^  r3   r4   r  r5   s      r(   r/   z#BrosForTokenClassification.__init__d  s      ++f%	)09M)NF%%TZTnTn 	 zz"45))F$6$68I8IJr'   r   rU   r   bbox_first_token_maskrr   rq   r   r   labelsr   r  r  r7   c                 B   ||n| j                   j                  }| j                  ||||||||
||
      }|d   }| j                  |      }| j	                  |      }d}|	t               }|J|j                  d      } ||j                  d| j                        |   |	j                  d      |         }n2 ||j                  d| j                        |	j                  d            }|s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )a  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.
        bbox_first_token_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```N)	rU   r   rr   rq   r   r   r   r  r  r   r:   r9   r   logitsr   r   )r4   rl  rJ  r   r  r	   r>   r  r   r   r   )r3   r   rU   r   r  rr   rq   r   r   r  r   r  r  r   r{  r  r   loss_fctr   s                      r(   rH   z"BrosForTokenClassification.forwardq  sF   X &1%<k$++B]B]))))%'/!5#  
 "!*,,71')H$0(=(B(B2(F%KKDOO45JKV[[Y[_]rMs  B @&++b/RY,F)-)9TGf$EvE$!//))	
 	
r'   NNNNNNNNNNNNr   r    r!   "_keys_to_ignore_on_load_unexpectedr/   r   r   r#   rJ   r   r   r   r   rH   rK   rL   s   @r(   r}  r}  `  sJ   *3&  -1'+158<15/3,004)-,0/3&*S
ELL)S
 u||$S
 !.	S

  (5S
 !.S
 u||,S
 ELL)S
  -S
 &S
 $D>S
 'tnS
 d^S
 
uU\\"$99	:S
 S
r'   r}  a  
    Bros Model with a token classification head on top (initial_token_layers and subsequent_token_layer on top of the
    hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. The initial_token_classifier is used to
    predict the first token of each entity, and the subsequent_token_classifier is used to predict the subsequent
    tokens within an entity. Compared to BrosForTokenClassification, this model is more robust to serialization errors
    since it predicts next token from one token.
    )custom_introc                        e Zd ZdgZ fdZe	 	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
eej                     deej                     deej                     dee	   dee	   dee	   de
eej                     ef   fd       Z xZS )!BrosSpadeEEForTokenClassificationr]  c           	      f   t         |   |       || _        |j                  | _        |j                  | _        |j
                  | _        t        |      | _        t        |d      r|j                  n|j                  }t        j                  t        j                  |      t        j                  |j
                  |j
                        t        j                  |      t        j                  |j
                  |j                              | _        t#        |      | _        | j'                          y r  )r.   r/   r4   r  r:  ry   r;  rY  rJ  r   r  r   r   
Sequentialr   ra   initial_token_classifierr8  subsequent_token_classifierr^  r  s      r(   r/   z*BrosSpadeEEForTokenClassification.__init__  s      ++!--$*$6$6!f%	)09M)NF%%TZTnTn 	
 )+JJ)*IIf((&*<*<=JJ)*IIf((&*;*;<	)
% ,A+H(r'   r   rU   r   r  rr   rq   r   r   initial_token_labelssubsequent_token_labelsr   r  r  r7   c                    ||n| j                   j                  }| j                  ||||||||||
      }|d   }|j                  dd      j	                         }| j                  |      j                  dd      j	                         }| j                  ||      j                  d      }d|z
  }|j                  \  }}|j                  }t        j                  |t        j                  |dg      j                  |      gd      j                         }|j                  |dddddf   t        j                   |j"                        j$                        }t        j&                  ||dz         j                  |t        j                        }|j                  |dddddf   t        j                   |j"                        j$                        }|j)                  d      j                         }d}|	|
t+               }|	j)                  d      }	|;|j)                  d      } ||j)                  d| j,                        |   |	|         }n# ||j)                  d| j,                        |	      }|
j)                  d      }
 ||j)                  d|dz         |   |
|         }||z   }|s||f|dd z   }||f|z   S |S t/        ||||j0                  |j2                  	      S )
a>  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.
        bbox_first_token_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.
        initial_token_labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for the initial token classification.
        subsequent_token_labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for the subsequent token classification.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosSpadeEEForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosSpadeEEForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```N
r   rU   r   rr   rq   r   r   r   r  r  r   r   rB  ru   rt   r:   r9   )r   r   r   r   r   )r4   rl  rJ  rf   r   r  r  squeezer   ru   r#   r?   r   r   r   masked_fillfinfort   mineyer>   r	   r  r   r   r   )r3   r   rU   r   r  rr   rq   r   r   r  r  r   r  r  r   last_hidden_statesr   r   inv_attention_maskr   max_seq_lengthru   invalid_token_maskself_token_masksubsequent_token_maskr   r  initial_token_losssubsequent_token_lossr   s                                 r(   rH   z)BrosSpadeEEForTokenClassification.forward  s   b &1%<k$++B]B]))))%'/!5#  
 %QZ/99!Q?JJL#<<=OPZZ[\^_`kkm"&"B"BCUWi"j"r"rst"u /%7%=%="
N#**"YY(:EKKUV<X<[<[\b<c'dklmrrt"9"E"Eq$z*EKK8O8U8U,V,Z,Z#
  ))NNQ4FGJJRX`e`j`jJk"9"E"ED!QJ'5L5R5R)S)W)W#
 !/ 3 3B 7 < < >+0G0S')H $8#<#<R#@ $0(=(B(B2(F%%-(--b$//BCXY()>?&"
 &..B.G.GDOO.\^r%s"&=&B&B2&F#$,',,R!1CDEZ['(=>%!
 &(==D*,CDwqr{RF)-)9TGf$EvE!5$;!//))
 	
r'   )NNNNNNNNNNNNN)r   r    r!   r  r/   r   r   r#   rJ   r   r   r   r   rH   rK   rL   s   @r(   r  r    sb    +4&2  -1'+158<15/3,0047;:>,0/3&*s
ELL)s
 u||$s
 !.	s

  (5s
 !.s
 u||,s
 ELL)s
  -s
 'u||4s
 "*%,,!7s
 $D>s
 'tns
 d^s
 
uU\\"O3	4s
 s
r'   r  z
    Bros Model with a token classification head on top (a entity_linker layer on top of the hidden-states output) e.g.
    for Entity-Linking. The entity_linker is used to predict intra-entity links (one entity to another entity).
    c                       e Zd ZdgZ fdZe	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
eej                     deej                     dee	   dee	   dee	   de
eej                     ef   fd       Z xZS )!BrosSpadeELForTokenClassificationr]  c                 @   t         |   |       || _        |j                  | _        |j                  | _        |j
                  | _        t        |      | _        t        |d      r|j                  n|j                   t        |      | _        | j                          y r  )r.   r/   r4   r  r:  ry   r;  rY  rJ  r   r  r   r8  entity_linkerr^  rT   s     r(   r/   z*BrosSpadeELForTokenClassification.__init__m  s      ++!--$*$6$6!f%	&-f6J&K	"	"QWQkQk26:r'   r   rU   r   r  rr   rq   r   r   r  r   r  r  r7   c                 <   ||n| j                   j                  }| j                  ||||||||
||
      }|d   }|j                  dd      j	                         }| j                  ||      j                  d      }d}|	et               }|j                  \  }}|j                  }t        j                  ||dz         j                  |t        j                        }|j                  d      }t        j                  | t        j                   |dgt        j                  |      gd      }|j#                  |dddddf   t        j$                  |j&                        j(                        }|j#                  |dddddf   t        j$                  |j&                        j(                        } ||j                  d|dz         |   |	j                  d      |         }|s|f|d	d z   }||f|z   S |S t+        |||j,                  |j.                  
      S )a  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.
        bbox_first_token_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosSpadeELForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosSpadeELForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```Nr  r   r   r  r:   rs   rB  r9   r  )r4   rl  rJ  rf   r   r  r  r	   r   ru   r#   r  r   r   r>   r?   r   r  r  rt   r  r   r   r   )r3   r   rU   r   r  rr   rq   r   r   r  r   r  r  r   r  r  r   r  r   r  ru   r  maskr   s                           r(   rH   z)BrosSpadeELForTokenClassification.forward{  s    V &1%<k$++B]B]))))%'/!5#  
 %QZ/99!Q?JJL##$68JKSSTUV')H)7)=)=&J#**F#ii8JKNNV\didndnNoO(--b1D$)II**KKQuzz&Q %! ''(=aqj(I5;;W]WcWcKdKhKhiF''a
(CU[[QWQ]Q]E^EbEbcFFKKNQ,>?Ev{{SUW[G\]DY,F)-)9TGf$EvE$!//))	
 	
r'   r  r  rL   s   @r(   r  r  d  sL    +4&  -1'+158<15/3,004)-,0/3&*]
ELL)]
 u||$]
 !.	]

  (5]
 !.]
 u||,]
 ELL)]
  -]
 &]
 $D>]
 'tn]
 d^]
 
uU\\"$99	:]
 ]
r'   r  )rI  rY  r}  r  r  )8r"   r   dataclassesr   typingr   r   r   r   r#   torch.utils.checkpointr   torch.nnr	   activationsr   modeling_outputsr   r   r   modeling_utilsr   pytorch_utilsr   r   r   utilsr   r   r   configuration_brosr   
get_loggerr   r#  r   Moduler*   rN   r]   rj   r   r   r   r   r   r   r  r0  r8  rI  rY  r}  r  r  __all__r&   r'   r(   <module>r     s     ! / /    % ! 
 . l l 9 9 * 
		H	% :k : :>		 *		 & ? ?DG		 GVRYY 3BII 3nryy  W		 WtZ
")) Z
| BII D */ * ** n
# n
 n
b d
!4 d
 d
N P
(; P
P
f o
(; o
o
dr'   