
    Uhs`                    `   d Z ddlZddlmZ ddlmZmZmZ ddlZddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZmZmZ ddlmZ  ej4                  e      Z G d de	j:                        Z G d de	j:                        Z	 	 	 d\dej@                  de!dee"   de#de$f
dZ%	 	 d]dej@                  dee"e$f   dee"   de$fdZ& G d de	j:                        Z' G d de	j:                        Z( G d de	j:                        Z)e G d  d!e             Z* G d" d#e	j:                        Z+ G d$ d%e	j:                        Z, G d& d'e*      Z-e G d( d)e             Z.e G d* d+e             Z/e G d, d-e             Z0e G d. d/e             Z1e G d0 d1e             Z2e G d2 d3e             Z3d4ejh                  jj                  d5ej@                  d6ej@                  fd7Z6d^d8ej@                  d9eej@                     d6ej@                  fd:Z7 G d; d<e	j:                        Z8 G d= d>e	j:                        Z9 G d? d@e	j:                        Z: G dA dBe	j:                        Z;e G dC dDe*             Z< G dE dFe	j:                        Z= edGH       G dI dJe*             Z> G dK dLe	j:                        Z? edMH       G dN dOe*             Z@ edPH       G dQ dRe	j:                               ZA edSH       G dT dUe*             ZB G dV dWe	j:                        ZC edXH       G dY dZe*             ZDg d[ZEy)_zPyTorch PatchTST model.    N)	dataclass)OptionalTupleUnion)nn   )ACT2CLS)BaseModelOutput)PreTrainedModel)NegativeBinomialOutputNormalOutputStudentTOutput)ModelOutputauto_docstringlogging   )PatchTSTConfigc                       e Zd ZdZ	 	 	 	 	 ddededededededee   f fd	Z	d
e
j                  dedefdZ	 	 	 	 	 dde
j                  dee
j                     deee
j                        dee
j                     dee
j                     dedee
j                  ee
j                     eee
j                        f   fdZ xZS )PatchTSTAttentionz=Multi-headed attention from 'Attention Is All You Need' paper	embed_dim	num_headsdropout
is_decoderbias	is_causalconfigc                 
   t         |           || _        || _        || _        ||z  | _        || _        | j
                  |z  | j                  k7  rt        d| j                   d| d      | j
                  dz  | _        || _	        || _
        t        j                  |||      | _        t        j                  |||      | _        t        j                  |||      | _        t        j                  |||      | _        y )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: ).g      ࿩r   )super__init__r   r   r   head_dimr   
ValueErrorscalingr   r   r   Lineark_projv_projq_projout_proj)	selfr   r   r   r   r   r   r   	__class__s	           /var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/transformers/models/patchtst/modeling_patchtst.pyr!   zPatchTSTAttention.__init__'   s     	""!Y.MMI%$..8MdnnM]$YKr3  }}d*$"ii	94@ii	94@ii	94@		)YTB    tensorseq_lenbszc                     |j                  ||| j                  | j                        j                  dd      j	                         S )Nr      )viewr   r"   	transpose
contiguous)r*   r.   r/   r0   s       r,   _shapezPatchTSTAttention._shapeF   s7    {{3GQQRSUVWbbddr-   hidden_stateskey_value_statespast_key_valueattention_masklayer_head_maskoutput_attentionsreturnc                 
   |du}|j                         \  }}	}
| j                  |      | j                  z  }|r0|.|d   j                  d   |j                  d   k(  r|d   }|d   }n
|rE| j	                  | j                  |      d|      }| j	                  | j                  |      d|      }n|}| j	                  | j                  |      d|      }| j	                  | j                  |      d|      }t        j                  |d   |gd      }t        j                  |d   |gd      }nD| j	                  | j                  |      d|      }| j	                  | j                  |      d|      }| j                  r||f}|| j                  z  d| j                  f} | j	                  ||	|      j                  | } |j                  | } |j                  | }|j                  d      }t        j                  ||j                  dd            }|j                         || j                  z  |	|fk7  r/t!        d|| j                  z  |	|f d|j                                |{|j                         |d|	|fk7  r#t!        d	|d|	|f d|j                                |j                  || j                  |	|      |z   }|j                  || j                  z  |	|      }t"        j$                  j'                  |d      }||j                         | j                  fk7  r*t!        d
| j                  f d|j                                |j                  dddd      |j                  || j                  |	|      z  }|j                  || j                  z  |	|      }|r?|j                  || j                  |	|      }|j                  || j                  z  |	|      }nd}t"        j$                  j)                  || j(                  | j*                        }t        j                  ||      }|j                         || j                  z  |	| j                  fk7  r9t!        d|| j                  z  |	| j                  f d|j                                |j                  || j                  |	| j                        }|j                  dd      }|j                  ||	| j,                        }| j/                  |      }|||fS )z#Input shape: Batch x Time x ChannelNr   r2   r   dimz$Attention weights should be of size z	, but is z!Attention mask should be of size z/Head mask for a single layer should be of size )ptrainingz `attn_output` should be of size )sizer(   r$   shaper6   r&   r'   torchcatr   r   r"   r3   reshapebmmr4   r#   r   
functionalsoftmaxr   rC   r   r)   )r*   r7   r8   r9   r:   r;   r<   is_cross_attentionr0   tgt_len_query_states
key_statesvalue_states
proj_shapesrc_lenattn_weightsattn_weights_reshaped
attn_probsattn_outputs                       r,   forwardzPatchTSTAttention.forwardI   s    .T9',,.Wa {{=1DLL@ *q!''*.>.D.DQ.GG (*J)!,LT[[1A%BBLJ;;t{{3C'Db#NL'T[[%?SIJ;;t{{='A2sKLN1$5z#BJJ 99nQ&7%FANL T[[%?SIJ;;t{{='A2sKL?? ),7NDNN*B>
Ct{{<#>CCZP'Z''4
+|++Z8//!$yyz/C/CAq/IJ3#7'"JJ6dnn8LgW^7_6` a %%'(* 
 %""$a'(BB 7a'8R7SS\]k]p]p]r\st  (,,S$..'7SVddL',,S4>>-A7GTL}},,\r,B&##%$..):: Et~~FWEX Y',,./1  +//2q!<|?P?PQTVZVdVdfmov?wwL',,S4>>-A7GTL
 %1$5$5c4>>7T[$\!055cDNN6JGU\]L$(!]]**<4<<RVR_R_*`
ii
L9#"6!OO2C$..4H'SWS`S`3a2b c$$&') 
 "&&sDNNGT]]S!++Aq1 "))#wGmmK01>AAr-   )        FTFN)NNNNF)__name__
__module____qualname____doc__intfloatboolr   r   r!   rF   Tensorr6   r   rX   __classcell__r+   s   @r,   r   r   $   sM   G  +/CC C 	C
 C C C (C>eU\\ eC ec e 488<1526"'vB||vB #5<<0vB !u||!45	vB
 !.vB "%,,/vB  vB 
u||Xell3XeELL>Q5RR	SvBr-   r   c                   H     e Zd ZdZdef fdZdej                  fdZ xZ	S )PatchTSTBatchNormzP
    Compute batch normalization over the sequence length (time) dimension.
    r   c                     t         |           t        j                  |j                  |j
                        | _        y )Neps)r    r!   r   BatchNorm1dd_modelnorm_eps	batchnormr*   r   r+   s     r,   r!   zPatchTSTBatchNorm.__init__   s(    FOOLr-   inputsc                 l    |j                  dd      }| j                  |      }|j                  dd      S )a  
        Parameters:
            inputs (`torch.Tensor` of shape `(batch_size, sequence_length, d_model)`):
                input for Batch norm calculation
        Returns:
            `torch.Tensor` of shape `(batch_size, sequence_length, d_model)`
        r   r2   )r4   rl   )r*   rn   outputs      r,   rX   zPatchTSTBatchNorm.forward   s7     !!!Q''1%%r-   
rZ   r[   r\   r]   r   r!   rF   ra   rX   rb   rc   s   @r,   re   re      s&    M~ M
&ell 
&r-   re   rn   
mask_ratiounmasked_channel_indiceschannel_consistent_masking
mask_valuec                    |dk  s|dk\  rt        d| d      | j                  \  }}}}| j                  }	t        |d|z
  z        }
|r-t	        j
                  |d||	      }|j                  d|d      }nt	        j
                  ||||	      }t	        j                  ||||	      }d|ddddd|
f<   t	        j                  |d      }t	        j                  |d      }t	        j                  |d|	      }|j                  d      j                  ddd|      }|d|dd|ddddf<   | j                  |j                         |      }||d
   fS )a  random_masking: Mask the input considering the control variables.

    Args:
        inputs (`torch.Tensor` of shape `(batch_size, num_channels, sequence_length, num_features)`):
            The input tensor to mask.
        mask_ratio (`float`):
            Masking ratio applied to mask the input data during random pretraining. It is the number between 0 and 1.
        unmasked_channel_indices (list, *optional*):
            Indices of channels that will not be masked.
        channel_consistent_masking (bool, *optional*, defaults to `False`):
            When true, masking will be same across all channels of a timeseries. Otherwise, masking positions will vary
            across channels.
        mask_value (int, *optional*, defaults to 0):
            Define the value of masked patches for pretraining.

    Returns:
        `tuple(torch.Tensor)`: inputs_mask, masked input, same shape as input Tensor and mask tensor of shape [bs x c x
        n]
    r   r   zMask ratio z has to be between 0 and 1.deviceNr?   r@   )rA   index.r   )r#   rE   rx   r^   rF   randrepeatonesargsortgather	unsqueezemasked_fillr`   )rn   rr   rs   rt   ru   
batch_sizenum_channelssequence_lengthnum_featuresrx   len_keepnoisemaskids_shuffleids_restoreinputs_masks                   r,   random_maskingr      sQ   4 A~q;zl2MNOO>Dll;Jo|]]F?a*n56H!

:q/&IQa0 

:|_VT ::j,ODDAyy --2.K--4K<<"K8D>>"$$Q1l;D+23Q(!Q./$$TYY[*=KV$$r-   num_forecast_mask_patchesc                 P   t        |t              r|g}|D cg c]  }d }}| j                  \  }}}}	t        j                  |||| j
                        }
g }d}t        |      }t        ||      D ]H  \  }}|dk  s||k\  rt        d| d      t        ||z  |z        }|j                  |||g       ||z  }J t        |d       }||k  r|d   d   ||z
  z   |d   d<   n||kD  r|d	   d   ||z
  z   |d	   d<   d}|D ]  \  }}}||z   }d|
||d
d
| d
f<   |} t        j                  |
j                  d         }|
|   }
|
j                  d	      j                  ddd|	      }
|d|
d
d
|d
d
d
d
f<   | j                  |
j                         |      }||
d   fS c c}w )a  Forecast masking that masks the last K patches where K is from the num_forecast_mask_patches.
    If num_forecast_mask_patches is a list, samples in the batch will be randomly masked by numbers defined in the list.

    Parameters:
        inputs (`torch.Tensor`):
            Input of shape `(bs, num_channels, num_patch, patch_length)`
        num_forecast_mask_patches (`list`):
            Number of patches to be masked at the end of each batch sample. e.g. 4 or [3, 5].
        unmasked_channel_indices (`list`, *optional*):
            Indices of channels that are not masked.
        mask_value (`int`, *optional*, defaults to 0):
            Values in the masked patches will be filled by `mask_value`.

    Returns:
        `tuple(torch.Tensor)`: inputs_mask, masked input, same shape as inputs Tensor and Mask tensor of shape `(bs,
        num_channels , num_patch)` or `(bs, tsg1, tsg2, num_channels, num_patch)`
    r   rw   r   znum_forecast_mask_patches z6 should be greater than 0 and less than total patches.c                     | d   S )Nr2    )xs    r,   <lambda>z"forecast_masking.<locals>.<lambda>>  s
    !A$ r-   )keyr2   r?   Nrz   )
isinstancer^   rE   rF   zerosrx   sumzipr#   appendsortedrandpermr   r|   r   r`   )rn   r   rs   ru   rN   forecast_mask_ratiosr   r   r   r   r   t_listtotal_lengthtotal_ratiopatch_lengthratiotemp_lenbatch1	patch_lenbatch2permr   s                         r,   forecast_maskingr     s   0 +S1%>$?!'@A!AAA>Dll;Jo|;;z<WDFL*+K"#<>RS !e1 ?,\N:pq  zE)K78|UH56 ! F/Fj ay|zL'@Aq	!	
	"r
1
)BCr
1F"( 	1h("./VF]A	z{*+
 >>$**Q-(D:D>>"$$Q1l;D+23Q(!Q./$$TYY[*=KV$$O Bs   	F#c                   H     e Zd ZdZdef fdZdej                  fdZ xZ	S )PatchTSTPatchifyz
    A class to patchify the time series sequence into different patches

    Returns:
        `torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`
    r   c                    t         |           |j                  | _        |j                  | _        |j
                  | _        | j                  | j                  k  r&t        d| j                   d| j                   d      t        | j                  | j                        | j                  z
  | j
                  z  dz   | _        | j                  | j
                  | j                  dz
  z  z   }| j                  |z
  | _	        y )NzSequence length (z+) has to be greater than the patch length ()r   )
r    r!   context_lengthr   r   patch_strider#   maxnum_patchessequence_start)r*   r   new_sequence_lengthr+   s      r,   r!   zPatchTSTPatchify.__init__^  s    %44"//"//4#4#44#D$8$8#99deievevdwwxy 
   4 4d6G6GH4K\K\\aeararruvv"//$2C2CtGWGWZ[G[2\\"225HHr-   past_valuesc                 :   |j                   d   }|| j                  k7  rt        d| d| j                   d      |dd| j                  dddf   }|j	                  d| j
                  | j                        }|j                  dd      j                         }|S )a!  
        Parameters:
            past_values (`torch.Tensor` of shape `(batch_size, sequence_length, num_channels)`, *required*):
                Input for patchification

        Returns:
            `torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`
        zInput sequence length (z%) doesn't match model configuration (r   N)	dimensionrD   step)	rE   r   r#   r   unfoldr   r   r4   r5   )r*   r   r   rp   s       r,   rX   zPatchTSTPatchify.forwardo  s     &++B/d222)/)::_`d`t`t_uuwx  Q 3 3 5q89$2C2C$J[J[\!!"b)446r-   rq   rc   s   @r,   r   r   V  s&    I~ I"5<< r-   r   c                   H     e Zd ZdZdef fdZdej                  fdZ xZ	S )PatchTSTMaskinga  
    Class to perform random or forecast masking.

    Parameters:
        config (`PatchTSTConfig`): model config
    Returns:
        x_mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`)
            Masked patched input
        mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches)`)
            Bool tensor indicating True on masked points
    r   c                 <   t         |           |j                  | _        |j                  | _        |j                  | _        |j
                  | _        |j                  | _        |j                  | _        | j                  t        | j                        | _        y y N)	r    r!   random_mask_ratiort   	mask_typer   rs   ru   r   rm   s     r,   r!   zPatchTSTMasking.__init__  s    !'!9!9*0*K*K')))/)I)I&(.(G(G% ++((4,243P3P,QD) 5r-   patch_inputc                 r   | j                   dk(  r<t        || j                  | j                  | j                  | j
                        \  }}nY| j                   dk(  r1t        || j                  | j                  | j
                        \  }}nt        d| j                    d      |j                         }||fS )a  
        Parameters:
            patch_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`, *required*):
                Patch input

        Return:
            masked_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`)
                Masked patched input
            mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches)`)
                Bool tensor indicating True on masked points

        random)rn   rr   rs   rt   ru   forecast)rn   r   rs   ru   zInvalid mask type .)
r   r   r   rs   rt   ru   r   r   r#   r`   )r*   r   masked_inputr   s       r,   rX   zPatchTSTMasking.forward  s     >>X%!/"11)-)F)F+/+J+J??"L$ ^^z)!1"*.*H*H)-)F)F??	"L$ 1$..1ACDD yy{T!!r-   rq   rc   s   @r,   r   r     s&    
	R~ 	R!"5<< !"r-   r   c                   T     e Zd ZdZdef fdZddej                  dee	   fdZ
 xZS )PatchTSTEncoderLayerz 
    PatchTST encoder layer
    r   c           
         t         |           |j                  | _        t        |j                  |j
                  |j                        | _        |j                  dkD  rt        j                  |j                        nt        j                         | _        |j                  dk(  rt        |      | _        nX|j                  dk(  r1t        j                   |j                  |j"                        | _        nt%        |j                   d      | j                  r|j                  dkD  rt        j                  |j                        nt        j                         | _        |j                  dk(  rt        |      | _        nX|j                  dk(  r1t        j                   |j                  |j"                        | _        nt%        |j                   d      t        j*                  t        j,                  |j                  |j.                  |j0                        t3        |j4                            |j6                  dkD  rt        j                  |j6                        nt        j                         t        j,                  |j.                  |j                  |j0                              | _        |j                  dkD  rt        j                  |j                        nt        j                         | _        |j                  dk(  rt        |      | _        nX|j                  dk(  r1t        j                   |j                  |j"                        | _        nt%        |j                   d      |j>                  | _        y )N)r   r   r   r   rl   	layernormrg   z$ is not a supported norm layer type.r   ) r    r!   channel_attentionr   rj   num_attention_headsattention_dropout	self_attnpath_dropoutr   DropoutIdentitydropout_path1	norm_typere   norm_sublayer1	LayerNormrk   r#   dropout_path2norm_sublayer2
Sequentialr%   ffn_dimr   r	   activation_function
ff_dropoutffdropout_path3norm_sublayer3pre_normrm   s     r,   r!   zPatchTSTEncoderLayer.__init__  s   !'!9!9*nn00,,
 AG@S@SVW@WRZZ(;(;<]_]h]h]j{*"3F";D,"$,,v~~6??"SD 0 011UVWW !!DJDWDWZ[D[F,?,?!@acalalanD;.&7&?#!![0&(ll6>>v&W# F$4$4#55Y!Z[[ --IIfnnfnn6;;GF../1-3->->-BBJJv(()IIfnnfnn6;;G	
 AG@S@SVW@WRZZ(;(;<]_]h]h]j{*"3F";D,"$,,v~~6??"SD 0 011UVWWr-   hidden_stater<   c                    |j                   \  }}}}|j                  ||z  ||      }| j                  r;| j                  | j	                  |      |      \  }}}	|| j                  |      z   }n:| j                  ||      \  }}}	| j	                  || j                  |      z         }|j                  ||||      }| j                  r|j                  dd      j                         }|j                  ||z  ||      }| j                  r;| j                  | j                  |      |      \  }}
}	|| j                  |      z   }n:| j                  ||      \  }}
}	| j                  || j                  |      z         }|j                  ||||      }|j                  dd      j                         }|j                  ||z  ||      }| j                  r3|| j                  | j                  | j                  |                  z   }n2| j                  || j                  | j                  |            z         }|j                  ||||      }|f}|r|| j                  r|
fn|fz  }|S )a  
        Parameters:
            hidden_state (`torch.Tensor` of shape `(batch_size, num_channels, sequence_length, d_model)`, *required*):
                Past values of the time series
            output_attentions (`bool`, *optional*):
                Whether or not to return the output attention of all layers
        Return:
            `torch.Tensor` of shape `(batch_size, num_channels, sequence_length, d_model)`

        )r7   r<   r2   r   )rE   r3   r   r   r   r   rH   r   r4   r5   r   r   r   r   r   )r*   r   r<   r   num_input_channelsr   rj   rW   rT   rN   channel_attn_weightsoutputss               r,   rX   zPatchTSTEncoderLayer.forward  s    DPCUCU@
& $((6H)H/[bc==+/>>"11,?Sd ,: ,(Kq ($*<*<[*IIL ,0>>*>O ,: ,(Kq  ..|d>P>PQ\>]/]^L $++J8JO]de !!'11!Q7BBDL',,Z/-IK]_fgL}}7;~~"&"5"5l"CWh 8F 8411  ,d.@.@.MM 8<~~".BS 8F 8411  $22<$BTBTU`Ba3ab (//
OM_ahiL'11!Q7BBDL $((6H)H/[bc== ($*<*<TWWTEXEXYeEf=g*hhL  ..|d>P>PQUQXQXYeQf>g/ghL $++J8JO]de/t?U?U&:;\h[jjGr-   r   )rZ   r[   r\   r]   r   r!   rF   ra   r   r`   rX   rb   rc   s   @r,   r   r     s3    /(~ /(bQELL QXd^ Qr-   r   c                   *    e Zd ZeZdZdZdZd ZddZ	y)PatchTSTPreTrainedModelmodelr   Fc                    t        |t              r| j                  j                  r+t        j
                  j                  |j                  d       | j                  j                  dk(  r-t        j
                  j                  |j                  dd       yyt        |t        j                        rJ|j                  j                  j                          |j                  j                  j                  d       yt        |t               r^|j"                  j                  j                  j                          |j"                  j                  j                  j                  d       yt        |t        j$                  t        j&                  f      rm|j                  j                  j                  d| j                  j(                         |j                  %|j                  j                  j                          yyy)	z$
        Initialize weights
        g{Gz?)stdr   rY   g?)meanr         ?N)r   PatchTSTPositionalEncodingr   use_cls_tokenr   initnormal_	cls_tokenpositional_encoding_typeposition_encr   r   datazero_weightfill_re   rl   r%   Conv1dinit_std)r*   modules     r,   _init_weightsz%PatchTSTPreTrainedModel._init_weightsS  sW    f89{{(( 0 0d;{{33x? 3 3#3G @-KK""$MM$$S) 12!!&&,,.##((..s3BII 67MM&&CT[[5I5I&J{{&  &&( ' 8r-   c                 4    t        |t              r||_        y y r   )r   PatchTSTEncodergradient_checkpointing)r*   r   values      r,   _set_gradient_checkpointingz3PatchTSTPreTrainedModel._set_gradient_checkpointingi  s    f0,1F) 1r-   N)F)
rZ   r[   r\   r   config_classbase_model_prefixmain_input_namesupports_gradient_checkpointingr   r   r   r-   r,   r   r   L  s"    !L#O&+#),2r-   r   c                   D     e Zd Zdef fdZdej                  fdZ xZS )PatchTSTEmbeddingr   c                    t         |           |j                  | _        |j                  | _        | j                  r0t	        j
                  |j                  |j                        | _        y t	        j                         | _        t        |j                        D ]E  }| j                  j                  t	        j
                  |j                  |j                               G y r   )r    r!   r   share_embeddingr   r%   r   rj   input_embedding
ModuleListranger   )r*   r   rN   r+   s      r,   r!   zPatchTSTEmbedding.__init__o  s    "(";";%55#%99V-@-@&..#QD #%==?D 6445 \$$++BIIf6I6I6>>,Z[\r-   r   c                 `   |j                   d   }|| j                  k7  rt        d| j                   d| d      | j                  r| j	                  |      }|S t        |      D cg c]$  } | j                  |   |dd|ddddf         & }}t        j                  |d      }|S c c}w )a%  
        Parameters:
            patch_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`, *required*):
                Patch input for embedding
        return:
            `torch.Tensor` of shape `(batch_size, num_channels, num_patches, d_model)`
        r   z&The defined number of input channels (zQ) in the config has to be the same as the number of channels in the batch input (r   Nr@   )rE   r   r#   r  r  r  rF   stack)r*   r   r   
embeddingsis        r,   rX   zPatchTSTEmbedding.forward{  s     )..q1!8!8889P9P8Q RTTfSgghj  --k:J  UZZlTmnq1$..q1+aAqj2IJnJnZQ7J os   ')B+	rZ   r[   r\   r   r!   rF   ra   rX   rb   rc   s   @r,   r  r  n  s!    
\~ 
\5<< r-   r  c                   ~     e Zd ZdZdedef fdZedededej                  fd       Z
dej                  fdZ xZS )	r   z'
    Class for positional encoding
    r   r   c                    t         |           |j                  | _        |j                  | _        |j                  r?t	        j
                  t        j                  ddd|j                              | _	        |dz  }| j                  ||      | _        |j                  dkD  r%t	        j                  |j                        | _        y t	        j                         | _        y )Nr   r   )r    r!   r   r   r   	ParameterrF   r   rj   r   _init_per   positional_dropoutr   r   r*   r   r   r+   s      r,   r!   z#PatchTSTPositionalEncoding.__init__  s    #11"(";";\\%++aAv~~*NODN1K MM&+> 6<5N5NQR5RBJJv001 	XZXcXcXe 	r-   r=   c                 $   | j                   dk(  r7t        j                  t        j                  || j
                        d      }|S | j                   dk(  r#t        j                  || j
                        }t        j                  d|      j                  d      }t        j                  t        j                  d| j
                  d      t        j                  d      | j
                  z   z        }t        j                  ||z        |d d dd df<   t        j                  ||z        |d d dd df<   ||j                         z
  }||j                         d	z  z  }t        j                  |d
      }|S t!        | j                    d      )Nr   Trequires_gradsincosr   r   r2   g     @
   FzN is not a valid positional encoder. Available types are 'random' and 'sincos'.)r   r   r  rF   randnrj   r   aranger   expmathlogsincosr   r   r#   )r   r   r   positiondiv_terms        r,   r  z#PatchTSTPositionalEncoding._init_pe  sd    **h6<<K(P`deL  ,,8 ;;{FNNCL||A{3==a@Hyya!CQXHY\b\j\jHjFk!klH$)IIh.A$BLADqD!$)IIh.A$BLADqD!',*;*;*==L'<+;+;+=+BCL<<EJL
  223  4B  C r-   r   c                 x   | j                   r| j                  || j                  dd d d f   z         }| j                  | j                  d dd d f   z   }|j	                  |j
                  d   | j                  dd      }t        j                  ||fd      }|S | j                  || j                  z         }|S )Nr   r   r?   r2   r@   )	r   r  r   r   expandrE   r   rF   rG   )r*   r   r   
cls_tokensr   s        r,   rX   z"PatchTSTPositionalEncoding.forward  s    11+@Q@QRSRTVWRW@X2XYK):):2A2q5)AAI"))+*;*;A*>@W@WY[]_`J 99j+%>AFL   22;ARAR3RSLr-   )rZ   r[   r\   r]   r   r^   r!   staticmethodr   r  r  rF   ra   rX   rb   rc   s   @r,   r   r     sX    
~ 
C 
  c bll  &5<< r-   r   c            	       j     e Zd ZdZdedef fdZ	 	 d
dej                  de	e
   de	e
   defd	Z xZS )r   z
    PatchTST Encoder
    r   r   c                 &   t         |   |       d| _        t        |      | _        t        ||      | _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        | j                          y c c}w )NF)r    r!   r   r  embedderr   positional_encoderr   r  r  num_hidden_layersr   layers	post_init)r*   r   r   r  r+   s       r,   r!   zPatchTSTEncoder.__init__  st     &+# *&1"<V["Qmm5QWQiQiKj$ka%9&%A$kl 	 %ls   Br   output_hidden_statesr<   r=   c                 J   ||n| j                   j                  }||n| j                   j                  }| j                  |      }| j	                  |      }|rdnd}|rdnd}| j
                  D ]%  }|r||fz   } |||      }|d   }|s||d   fz   }' t        |||      S )a  
        Parameters:
            patch_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`, *required*):
                Past values of the time series
            output_hidden_states (bool, optional): Indicates if hidden states should be outputted.
            output_attentions (bool, optional): Indicates if attentions should be outputted.

        return:
            `BaseModelOutput`
        Nr   )r   r<   r   r   )last_hidden_stater7   
attentions)r   r<   r,  r'  r(  r*  r
   )	r*   r   r,  r<   r   encoder_statesall_attentionsencoder_layerlayer_outputss	            r,   rX   zPatchTSTEncoder.forward  s      2C1N-TXT_T_TqTq$8$D $++JjJj 	
 mmK0..{;30d![[ 
	FM#!/</!A)|WhiM )+L !/=3C2E!E
	F ^hvwwr-   NN)rZ   r[   r\   r]   r   r^   r!   rF   ra   r   r`   r
   rX   rb   rc   s   @r,   r   r     s_    ~ C " 04,0	(x\\(x 'tn(x $D>	(x
 
(xr-   r   c                   6   e Zd ZU dZdZeej                     ed<   dZ	ee
ej                        ed<   dZee
ej                        ed<   dZeej                     ed<   dZeej                     ed<   dZeej                     ed<   dZeej                     ed	<   y)
PatchTSTModelOutputa  
    Base class for model's outputs, with potential hidden states.

    Parameters:
        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches, patch_length)`):
            Sequence of hidden-states at the output of the last layer of the model.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, num_channels, height, width)`. Hidden-states of
            the model at the output of each layer plus the optional initial embedding outputs.
        mask: (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches)`, *optional*)
            Bool masked tensor indicating which patches are masked
        loc: (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*)
            Mean of the input data (batch_size, sequence_length, num_channels) over the sequence_length
        scale: (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*)
            Std of the input data (batch_size, sequence_length, num_channels) over the sequence_length
        patch_input (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches, patch_length)`):
            Patched input to the Transformer
    Nr.  r7   r/  r   locscaler   )rZ   r[   r\   r]   r.  r   rF   FloatTensor__annotations__r7   r   r/  r   r7  r8  r   r   r-   r,   r6  r6    s    ( 6:x 1 1298<M8E%"3"345<59Ju00129(,D(5$$
%,'+C%##	$+)-E8E%%&-/3K%++,3r-   r6  c                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eeej                        ed<   dZeeej                        ed<   y)PatchTSTForPretrainingOutputa  
    Output type of [`PatchTSTForPretraining`].

    Parameters:
        loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
            MSE loss.
        prediction_outputs (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
            Prediction outputs of the time series modeling heads.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nlossprediction_outputr7   r/  )rZ   r[   r\   r]   r=  r   rF   r9  r:  r>  r7   r   r/  r   r-   r,   r<  r<  '  sh    * )-D(5$$
%,59x 1 1298<M8E%"3"345<59Ju00129r-   r<  c                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eeej                        ed<   dZeeej                        ed<   y)PatchTSTForRegressionOutputa  
    Output type of [`PatchTSTForRegression`].

    Parameters:
        loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
            MSE loss.
        regression_outputs (`torch.FloatTensor` of shape `(batch_size, num_targets)`):
            Regression outputs of the time series modeling heads.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nr=  regression_outputsr7   r/  )rZ   r[   r\   r]   r=  r   rF   r9  r:  rA  r7   r   r/  r   r-   r,   r@  r@  D  sh    * )-D(5$$
%,6:!2!23:8<M8E%"3"345<59Ju00129r-   r@  c                      e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eeej                        ed<   dZeeej                        ed<   dZeej                     ed<   dZeej                     ed<   y)	PatchTSTForPredictionOutputaR  
    Output type of [`PatchTSTForPrediction`].

    Parameters:
        loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
            MSE loss.
        prediction_outputs (`torch.FloatTensor` of shape `(batch_size, prediction_length, -1)`):
            Prediction outputs of the time series modeling heads.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        loc: (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*)
            Mean of the input data (batch_size, sequence_length, num_channels) over the sequence_length
        scale: (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*)
            Std of the input data (batch_size, sequence_length, num_channels) over the sequence_length
    Nr=  prediction_outputsr7   r/  r7  r8  )rZ   r[   r\   r]   r=  r   rF   r9  r:  rD  r7   r   r/  r7  r8  r   r-   r,   rC  rC  a  s    2 )-D(5$$
%,6:!2!23:8<M8E%"3"345<59Ju00129'+C%##	$+)-E8E%%&-r-   rC  c                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eeej                        ed<   dZeeej                        ed<   y)PatchTSTForClassificationOutputaR  
    Output type of [`PatchTSTForClassification`].

    Parameters:
        loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
            Total loss as the sum of the masked language modeling loss and the next sequence prediction
            (classification) loss.
        prediction_logits (`torch.FloatTensor` of shape `(batch_size, num_targets)`):
            Prediction scores of the PatchTST modeling head (scores before SoftMax).
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nr=  prediction_logitsr7   r/  )rZ   r[   r\   r]   r=  r   rF   r9  r:  rG  r7   r   r/  r   r-   r,   rF  rF    sh    , )-D(5$$
%,59x 1 1298<M8E%"3"345<59Ju00129r-   rF  c                   :    e Zd ZU dZdZeej                     ed<   y)SamplePatchTSTOutputa!  
    Base class for time series model's predictions outputs that contains the sampled values from the chosen
    distribution.

    Parameters:
        sequences `(batch_size, num_samples, prediction_length, num_targets)`):
                Sampled values from the chosen distribution.
    N	sequences)	rZ   r[   r\   r]   rJ  r   rF   r9  r:  r   r-   r,   rI  rI    s     .2Ix))*1r-   rI  inputtargetr=   c                 &    | j                  |       S )zc
    Computes the negative log likelihood loss from input distribution with respect to target.
    )log_prob)rK  rL  s     r,   nllrO    s     NN6"""r-   input_tensorweightsc                 P   |t        j                  |dk7  | |z  t        j                  |             }t        j                  |r|j	                  |      n|j	                         d      }|r|j	                  |      |z  S |j	                         |z  S | j                  |      S )aj  
    Computes the weighted average of a given tensor across a given `dim`, masking values associated with weight zero,
    meaning instead of `nan * 0 = nan` you will get `0 * 0 = 0`.

    Args:
        input_tensor (`torch.FloatTensor`):
            Input tensor, of which the average must be computed.
        weights (`torch.FloatTensor`, *optional*):
            Weights tensor, of the same shape as `input_tensor`.
        dim (`int`, *optional*):
            The dim along which to average `input_tensor`.

    Returns:
        `torch.FloatTensor`: The tensor with values averaged along the specified `dim`.
    r   r@   r   min)rF   where
zeros_likeclampr   r   )rP  rQ  rA   weighted_tensorsum_weightss        r,   weighted_averagerZ    s      ++glL74JEL\L\]iLjkkk#'++#+"67;;=VYZ03###,R]]]9L9L9NR]]]  S ))r-   c            	            e Zd ZdZdef fdZdej                  dej                  deej                  ej                  ej                  f   fdZ	 xZ
S )PatchTSTStdScalerz
    Standardize features by calculating the mean and scaling along the first dimension, and then normalizes it by
    subtracting from the mean and dividing by the standard deviation.
    r   c                     t         |           t        |d      r|j                  nd| _        t        |d      r|j
                  nd| _        t        |d      r|j                  | _        y d| _        y )Nscaling_dimr   keepdimTminimum_scalegh㈵>)r    r!   hasattrr^  rA   r_  r`  rm   s     r,   r!   zPatchTSTStdScaler.__init__  s[    )0)G6%%Q)0)Cv~~5<V_5UV11[_r-   r   observed_indicatorr=   c                    |j                  | j                  | j                        }|j                  d      }||z  j                  | j                  | j                        |z  }||z
  |z  dz  j                  | j                  | j                        |z  }t	        j
                  || j                  z         }||z
  |z  ||fS )C  
        Parameters:
            data (`torch.Tensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                input for Batch norm calculation
            observed_indicator (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                Calculating the scale on the observed indicator.
        Returns:
            tuple of `torch.Tensor` of shapes
                (`(batch_size, sequence_length, num_input_channels)`,`(batch_size, 1, num_input_channels)`,
                `(batch_size, 1, num_input_channels)`)
        r_  r   r2   )r   rA   r_  	clamp_minrF   sqrtr`  )r*   r   rb  denominatorr7  variancer8  s          r,   rX   zPatchTSTStdScaler.forward  s     ),,TXXt||,L!++C0((--dhh-MP[[Sj$661<AA$((TXT`T`Aadoo

8d&8&889s
e#S%//r-   rZ   r[   r\   r]   r   r!   rF   ra   r   rX   rb   rc   s   @r,   r\  r\    sS    
`~ `0LL06;ll0	u||U\\5<<7	80r-   r\  c            	            e Zd ZdZdef fdZdej                  dej                  deej                  ej                  ej                  f   fdZ	 xZ
S )PatchTSTMeanScalerz
    Computes a scaling factor as the weighted average absolute value along the first dimension, and scales the data
    accordingly.
    r   c                 &   t         |           t        |d      r|j                  nd| _        t        |d      r|j
                  nd| _        t        |d      r|j                  nd| _        t        |d      r|j                  | _        y d | _        y )Nr^  r   r_  Tr`  绽|=default_scale)r    r!   ra  r^  rA   r_  r`  ro  rm   s     r,   r!   zPatchTSTMeanScaler.__init__  su    )0)G6%%Q)0)Cv~~5<V_5UV11[`5<V_5UV11[_r-   r   rb  r=   c                    ||z  j                         j                  | j                  d      }|j                  | j                  d      }|t        j                  |d      z  }| j
                  Q|j                  d      }t        j                  |j                  d      d      }t        j                  ||z        }n"| j
                  t        j                  |      z  }t        j                  |dkD  ||      }t        j                  || j                        }||z  }	| j                  s|j                  | j                        }|	t        j                  |      |fS )rd  Tre  r   rS  r   r@   )absr   rA   rF   rW  ro  squeeze	ones_likerU  r`  r_  rV  )
r*   r   rb  ts_sumnum_observedr8  	batch_sumbatch_observationsro  scaled_datas
             r,   rX   zPatchTSTMeanScaler.forward  s.    ++00266txx6N)--dhh-E\q99 %

q
)I!&\-=-=a-@a!H!MM)6H*HIM ..1GGM L1,e]C Et'9'9:Ul||MMdhhM/EE,,U3U::r-   rj  rc   s   @r,   rl  rl    sS    
`~ `&;LL&;6;ll&;	u||U\\5<<7	8&;r-   rl  c            
            e Zd ZdZdef fdZ	 ddej                  deej                     de	ej                  ej                  ej                  f   fdZ
 xZS )	PatchTSTNOPScalerz|
    Assigns a scaling factor equal to 1 along the first dimension, and therefore applies no scaling to the input data.
    r   c                     t         |           t        |d      r|j                  nd| _        t        |d      r|j
                  | _        y d| _        y )Nr^  r   r_  T)r    r!   ra  r^  rA   r_  rm   s     r,   r!   zPatchTSTNOPScaler.__init__2  s@    )0)G6%%Q)0)Cv~~r-   r   rb  r=   c                     t        j                  |d      j                  | j                  | j                        }t        j
                  |d      j                  | j                  | j                        }|||fS )a  
        Parameters:
            data (`torch.Tensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                input for Batch norm calculation
        Returns:
            tuple of `torch.Tensor` of shapes
                (`(batch_size, sequence_length, num_input_channels)`,`(batch_size, 1, num_input_channels)`,
                `(batch_size, 1, num_input_channels)`)
        Fr  )rA   r_  )rF   rs  r   rA   r_  rV  )r*   r   rb  r8  r7  s        r,   rX   zPatchTSTNOPScaler.forward7  si     E:??DHHVZVbVb?ct59>>488UYUaUa>bS%r-   r   )rZ   r[   r\   r]   r   r!   rF   ra   r   r   rX   rb   rc   s   @r,   rz  rz  -  s_    N~ N PT LL 6>u||6L 	u||U\\5<<7	8 r-   rz  c            	            e Zd Zdef fdZdej                  dej                  deej                  ej                  ej                  f   fdZ xZ	S )PatchTSTScalerr   c                     t         |           |j                  dk(  s|j                  du rt        |      | _        y |j                  dk(  rt        |      | _        y t        |      | _        y )Nr   Tr   )r    r!   r$   rl  scalerr\  rz  rm   s     r,   r!   zPatchTSTScaler.__init__I  sU    >>V#v~~'=,V4DK^^u$+F3DK+F3DKr-   r   rb  r=   c                 8    | j                  ||      \  }}}|||fS )a>  
        Parameters:
            data (`torch.Tensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                Input for scaler calculation
            observed_indicator (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                Calculating the scale on the observed indicator.
        Returns:
            tuple of `torch.Tensor` of shapes
                (`(batch_size, sequence_length, num_input_channels)`,`(batch_size, 1, num_input_channels)`,
                `(batch_size, 1, um_input_channels)`)
        )r  )r*   r   rb  r7  r8  s        r,   rX   zPatchTSTScaler.forwardR  s)      ;;t-?@c5S%r-   )
rZ   r[   r\   r   r!   rF   ra   r   rX   rb   rc   s   @r,   r~  r~  H  sL    4~ 4 LL 6;ll 	u||U\\5<<7	8 r-   r~  c                        e Zd Zdef fdZ	 	 	 	 	 ddej                  deej                     deej                     dee   dee   dee   d	e	e
ef   fd
Z xZS )PatchTSTModelr   c                 b   t         |   |       t        |      | _        t	        |      | _        |j                  | _        | j
                  j                  }| j                  rt        |      | _	        nt        j                         | _	        t        ||      | _        | j                          y )N)r   )r    r!   r~  r  r   
patchifierdo_mask_inputr   r   maskingr   r   r   encoderr+  r  s      r,   r!   zPatchTSTModel.__init__f  s     $V,*62#11oo11*62DL;;=DL&v;G 	r-   r   past_observed_maskfuture_valuesr,  r<   return_dictr=   c           	         ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }|t	        j
                  |      }| j                  ||      \  }}}	| j                  |      }
| j                  r| j                  |
      \  }}n| j                  |
      d}}| j                  |||      }|s>|j                  |j                  |j                  f}||||	|
fz   }t        d |D              S t        |j                  |j                  |j                  |||	|
      S )a  
        Parameters:
            past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
                Input sequence to the model
            past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
                in `[0, 1]`:

                - 1 for values that are **observed**,
                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
            future_values (`torch.BoolTensor` of shape `(batch_size, prediction_length, num_input_channels)`, *optional*):
                Future target values associated with the `past_values`
            output_hidden_states (`bool`, *optional*):
                Whether or not to return the hidden states of all layers
            output_attentions (`bool`, *optional*):
                Whether or not to return the output attention of all layers
            return_dict (`bool`, *optional*):
                Whether or not to return a `ModelOutput` instead of a plain tuple.

        Returns:
            `PatchTSTModelOutput` or tuple of `torch.Tensor` (if `return_dict`=False or `config.return_dict`=False)

        Examples:

        ```python
        >>> from huggingface_hub import hf_hub_download
        >>> import torch
        >>> from transformers import PatchTSTModel

        >>> file = hf_hub_download(
        ...     repo_id="hf-internal-testing/etth1-hourly-batch", filename="train-batch.pt", repo_type="dataset"
        ... )
        >>> batch = torch.load(file)

        >>> model = PatchTSTModel.from_pretrained("namctin/patchtst_etth1_pretrain")

        >>> # during training, one provides both past and future values
        >>> outputs = model(
        ...     past_values=batch["past_values"],
        ...     future_values=batch["future_values"],
        ... )

        >>> last_hidden_state = outputs.last_hidden_state
        ```N)r   r,  r<   c              3   &   K   | ]	  }||  y wr   r   ).0vs     r,   	<genexpr>z(PatchTSTModel.forward.<locals>.<genexpr>  s     =qq}=s   )r.  r7   r/  r   r7  r8  r   )r   use_return_dictr<   r,  rF   rs  r  r  r  r  r  r.  r7   r/  tupler6  )r*   r   r  r  r,  r<   r  scaled_past_valuesr7  r8  patched_valuesmasked_valuesr   encoder_outputr   s                  r,   rX   zPatchTSTModel.forwardx  sY   l &1%<k$++B]B]1B1N-TXT_T_TqTq$8$D $++JjJj 	 %!&!= *.[BT)U&C );<"&,,~">M4"&,,~">4M%<Pdu & 
 %779U9UWeWpWpqGsE> BBG=G===",>>(66%00&
 	
r-   NNNNN)rZ   r[   r\   r   r!   rF   ra   r   r`   r   r   r6  rX   rb   rc   s   @r,   r  r  d  s    ~ * 6:04/3,0&*Z
\\Z
 %U\\2Z
  -	Z

 'tnZ
 $D>Z
 d^Z
 
u))	*Z
r-   r  c                   `     e Zd ZdZdef fdZdej                  dej                  fdZ xZ	S )PatchTSTMaskPretrainHeadz-
    Pretraining head for mask modelling
    r   c                 0   t         |           |j                  dkD  rt        j                  |j                        nt        j
                         | _        t        j                  |j                  |j                        | _
        |j                  | _        y Nr   )r    r!   head_dropoutr   r   r   r   r%   rj   r   linearr   rm   s     r,   r!   z!PatchTSTMaskPretrainHead.__init__  sh    :@:M:MPQ:Qrzz&"5"56WYWbWbWdii0C0CD#11r-   	embeddingr=   c                     | j                  | j                  |            }| j                  r|ddddddddf   }|S )a  
        Parameters:
            embedding (`torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
                    `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True, *required*):
                Embedding from the model
        Returns:
            `torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
                            `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True

        Nr   )r  r   r   )r*   r  s     r,   rX   z PatchTSTMaskPretrainHead.forward  s>     KKY 78	!!QA+.Ir-   rq   rc   s   @r,   r  r    s/    2~ 2 %,, r-   r  z*
    The PatchTST for pretrain model.
    )custom_introc                        e Zd Zdef fdZ	 	 	 	 d
dej                  deej                     dee   dee   dee   de	e
ef   fd	Z xZS )PatchTSTForPretrainingr   c                     t         |   |       d|_        t        |      | _        t        |      | _        | j                          y )NT)r   )r    r!   r  r  r   r  headr+  rm   s     r,   r!   zPatchTSTForPretraining.__init__  s<     #"&1
,V4	 	r-   r   r  r,  r<   r  r=   c                    ||n| j                   j                  }| j                  ||||d      }| j                  |j                        }t        j                  d      } |||j                        }	|	j                  d      |j                  z  j                         |j                  j                         dz   z  }
|j                  }|s|f|dd	 z   }|
|
f|z   }|S |}|S t        |
|||j                  
      S )a	  
        Parameters:
            past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
                Input sequence to the model
            past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
                in `[0, 1]`:

                - 1 for values that are **observed**,
                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
            output_hidden_states (`bool`, *optional*):
                Whether or not to return the hidden states of all layers
            output_attentions (`bool`, *optional*):
                Whether or not to return the output attention of all layers
            return_dict (`bool`, *optional*): Whether or not to return a `ModelOutput` instead of a plain tuple.

        Returns:
            `PatchTSTForPretrainingOutput` or tuple of `torch.Tensor` (if `return_dict`=False or
            `config.return_dict`=False)

        Examples:

        ```python
        >>> from huggingface_hub import hf_hub_download
        >>> import torch
        >>> from transformers import PatchTSTConfig, PatchTSTForPretraining

        >>> file = hf_hub_download(
        ...     repo_id="hf-internal-testing/etth1-hourly-batch", filename="train-batch.pt", repo_type="dataset"
        ... )
        >>> batch = torch.load(file)

        >>> # Config for random mask pretraining
        >>> config = PatchTSTConfig(
        ...     num_input_channels=7,
        ...     context_length=512,
        ...     patch_length=12,
        ...     stride=12,
        ...     mask_type='random',
        ...     random_mask_ratio=0.4,
        ...     use_cls_token=True,
        ... )
        >>> # Config for forecast mask pretraining
        >>> config = PatchTSTConfig(
        ...     num_input_channels=7,
        ...     context_length=512,
        ...     patch_length=12,
        ...     stride=12,
        ...     mask_type='forecast',
        ...     num_forecast_mask_patches=5,
        ...     use_cls_token=True,
        ... )
        >>> model = PatchTSTForPretraining(config)

        >>> # during training, one provides both past and future values
        >>> outputs = model(past_values=batch["past_values"])

        >>> loss = outputs.loss
        >>> loss.backward()
        ```Tr   r  r,  r<   r  none	reductionr?   r@   rn  r   )r=  r>  r7   r/  )r   r  r   r  r.  r   MSELossr   r   r   r   r7   r<  r/  )r*   r   r  r,  r<   r  model_outputx_hatr=  loss_valmasked_lossr0  r   s                r,   rX   zPatchTSTForPretraining.forward  s   J &1%<k$++B]B] zz#1!5/ " 
 		,889 zzF+|778}}},|/@/@@EEG<K\K\K`K`KbejKjk%33ha!33G2=2I{nw.GN PWGN+^`l`w`w
 	
r-   )NNNN)rZ   r[   r\   r   r!   rF   ra   r   r`   r   r   r<  rX   rb   rc   s   @r,   r  r    s    ~  6:/3,0&*a
\\a
 %U\\2a
 'tn	a

 $D>a
 d^a
 
u22	3a
r-   r  c                   D     e Zd Zdef fdZdej                  fdZ xZS )PatchTSTClassificationHeadr   c                    t         |           |j                  | _        |j                  | _        t	        j
                  d      | _        |j                  dkD  rt	        j                  |j                        nt	        j                         | _
        t	        j                  |j                  |j                  z  |j                        | _        y Nr   	start_dimr   )r    r!   r   pooling_typer   Flattenflattenr  r   r   r   r%   r   rj   num_targetsr  rm   s     r,   r!   z#PatchTSTClassificationHead.__init__f  s    #11"//zzA.:@:M:MPQ:Qrzz&"5"56WYWbWbWdii 9 9FNN JFL^L^_r-   r  c                 n   | j                   r|dddddddf   }ng| j                  dk(  r|j                  d      }nE| j                  dk(  r|j                  d      j                  }nt        d| j                   d      | j                  |      }| j                  | j                  |            }|S )	a[  
        Parameters:
            embedding (`torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
                     `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True, *required*):
                Embedding from the model
        Returns:
            `torch.Tensor` of shape `(bs, num_targets)`

        Nr   r   r2   r@   r   pooling operator  is not implemented yet)	r   r  r   r   valuesr#   r  r  r   r*   r  pooled_embeddingrp   s       r,   rX   z"PatchTSTClassificationHead.forwardn  s     (Aq!4&((~~!~4%'(}}}3::01B1B0CCZ[\\<<(89T\\*:;<r-   r  rc   s   @r,   r  r  e  s!    `~ ` r-   r  z0
    The PatchTST for classification model.
    c                        e Zd Zdef fdZe	 	 	 	 	 ddej                  deej                     dee	   dee	   dee	   dee	   d	e
eef   fd
       Z xZS )PatchTSTForClassificationr   c                     t         |   |       |j                  rt        j	                  d       d|_        t        |      | _        t        |      | _        | j                          y )N+Setting `do_mask_input` parameter to False.F)
r    r!   r  loggerwarningr  r   r  r  r+  rm   s     r,   r!   z"PatchTSTForClassification.__init__  sT      NNHI#(F "6*
.v6	 	r-   r   target_valuesr  r,  r<   r  r=   c                 R   ||n| j                   j                  }| j                  ||||d      }| j                  |j                        }d}	|t        j                         }
 |
||      }	|s|f|dd z   }|	|	f|z   }|S |}|S t        |	||j                  |j                        S )ac  
        past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
            Input sequence to the model
        target_values (`torch.Tensor`, *optional*):
            Labels associates with the `past_values`
        past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
            Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
            in `[0, 1]`:

            - 1 for values that are **observed**,
            - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).

        Examples:

        ```python
        >>> from transformers import PatchTSTConfig, PatchTSTForClassification

        >>> # classification task with two input channel2 and 3 classes
        >>> config = PatchTSTConfig(
        ...     num_input_channels=2,
        ...     num_targets=3,
        ...     context_length=512,
        ...     patch_length=12,
        ...     stride=12,
        ...     use_cls_token=True,
        ... )
        >>> model = PatchTSTForClassification(config=config)

        >>> # during inference, one only provides past values
        >>> past_values = torch.randn(20, 512, 2)
        >>> outputs = model(past_values=past_values)
        >>> labels = outputs.prediction_logits
        ```NTr  r   r   )r=  rG  r7   r/  )
r   r  r   r  r.  r   CrossEntropyLossrF  r7   r/  )r*   r   r  r  r,  r<   r  r  y_hatr  r=  r   s               r,   rX   z!PatchTSTForClassification.forward  s    X &1%<k$++B]B]zz#1!5/ " 
 		,889$&&(DE=1Hha!33G/7/CxkG+GN JQGN.#&44#..	
 	
r-   r  )rZ   r[   r\   r   r!   r   rF   ra   r   r`   r   r  rF  rX   rb   rc   s   @r,   r  r    s    ~   15-1/3,0&*D
\\D
  -D
 %TN	D

 'tnD
 $D>D
 d^D
 
u55	6D
 D
r-   r  z,
    The PatchTST for regression Model.
    c                   J     e Zd Zddedef fdZdej                  fdZ xZ	S )PatchTSTPredictionHeadr   r   c                    t         |           |j                  | _        |j                  | _        |j                  | _        |j
                  | _        | j
                  s| j                  r|j                  }n|j                  |z  }| j                  sVt        j                         | _	        t        j                         | _
        t        j                         | _        t        | j                        D ]  }| j                  j                  t        j                  d             |:| j                  j                  t        j                  ||j                                n*| j                  j                  |j#                  |             | j                  j                  |j$                  dkD  rt        j&                  |j$                        nt        j(                                 yt        j                  d      | _        |&t        j                  ||j                         | _        n|j#                  |      | _        |j$                  dkD  rt        j&                  |j$                        nt        j(                         | _        y)a  
        num_patches (`int`):
            The number of patches in the input sequence.
        distribution_output (`DistributionOutput`, *optional*):
            The distribution output layer for probabilistic forecasting. If None, a linear output layer is used.
        r2   r  Nr   )r    r!   share_projectionr   r   r  rj   r   r  projectionsdropoutsflattensr  r   r  r%   prediction_lengthget_parameter_projectionr  r   r   r  
projectionr   )r*   r   r   distribution_outputr"   r  r+   s         r,   r!   zPatchTSTPredictionHead.__init__  s    	 & 7 7"(";";#11"// 2 2~~H~~3H$$!}}DMMODMMMODM4223 t$$RZZ!%<=&.$$++BIIh@X@X,YZ $$++,?,X,XYa,bc$$H[H[^_H_RZZ0C0C%Degepeperst ::2DL"*"$))Hf6N6N"O #6"N"Nx"X>D>Q>QTU>U2::f&9&9:[][f[f[hDLr-   r  c                    | j                   r|dddddddf   }nP| j                  dk(  r|j                  d      }n.| j                  dk(  r|j                  d      j                  }n|}| j
                  sg }t        | j                        D ]\  } | j                  |   |dd|ddf         } | j                  |   |      } | j                  |   |      }|j                  |       ^ t        j                  |d      }n3| j                  |      }| j                  |      }| j!                  |      }t#        |t$              rt%        d |D              }|S |j'                  dd      }|S )	aj  
        Parameters:
            embedding (`torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
                     `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True, *required*):
                Embedding from the model
        Returns:
            `torch.Tensor` of shape `(bs, forecast_len, num_channels)`

        Nr   r   r2   r@   r   r   c              3   @   K   | ]  }|j                  d d        yw)r2   r   N)r4   )r  zs     r,   r  z1PatchTSTPredictionHead.forward.<locals>.<genexpr>E  s     =1;;q!,=s   )r   r  r   r   r  r  r  r   r  r  r  r   rF   r	  r  r   r  r   r  r4   )r*   r  r  rp   r  s        r,   rX   zPatchTSTPredictionHead.forward  st    (Aq!4  F*#,>>a>#8 ""e+#,==Q=#7#>#>  $- $$F4223 0#34==#34DQ1W4M#N #34==#34D#E  $74#3#3A#67G#H ./0 [[Q/F  $||,<=#||,<= __%56Ffe$=f==F  %%a+Fr-   r   )
rZ   r[   r\   r   r^   r!   rF   ra   rX   rb   rc   s   @r,   r  r    s*    )i~ )iC )iV1 1r-   r  z,
    The PatchTST for prediction model.
    c                        e Zd Zdef fdZ	 	 	 	 	 ddej                  deej                     deej                     dee   dee   dee   d	e	e
ef   fd
Z	 ddej                  deej                     d	efdZ xZS )PatchTSTForPredictionr   c                    t         |   |       |j                  rt        j	                  d       d|_        t        |      | _        |j                  dk(  rd | _        n|j                  dk(  rt        |j                        | _        nn|j                  dk(  rt        |j                        | _        nC|j                  dk(  rt        |j                        | _        nt        d|j                         t        || j                  j                  j                   | j                  	      | _        | j%                          y )
Nr  Fmse	student_tr@   normalnegative_binomialUnknown distribution output )r  )r    r!   r  r  r  r  r   r=  r  r   r  r   r   r#   r  r  r   r  r+  rm   s     r,   r!   zPatchTSTForPrediction.__init__Q  s     NNHI#(F "6*
;;%'+D$))[8+9f>V>V+W(++x7+7F<T<T+U(++/BB+AfF^F^+_( #?@Z@Z?[!\]]*DJJ))554KcKc
	
 	r-   r   r  r  r,  r<   r  r=   c                    ||n| j                   j                  }| j                  ||||d      }| j                  |j                        }d}	| j
                  r|}
n||j                  z  |j                  z   }
|u| j
                  rJ| j
                  j                  ||j                  |j                        }t        ||      }	t        |	      }	nt        j                  d      } ||
|      }	|j                  }|j                  }|s|
f|dd z   }|	|	f|z   }|S |}|S t        |	|
|j                  |j                  ||	      S )
aV	  
        Parameters:
            past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
                Input sequence to the model
            past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
                in `[0, 1]`:

                - 1 for values that are **observed**,
                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
            future_values (`torch.Tensor` of shape `(bs, forecast_len, num_input_channels)`, *optional*):
                Future target values associated with the `past_values`
            output_hidden_states (`bool`, *optional*):
                Whether or not to return the hidden states of all layers
            output_attentions (`bool`, *optional*):
                Whether or not to return the output attention of all layers
            return_dict (`bool`, *optional*):
                Whether or not to return a `ModelOutput` instead of a plain tuple.

        Returns:
            `PatchTSTForPredictionOutput` or tuple of `torch.Tensor` (if `return_dict`=False or
            `config.return_dict`=False)

        Examples:

        ```python
        >>> from huggingface_hub import hf_hub_download
        >>> import torch
        >>> from transformers import PatchTSTConfig, PatchTSTForPrediction

        >>> file = hf_hub_download(
        ...     repo_id="hf-internal-testing/etth1-hourly-batch", filename="train-batch.pt", repo_type="dataset"
        ... )
        >>> batch = torch.load(file)

        >>> # Prediction task with 7 input channels and prediction length is 96
        >>> model = PatchTSTForPrediction.from_pretrained("namctin/patchtst_etth1_forecast")

        >>> # during training, one provides both past and future values
        >>> outputs = model(
        ...     past_values=batch["past_values"],
        ...     future_values=batch["future_values"],
        ... )

        >>> loss = outputs.loss
        >>> loss.backward()

        >>> # during inference, one only provides past values, the model outputs future values
        >>> outputs = model(past_values=batch["past_values"])
        >>> prediction_outputs = outputs.prediction_outputs
        ```NTr  r7  r8  r   r  r   r?   )r=  rD  r7   r/  r7  r8  )r   r  r   r  r.  r  r8  r7  distributionrO  rZ  r   r  rC  r7   r/  )r*   r   r  r  r,  r<   r  r  r  r  	y_hat_outr  r=  r7  r8  r   s                   r,   rX   zPatchTSTForPrediction.forwardn  sn   z &1%<k$++B]B] zz#1!5/ " 
 		,889##I 2 22\5E5EEI$''#77DD|//|7I7I  E   |];+H5zzF3	=9"" l\!B%77G/7/CxkG+GN JQGN*(&44#..
 	
r-   c                    | j                   j                  } | |d|d      }| j                  rz| j                  j                  |j                  |j
                  |j                        }t        |      D cg c]  }|j                          }}t        j                  |d      }n|j                  j                  d      }t        |      S c c}w )a   
        Generate sequences of sample predictions from a model with a probability distribution head.

        Parameters:
            past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                Past values of the time series that serves as context in order to predict the future.
            past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
                in `[0, 1]`:

                - 1 for values that are **observed**,
                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).

        Return:
            [`SamplePatchTSTOutput`] where the outputs `sequences` tensor will have shape `(batch_size, number of
            samples, prediction_length, 1)` or `(batch_size, number of samples, prediction_length, num_input_channels)`
            for multivariate predictions.
        NF)r   r  r  r,  r  r   r@   rJ  )r   num_parallel_samplesr  r  rD  r7  r8  r  samplerF   r	  r   rI  r*   r   r  r  r   r  rN   sampless           r,   generatezPatchTSTForPrediction.generate  s    0  ${{?? #1!&	
 ##33@@**7== A L 7<<P6QR|**,RGRkk'q1G00::1=G#g66 Ss   8Cr  r   )rZ   r[   r\   r   r!   rF   ra   r   r`   r   r   rC  rX   rI  r  rb   rc   s   @r,   r  r  K  s    ~ @ 6:04/3,0&*k
\\k
 %U\\2k
  -	k

 'tnk
 $D>k
 d^k
 
u11	2k
` 6:-7\\-7 %U\\2-7 
	-7r-   r  c                   J     e Zd ZdZddef fdZdej                  fdZ xZ	S )PatchTSTRegressionHeadz
    Regression head
    r   c                    t         |           |j                  | _        |j                  | _        |j
                  | _        || _        |j                  |j                  z  }t        j                  d      | _        |j                  dkD  rt        j                  |j                        nt        j                         | _        |&t        j                   ||j"                        | _        y |j'                  |      | _        y r  )r    r!   output_rangey_ranger   r  r  r   rj   r   r  r  r  r   r   r   r%   r  r  r  )r*   r   r  r"   r+   s       r,   r!   zPatchTSTRegressionHead.__init__  s    **#11"//#6 ,,v~~=zzA.:@:M:MPQ:Qrzz&"5"56WYWbWbWd& ii&2D2DEDO1JJ8TDOr-   r  c                 2   | j                   r|dddddddf   }ng| j                  dk(  r|j                  d      }nE| j                  dk(  r|j                  d      j                  }nt        d| j                   d      | j                  | j                  |            }| j                  |      }| j                  du | j                  duz  rEt        j                  |      | j                  d	   | j                  d   z
  z  | j                  d   z   }|S )
aY  
        Parameters:
            embedding (`torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
                    `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True, *required*):
                Embedding from the model
        Returns:
            `torch.Tensor` of shape `(bs, output_dim)`

        Nr   r   r2   r@   r   r  r  r   )r   r  r   r   r  r#   r   r  r  r  r  rF   sigmoidr  s       r,   rX   zPatchTSTRegressionHead.forward!  s    (Aq!4&((~~!~4%'(}}}3::01B1B0CCZ[\\  <<5E(FG !12$$,T1IJ]]6*dll1oQ.OPSWS_S_`aSbbFr-   r   rq   rc   s   @r,   r  r    s&    U~ U" r-   r  z,
    The PatchTST for regression model.
    c                       e Zd Zdef fdZe	 	 	 	 	 ddej                  deej                     deej                     dee	   dee	   dee	   d	e
eef   fd
       Z	 ddej                  deej                     d	efdZ xZS )PatchTSTForRegressionr   c                 J   t         |   |       |j                  rt        j	                  d       d|_        t        |      | _        |j                  dk(  rd | _        n|j                  dk(  rt        |j                        | _        nn|j                  dk(  rt        |j                        | _        nC|j                  dk(  rt        |j                        | _        nt        d|j                         t        || j                        | _        | j!                          y )	Nr  Fr  r  r@   r  r  r  )r    r!   r  r  r  r  r   r=  r  r   r  r   r   r#   r  r  r+  rm   s     r,   r!   zPatchTSTForRegression.__init__H  s      NNHI#(F "6*
;;%'+D$))[8+9f>P>P+Q(++x7+7F<N<N+O(++/BB+AfFXFX+Y( #?@Z@Z?[!\]]*643K3KL	 	r-   r   r  r  r,  r<   r  r=   c           	      X   ||n| j                   j                  }| j                  ||||d      }| j                  |j                        }d}	|| j
                  rp| j
                  j                  |      }
t        |D cg c](  }|j                  d| j                   j                        * c}      }t        |
|      }	t        |	      }	nt        j                  d      }	 |	||      }	|s|f|dd z   }|	|	f|z   }|S |}|S t        |	||j                  |j                   	      S c c}w )
a#  
        past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
            Input sequence to the model
        target_values (`torch.Tensor` of shape `(bs, num_input_channels)`):
            Target values associates with the `past_values`
        past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
            Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
            in `[0, 1]`:

            - 1 for values that are **observed**,
            - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
            Whether or not to return a `ModelOutput` instead of a plain tuple.

        Examples:

        ```python
        >>> from transformers import PatchTSTConfig, PatchTSTForRegression

        >>> # Regression task with 6 input channels and regress 2 targets
        >>> model = PatchTSTForRegression.from_pretrained("namctin/patchtst_etth1_regression")

        >>> # during inference, one only provides past values, the model outputs future values
        >>> past_values = torch.randn(20, 512, 6)
        >>> outputs = model(past_values=past_values)
        >>> regression_outputs = outputs.regression_outputs
        ```NTr  r?   r   r  r   r   )r=  rA  r7   r/  )r   r  r   r  r.  r  r  r  r3   r  rO  rZ  r   r  r@  r7   r/  )r*   r   r  r  r,  r<   r  r  r  r=  r  itemr   s                r,   rX   zPatchTSTForRegression.forwardb  s=   J &1%<k$++B]B]zz#1!5/ " 
 		,889$''#77DDUKRWX$tyyT[[-D-DEXY<7'-zzF3E=1ha!33G+/+;tg'GN BIGN*$&44#..	
 	
 Ys    -D'c                 v   | j                   j                  } | |d|d      }| j                  j                  |j                        }t        |      D cg c]  }|j                          }}t        j                  |d      j                  d|| j                   j                        }t        |      S c c}w )a  
        Generate sequences of sample predictions from a model with a probability distribution head.

        Parameters:
            past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                Past values of the time series that serves as context in order to predict the future.
            past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
                in `[0, 1]`:

                - 1 for values that are **observed**,
                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).

        Return:
            [`SamplePatchTSTOutput`] where the outputs `sequences` tensor will have shape `(batch_size, number of
            samples, num_targets)`.
        NF)r   r  r  r,  r   r@   r?   r  )r   r  r  r  rA  r  r  rF   r	  r3   r  rI  r  s           r,   r  zPatchTSTForRegression.generate  s    .  ${{?? #1!&	
 //<<W=W=WX278L2MNQ<&&(NN++g1-2227KT[[MdMde#g66 Os   B6r  r   )rZ   r[   r\   r   r!   r   rF   ra   r   r`   r   r  r@  rX   rI  r  rb   rc   s   @r,   r  r  B  s    ~ 4  1559/3,0&*G
\\G
  -G
 %U\\2	G

 'tnG
 $D>G
 d^G
 
u11	2G
 G
X 6:'7\\'7 %U\\2'7 
	'7r-   r  )r  r   r  r  r  r  )NFr   r  r4  )Fr]   r  dataclassesr   typingr   r   r   rF   r   activationsr	   modeling_outputsr
   modeling_utilsr   time_series_utilsr   r   r   utilsr   r   r   configuration_patchtstr   
get_loggerrZ   r  Moduler   re   ra   r_   listr`   r^   r   r   r   r   r   r   r  r   r   r6  r<  r@  rC  rF  rI  distributionsDistributionrO  rZ  r\  rl  rz  r~  r  r  r  r  r  r  r  r  r  __all__r   r-   r,   <module>r     s     ! ) )   " / - U U 9 9 2 
		H	%[B		 [B|&		 &2 04',7%LL7%7% 'tn7% !%	7%
 7%z 04	A%LLA%$T3Y/A% 'tnA% 	A%H-ryy -`9"bii 9"xG299 GT 2o 2 2B!		 !H5 5p;x- ;x| 4+ 4 4< :; : :8 :+ : :8 .+ . .D :k : :: 
2; 
2 
2#u""// # #%,, #*5<< *(5<<:P *fkfrfr *2 0		  0H3; 3;n 		  6 RYY  8 m
+ m
 m
`ryy 8 
l
4 l

l
^" "J 
T
 7 T

T
n 
]RYY ]
]@ 
x73 x7
x7v4RYY 4n 
L73 L7
L7^r-   