
    sh>C                        S SK r S SKJrJr  S SKJr  \ R                  " \S5      r\ R                  " \S5      r\" 5       (       Ga  S SK	r	S SK	J
r  \	R                    S1S j5       r\	R                    S1S	 j5       r\	R                      S2S
 j5       r\	R                      S2S j5       r\	R"                  " \	R$                  " SS0SSS9\	R$                  " SS0SSS9\	R$                  " SS0SSS9\	R$                  " SS0SSS9// S9\	R                    S1S j5       5       r\	R"                  " \	R$                  " SS0SSS9// S9\	R                    S1S j5       5       r\	R"                  " \	R$                  " SSS.SSS9\	R$                  " SSS.SSS9\	R$                  " SSS.SSS9\	R$                  " SSS.SSS9// S9\	R                      S3S j5       5       rS r\	R"                  " \	R$                  " SS0SSS9\	R$                  " SS0SSS9// SSS\0S9\	R                    S1S j5       5       r\	R                    S1S j5       r\	R                    S1S  j5       r\	R                      S3S! j5       r\	R                    S1S" j5       r\	R                    S1S# j5       r\	R                  S$ 5       r\	R                      S4S% j5       r\	R                      S5S& j5       r\	R                      S6S' j5       r \	R                      S6S( j5       r!\	R                  S\RD                  4S) j5       r#\	R                  S\RD                  4S* j5       r$S S+K%J&r&J'r'  \	R                    S1S, j5       r(\	R                    S1S- j5       r)\	R                    S1S. j5       r*\	R                    S1S/ j5       r+\	R                    S1S0 j5       r,gg)7    N)HAS_CUDAHAS_GPU)
has_tritonzrequires cudazrequires gpu)language
BLOCK_SIZEc                    [         R                  " SS9nXT-  nU[         R                  " SU5      -   nXs:  n[         R                  " X-   US9n	[         R                  " X-   US9n
X-   n[         R                  " X'-   XS9  g Nr   axismasktl
program_idarangeloadstorein_ptr0in_ptr1out_ptr
n_elementsr   pidblock_startoffsetsr   xyoutputs               x/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/torch/testing/_internal/triton_utils.py
add_kernelr       u     mm#&		!Z 88#GGG%D1GGG%D1
"F6    c                    [         R                  " SS9nXT-  nU[         R                  " SU5      -   nXs:  n[         R                  " X-   US9n	[         R                  " X-   US9n
X-
  n[         R                  " X'-   XS9  g r	   r   r   s               r   
sub_kernelr$   "   r!   r"   c                    [         R                  " SS9nXe-  nU[         R                  " SU5      -   nX:  n	[         R                  " X-   U	S9n
US:X  a  [         R                  " X-   U	S9nX-   nOU
n[         R                  " X(-   XS9  g Nr   r
   r   twor   )r   r   r   r   ARGS_PASSEDr   r   r   r   r   r   r   r   s                r   add_kernel_with_optional_paramr)   3   s     mm#&		!Z 88#GGG%D1%)5AUFF
"F6r"   c                     [         R                  " SS9nXv-  nU[         R                  " SU5      -   n	X:  n
[         R                  " X	U-  -   U
S9nUS:X  a  [         R                  " X-   U
S9nX-   nOUn[         R                  " X)U-  -   XS9  g r&   r   )r   r   r   r   strider(   r   r   r   r   r   r   r   r   s                 r   -add_kernel_with_none_param_and_equal_to_1_argr,   H   s     mm#&		!Z 88#GGG..T:%)5AUFF
V++V?r"            )
num_stages	num_warps   @   )configskeyc                    [         R                  " SS9nXT-  nU[         R                  " SU5      -   nXs:  n[         R                  " X-   US9n	[         R                  " X-   US9n
X-   n[         R                  " X'-   XS9  g r	   r   r   s               r   add_kernel_autotunedr7   ^   su    " mm#&		!Z 88#GGG%D1GGG%D1
"F6r"         c                    [         R                  " SS9nXS-  nU[         R                  " SU5      -   nXr:  n[         R                  " X-   US9n	[         R                  " X-   US9n
X-   n[         R                  " XG-   XS9  g r	   r   )r   r   r   r   r   r   r   r   r   r   r   r   s               r   &add_kernel_autotuned_weird_param_orderr;   x   su      mm#&		!Z 88#GGG%D1GGG%D1
"F6r"   )BLOCK_SIZE_XBLOCK_SIZE_Yc                    [         R                  " S5      U-  nU[         R                  " SU5      S S 2S 4   -   nX:  n	[         R                  " S5      U-  n
U
[         R                  " SU5      S S S 24   -   nX:  nUnUn[         R                  " XX>-  -   -   X-  5      n[         R                  " XXM-  -   -   X-  5      nUU-   n[         R                  " X-X>-  -   -   UX-  5        g )Nr      r   )r   r   r   
x_elements
y_elementsr<   r=   xoffsetxindexxmaskyoffsetyindexymaskx1y0tmp0tmp1tmp2s                     r   add_kernel_2d_autotunedrM      s    6 --"\1299Q5ag>>#--"\1299Q5dAg>>#www
"895=Iwww
"895=Id{
*/23T5=Ir"   c                     U $ )N )r4   ___s      r   _dummy_early_config_prunerR      s    r"   
      early_config_prune)r4   r5   warmuprepprune_configs_byc                    [         R                  " SS9nXT-  nU[         R                  " SU5      -   nXs:  n[         R                  " X-   US9n	[         R                  " X-   US9n
X-   n[         R                  " X'-   XS9  g r	   r   r   s               r   *add_kernel_autotuned_with_unsupported_argsrZ      su    $ mm#&		!Z 88#GGG%D1GGG%D1
"F6r"   c                    [         R                  " SS9nXe-  nU[         R                  " SU5      -   nX:  n	[         R                  " X-   U	S9n
[         R                  " X-   U	S9nX-   U-  n[         R                  " X(-   XS9  g r	   r   )r   r   r   r   scaling_factorr   r   r   r   r   r   r   r   s                r   add_kernel_with_scalingr]      sz     mm#&		!Z 88#GGG%D1GGG%D1%>)
"F6r"   c                    [         R                  " SS9nXC-  n[         R                  " U U/U/[         R                  5      n[         R                  " UU/U/[         R                  5      nXg-   n[         R                  " UUU/5        g )Nr   r
   r   r   _experimental_descriptor_loadfloat32_experimental_descriptor_store)	in_desc_ptr0in_desc_ptr1out_desc_ptrr   r   offsetabr   s	            r   add_kernel_with_tma_1dri      s     mm#!,,HLJJ	
 ,,HLJJ	
 
))H	
r"   c                 B   [         R                  " SS9n[         R                  " SS9nXS-  nXd-  n[         R                  " U Xx/X4/[         R                  5      n	[         R                  " UXx/X4/[         R                  5      n
X-   n[         R                  " UUXx/5        g )Nr   r
   r?   r_   )rc   rd   re   r<   r=   pid_xpid_yoffset_xoffset_yr   r   r   s               r   add_kernel_with_tma_2dro     s     1%1%'',, (JJ	
 ,, (JJ	
 
)) 	
r"   c                     [         R                  " SS9nXC-  nU[         R                  " SU5      -   nXb:  n[         R                  " X-   US9nSU-  n	[         R                  " X-   XS9  g Nr   r
   r   r9   r   )
r   r   r   r   r   r   r   r   r   r   s
             r   mul2_kernelrr   *  sd     mm#&		!Z 88#GGG%D1Q
"F6r"   c                     [         R                  " SS9nX2-  nU[         R                  " SU5      -   nXQ:  n[         R                  " X-   US9nSU-  n[         R                  " X-   XS9  g rq   r   )	ptrr   r   r   r   r   r   r   r   s	            r   mul2_inplace_kernelru   9  sb     mm#&		!Z 88#GGCM-Q
2r"   c                 8    [         R                  " U S:  U S5      $ )Nr   )r   where)r   s    r   	zero_negsrx   G  s    xxQ1%%r"   c                    [         R                  " SS9nXS-  nU[         R                  " SU5      -   nXr:  nUS:X  a  [        XUS9  OUS:X  a  [	        X XUS9  [         R
                  " X-   US9n	[         R                  " X-   XS9  g )Nr   r
   ru   )r   r    r   )r   r   r   ru   r    r   r   )
r   r   r   r   
ACTIVATIONr   r   r   r   r   s
             r   indirection_kernelr{   K  s     mm#&		!Z 88#..
K<'wTGGG%D1
"A1r"   c                    [         R                  " SS9n[         R                  " SS9nXd-  nXu-  n	U[         R                  " SU5      -   n
U	[         R                  " SU5      -   nUS S 2S 4   U-  U
S S S 24   -   nUS S 2S 4   U-  U
S S S 24   -   n[         R                  " X-   5      n[         R                  " X-   US-  5        g )Nr   r
   r?   g       @r   )in_ptrr   in_y_strideout_y_strideX_BLOCK_SIZEY_BLOCK_SIZExidyidx_starty_start	x_offsets	y_offsetssrc_offsetsdst_offsetssrcs                  r   double_strided_kernelr   ^  s     mm#mm#$$bii<88	bii<88	4(;6479KK4(<7)D!G:LLggf*+
&c	2r"   c           	         [         R                  " U [         R                  " SU5      -   5      n[         R                  " U[         R                  " SU5      -   5      n[         R                  " U/U[         R                  5      n[         R
                  " SSXVU/[         R                  SSS9n[         R                  " U[         R                  " SU5      -   U5        g )Nr   shf.l.wrap.b32 $0, $1, $2, $3;
=r,r, r, rTr?   dtypeis_purepackr   r   r   fullint32inline_asm_elementwiser   	XYZnBLOCKr   r   szs	            r   inline_asm_kernel_is_pure_truer   r  s     GGA		!U++,GGA		!U++,GGUGQ)%%,1I((
 	RYYq%((!,r"   c           	         [         R                  " U [         R                  " SU5      -   5      n[         R                  " U[         R                  " SU5      -   5      n[         R                  " U/U[         R                  5      n[         R
                  " SSXVU/[         R                  SSS9n[         R                  " U[         R                  " SU5      -   U5        g )Nr   r   r   Fr?   r   r   r   s	            r   inline_asm_kernel_is_pure_falser     s     GGA		!U++,GGA		!U++,GGUGQ)%%,1I((
 	RYYq%((!,r"   c                 j   [         R                  " SS9nXT-  n[         R                  " [         R                  " U U/S/U/U/S/S9S/S9n[         R                  " [         R                  " UU/S/U/U/S/S9S/S9nXx-   n	[         R                  " [         R                  " UU/S/U/U/S/S9U	S/S9  g Nr   r
   r?   )baseshapestridesr   block_shapeorder)boundary_checkr   r   r   make_block_ptrr   )
x_ptry_ptr
output_ptrr   r   r   r   r   r   r   s
             r   add_kernel_with_block_ptrr     s     mm#&GG!l$'Lc 3

 GG!l$'Lc 3

 
!l$'Lc 3	
r"   c                    [         R                  " SS9nXC-  n[         R                  " [         R                  " U US/SS/US/US/SS/S9S/S9nUn[         R                  " [         R                  " UUS/SS/US/US/SS/S9US/S9  g r   r   )r   r   r   r   r   r   r   r   s           r   kernel_with_block_ptr_2dr     s     mm#&GG!1oA$a('O!f 3

 
!1oA$a('O!f 3	
r"   )r   r   c                     [         R                  " SS9nXT-  nU[         R                  " SU5      -   nXs:  n[        X-   US9n	[        X-   US9n
X-   n[	        X'-   XS9  g r	   r   r   s               r   add_kernel_with_importr     si     mm#&		!Z 88#".".g3r"   c                 @   [         R                  " SS9nXT-  nU[         R                  " SU5      -   nXs:  n[         R                  " X-   US9n	[         R                  " X-   US9n
[         R                  " S5      S:X  a  X-   nOX-  n[         R                  " X'-   XS9  g r	   r   r   s               r   cond_op_kernelr     s     mm#&		!Z 88#GGG%D1GGG%D1==q UFUF
"F6r"   c                    [         R                  " SS9nXT-  nU[         R                  " SU5      -   nXs:  n[         R                  " X-   US9n	[         R                  " X-   US9n
X-   n[         R                  " X'-   XS9  g r	   )r   r   r   r   
atomic_addr   s               r   atomic_add_kernelr     su     mm#&		!Z 88#GGG%D1GGG%D1
g';r"   c                    [         R                  " SS9nXT-  nU[         R                  " SU5      -   nXs:  n[         R                  " X-   US9n	[         R                  " X-   US9n
[	        S5       H  nX-   n[         R
                  " X'-   XS9  M      SnUS:  a)  US-  nX-   n[         R
                  " X'-   XS9  US:  a  M(  g g )Nr   r
   r   r9   r?   )r   r   r   r   ranger   )r   r   r   r   r   r   r   r   r   r   r   ir   s                r   add_4_times_kernelr     s     mm#&		!Z 88#GGG%D1GGG%D1qAUFHHW&:  !eFAUFHHW&: !er"   c                    [         R                  " SS9nXT-  nU[         R                  " SU5      -   nXr:  n[         R                  " X-   US9n	[         R                  " X-   US9n
X-   n[         R                  " X7-   XS9  g r	   r   )r   r   r   r   r   r   r   r   r   r   r   r   s               r   add_kernel_out_of_order_fn2r   3  r!   r"   )r   tl.constexpr)r(   r   r   r   )r<   r   r=   r   )r   r   rz   r   )r   r   r   r   )r   r   r   r   )-unittest&torch.testing._internal.inductor_utilsr   r   torch.utils._tritonr   
skipUnlessrequires_cudarequires_gputritonr   r   jitr    r$   r)   r,   autotuneConfigr7   r;   rM   rR   rZ   r]   ri   ro   rr   ru   rx   r{   r   r   r   	constexprr   r   triton.languager   r   r   r   r   r   r   rO   r"   r   <module>r      st    D * ##Ho>""7N;<<% ZZ7
 #7 7  ZZ7
 #7 7  ZZ7
 $7 #7 7( ZZ@ $@ #@ @* __MM<-!qIMM<-!qIMM<,aHMM<,aH	
  ZZ7
 #7 7  __MM<,aH
 	 ZZ7 #	7 7$ __MM!$c:qTU MM!$c:qTU MM!#R8QRS MM!#R8QRS
 " ZZJ %J %J #$J, __MM<-!qIMM<,aH
 .0IJ	 ZZ7
 #7 	7  ZZ7 #7 7" ZZ
 #	
 
< ZZ
 %	

 %
 
B ZZ7 #	7 7 ZZ3 #3 3 ZZ& & ZZ2 #	2
 #2 2$ ZZ3
 %3 %3 3& ZZ-"-+9- -  ZZ-"-+9- -  ZZ+

 LL+
 +
Z ZZ
 LL	
 
B ,ZZ4
 #4 4  ZZ7
 #7 7& ZZ<
 #< <  ZZ;
 #; ;, ZZ7
 #7 7Q r"   