
    shh                      % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKrS SKJrJr  S SKJr  S SKJrJrJrJrJrJrJrJrJr  S SKJr  S SKrS SKrS SKrS SKJr  S SKJ r!  S S	K"J#r#  S S
K$J%r%  S SK&J'r(  S SK)J*r*J+r+J,r,  S SK-J.r.J/r/  SSK0J1r1J2r2  SSK3J4r4  SSK5J6r6J7r7  SSK8J9r9J:r:J;r;J<r<J=r=J>r>J?r?J@r@JArAJBrBJCrC  SSKDJErEJFrFJGrGJHrHJIrIJJrJ  \(       aW  S SKKJLrLJMrMJNrN  SSKOJPrPJQrQJRrRJSrS  SSKTJUrU  SSKVJWrWJXrXJYrY  SSKZJ[r[  \" S5      r\\\\X   /\W4   r]\^\[   r_\\`\R                  4   rb\`rc\R                  R                  \fS5      rg\R                  " \f5      riSS jrj " S S\R                  5      rk\=" SS 9 " S! S"5      5       rl\R                   " S# S$5      5       rn\R                   " S% S&5      5       ro\R                   " S' S(5      5       rp\R                   " S) S*5      5       rq\R                   " S+ S,5      5       rr\\l\n\o\q\p4   rs0 rtS-\uS.'    " S/ S05      rv0 rwS1\uS2'    S         SS3 jjrx " S4 S5\5      ry    SS6 jrz      SS7 jr{SS8 jr| S     SS9 jjr}\R                  " S5      SS: j5       r        SS; jr      GS S< jrGSS= jr\GR                  \GR                  \GR
                  \GR                  0\GR                  \GR                  \GR                  \GR                  \GR                  \GR                  \GR                  \GR                  \GR                  \GR                  \GR                   4 V s0 s H  n X _M     sn ErS>\uS?'           GSS@ jr " SA SB5      r " SC SD\(5      r' " SE SF5      r\
GR*                  " SG\
GR,                  SH9rGSSI jr " SJ SK\6\\F\   5      r\R                   " SL SM5      5       r\" GS0 SN\" \GR8                  SO SPSQ9_SR\" \GR8                  SS ST SUSV9_SW\" \GR8                  SX SY SZSV9_S[\" \GR8                  S\ S] S^SV9_S_\" \GR8                  S` Sa SbSV9_Sc\" \GR8                  Sd Se ScSf9_Sg\" \GR8                  Sh Si SjSV9_Sk\" \GR8                  Sl Sm Sn SkSo9_Sp\" \GR8                  Sq SpSQ9_Sr\" \GR8                  Ss SrSQ9_St\" \GR8                  Su SvSQ9_Sw\" \GR8                  Sx SySQ9_Sz\" \GR8                  S{ S| S} SzS~9_S\" \GR8                  S S SSf9_S\" \GR8                  S S SSV9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S S SSV9_S\" \GR8                  S S SSV9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_S\" \GR8                  S SSQ9_6rS\uS'   GSS jr " S S\:5      r " S S\<5      r " S S\5      r\R                   " S S5      5       r " S S5      r\" 5       r " S S5      r " S S5      r\" S\`S9r\" S\\S9r\(       a  \\GRT                  \H\\\\S4   4   4   r " S S\\\4   5      r " S S5      r " S S\\\   5      r\R                   " S S5      5       r\R                  " S5      GSS j5       r " S S5      r " S S\75      rgs  sn f (      )annotationsN)autoEnum)chain)	AnyCallablecastClassVarGeneric
NamedTupleOptionalTYPE_CHECKINGUnion)TypeVar)ELEMENTWISE_TYPE_PROMOTION_KIND)_pytree)
OrderedSet)int_oo)PythonPrinter)free_symbol_is_typesymbol_is_typeSymT)bound_sympyValueRanges   )configmetrics)DtypePropagationOpsHandler)BasicMathOpsMixinDefaultHandler)boolean_opsDeferredLineBasegenerate_assertIndentedBufferir_dataclass
ScopedDict	sympy_dotsympy_index_symbol
sympy_substriton_typeunique)ops
OpsHandlerOpsValueReductionType	StoreModeV)IteratorMutableMappingSequence)BufferChoiceCallerFixedLayoutIRNodeLoopBody)BaseScheduling	SchedulerSchedulerNode   PythonWrapperCodegen_Tschedulec                    [         R                  [        R                  5      (       a  [         R	                  SU 5        g g )NzData type propagation: %s)schedule_logisEnabledForloggingDEBUGdebug)msgs    r/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/torch/_inductor/codegen/common.pydata_type_loggerrK   P   s-      //6< 0    c                  H    \ rS rSrSrSrSr\S	S j5       r\S
S j5       r	Sr
g)WorkspaceZeroModeU   r   r>   r   c                    X:X  d  U[         R                  :X  a  U $ U [         R                  :X  a  U$ [        SU < SU< S35      e)NzWorkspaceZeroMode.combine(, ))rN   UNINITIALIZEDNotImplementedErrorabs     rJ   combineWorkspaceZeroMode.combineZ   sI    6Q+999H!///H!$>qe2aU!"LMMrL   c                P    U (       a  [         R                  $ [         R                  $ N)rN   ZERO_ON_CALLrS   )	zero_fills    rJ   	from_boolWorkspaceZeroMode.from_boolb   s    $111 ...rL    N)rV   rN   rW   rN   returnrN   )r]   boolra   rN   )__name__
__module____qualname____firstlineno__rS   r\   ZERO_PER_GRAPHstaticmethodrX   r^   __static_attributes__r`   rL   rJ   rN   rN   U   s9    MLNN N / /rL   rN   T)frozenc                  0   \ rS rSr% SrS\S'   S\S'   S\S'   S	\S
'   SrS	\S'   \R                  r	S\S'   \
SSS jj5       r\
SS j5       r\
S S j5       r\
S S j5       rS!S jr\rS"S jrS#S jr\S#S j5       r\r\r\rS$S jrS$S jrS%S jrS&S jrSrg)'WorkspaceArgi   a  A temporary buffer used for a single kernel, then discarded.

Not registered as a traditional buffer since there are no users,
so it would be dead code eliminated.

Args:
    nbytes: The size of the buffer in bytes.
    zero_fill: Whether the buffer should be initialized to zero.


sympy.ExprcountrN   	zero_modetorch.devicedevicestr
outer_namews_ptr
inner_nametorch.dtypedtypec                P    U  [        [        R                  R                  5       3$ r[   )nextr1   graphworkspace_id)prefixs    rJ   unique_nameWorkspaceArg.unique_name}   s!    $qww334566rL   c                    U R                   UR                   :H  =(       a9    U R                  UR                  :H  =(       a    U R                  UR                  :H  $ r[   )rv   rx   rr   rU   s     rJ   can_joinWorkspaceArg.can_join   s@     LLALL(XQWW-?XAHHPQPXPXDX	
rL   c           	         [        U R                  UR                  -   [        R                  U R                  UR                  5      U R
                  U R                  U R                  U R                  S9$ N)ro   rp   rx   rr   rv   rt   )	rl   ro   rN   rX   rp   rx   rr   rv   rt   rU   s     rJ   joinWorkspaceArg.join   sS    ''AGG#'//Q[[I''88||||
 	
rL   c           	        U R                   UR                   :X  a4  U R                  UR                  :X  a  U R                  UR                  :X  d   e[        [        R
                  " U R                  UR                  5      [        R                  U R                  UR                  5      U R                   U R                  U R                  U R                  S9$ r   )rx   rr   rv   rl   sympyMaxro   rN   rX   rp   rt   rU   s     rJ   maximumWorkspaceArg.maximum   s     GGqww188qxx#7ALLALL<X	
X))AGGQWW-'//Q[[I''88||||
 	
rL   c                    U R                   $ r[   rr   selfs    rJ   
get_deviceWorkspaceArg.get_device   s    {{rL   c                    U R                   $ r[   rx   r   s    rJ   	get_dtypeWorkspaceArg.get_dtype   s    zzrL   c                `    SSK Jn  U" U R                  U R                  U R                  /S/S9$ )Nr   )r7   r>   )rr   rx   sizestride)irr7   rr   rx   ro   )r   r7   s     rJ   
get_layoutWorkspaceArg.get_layout   s.    $;;****3	
 	
rL   c                "    U R                  5       $ r[   )r   r   s    rJ   layoutWorkspaceArg.layout   s      rL   c                    U R                   /$ r[   )ro   r   s    rJ   get_sizeWorkspaceArg.get_size   s    

|rL   c                8    [         R                  R                  /$ r[   )r   SOner   s    rJ   
get_strideWorkspaceArg.get_stride   s    }rL   c                    U R                   $ r[   )rt   r   s    rJ   get_nameWorkspaceArg.get_name   s    rL   c                    / $ r[   r`   r   s    rJ   get_inputs_that_alias_output)WorkspaceArg.get_inputs_that_alias_output   s    	rL   r`   N)
workspace_)r}   rs   ra   rs   )rV   rl   rW   rl   ra   rb   )rV   rl   rW   rl   ra   rl   )ra   rq   )ra   rw   )ra   r7   )ra   list[sympy.Expr]ra   rs   )ra   	list[str])rc   rd   re   rf   __doc____annotations__rv   torchuint8rx   rh   r~   r   r   r   r   get_device_or_errorr   r   propertyr   get_output_specmaybe_get_output_specmaybe_get_layoutr   r   r   r   ri   r`   rL   rJ   rl   rl   i   s    	   OJE;$7 7 
 

 
 
 
 
 %
 ! ! !O&!rL   rl   c                  x    \ rS rSr% S\S'   S\S'   S\S'   \R                  R                  rS\S'   S	r	S
\S'   Sr
g	)	TensorArg   rs   namebufferrw   rx   rn   offsetNOptional[str]alias_ofr`   )rc   rd   re   rf   r   r   r   Zeror   r   ri   r`   rL   rJ   r   r      s.    
IKFJ%"Hm"rL   r   c                  >    \ rS rSr% S\S'   S\S'   \S	S j5       rSrg)
SizeArg   rs   r   rn   exprc                    g r[   r`   r   s    rJ   r   SizeArg.alias_of   s    rL   r`   Nra   r   )rc   rd   re   rf   r   r   r   ri   r`   rL   rJ   r   r      s    
I
 rL   r   c                       \ rS rSr% S\S'   Srg)ConstexprArg   rs   r   r`   Nrc   rd   re   rf   r   ri   r`   rL   rJ   r   r          
IrL   r   c                       \ rS rSr% S\S'   Srg)TMADescriptorArg   rs   r   r`   Nr   r`   rL   rJ   r   r      r   rL   r   c                  8    \ rS rSr% S\S'   S\S'   SrS\S'   S	rg)
DeviceCodegen   SchedulingConstructor
schedulingWrapperConstructorwrapper_codegenNOptional[WrapperConstructor]cpp_wrapper_codegenr`   )rc   rd   re   rf   r   r   ri   r`   rL   rJ   r   r      s    %%''8<5<rL   r   zdict[str, DeviceCodegen]device_codegensc                      \ rS rSrSS jrSS jrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSS
 jrSS jrSS jrSS jrSS jrSS jrSS jrSS jrSS jrSrg)DeviceOpOverrides   c                    [         er[   rT   r   r   s     rJ   import_get_raw_stream_as*DeviceOpOverrides.import_get_raw_stream_as       !!rL   c                    [         er[   r   r   
device_idxs     rJ   
set_deviceDeviceOpOverrides.set_device   r   rL   c                    [         er[   r   r   s    rJ   synchronizeDeviceOpOverrides.synchronize   r   rL   c                    [         er[   r   r   s     rJ   device_guardDeviceOpOverrides.device_guard   r   rL   c                    [         er[   r   r   s    rJ   cpp_device_guard"DeviceOpOverrides.cpp_device_guard   r   rL   c                    [         er[   r   r   s    rJ   cpp_aoti_device_guard'DeviceOpOverrides.cpp_aoti_device_guard  r   rL   c                    [         er[   r   r   s    rJ   cpp_stream_guard"DeviceOpOverrides.cpp_stream_guard  r   rL   c                    [         er[   r   r   s    rJ   cpp_aoti_stream_guard'DeviceOpOverrides.cpp_aoti_stream_guard  r   rL   c                    [         er[   r   r   s    rJ   cpp_getStreamFromExternal+DeviceOpOverrides.cpp_getStreamFromExternal
  r   rL   c                    [         er[   r   r   s    rJ   kernel_headerDeviceOpOverrides.kernel_header  r   rL   c                    [         er[   r   r   s    rJ   kernel_driverDeviceOpOverrides.kernel_driver  r   rL   c                    [         er[   r   r   s    rJ   cpp_stream_type!DeviceOpOverrides.cpp_stream_type  r   rL   c                    [         er[   r   r   s    rJ   aoti_get_stream!DeviceOpOverrides.aoti_get_stream  r   rL   c                    [         er[   r   r   s    rJ   cpp_kernel_type!DeviceOpOverrides.cpp_kernel_type  r   rL   c                    [         er[   r   r   s    rJ   cpp_device_ptr DeviceOpOverrides.cpp_device_ptr  r   rL   c                    [         er[   r   r   s    rJ   tma_descriptor_helpers(DeviceOpOverrides.tma_descriptor_helpers  r   rL   c                    [         er[   r   )r   idxs     rJ   cpp_global_scratch$DeviceOpOverrides.cpp_global_scratch"      !!rL   r`   Nr   rs   ra   rs   )r   intra   rs   r   )r  r  ra   zOptional[tuple[str, str]])rc   rd   re   rf   r   r   r   r   r   r   r   r   r   r   r  r  r	  r  r  r  r  ri   r`   rL   rJ   r   r      sW    """""""""""""""""rL   r   zdict[str, DeviceOpOverrides]device_op_overrides_dictc                *    [        XU5      [        U '   g r[   )r   r   )rr   device_schedulingdevice_wrapper_codegendevice_cpp_wrapper_codegens       rJ   register_backend_for_devicer   ?  s     ,3MOFrL   c                      \ rS rSr\" 5       r\" 5       r\" 5       r\" 5       r\" 5       r	\" 5       r
\" 5       r\" 5       r\" 5       r\" 5       rSrg)BackendFeatureiJ  r`   N)rc   rd   re   rf   r   FOREACH	BUCKETIZEINPLACE_BUFFERSMASKED_SCATTER_WITH_INDEXSCANSORTTUPLE_REDUCTIONPREFER_STORE_LOOP_ORDERTRITON_TEMPLATESREDUCE_TO_SINGLE_ELEMENTri   r`   rL   rJ   r"  r"  J  sL    fGIfO $6D6DfO"fv#vrL   r"  c                @   U c
  [        5       $ [        5         [        U [        R                  5      (       a  U R
                  nO/[        U [        5      (       d   eU n[        R                  " U5      n [        U5      nU(       d   eU" S 5      nUR                  U 5      $ r[   )	r   init_backend_registration
isinstancer   rr   typers   get_scheduling_for_deviceget_backend_features)rr   device_typescheduling_ctorr   s       rJ   r2  r2  W  s     ~|&%,,''kk&#&&&&k*/<O? &J**622rL   c                L    [        U[        5      (       d   eU[        U 5      ;   $ )zSee also V.graph.has_feature)r/  r"  r2  )rr   features     rJ   has_backend_featurer7  i  s(     g~....*6222rL   c                @    U [         ;   a  [         U    R                  $ S $ r[   )r   r   r   s    rJ   r1  r1  q  s     17?1J?6"--TPTTrL   c                h    U [         ;   a(  [         U    nU(       a  UR                  $ UR                  $ g r[   )r   r   r   )rr   cpp_wrapperwrapper_codegen_objs      rJ   get_wrapper_codegen_for_devicer<  u  sA      -<V-D   33	
 %44	

 rL   c                   ^^ SSK Jn   SSKJn  SSKJn  SSKJn  SSKJ	n  SSK
Jn  SSKJn  SS	KJn  SS
KJn  [%        S5      c9  U UUS.m['        SU4S jU[(        R*                  R,                  (       a  UOU5        [%        S5      c  UUS.m['        SU4S jUU5        [%        S5      c  ['        SUUU5        [%        S5      c  ['        SUUU5        [.        R0                  R3                  5       n	U	S:w  aS  [%        U	5      cF  SSKJn
   U
" S5      nU
" S5      nU
" S5      nU(       a  U(       a  U(       a  ['        U	UUU5        g g g g g g ! [8         a     g f = f)Nr>   )CppScheduling)CppWrapperCpu)CppWrapperCpuArrayRef)CppWrapperGpu)CUDACombinedScheduling)HalideScheduling)MetalScheduling)TritonSchedulingr?   cpu)cpphalidetritonc                6   > T[         R                     " U 5      $ r[   )r   cpu_backend)r   cpu_backendss    rJ   <lambda>+init_backend_registration.<locals>.<lambda>  s    |F,>,>?
KrL   cuda)rI  rH  c                6   > T[         R                     " U 5      $ r[   )r   cuda_backend)r   cuda_backendss    rJ   rM  rN    s    }V-@-@A*MrL   xpumpsprivateuseoner   )_get_custom_mod_func
Schedulingr@   CppWrapperCodegen)rG  r>  cpp_wrapper_cpur?  cpp_wrapper_cpu_array_refr@  cpp_wrapper_gpurA  cuda_combined_schedulingrB  rH  rC  rT  rD  rI  rE  wrapperr@   r1  r   r   aot_inductorallow_stack_allocationr   _C_get_privateuse1_backend_name torch.utils.backend_registrationrV  RuntimeError)r>  r?  r@  rA  rB  rC  rD  rE  r@   private_backendrV  r  r   r   rL  rR  s                 @@rJ   r.  r.    sm   ".@.@($(- '/ &&

 	$K ""99 "	
 !(0 -&
 	$M 		
 !'/# 		
 !'/# 		
 hh<<>O?*%o6>I	 4\ B23IJO"67J"K _9L+#%#'	 :M_  ? 	+   		s   ;E 
EEc                J    SSK Jn  / U Q[        XR                  U5      5      P$ )Nr   )FlexibleLayout)r   rf  r'   contiguous_strides)index
index_varssizesrf  s       rJ   index_prevent_reorderingrk    s*    
 $ UUTIj*K*KE*RSTTrL   c                    U[         U '   g r[   )r  )rr   device_op_overridess     rJ   register_device_op_overridesrn    s     (;V$rL   c                    [        U [        5      (       d   e[        (       d  SSKJnJn  SSKJn  SSKJn  [        U    $ )Nr>   )cpu_device_op_overridesmps_device_op_overrides)rm  )	r/  rs   r   rp  rq  rO  rm  rS  )rr   rp  rq  rm  xpu_op_overridess        rJ   get_device_op_overridesrt    s/    fc""""##F-@#F++rL   zdict[torch.dtype, torch.dtype]DTYPE_TO_COMPUTATION_DTYPEc                   U [        5       ;   a  [        R                  $ U S;   a  SU;   a  US   $ US   $ U S;   a  [        R                  $ U S;   a  [        R                  $ U S:X  a  SU;   a  US   $ US   $ U S:X  a  SU;   a  US   $ US   $ U S	;   a$  US   n[
        R                  R                  U5      $ U S
:X  a  SU;   a  US   $ US   $ g)zC
Given op name and a list of input dtypes, deduce the output dtype
)to_dtype
index_exprrx   )randrandn)	get_index	randint64	load_seed	reductionr>   constant)loadstorestore_reductionto_dtype_bitcastN)r!   r   rb   floatint64r1   r{   r   )op_nameargskwargsbuf_names       rJ   deduce_output_dtype_by_namer    s
    +-zz	  
 #*V"3vgAbA	  
 {{	  

 {{	K	")V"3vg@a@	J	")V"3vgAbA	  

 7ww  **	&	&")V"3vgAbArL   c                  x    \ rS rSrSS jrSS jrSS jrSS jrSS jrSS jr	\
SS j5       r\
SS	 j5       rS
rg)DataTypePropagationi,  c                    Xl         SUR                  R                  0U l        UR                  R                  5        H  u  p#UR                  U R                  U'   M      g Nroot)body
root_blockr{   graphs	subblocksitems)r   r  kvs       rJ   __init__DataTypePropagation.__init__-  sL    	DOO))B
 NN((*DAWWDKKN +rL   c                   UR                   nU Vs/ s HB  n[        U[        R                  R                  5      (       d  M.  UR
                  S:w  d  M@  UPMD     nn[        U5      S:X  a  g [        S U 5       5      nU(       d  g [        R                  " [        R                  U Vs/ s H)  o3R                  [        R                     R                  PM+     sn5      $ s  snf s  snf )Nplaceholderr   c              3     #    U  HR  n[         R                  UR                  ;   =(       a)    UR                  [         R                     R                  S Lv   MT     g 7fr[   )OptimizationContextkeymetarx   ).0ns     rJ   	<genexpr>BDataTypePropagation.deduce_node_dtype_by_inputs.<locals>.<genexpr>=  sR      )
 !  ##qvv- B*../55TAB s   AA)all_input_nodesr/  r   fxNodeoplenall	functoolsreducepromote_typesr  r  r  rx   )r   nodeinputsr  input_nodesall_input_nodes_propagateds         rJ   deduce_node_dtype_by_inputs/DataTypePropagation.deduce_node_dtype_by_inputs5  s    %%
!Auxx}}!=A!$$-BWAv 	 
 {q %( )
 !)
 &
"
 *<GHKqVV'++,22KH
 	

  Is   -C C C )0C%
c                l    U R                   UR                     nU R                  U5      nU(       d   eU$ r[   )r  targetpropagate_graph)r   r  	sub_graphrx   s       rJ   deduce_node_dtype_by_subgraph1DataTypePropagation.deduce_node_dtype_by_subgraphJ  s0    KK,	$$Y/urL   c                   UR                   S:X  a  g UR                  S:X  a  [        UR                  5      S:w  a  g UR                  [        R
                  :X  a  U R                  UR                  S   5      $ [        UR                  [        5      (       d   eUR                  R                  S5      (       a  U R                  U5      $ [        UR                  /UR                  Q70 UR                  D6=n b  U$ U R                  U5      $ )Nr  outputr>   r   masked_subblock)r  r  r  r  operatorgetitemdeduce_node_dtyper/  rs   
startswithr  r  r  r  )r   r  output_dtypes      rJ   r  %DataTypePropagation.deduce_node_dtypeP  s    77m#;;("s499~':;;(***))$))A,77$++s++++;;!!"34455d;; 8 ++ L
   //55rL   c                   UR                   (       d   eS nUR                    H  n[        R                  UR                  ;   a  UR                  [        R                     nO
[        5       nU R	                  U5      Ul        XCR                  [        R                  '   UR                  S:X  d  M  UR
                  nM     U$ )Nr  )nodesr  r  r  r  rx   r  )r   r{   graph_dtyper  opt_ctxs        rJ   r  #DataTypePropagation.propagate_graphk  s    {{{-1 KKD"&&$))3))$7$;$;<-/ 2248GM18II)--.{{h&%mm   rL   c                >    U R                  U R                  S   5      $ r  )r  r  r   s    rJ   	propagateDataTypePropagation.propagate}  s    ##DKK$788rL   c                .    U " U5      R                  5       $ r[   )r  )clsr  s     rJ   propagate_loopbody&DataTypePropagation.propagate_loopbody  s    4y""$$rL   c                    SSK Jn  SSKJn  [	        X5      (       d   e[	        UR
                  U5      (       d   e[        R                  UR
                  5      $ )Nr   r9   )r=   )	loop_bodyr:   	schedulerr=   r/  _bodyr  r  )r  r  r:   r=   s       rJ   propagate_scheduler_node,DataTypePropagation.propagate_scheduler_node  sE    (-$....$**h////"55djjAArL   )r  r  N)r  r:   ra   None)r  torch.fx.Nodera   Optional[torch.dtype])r  r  ra   rw   )r{   ztorch.fx.Graphra   r  )ra   r  )r  r:   ra   r  )r  r=   ra   r  )rc   rd   re   rf   r  r  r  r  r  r  classmethodr  r  ri   r`   rL   rJ   r  r  ,  sJ    %
*66$9 % % B BrL   r  c                  D   ^  \ rS rSrSSS.       SU 4S jjjrSrU =r$ )r   i  T)simplifypc                  > U(       ag  [        U[        R                  5      (       aH  [        [        R
                  S5      (       a)  [        R
                  R                  R                  U5      n[        TU ]%  U5      $ )Nsizevars)
r/  r   Exprhasattrr1   r{   r  r  superdoprint)r   r   r  r  	__class__s       rJ   r  PythonPrinter.doprint  sQ     
444*9U9U77##,,T2Dwt$$rL   r`   )r   rn   r  rb   r  rb   ra   rs   )rc   rd   re   rf   r  ri   __classcell__r  s   @rJ   r   r     s2    48D%%-1%=A%	% %rL   r   c                     \ rS rSrSr\SS j5       r\SS j5       r\SS j5       r\SS j5       r	\SS j5       r
\SS j5       r\SS	 j5       r\SS
 j5       r\SS j5       r\SS j5       r\SS j5       r\SS j5       r\SS j5       r\SS j5       r\SS j5       r\SS j5       r\SS j5       r\SS j5       rSrg)OpDecompositionsi  z
Decomposes inductor ops
c                    U $ r[   r`   )values    rJ   identityOpDecompositions.identity  s	     rL   c                v    [         R                  " [         R                  " S[        R                  5      U 5      $ Nr>   )r,   truedivr  r   int32xs    rJ   
reciprocalOpDecompositions.reciprocal  s"    {{3<<5;;7;;rL   c                .    [         R                  " X 5      $ r[   )r,   mulr  s    rJ   squareOpDecompositions.square  s    wwq}rL   c                    [         R                  " [         R                  " S[        R                  5      [         R
                  " U 5      5      $ r  )r,   subr  r   float32erfr  s    rJ   erfcOpDecompositions.erfc  s*    wws||Au}}5swwqzBBrL   c                    [         R                  " [         R                  " [         R                  " U 5      5      [         R                  " U 5      5      $ r[   )r,   r  expr  r  r  s    rJ   erfcxOpDecompositions.erfcx  s,    wwswwszz!}-sxx{;;rL   c                    [         R                  " [         R                  " U 5      [         R                  " S[        R
                  5      5      $ r  )r,   r  r  r  r   r  r  s    rJ   expm1OpDecompositions.expm1  s*    wwswwqz3<<5==#ABBrL   c           	         [         R                  " [         R                  " U 5      [         R                  " S[        R                  " S5      -  [
        R                  5      5      $ )Nr>   
   r,   r  logr  mathr   r  r  s    rJ   log10OpDecompositions.log10  s7    wwswwqz3<<DHHRL0@%--#PQQrL   c           	         [         R                  " [         R                  " U 5      [         R                  " S[        R                  " S5      -  [
        R                  5      5      $ )Nr>   r   r  r  s    rJ   log2OpDecompositions.log2  s6    wwswwqz3<<DHHQK#OPPrL   c           
         [         R                  " [         R                  " U [         R                  " [        R
                  " S5      [        R                  5      5      5      $ )Nr   )r,   r  r  r  r	  r  r   r  r  s    rJ   exp2OpDecompositions.exp2  s3    wwswwq#,,txx{EMM"JKLLrL   c           	         [         R                  " [         R                  " U [         R                  " S[        R
                  5      5      5      $ r  )r,   r  addr  r   r  r  s    rJ   log1pOpDecompositions.log1p  s+    wwswwq#,,q%++">?@@rL   c                    [         R                  " S[        R                  5      n[         R                  " U[         R
                  " U[         R                  " [         R                  " U 5      5      5      5      $ r  )r,   r  r   r  r  r  r  negr  ones     rJ   sigmoidOpDecompositions.sigmoid  sC    ll1ekk*{{3SWWSWWQZ-@ ABBrL   c                v    [         R                  " U [         R                  " S[        R                  5      5      $ Nr   )r,   r   r  r   r  r  s    rJ   reluOpDecompositions.relu  s"    {{1cll1ekk:;;rL   c                X    [         R                  " [         R                  " X5      U5      $ r[   )r,   r  r  r  yzs      rJ   fmaOpDecompositions.fma  s     wwswwq}a((rL   c                X    [         R                  " [         R                  " U 5      U5      $ r[   )r,   rw  floorrV   rx   s     rJ   floor_to_intOpDecompositions.floor_to_int      ||CIIaL%00rL   c                X    [         R                  " [         R                  " U 5      U5      $ r[   )r,   rw  ceilr(  s     rJ   ceil_to_intOpDecompositions.ceil_to_int  s    ||CHHQK//rL   c                X    [         R                  " [         R                  " U 5      U5      $ r[   )r,   rw  truncr(  s     rJ   trunc_to_intOpDecompositions.trunc_to_int  r+  rL   c           	        [         R                  " X5      n[         R                  " [         R                  " U[         R                  " S[
        R                  5      5      [         R                  " [         R                  " U5      [         R                  " U5      5      5      n[         R                  " U[         R                  " X!5      U5      $ r  )
r,   modand_ner  r   r  signbitwherer  )rV   rW   rconds       rJ   	remainderOpDecompositions.remainder  su    GGAMxxFF1cll1ekk23FF3;;q>3;;q>2
 yyswwq}a00rL   c                X    [         R                  " [         R                  " U 5      U5      $ r[   )r,   rw  roundr(  s     rJ   round_to_intOpDecompositions.round_to_int  r+  rL   r`   N)r  OpVarTra   rB  r  rB  ra   rB  )r  rB  r"  rB  r#  rB  ra   rB  )rV   rB  rx   rw   ra   rB  rV   rB  rW   rB  ra   rB  )rc   rd   re   rf   r   rh   r  r  r  r  r   r  r
  r  r  r  r  r  r$  r)  r.  r2  r<  r@  ri   r`   rL   rJ   r  r    s}      < <   C C < < C C R R Q Q M M A A C C < < ) ) 1 1 0 0 1 1 1 1 1 1rL   r  z[a-z0-9_.]+|\([^)]*\)|)flagsc                    U S   S:w  d  [        U 5      S:  a  gSn[        U SS  5       H8  u  p#US:X  a  US-  nOUS:X  a  US-  nUS:X  d  M$  U[        U 5      S-
  :w  d  M8    g   US:X  d   eg)Nr   (r   Fr>   rR   T)r  	enumerate)stringro   ichars       rJ   _all_in_parensrL    s    ayC3v;?EVABZ(3;QJES[QJEA:!s6{Q. ) A::rL   c                     \ rS rSr\S)S j5       r\S*S j5       r\S+S j5       r\S+S j5       r\S+S j5       r	\S+S j5       r
\S+S j5       r\S+S	 j5       r\S+S
 j5       r\S+S j5       r\S,S j5       r\S-S j5       r\S-S j5       r\S-S j5       r\S-S j5       r\S-S j5       r\S.S j5       r\S/S j5       r  S0         S1S jjr          S2S jrS3S jr S4         S5S jjrS6S jr          S7S jr        S8S jr          S9S jr  S:               S;S jjrS<S jr S\!RD                  SS S!.             S=S" jjr#S>S# jr$S?S$ jr%\S@S% j5       r&\'SAS& j5       r(\'SBS' j5       r)S(r*g)COpOverridesi   c                    [        U [        5      (       d*  [        R                  U 5      (       d  [	        U 5      (       a  U $ SU  S3$ NrG  rR   )r/  CSEVariable_RE_PAREN_NOT_NEEDED	fullmatchrL  )rI  s    rJ   parenOpOverrides.paren  sB     v{++#--f55f%% M6(!}rL   c                    [        U 5      $ r[   )repr)r  rx   s     rJ   r  OpOverrides.constant  s    E{rL   c                    [         R                  " S[        R                  5      n[         R                  " U[         R
                  " U[         R                  " [         R                  " U 5      5      5      5      $ r  )r,   r  r   r  r  r  libdevice_expr  r  s     rJ   libdevice_sigmoidOpOverrides.libdevice_sigmoid  sE    ll1ekk*{{3S->->swwqz-J KLLrL   c                .    [         R                  " U 5      $ r[   )r,   absr  s    rJ   libdevice_absOpOverrides.libdevice_abs      wwqzrL   c                .    [         R                  " U 5      $ r[   )r,   sqrtr  s    rJ   libdevice_sqrtOpOverrides.libdevice_sqrt  s    xx{rL   c                .    [         R                  " U 5      $ r[   )r,   cosr  s    rJ   libdevice_cosOpOverrides.libdevice_cos  ra  rL   c                .    [         R                  " U 5      $ r[   )r,   sinr  s    rJ   libdevice_sinOpOverrides.libdevice_sin!  ra  rL   c                .    [         R                  " U 5      $ r[   )r,   r  r  s    rJ   libdevice_logOpOverrides.libdevice_log%  ra  rL   c                .    [         R                  " U 5      $ r[   )r,   r  r  s    rJ   rZ  OpOverrides.libdevice_exp)  ra  rL   c                2    S[         R                  U 5       3$ )N~rN  rT  r  s    rJ   bitwise_notOpOverrides.bitwise_not-  s    ;$$Q'())rL   c                2    [         R                  U 5       S3$ )Nz == 0ru  )rV   s    rJ   logical_notOpOverrides.logical_not1  s    ##A&'u--rL   c                \    [         R                  U 5       S[         R                  U5       3$ )Nz & ru  r  r"  s     rJ   bitwise_andOpOverrides.bitwise_and5  +    ##A&'s;+<+<Q+?*@AArL   c                \    [         R                  U 5       S[         R                  U5       3$ )Nz | ru  r|  s     rJ   
bitwise_orOpOverrides.bitwise_or9  r  rL   c                \    [         R                  U 5       S[         R                  U5       3$ )Nz ^ ru  r|  s     rJ   bitwise_xorOpOverrides.bitwise_xor=  r  rL   c                \    [         R                  U 5       S[         R                  U5       3$ )Nz << ru  r|  s     rJ   bitwise_left_shiftOpOverrides.bitwise_left_shiftA  +    ##A&'tK,=,=a,@+ABBrL   c                \    [         R                  U 5       S[         R                  U5       3$ )Nz >> ru  r|  s     rJ   bitwise_right_shiftOpOverrides.bitwise_right_shiftE  r  rL   c                .    [         R                  " X5      $ r[   )r,   r  rU   s     rJ   int_truedivOpOverrides.int_truedivI  s    
 {{1  rL   c                X    [         R                  " U [        R                  " U5      5      $ r[   )r,   r  r   Integer)r   r   s     rJ   r~  OpOverrides.load_seedP  s    xxemmF344rL   Tc                *    [        [        U5      5      $ r[   )r(   rs   )r   varr   checkwrap_negs        rJ   indirect_indexingOpOverrides.indirect_indexingT  s     "#c(++rL   c                D    [        [        U 5      R                   S35      e)Nz,: check_bounds should be handled by CSEProxyrT   r0  rc   r   r   r   loweruppers        rJ   check_boundsOpOverrides.check_bounds]  s'     "Dz""##OP
 	
rL   c                D    [        [        U 5      R                   S35      e)Nz$: load should be handled by CSEProxyr  r   r   rh  s      rJ   r  OpOverrides.loadd  s%    !Dz""##GH
 	
rL   Nc                D    [        [        U 5      R                   S35      e)Nz%: store should be handled by CSEProxyr  r   r   rh  r  modes        rJ   r  OpOverrides.storei  s'     "Dz""##HI
 	
rL   c                D    [        [        U 5      R                   S35      e)Nz/: store_reduction should be handled by CSEProxyr  r   r   rh  r  s       rJ   r  OpOverrides.store_reductionp  s%    !Dz""##RS
 	
rL   c                D    [        [        U 5      R                   S35      e)Nz): reduction should be handled by CSEProxyr  r   rx   	src_dtypereduction_typer  s        rJ   r  OpOverrides.reductionu  s'     "Dz""##LM
 	
rL   c                D    [        [        U 5      R                   S35      e)Nz$: scan should be handled by CSEProxyr  r   dtypes
combine_fnvaluess       rJ   scanOpOverrides.scan  s'     "Dz""##GH
 	
rL   c                D    [        [        U 5      R                   S35      e)Nz$: sort should be handled by CSEProxyr  r   r  r  stable
descendings        rJ   sortOpOverrides.sort  s'     "Dz""##GH
 	
rL   c                D    [        [        U 5      R                   S35      e)Nz): bucketize should be handled by CSEProxyr  r   r  
boundariesboundary_indicesindexing_dtyperightsortersorter_indicess           rJ   	bucketizeOpOverrides.bucketize  s'     "Dz""##LM
 	
rL   c                D    [        [        U 5      R                   S35      e)Nz2: halide_clamp only implemented for Halide backendr  )r   r  r   r  s       rJ   halide_clampOpOverrides.halide_clamp  s%    !Dz""##UV
 	
rL   r>   )constraintsrx   is_purepackc               D    [        [        U 5      R                   S35      e)Nz<: inline_asm_elementwise only implemented for Triton backendr  )r   asmr  rx   r  r  r  s          rJ   inline_asm_elementwise"OpOverrides.inline_asm_elementwise  s'     "Dz""##_`
 	
rL   c                D    [        [        U 5      R                   S35      e)Nz.: ops.output should not appear at codegen timeAssertionErrorr0  rc   )r   r  s     rJ   r  OpOverrides.output  s%    Dz""##QR
 	
rL   c                D    [        [        U 5      R                   S35      e)Nz3: ops.placeholder should not appear at codegen timer  r   rh  s     rJ   r  OpOverrides.placeholder  s%    Dz""##VW
 	
rL   c                4   ^  SU 4S jjnT Ul         SUl        U$ )Nc                J   > [        [        U 5      R                   ST 35      e)Nz does not implement ops.r  )r   r  r  r   s      rJ   unimplemented1OpOverrides._unimplemented.<locals>.unimplemented  s*    %:&&''?vF rL   T)r   rN  r  r   r  r   ra   rB  )rc   is_unimplemented)r   r  s   ` rJ   _unimplementedOpOverrides._unimplemented  s     	
 "&)-&rL   c                    [        XS 5      n[        [        US 5      nU(       + =(       d    X#:H  =(       d    [        USS5      $ )Nr  F)getattrr-   )r  r   fn
default_fns       rJ   _is_unimplementedOpOverrides._is_unimplemented  s;    S%Zt4
vS)SWR9KU-SSrL   c                `   US;   d   U5       e[         R                  5        H  u  p#[        X15      nUc5  U R                  U5      (       a  [	        XU R                  U5      5        MF  MH  X R                  ;  d   SU SU R                   35       eX$l        [	        X[        U5      5        M     g )N)rI  rG  cppvecrH  rT  zmultiple definitions of z on )	pointwise_overrides_datar  r  r  setattrr  __dict__rc   rh   )r  r  funcnamedataimpls        rJ   _initialize_pointwise_overrides+OpOverrides._initialize_pointwise_overrides  s    EEMvME6<<>NH4(D|((22C3+=+=h+GH 3  ||3 .xjS\\NK3 !)|D'9: ?rL   r`   )rI  rB  ra   rB  )r  zUnion[bool, float, int]rx   rw   ra   rB  rC  )rV   rB  ra   rB  )r  rB  r"  rB  ra   rB  rD  )r   rs   r   rB  ra   rB  TT)
r  rB  r   Union[sympy.Expr, int]r  rb   r  rb   ra   sympy.Symbol
r   rn   r   rn   r  rb   r  rb   ra   r  )r   rs   rh  rn   ra   rB  r[   )
r   rs   rh  rn   r  rB  r  r0   ra   r  )r   rs   rh  rn   r  rB  ra   r  )
rx   rw   r  rw   r  r/   r  !Union[OpVarT, tuple[OpVarT, ...]]ra   r  )r  tuple[torch.dtype, ...]r  zFCallable[[tuple[OpVarT, ...], tuple[OpVarT, ...]], tuple[OpVarT, ...]]r  tuple[OpVarT, ...]ra   r  )
r  r  r  r  r  rb   r  rb   ra   r  NN)r  rB  r  .tuple[str, sympy.Expr, sympy.Expr, sympy.Expr]r  rB  r  rw   r  rb   r   Optional[tuple[str, sympy.Expr]]r  zOptional[OpVarT]ra   rB  )r  rB  r   rn   r  rb   ra   rB  )r  rB  r  rs   r  r   rx   rw   r  rb   r  r  ra   rB  )r  rB  ra   r  )rh  r  ra   rB  )r   rs   ra   zCallable[..., OpVarT]r   rs   ra   rb   )r  rs   ra   r  )+rc   rd   re   rf   rh   rT  r  r[  r_  rd  rh  rl  ro  rZ  rv  ry  r}  r  r  r  r  r  r~  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  ri   r`   rL   rJ   rN  rN     s       M M             * * . . B B B B B B C C C C ! ! 5 5 ,, %, 	,
 , 
,

&0
9=
FJ
	

 NR

 *
39
AJ
	


	
	
 	
 &		

 1	
 
+	

'


 #
 

	
'	
 #	
 		

 	
 
	
$ 48+/

 C
 !	

 $
 
 1
 )
 


 &*"]]

 
 #	

 
 
 
 





   T T
 ; ;rL   rN  c                      \ rS rSr% S\S'   S\S'   SrS\S'   SrS\S	'   \R                  r	S
\S'   Sr
S\S'   SrS\S'   Srg)OverridesDatai  rs   r   zCallable[..., str]rG  NzOptional[Callable[..., str]]rI  r  r   type_promotion_kindrH  rT  r`   )rc   rd   re   rf   r   rI  r  r   DEFAULTr  rH  rT  ri   r`   rL   rJ   r  r    sQ    
I	+/F(/+/F(/'// 8  ,0F(/(,C	%,rL   r  airy_aic                    SU  S3$ )Nzairy_ai_forward(rR   r`   r  s    rJ   rM  rM    s    (1-rL   special_airy_ai)r  rG  r   	bessel_j0c                    SU  S3$ )Nzbessel_j0_forward(rR   r`   r  s    rJ   rM  rM        *1#Q/rL   c                    SU  S3$ )Nzlibdevice.j0(rR   r`   r  s    rJ   rM  rM        =1-rL   special_bessel_j0)r  rG  rI  r   	bessel_j1c                    SU  S3$ )Nzbessel_j1_forward(rR   r`   r  s    rJ   rM  rM    r  rL   c                    SU  S3$ )Nzlibdevice.j1(rR   r`   r  s    rJ   rM  rM    r  rL   special_bessel_j1	bessel_y0c                    SU  S3$ )Nzbessel_y0_forward(rR   r`   r  s    rJ   rM  rM  	  r  rL   c                    SU  S3$ )Nzlibdevice.y0(rR   r`   r  s    rJ   rM  rM  
  r  rL   special_bessel_y0	bessel_y1c                    SU  S3$ )Nzbessel_y1_forward(rR   r`   r  s    rJ   rM  rM    r  rL   c                    SU  S3$ )Nzlibdevice.y1(rR   r`   r  s    rJ   rM  rM    r  rL   special_bessel_y1digammac                    SU  S3$ )Nzcalc_digamma(rR   r`   r  s    rJ   rM  rM    s    aS*rL   c                    U  S3$ )Nz
.digamma()r`   r  s    rJ   rM  rM    s    A3j)rL   )r  rG  r  r   r   c                    SU  S3$ )Nzcalc_erfcx(rR   r`   r  s    rJ   rM  rM        A3a(rL   c                    SU  S3$ )Nzlibdevice.erfcx(rR   r`   r  s    rJ   rM  rM    s    +A3a0rL   special_erfcxr$  c                    SU  SU SU S3$ )Nz	std::fma(rQ   rR   r`   r!  s      rJ   rM  rM  #  s    is"QCr!A6rL   c                    SU  SU SU S3$ )Nzfmadd(rQ   rR   r`   r!  s      rJ   rM  rM  $  s    s"QCr!A6rL   c                    SU  SU SU S3$ )Nzlibdevice.fma(rQ   rR   r`   r!  s      rJ   rM  rM  %  s    s"QCr!A>rL   )r  rG  r  rI  r   igammac                    SU  SU S3$ Nzcalc_igamma(rQ   rR   r`   r|  s     rJ   rM  rM  +      <s"QCq1rL   igammacc                    SU  SU S3$ Nzcalc_igammac(rQ   rR   r`   r|  s     rJ   rM  rM  0      =2aS2rL   gammaincc                    SU  SU S3$ r  r`   r|  s     rJ   rM  rM  5  r  rL   special_gammainc	gammainccc                    SU  SU S3$ r"  r`   r|  s     rJ   rM  rM  :  r#  rL   special_gammaincci0c                    SU  S3$ )Nzcalc_i0(rR   r`   r  s    rJ   rM  rM  ?      1orL   c                    SU  S3$ Nzlibdevice.cyl_bessel_i0(rR   r`   r  s    rJ   rM  rM  @      3A3a8rL   c                    U  S3$ )Nz.i0()r`   r  s    rJ   rM  rM  A  s
    A3erL   )r  rG  rI  r  r   i0ec                    SU  S3$ )Nz	calc_i0e(rR   r`   r  s    rJ   rM  rM  F      	!A&rL   c                    U  S3$ )Nz.i0e()r`   r  s    rJ   rM  rM  G  s
    A3frL   special_i0ei1c                    SU  S3$ )Nzcalc_i1(rR   r`   r  s    rJ   rM  rM  L  r,  rL   c                    SU  S3$ Nzlibdevice.cyl_bessel_i1(rR   r`   r  s    rJ   rM  rM  M  r/  rL   
special_i1i1ec                    SU  S3$ )Nz	calc_i1e(rR   r`   r  s    rJ   rM  rM  R  r3  rL   special_i1elog_ndtrc                    SU  S3$ )Nzcalc_log_ndtr(rR   r`   r  s    rJ   rM  rM  W  s    qc+rL   special_log_ndtrmodified_bessel_i0c                    SU  S3$ )Nzmodified_bessel_i0_forward(rR   r`   r  s    rJ   rM  rM  ]      3A3a8rL   c                    SU  S3$ r.  r`   r  s    rJ   rM  rM  ^  r/  rL   special_modified_bessel_i0modified_bessel_i1c                    SU  S3$ )Nzmodified_bessel_i1_forward(rR   r`   r  s    rJ   rM  rM  c  rC  rL   c                    SU  S3$ r9  r`   r  s    rJ   rM  rM  d  r/  rL   special_modified_bessel_i1modified_bessel_k0c                    SU  S3$ )Nzmodified_bessel_k0_forward(rR   r`   r  s    rJ   rM  rM  i  rC  rL   special_modified_bessel_k0modified_bessel_k1c                    SU  S3$ )Nzmodified_bessel_k1_forward(rR   r`   r  s    rJ   rM  rM  n  rC  rL   special_modified_bessel_k1ndtrc                    SU  S3$ )Nz
calc_ndtr(rR   r`   r  s    rJ   rM  rM  t  s    
1#Q'rL   special_ndtrndtric                    SU  S3$ )Nzcalc_ndtri(rR   r`   r  s    rJ   rM  rM  y  r  rL   special_ndtri	polygammac                    U  SU SU SU  S3$ )Nz == 0 ? calc_digamma(z) : calc_polygamma(rQ   rR   r`   r|  s     rJ   rM  rM  ~  s     A33A36I!BqcQRSrL   scaled_modified_bessel_k0c                    SU  S3$ )Nz"scaled_modified_bessel_k0_forward(rR   r`   r  s    rJ   rM  rM        :1#Q?rL   !special_scaled_modified_bessel_k0scaled_modified_bessel_k1c                    SU  S3$ )Nz"scaled_modified_bessel_k1_forward(rR   r`   r  s    rJ   rM  rM    rZ  rL   !special_scaled_modified_bessel_k1spherical_bessel_j0c                    SU  S3$ )Nzspherical_bessel_j0_forward(rR   r`   r  s    rJ   rM  rM    s    4QCq9rL   special_spherical_bessel_j0zetac                    SU  SU S3$ )Nzzeta(rQ   rR   r`   r|  s     rJ   rM  rM    s    52aS*rL   special_zetachebyshev_polynomial_tc                    SU  SU S3$ )Nzchebyshev_polynomial_t_forward(rQ   rR   r`   r|  s     rJ   rM  rM        :1#Rs!DrL   special_chebyshev_polynomial_tchebyshev_polynomial_uc                    SU  SU S3$ )Nzchebyshev_polynomial_u_forward(rQ   rR   r`   r|  s     rJ   rM  rM    rg  rL   special_chebyshev_polynomial_uchebyshev_polynomial_vc                    SU  SU S3$ )Nzchebyshev_polynomial_v_forward(rQ   rR   r`   r|  s     rJ   rM  rM    rg  rL   special_chebyshev_polynomial_vchebyshev_polynomial_wc                    SU  SU S3$ )Nzchebyshev_polynomial_w_forward(rQ   rR   r`   r|  s     rJ   rM  rM    rg  rL   special_chebyshev_polynomial_wlegendre_polynomial_pc                    SU  SU S3$ )Nzlegendre_polynomial_p_forward(rQ   rR   r`   r|  s     rJ   rM  rM        9!BqcCrL   special_legendre_polynomial_pshifted_chebyshev_polynomial_tc                    SU  SU S3$ )Nz'shifted_chebyshev_polynomial_t_forward(rQ   rR   r`   r|  s     rJ   rM  rM        B1#Rs!LrL   &special_shifted_chebyshev_polynomial_tshifted_chebyshev_polynomial_uc                    SU  SU S3$ )Nz'shifted_chebyshev_polynomial_u_forward(rQ   rR   r`   r|  s     rJ   rM  rM    rx  rL   &special_shifted_chebyshev_polynomial_ushifted_chebyshev_polynomial_vc                    SU  SU S3$ )Nz'shifted_chebyshev_polynomial_v_forward(rQ   rR   r`   r|  s     rJ   rM  rM    rx  rL   &special_shifted_chebyshev_polynomial_vshifted_chebyshev_polynomial_wc                    SU  SU S3$ )Nz'shifted_chebyshev_polynomial_w_forward(rQ   rR   r`   r|  s     rJ   rM  rM    rx  rL   &special_shifted_chebyshev_polynomial_whermite_polynomial_hc                    SU  SU S3$ )Nzhermite_polynomial_h_forward(rQ   rR   r`   r|  s     rJ   rM  rM    s    82aSBrL   special_hermite_polynomial_hhermite_polynomial_hec                    SU  SU S3$ )Nzhermite_polynomial_he_forward(rQ   rR   r`   r|  s     rJ   rM  rM    rt  rL   special_hermite_polynomial_helaguerre_polynomial_lc                    SU  SU S3$ )Nzlaguerre_polynomial_l_forward(rQ   rR   r`   r|  s     rJ   rM  rM    rt  rL   special_laguerre_polynomial_lzdict[str, OverridesData]r  c                   ^  [        U 4S j[        R                  R                  [        R                  R                  [        R                  R
                  [        R                  R
                  4 5       5      $ )Nc              3  .   >#    U  H
  nTU;   v   M     g 7fr[   r`   )r  r  r   s     rJ   r  $is_buffer_removed.<locals>.<genexpr>  s       
A 		
s   )anyr1   r{   removed_bufferskernelinplaced_to_remover   s   `rJ   is_buffer_removedr    sU      GG##HH$$GG&&HH''	
  rL   c                  D   ^  \ rS rSrSrSU 4S jjrSS jrS	S jrSrU =r	$ )
DeferredLinei  zHA line that can be 'unwritten' by adding name to V.graph.removed_buffersc                ^   > [         TU ]  U5        Xl        [        U[        5      (       a   eg r[   )r  r  r   r/  r"   )r   r   liner  s      rJ   r  DeferredLine.__init__  s,    	d$455555rL   c                P    [        U R                  5      (       d  U R                  $ g r[   )r  r   r  r   s    rJ   __call__DeferredLine.__call__  s     ++99rL   c                .    [        U R                  U5      $ r[   )r  r   )r   r  s     rJ   	_new_lineDeferredLine._new_line  s    DIIt,,rL   r  )r   rs   r  rs   r   )r  rs   ra   r  )
rc   rd   re   rf   r   r  r  r  ri   r  r  s   @rJ   r  r    s    R6

- -rL   r  c                  "    \ rS rSrSSS jjrSrg)BracesBufferi  c                L   ^ ^ [         R                  SUU 4S jj5       nU" 5       $ )Nc               3    >#    [        T5       H)  n TR                  S5        T=R                  S-  sl        M+     [        T* 5       H)  n T=R                  S-  sl        TR                  S5        M+     S v   [        T* 5       H)  n TR                  S5        T=R                  S-  sl        M+     [        T5       H)  n T=R                  S-  sl        TR                  S5        M+     g 7f)N{r>   })range	writeline_indent)_r   r   s    rJ   ctx BracesBuffer.indent.<locals>.ctx  s     6]s#! # F7^!s# $ F7^s#! $ 6]!s# #s   C(C+)ra   Iterator[None])
contextlibcontextmanager)r   r   r  s   `` rJ   indentBracesBuffer.indent  s$    		"	"	$ 
#	$ urL   r`   N)r>   )r   r  ra   z'contextlib.AbstractContextManager[None])rc   rd   re   rf   r  ri   r`   rL   rJ   r  r    s     rL   r  c                  *    \ rS rSr% S\S'   S\S'   Srg)InplacedBufferi	  rs   rv   r   other_namesr`   Nr   r`   rL   rJ   r  r  	  s    OrL   r  c                  8    \ rS rSr% S\S'   SrS\S'   S
S jrSrg	)ArgNamei  rs   r   Frb   is_constexprc                L    U R                    U R                  (       a  S 3$ S 3$ )Nz : tl.constexprrr  )r   r  r   s    rJ   	full_nameArgName.full_name  s*    ))$2C2C.LMMLMMrL   r`   Nr   )rc   rd   re   rf   r   r  r  ri   r`   rL   rJ   r  r    s    
IL$NrL   r  c                      \ rS rSrSS jrSrg)
RemovedArgi  c                    g)NREMOVEDr`   r   s    rJ   __str__RemovedArg.__str__  s    rL   r`   Nr   )rc   rd   re   rf   r  ri   r`   rL   rJ   r  r    s    rL   r  c                     \ rS rSr\        SS j5       rSS jrSS jr\SS j5       rSS jr	SS jr
SS jrSS	 jrSS
 jrS S jrS!S jrS"S jrS#S jrS$S jrS%S jrS&S jr  S'S jrS(S jrS)S jrS*S jrSrg)+
KernelArgsi   c                    UR                  U[        5      n[        U[        5      (       a  U  [	        U5       3=X'   nU$ U$ r[   )getr  r/  r  r  )r}   odictr   result
new_results        rJ   _lookupKernelArgs._lookup!  sE     */4)Afj))*0#e*'>>EK*rL   c                J    0 U l         0 U l        0 U l        0 U l        / U l        g r[   )input_buffersoutput_buffersinplace_buffersr  workspace_argsr   s    rJ   r  KernelArgs.__init__-  s)    -/ACMO/124rL   c                    SR                  SR                  [        [        U R                  U R
                  U R                  U R                  /5      5      5      $ )NzKernelArgs({})rQ   )formatr   maprW  r  r  r  r  r   s    rJ   __repr__KernelArgs.__repr__4  sS    &&II**++,,	

 	
rL   c                "    [        U [        5      $ r[   )r/  r  r  s    rJ   _buffer_is_marked_removed$KernelArgs._buffer_is_marked_removedC  s     $
++rL   c                T   [         R                  R                  (       a3  [         R                  R                  R                  R	                  X5      nU[         R                  R
                  ;  d   U5       eXR                  ;   a  [        [        U R                  U   5      $ XR                  ;   a'  [        [        U R                  U   5      R                  $ UR                  S5      (       a  U R                  SU R                  U5      $ U R                  SU R                  U5      $ )Nseedin_ptr)r1   r{   r  mutation_real_namer  r  r  r	   rs   r  r  rv   r  r  r  r   s     rJ   inputKernelArgs.inputH  s    7777$$77;;DGD1772228D82&&&T00677'''(<(<T(BCNNN??6""<<(:(:DAA||Hd&8&8$??rL   c                   [         R                  R                  (       a3  [         R                  R                  R                  R	                  X5      nU[         R                  R
                  ;  d   U5       eXR                  ;   a'  [        [        U R                  U   5      R                  $ U R                  SU R                  U5      $ )Nout_ptr)r1   r{   r  r  r  r  r  r	   r  rv   r  r  r   s     rJ   r  KernelArgs.outputT  s    7777$$77;;DGD1772228D82'''(<(<T(BCNNN||It':':DAArL   c                   X R                   ;  d   eXR                   ;   aP  U R                   U   n[        U[        5      (       a   eUR                  R	                  U5        X0R                   U'   g U R                   R                  5        Vs/ s H  n[        U[        5      (       a  M  UPM     nnU R                   R                  5        Vs/ s H  n[        U[        5      (       d  M  UPM     nn[        [        U5      5      [        U5      -   n[        SU 3X/5      nX0R                   U'   X0R                   U'   g s  snf s  snf )N
in_out_ptr)	r  r/  r  r  appendr  r  r+   r  )r   
input_nameoutput_namebufvalalive_buffersr  inplace_buffer_idxs           rJ   make_inplaceKernelArgs.make_inplace\  s9   "6"6666---&&z2C!#z2222OO"";/03  -  //6688C!#z2 8    //6688Cc:. 8  
 "%VM%:!;c/>R!R /01)C 03  ,03  -!
s   E *E E+Ec                F   [        U[        R                  U5      [        R                  R                  5       [         R                  5       S9n[        U R                  5       H  u  pE[         R                  XS5      (       a>  UR                  n[         R                  XS5      U R                  U'   UR                  U4s  $ UR                  UR                  :w  a  UR                  UR                  :w  a  M   e   U R                  R                  U5        UR                  S4$ )a  
Allocate or extend a workspace buffer of nbytes bytes.

This function manages the allocation of a workspace buffer. It either creates
a new WorkspaceArg or extends an existing one.

Note:
- Calling this function will in-place mutate the args by adding or updating
a WorkspaceArg.
- The codegen for generating the Python argdefs and call_defs will check
this field and allocate the buffer accordingly.
- A new argument "ws_ptr" will be present in the generated code.

Args:
    nbytes (sympy.Expr): The number of bytes to allocate.
    zero_fill (bool): Whether to initialize the buffer to zero.

Returns:
    Tuple[str, int]: A tuple containing:
        - "ws_ptr": A string identifier for the workspace pointer.
        - offset: An integer representing the byte offset in the workspace.
)ro   rp   rr   rt   r   )rl   rN   r^   r1   r{   get_current_device_or_throwr~   rH  r  r   ro   r   rv   rt   r  )r   nbytesr]   argrJ  existing_argr   s          rJ   	workspaceKernelArgs.workspacev  s    . '11)<77668#//1	
  ))<)<=OA$$\77%++)5):):<)M##A&#..66''3>>9 ++s~~=>  > 	""3'~~q  rL   c           
        [         R                  R                  5       n[        U[        R
                  [        R                  SSUR                   SUR                   3US9nU R                   H&  nUR                  UR                  :X  d  M  X4:X  a  M&   e   U R                  R                  U5        UR                  $ )a  
Lazily allocate a graph-wide semaphores buffer with at least min_size.  This is a single buffer shared by
all kernels and zero initialized once at graph start.  Each kernel must leave the buffer zeroed on exit.

Warning: multiple calls to this function will return the same buffer.

Args:
    min_size: the number of int32 semaphores required

Returns:
    name of the semaphores buffer
sem_ptrsemaphores_r  )ro   rp   rx   rv   rt   rr   )r1   r{   r  rl   rN   rg   r   uint32r0  rh  r  rv   r  )r   min_sizecurrent_devicer  r  s        rJ   
semaphoresKernelArgs.semaphores  s     <<>'66,, $^%8%8$9>;O;O:PQ!
 !//L&&#..8*** 0 	""3'~~rL   c                z  ^ [        U[        5      (       d   [        U5      U45       e[        R                  " U5      nX R
                  ;   a  U R
                  U   $ TU R
                  R                  5       ;   a1  T [        U4S jU R
                  R                  5        5       5       3mTU R
                  U'   T$ )Nc              3  V   >#    U  H  oR                  T5      (       d  M  S v   M      g7f)r>   N)r  )r  r  r   s     rJ   r  )KernelArgs.seed_offset.<locals>.<genexpr>  s     U(>1,,tBTQQ(>s   )	))r/  r  r0  r   r  r  r  sum)r   r   r  s    ` rJ   seed_offsetKernelArgs.seed_offset  s    %%%;UU';;%e$MM!==''4==''))&U(<(<(>UUVW   $erL   c                    [        U[        R                  5      (       d   [        U5      U45       eUR                  S:X  a  SU R
                  U'   gU R                  SU R
                  U5      $ )Nr  ks)r/  r   Symbolr0  r   r  r  r   s     rJ   r   KernelArgs.size  s[    $--AT
D/AA-99"(DMM$||D$--66rL   c                    [        U R                  R                  5       U R                  R                  5       U R                  R                  5       5      $ r[   )r   r  keysr  r  r   s    rJ   
call_namesKernelArgs.call_names  sA    ##%t':':'?'?'A4==CUCUCW
 	
rL   c                &   U R                   R                  US5      nUb!  [        U[        5      (       d  UR                  $ U R
                  R                  US5      nUb  [        U[        5      (       d  U$ U R                  R                  US5      $ )z+
Returns inner name of a given outer name.
N)r  r  r/  r  rv   r  r  )r   r   inplacedr  s       rJ   arg_nameKernelArgs.arg_name  s     ''++D$7
8Z(H(H&&&))--dD9":k:+N+N!!%%dD11rL   c                    U$ r[   r`   )r   r  rx   s      rJ   wrap_ptr_argKernelArgs.wrap_ptr_arg  s    
rL   c                    [        U5      $ r[   )rs   )r   r   s     rJ   wrap_size_argKernelArgs.wrap_size_arg  s    4yrL   c                
   SSK JnJn  / n/ n/ n[        U R                  R                  5       5       H  n[        U[        5      (       a  M  UR                  S   nUR                  n[        R                  R                  U5      n	X   n
UR                  U
 SU 35        UR                  U R                  Xy5      5        UR                  U
 S35        M     U R                  R!                  5        H  u  pxXpR                  ;   a  M  [        R                  R                  U5      n	X   n
UR                  SU
 SU 35        UR                  U R                  Xy5      5        UR                  SU
 S35        M     U R"                  R!                  5        H  u  p{XpR                  ;   d  [        U[        5      (       a  M+  [        R                  R                  U5      n	X   n
UR                  U
 SU 35        UR                  U R                  Xy5      5        UR                  U
 S35        M     U R$                  R!                  5        H  u  pxUR                  SU SU 35        UR                  U R'                  U5      5        UR                  SU 35        [        R                  R(                  (       d  Mq  [        R                  R(                  R+                  U5        M     U R,                  (       a   S5       eXCU4$ )	Nr>   )DTYPE_TO_CPP
INDEX_TYPEry  z* *zconst  zWorkspace not supported on CPU )	cpp_utilsr  r  r+   r  r  r/  r  r  rv   r1   r{   r   r  r  r  r  r  r  r  wrapper_codeensure_size_computedr  )r   r  r  	call_argsarg_defs	arg_typesr	  outerinnerrx   	cpp_dtypemaybe_inners               rJ   cpp_argdefsKernelArgs.cpp_argdefs  s   7		t33::<=H(J//((,E''EGG%%e,E$+IOOykE734T..u<=	{!_- > !..446LE,,,GG%%e,E$+IOOfYKr%9:T..u<=vi[23 7 #'"5"5";";"=E,,,
;
0S0SGG%%e,E$+IOOykK=9:T..u<=	{!_- #> !MM//1LEOOfZL%9:T//67vj\23ww###$$99%@ 2 &&I(II&I--rL   c                   / n/ n/ n/ n[        U R                  R                  5       5       H  n[        U[        5      (       a  M  UR                  [        UR                  5      5        UR                  UR                  S   5        UR                  [        R                  R                  UR                  S   5      5        UR                  [        UR                  UR                  S   [        R                  R                  UR                  S   5      S95        M     [        U R                  R                  5       U R                   R                  5       5       H  u  pgX`R                  ;   d  [        U[        5      (       a  M+  UR                  [        U5      5        UR                  U5        UR                  [        R                  R                  U5      5        UR                  [        UU[        R                  R                  U5      S95        M     U R"                  R                  5        H  u  pgUR                  [        U5      5        UR                  U5        UR                  [%        U5      5        UR                  ['        Xv5      5        [        R                  R(                  (       d  M  [        R                  R(                  R+                  U5        M     U R,                   Hn  nUR                  [        UR                  5      5        UR                  UR.                  5        UR                  U5        UR                  UR0                  5        Mp     XXC4$ )Nry  )r   r   rx   )r+   r  r  r/  r  r  r  rv   r  r1   r{   r   r   r   r  r  r  r  r0  r   r  r  r  rt   rx   )	r   r  r  r  precompile_argsr	  r  r  r  s	            rJ   python_argdefsKernelArgs.python_argdefs  s    #%!	')	/1t33::<=H(J//OOGH$7$789X11"56QWW..x/C/CB/GHI""!,,#//3''++H,@,@,DE > "$$&(;(;(A(A(C
LE ,,,
5*0M0MOOGEN+U#QWW..u56"" ''++E2
 !MM//1LEOOGEN+U#T%[)""75#89ww###$$99%@ 2 &&COOGCNN34S^^,""3'SYY'	 '
 O>>rL   c              #     #    [        U R                  R                  5       5       H  n[        U[        5      (       a  M  UR
                   H  nU[        R                  R                  ;   d  U[        R                  R                  ;   a  MA  X R                  ;   a  U R                  U   UR                  4v   X R                  ;   d  M~  [        [        U R                  U   5      UR                  4v   M     M     g 7fr[   )r+   r  r  r/  r  r  r1   r{   r  r  r  rv   r  r	   rs   )r   r	  others      rJ   aliasesKernelArgs.aliases@  s     t33::<=H(J//!--QWW777 ; ;;...,,U3X5H5HHH///sD$7$7$>?ATATTT . >s   CC>
4C>c                    [        U R                  R                  U[        5      [        5      =(       a.    [        U R
                  R                  U[        5      [        5      $ r[   )r/  r  r  r  r  r  r   s     rJ   
is_removedKernelArgs.is_removedO  sK    ##D'2J
 N--11$@*M	NrL   c                   [        5       n[        U R                  R                  5       5       H8  n[	        U[
        5      (       a  M  UR                  UR                  S   5        M:     U R                  R                  5        H<  u  p4X0R                  ;   d  [	        U[
        5      (       a  M+  UR                  U5        M>     U$ )Nry  )
r   r+   r  r  r/  r  r  r  r  r  )r   	live_outsr	  r  r  s        rJ   live_output_buffersKernelArgs.live_output_buffersW  s    L	t33::<=H(J//MM(..r23 > !//557LE,,,
5*0M0MMM%  8 rL   )r  r  r  r  r  N)r}   rs   r  z6Union[dict[_T, Union[str, RemovedArg]], dict[_T, str]]r   rA   ra   rs   ra   r  r   )r   r   ra   rb   r  )r  rs   r  rs   ra   r  )r  rn   r]   rb   ra   ztuple[str, int])r  rn   ra   rs   )r   rs   r  r  ra   rs   )r   r  ra   rs   )ra   zIterator[str])r   rs   ra   r   )r  rs   rx   rw   ra   rs   )r   
SymbolLikera   rs   )ra   z&tuple[list[str], list[str], list[str]])ra   z?tuple[list[ArgName], list[str], list[KernelArgType], list[Any]])ra   zIterator[tuple[str, str]]r  )ra   zOrderedSet[str])rc   rd   re   rf   rh   r  r  r  r  r  r  r  r  r  r  r   r  r
  r  r  r!  r%  r)  r,  r0  ri   r`   rL   rJ   r  r     s    		E	 	 
		 	5
 , ,
@B44'!R87


2'.R/?	H/?bUN
rL   r  c                  r   ^  \ rS rSrSr S
     SU 4S jjjrSS jrSS jrSS jrSS jr	SS jr
S	rU =r$ )rQ  id  a4  A CSEVariable is just a name for an expression but it is useful to be able to annotate them on a backend dependent basis.
To do so, the backends can simply overload `Kernel.create_cse_var`
The "CSEVariable.update_on_args" method gives you a hook for annotations
See example of TritonCSEVariable in triton.py
c                   > [         TU ]  5         [        U[        5      (       d   eXl        X l        SU l        X0l        g r  )r  r  r/  r   r   bounds	use_countrx   )r   r   r6  rx   r  s       rJ   r  CSEVariable.__init__k  s9     	&+....	
rL   c                    U R                   $ r[   r  r   s    rJ   r  CSEVariable.__str__x  s    yyrL   c                ,    [        U R                  5      $ r[   )hashr   r   s    rJ   __hash__CSEVariable.__hash__{  s    DIIrL   c                b    [        U[        5      =(       a    UR                  U R                  :H  $ r[   )r/  rQ  r   )r   r(  s     rJ   __eq__CSEVariable.__eq__~  s!    %-I%**		2IIrL   c                    g r[   r`   )r   r   r  r  s       rJ   update_on_argsCSEVariable.update_on_args  s    rL   c                P    U R                   R                   SU R                  < S3$ rP  )r  rc   r   r   s    rJ   r  CSEVariable.__repr__  s$    ..))*!DII=::rL   )r6  rx   r   r7  r[   )r   rs   r6  ValueRanges[Any]rx   r  r   )ra   r  )r(  objectra   rb   )r   rs   r  r   r  r   ra   r  )rc   rd   re   rf   r   r  r  r=  r@  rC  r  ri   r  r  s   @rJ   rQ  rQ  d  sR     (,	 ! %	 J; ;rL   rQ  AugmentedKeyT)defaultCSEVariableType)boundrJ  .c                  f   \ rS rSrSr       S             SS jjrSS jrSS jrSS jrSS jr	SS	 jr
SS
 jrSS jrSS jr\R                  " 5       SSSS.             SS jjr\R                  " 5       S4     SS jjr\R                  " 5       S4       SS jjrSrg)CSEi  z Common subexpression eliminationNc                    Xl         X l        0 U l        X0l        U=(       d    0 U l        U=(       d    0 U l        U=(       d    [        R                  " 5       U l        [        5       U l
        U=(       d    0 U l        g r[   )r}   suffix_cachename_prefixstore_cachereduction_cache	itertoolsro   iter_buffer_idsr   invalidated_storesvarname_map)r   r}   rP  rR  iter_buffersrS  rT  rX  s           rJ   r  CSE.__init__  sg     FH&ALARPR!r 	 6B5VY__EV3=<7B7HbrL   c                L   / U R                   R                  5       Q H4  u  p#X1;  d  M  U R                   U	 U R                  R                  U5        M6     U(       a<  U R                  R                  5        VVs0 s H  u  pEXQ;   d  M  XE_M     snnU l        g 0 U l        g s  snnf r[   )rS  r  rW  r  rQ  )r   	keep_varsr   tmpr  r  s         rJ   
invalidateCSE.invalidate  s    44++1134ID#$$T*''++D1 5 ,0KK,=,=,?R,?DA1>414,?RDKDK Ss   9B B c           
         [        U 5      " U R                  U R                  U R                  U R                  U R
                  U R                  U R                  S9$ )N)r}   rP  rR  rY  rS  rX  rT  )r0  r}   rP  rR  rV  rS  rX  rT  r   s    rJ   clone	CSE.clone  sN    Dz;;;;((--(((( 00
 	
rL   c                    U R                  5       n[        U R                  5      Ul        [        U R                  5      Ul        [        U R                  5      Ul        U$ )zNReturn a copy of using ScopedDict so changes to *_cache aren't visible in self)ra  r&   rQ  rT  rS  )r   new_cses     rJ   scoped_copyCSE.scoped_copy  sH    **,#DKK0",T-A-A"B()9)9:rL   c                "    [        [        U5      $ )z@Override this method to augment cache key with backend specifics)r	   rI  r   	cache_keys     rJ   augment_keyCSE.augment_key  s    M9--rL   c                >    X R                   U R                  U5      '   g r[   rQ  rj  )r   ri  r  s      rJ   putCSE.put  s    36D$$Y/0rL   c                >    U R                  U5      U R                  ;   $ r[   )rj  rQ  rh  s     rJ   containsCSE.contains  s    	*dkk99rL   c                X    U R                   R                  U R                  U5      S 5      $ r[   )rQ  r  rj  rh  s     rJ   try_getCSE.try_get  s"    {{t//	:DAArL   c                >    U R                   U R                  U5         $ r[   rm  rh  s     rJ   r  CSE.get  s    {{4++I677rL   T)r6  write
assignmentrx   c          	     B   [        U[        5      (       a  UR                  nU(       d	  U(       d   e[        U[        5      (       aE  UR                  R                  U5      Ul        U=R                  S-  sl        [        [        U5      $ [        U[        5      (       a  UR                  5       nO;[        U[        5      (       a  UR                  nO[        U[        5      (       d   eUnU R                  U5      nU(       Gd  U R                  X65      nU R!                  Xx5        U(       Ga  ["        R$                  R&                  (       a(  ["        R$                  R&                  R)                  USS9  [        U[        5      (       aU  U(       a   UR+                  U R,                   U S35        UR/                  U5        UR+                  U R0                  5        U$ [        U[        5      (       aR  U(       d   eUR+                  UR3                  U R,                   U SUR                   U R0                   35      5        U$ U(       a   U R,                   U SU U R0                   3n	OU U R0                   3n	UR+                  U	5        U(       aQ  [4        R6                  R8                  (       a2  Ub/  SU R,                   U S[;        U5       S3n
UR+                  U
5        U$ UR                  R                  U5      Ul        U=R                  S-  sl        U$ )	Nr>   T)	only_oncez =z = tl.static_assert(
.dtype == rR   )r/  r.   r  rQ  r6  tightenr7  r	   rK  r$   getvaluer"   r  rs   rt  newvarrn  r1   r  current_nodecodegen_originating_infor  r}   splicerP  r  r   test_configsruntime_triton_dtype_assertr*   )r   r   r   r6  rx  ry  rx   ri  r  r  assert_lines              rJ   generateCSE.generate  sz    dH%%::D
""dK(( ++--f5DKNNaN..n--I.//		IdC((((Ill9%++f,CHHY$88((HH))BB$ C  dN33!((DKK=R)@AMM$'$$T[[12 
1  &677%%:$$$++se3tyyk$++'WX, 
% ""&++se3tfT[[MJ"&}5$$T* #"//KK!-(9$++se:VabgVhUiij&k((5 
 ++F3CJMMQM
rL   c                    U R                    [        U R                  5       3n[        R                  R                  X1U5      nX@R                  U'   U$ r[   )rR  rz   rV  r1   r  create_cse_varrX  )r   r6  rx   var_namer  s        rJ   r  
CSE.newvar  sN    
 &&'T-A-A(B'CDhh%%h>%("
rL   c                   ^ [         R                  " TU R                  ;  U4S j5        [        R                  R                  TX#5      nX@R                  T'   U$ )Nc                    > ST  3$ )Nzduplicate name: r`   r  s   rJ   rM  CSE.namedvar.<locals>.<lambda>0  s    4DTF2KrL   )r   _check_valuerX  r1   r  r  )r   r   r6  rx   r  s    `   rJ   namedvarCSE.namedvar)  sO     	(((*K	
 hh%%dF:!$
rL   )	rQ  rW  rV  rR  r}   rT  rS  rP  rX  )rr  rr  r]  NNNN)r}   rs   rP  rs   rR  rs   rY  zOptional[itertools.count[int]]rS  z.Optional[MutableMapping[str, CSEVariableType]]rT  z<Optional[MutableMapping[ReductionCacheKey, CSEVariableType]]rX  z$Optional[dict[str, CSEVariableType]])r\  zOrderedSet[CSEVariable]ra   r  ra   ztyping.Self)ri  rs   ra   rI  )ri  rs   r  rK  ra   r  )ri  rs   ra   rb   )ri  rs   ra   zOptional[CSEVariableType])ri  rs   ra   rK  )r   r$   r   zCUnion[str, CSEVariable, OpsValue, IndentedBuffer, DeferredLineBase]r6  rG  rx  rb   ry  rb   rx   r  ra   rK  )r6  rG  rx   r  ra   rK  )r   rs   r6  rG  rx   r  ra   rK  )rc   rd   re   rf   r   r  r^  ra  re  rj  rn  rq  rt  r  r   unknownr  r  r  ri   r`   rL   rJ   rN  rN    sv   *  7;FJ <@II I 	I
 5I DI
I :I0	
.7:B8 $/#6#6#8'+BB RB
 !B B B %B 
BL $/#6#6#8'+  % 
	 $/#6#6#8'+	 ! %	
 
 rL   rN  c                  @   ^  \ rS rSrSU 4S jjrSS jrSS jrSrU =r$ )	CodeGeni7  c                V   > [         TU ]  5         [        R                  " 5       U l        g r[   )r  r  r  	ExitStack
exit_stackr   r  s    rJ   r  CodeGen.__init__8  s    $..0rL   c                :    U R                   R                  5         U $ r[   )r  	__enter__r   s    rJ   r  CodeGen.__enter__<  s    !!#rL   c                <    U R                   R                  XU5        g r[   )r  __exit__)r   exc_typeexc_valexc_tbs       rJ   r  CodeGen.__exit__@  s      F;rL   )r  r2  r  r  r   r  r   r  r   ra   r  )	rc   rd   re   rf   r  r  r  ri   r  r  s   @rJ   r  r  7  s    1< <rL   r  c                  `  ^  \ rS rSr% SrS\S'   SrS\S'   SrS\S'    S"     S#U 4S	 jjjr\	R                  S$S
 j5       r\	R                    S%       S&S jj5       rS'S jrS'S jrS(S jr S)         S*S jjr          S+S jr        S,S jr          S-S jrS.S jr  S%               S/S jjr\S0S j5       r S)         S1S jjr          S2S jrS3S jrS4U 4S jjrS5U 4S jjrS6S jrS7S jrS7S jr    S8S jr S9S jr!S:S  jr"S!r#U =r$$ );KerneliD  rr  rs   newvar_prefixrP  Nz'Optional[Callable[[], OpsHandler[Any]]]	overridesc                l  > [         TU ]  5         U(       a  [        =R                  S-  sl        U=(       d
    [	        5       U l        [        5       U l        [        5       U l        [        5       U l	        SU l
        SU l        [        U R                  U R                  5      U l        [         ["           " 5       U l        [         ["           " 5       U l        S U l        S U l        S U l        S U l        [         ["           " 5       U l        [         ["           " 5       U l        0 U l        SU l        S U l        g )Nr>   r   )r  r  r   generated_kernel_countr  r  r$   loadscomputestoresnum_loadnum_reductionrN  r  rP  cser   rs   must_keep_buffersstore_buffer_names
_load_mask_load_otherr  node_to_boundsr  r  inplace_update_buffersmin_elem_per_threadkernel_name)r   r  increase_kernel_countr  s      rJ   r  Kernel.__init__I  s     	 **a/*(JL	#%
%'$&.1$2D2Ddkk.R!+C!2",S/"3)-4859OS)#0",S/"3
 79##$ *.rL   c              #     #    U R                   nXl         UR                  R                  5       R                  5       U l         S v   X l         g ! X l         f = f7fr[   )r  r  r6  
get_boundsr  )r   r  priors      rJ   set_current_nodeKernel.set_current_nodek  sH     !! "jj//1<<>	& %s   A AA AAAc              #    #    Uc  UnUS L =n(       a
  [        5       nU R                  nU R                  nU R                  nU R                  nXl        X l        X0l        UR                  5       U l         S v   XPl        X`l        Xpl        Xl        U(       a  U(       a   S5       eg g ! XPl        X`l        Xpl        Xl        U(       a  U(       a   S5       ef f = f7f)Nz$unexpected store inside swap_buffers)r$   r  r  r  r  re  )	r   lbcbsbdisallow_storesr  r  r  r  s	            rJ   swap_buffersKernel.swap_buffersu  s      :B Dj(?(!B

,,hh
??$	FJ"L KHEEEv2 	 J"L KHEEEv2 s   A2C5B( 9/C(0CCc                    [         er[   r   r  s      rJ   r  Kernel.load  r   rL   c                    U R                   n U R                  U l         U R                  X5      X0l         $ ! X0l         f = f)z+A load the depends on an index we have read)r  r  r  )r   r   rh  r  s       rJ   indirect_loadKernel.indirect_load  s2    

	DJ99T)JJs   !6 >c                    [         er[   r   r  s       rJ   r  Kernel.store_reduction  r   rL   c                    [         er[   r   r  s        rJ   r  Kernel.store  
     "!rL   c                    [         er[   r   r  s        rJ   r  Kernel.reduction  
     "!rL   c                    [         er[   r   r  s       rJ   r  Kernel.scan  s
     "!rL   c                    [         er[   r   r  s        rJ   r  Kernel.sort  r  rL   c                    [         er[   r   r   s    rJ   
var_rangesKernel.var_ranges  r   rL   c                    [         e)z#
See [Note: Inductor bucketize op]
r   r  s           rJ   r  Kernel.bucketize  s
     "!rL   c                    [         er[   r   r   s    rJ   assert_functionKernel.assert_function  r  rL   c           	        [        U[        5      (       a  [        U5      n[        U[        5      (       d   eUb  [        U[        5      (       d   eUb  [        U[        5      (       d   eU(       a!  U(       a  SU SU SU SU S3	nU SU SU 3nO#U(       a
  U SU 3nUnOU(       d   eU SU 3nUnU(       a	  SU SU S3nU R                   SU SU S3$ )	NrG  z <= z) & (z < rR   z) | ~(z, "index out of bounds: z"))r/  rQ  rs   r  )r   r  r  r  maskr;  
cond_prints          rJ   indirect_assertKernel.indirect_assert  s    c;''c(C#s####}
5# 6 666}
5# 6 666U ugT#eC5E7!<D!7$se3ug6JWD&DJL5U#eW%DJtfF4&*D&&'q.FzlRTUUrL   c                    [         er[   r   r  s        rJ   r  Kernel.check_bounds  r  rL   c                    [         er[   r   r  s     rJ   index_to_strKernel.index_to_str  r   rL   c           	     4  > [         TU ]  5         U R                  (       d   eU R                  R	                  [
        R                  " [        X R                  5       5      5      5        U R                  R	                  [
        R                  " U 5      5        U $ r[   )	r  r  r  r  enter_contextr1   set_ops_handlerCSEProxyset_kernel_handlerr  s    rJ   r  Kernel.__enter__  sj    ~~~%%ht^^-=>?	
 	%%a&:&:4&@ArL   c                F   > U R                  5         [        TU ]	  XU5        g r[   )remove_kernel_local_buffersr  r  )r   r  r  r  r  s       rJ   r  Kernel.__exit__  s    ((*F3rL   c                  ^^ [         R                  R                  mT(       d  g[        U4S jU R                   5       5      n[        [
           " 5       mU R                   HX  nX R                  ;  d  M  X R                  R                  ;  d  M/  TR                  X!5      (       d  MG  TR                  U5        MZ     T H  nX R                  R                  ;   a  U R                  R                  U   n[        U[        5      (       a  ML  [        U4S jUR                   5       5      nU(       a  U R!                  U5        U R"                  R                  U5        M  U R%                  U5        M     g)z
Any buffers that are both created and have a last use in the
same kernel can be removed.

Note that V.graph.scheduler can be None when codegening triton template
kernels.
Nc              3     >#    U  H4  nUTR                   ;   d  M  TR                   U   R                  5       v   M6     g 7fr[   )name_to_bufdefining_op_name)r  r  r  s     rJ   r  5Kernel.remove_kernel_local_buffers.<locals>.<genexpr>  s?      &
.i+++ :I!!#&7799.s   ?$?c              3  ,   >#    U  H	  oT;   v   M     g 7fr[   r`   )r  r  names_to_removes     rJ   r  r  ,  s     K?a/1?s   )r1   r{   r  r   r  rs   r  r  r  $can_buffer_be_removed_through_fusionr  r  r/  r  r  r  remove_inplace_bufferr  remove_buffer)r   fused_node_namesr   r  remover  r  s        @@rJ   r  "Kernel.remove_kernel_local_buffers  s    GG%%	% &
..&
 

 %S/+++D222		 7 77BB   ##D) , $Dyy000ii//5c:..K3??KK..t4''++D1""4( $rL   c                    [         R                  SU5        [        U R                  R                  U'   U R
                  R                  U5        g )Nzremove_buffer(%r))r  rH   r  r  r  r  r  r   s     rJ   r  Kernel.remove_buffer3  s;     			%t,)0		  &  &rL   c                    [         R                  SU5        [        U R                  R                  U'   U R
                  R                  U5        g )Nzremoving_inplace_buffer(%r))r  rH   r  r  r  r  r  r   s     rJ   r  Kernel.remove_inplace_buffer;  s9    		/6*1		!!$'  &rL   c           	        [        U[        [        45      (       a!  U Vs/ s H  o R                  U5      PM     sn$ [        R
                  R                  R                  U5      n[        UR                  S S9nU Vs0 s H^  n[        U[        R                  [        R                  [        R                  45      (       d  MC  X R                  R!                  U5      _M`     nn[#        X5      $ s  snf s  snf )Nc                    U R                   $ r[   r  )ss    rJ   rM  (Kernel.rename_indexing.<locals>.<lambda>H  s    !&&rL   )r  )r/  listtuplerename_indexingr1   r{   r  r  sortedfree_symbolsr   r   UNBACKED_INTSIZEPRECOMPUTED_SIZEr  r   r)   )r   rh  r  sorted_symbolsreplacementss        rJ   r  Kernel.rename_indexing@  s    
 edE]++5:;U((+U;;  ))%0 2 28HI $
#%%II)) !Ayy~~a  # 	 
 %.. <
s   C1?AC6C6c                    [        U0 UD6$ r[   )rQ  )r   r  r  s      rJ   r  Kernel.create_cse_varW  s    D+F++rL   c                \    Uc  gU R                   R                  UR                  5       5      $ )z3
Returns arg name of a given input or output node.
N)r  r
  r   )r   r  s     rJ   r
  Kernel.arg_nameZ  s'     <yy!!$--/22rL   )r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  )NT)r  zOptional[KernelArgs]r  rb   ra   r  )r  r=   ra   r  r  )r  r$   r  Optional[IndentedBuffer]r  r  ra   r  r   rs   rh  rn   ra   rQ  r   rs   rh  rn   r  rQ  ra   r  r[   
r   rs   rh  rn   r  rQ  r  r0   ra   r  
rx   rw   r  rw   r  r/   r  +Union[CSEVariable, tuple[CSEVariable, ...]]ra   r  r  r  r  zUCallable[[tuple[CSEVariable, ...], tuple[CSEVariable, ...]], tuple[CSEVariable, ...]]r  tuple[CSEVariable, ...]ra   r  
r  r  r  r  r  rb   r  rb   ra   r  )ra   zdict[sympy.Symbol, sympy.Expr]r  rQ  r  r  r  rQ  r  rw   r  rb   r  r  r  zOptional[CSEVariable]ra   rQ  r   )
r  zUnion[CSEVariable, str]r  r   r  r   r  z!Optional[Union[CSEVariable, str]]ra   rs   r  )rh  rn   ra   rs   r  r  r2  r   rs   ra   r  )rh  z;Union[list[sympy.Expr], tuple[sympy.Expr, ...], sympy.Expr]ra   rn   )r  r   r  r   ra   rQ  )r  r8   ra   r   )%rc   rd   re   rf   r  r   rP  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r
  ri   r  r  s   @rJ   r  r  D  s   M3FC9=I6= PT /( /HL /	 /  /D & &  (,'+	FF %F %	F
 
F F8"" SW"" *"3>"FO"	"
"" " &	"
 ;" 
5""'"
" (" 
!""'" (" 	"
 " 
!"" 4804"" C" &	"
 $" " 1" ." 
" " " 37V$V V 	V
 0V 
V<""&0"9="FJ"	"
"4%)N''
/P/	/.,3 3rL   r  c                  @    \ rS rSr% SrS\S'   SrS\S'   SrS	\S
'   Srg)r  ic  r  zClassVar[str]r  Nr  rx   rr  rs   ops_namer`   )	rc   rd   re   rf   r  r   rx   r$  ri   r`   rL   rJ   r  r  c  s!    "C"#'E 'HcrL   r  c                 ^     SS K n U R                  U R                  S9$ ! [         a     g f = f)Nr   )	undefined)jinja2EnvironmentStrictUndefinedImportError)r'  s    rJ   
jinja2_envr+  k  s?    !!,, " 
 	
  s    
,,c                      \ rS rSrSr\ S       SS jj5       r\SS j5       r\    SS j5       rSS jr	      SS jr
SS jrS	rg
)KernelTemplateiw  z[
Base class for defining kernel templates.

Children classes: TritonTemplate, CUDATemplate
c                    U R                  S5      n[        U5      S:  a"  USS   Vs/ s H  nSU-  U-  U-   PM     snUSS & SR                  U5      $ s  snf )NTr>   r  rr  )
splitlinesr  r   )sourcenum_indentsindents_spacinglinesr  s        rJ   indent_except_first"KernelTemplate.indent_except_first~  sh     !!$'u:>INqrIR&4<E!"I wwu~s   Ac                    [        5       nUc  g [        R                  UR                  S'   SSKJn   UR                  U 5      $ ! U a  n " S SU5      nU" U5      UeS nAff = f)Nr4  r   )TemplateSyntaxErrorc                  6   ^  \ rS rSrSU 4S jjrSS jrSrU =r$ )IKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxErrori  c                   > [         TU ]  UR                  UR                  UR                  UR
                  5        Xl        g r[   )r  r  messagelinenor   filenameoriginal_error)r   r>  r  s     rJ   r  RKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxError.__init__  s<    G$&..&--&++&//	 +9'rL   c                X   SU R                    S3nUSU R                   S3-  n[        U R                  S5      (       a  U R                  R                  R                  S5      nUS-  n[        SU R                   S-
  5      n[        [        U5      U R                   S-   5      n[        X45       Hw  nXPR                   S-
  :X  aS  XS-    S	X%    S3-  n[        U R                  S
5      (       a'  USSU R                  R                  S-
  -  -   S-   -  nMf  Mh  XS-    SX%    S3-  nMy     U$ )NzError in template at line 
zError message: r0  z	Context:
r   r   r>   z: --> columnz     r  z^
z:     )r<  r;  r  r>  r0  splitmaxminr  r  rB  )r   
error_infor3  startendrJ  s         rJ   r  QKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxError.__str__  s:   #=dkk]"!MJODLL>"DDJt22H== $ 3 3 : : @ @ F"l2
 #At{{Q 7!#e*dkkAo>!&u!2A KK!O3 *QvehZr.J J
#*4+>+>#I#I$.(/*-1D1D1K1Ka1O*P)Q*/)0%&J $J !+QvehZr.J J
 "3 &%rL   )r>  )r>  r7  ra   r  r   )rc   rd   re   rf   r  r  ri   r  r  s   @rJ   DetailedTemplateSyntaxErrorr9    s    9& &rL   rJ  )r+  r-  r4  filtersr'  r7  from_string)r0  envr7  erJ  s        rJ   _template_from_string$KernelTemplate._template_from_string  sj    l;-;-O-O)*.#	8??6**" !	8&.A &> .a0a7C!	8s   A A#
AA#c                0  ^^ [         R                  R                  m[        U [        [
        45      (       a0  U  Vs0 s H!  oR                  5       UR                  5       _M#     snmO U R                  5       U R                  5       0mSUU4S jjnU$ s  snf )Nc                @   > TR                  U 5      nUb  U$ T" U 5      $ r[   )r  )r   r  _get_dtype_reallookups     rJ   r   1KernelTemplate._fake_get_dtype.<locals>.get_dtype  s'    ZZ%F!"4((rL   )r   rs   ra   rw   )r1   r{   r   r/  r	  r
  r   )	fake_outsr  r   rS  rT  s      @@rJ   _fake_get_dtypeKernelTemplate._fake_get_dtype  s|     ''++i$//AJK#llncmmo5KF((*I,?,?,ABF	) 	)  Ls   (Bc                    Xl         g r[   r  r   s     rJ   r  KernelTemplate.__init__  s    	rL   c                     UR                  U R                  " S0 UD65        g! [         aN  n[        R	                  SU[        U 5      [        R                  5       [        R                  :  S9  Us SnA$ SnAff = f)z
Maybe generates a new ChoiceCaller and appends it into existing choices.
Returns None if success, otherwise returns the error.

choices: A list of ChoiceCallers.
kwargs: Additional kwargs to be passed to self.generate() to generate a new ChoiceCaller.
Nz3Cannot Append Choice: %s. KernelTemplate type is %s)
stack_infor`   )	r  r  rT   r  infor0  getEffectiveLevelrF   INFO)r   choicesr  rN  s       rJ   maybe_append_choice"KernelTemplate.maybe_append_choice  sl    
	NN4==2623" 	HHET
002W\\A	   H	s   !$ 
A<AA71A<7A<c                    [         e)z=
Generates a ChoiceCaller instance from the given arguments.
r   )r   r  s     rJ   r  KernelTemplate.generate  s
    
 "!rL   r  N)   )r0  rs   r1  r  r2  r  ra   rs   )r0  rs   ra   r   )rV  zUnion[list[Buffer], Buffer]ra   zCallable[[str], torch.dtype]r"  )r`  z	list[Any]r  r   ra   zOptional[NotImplementedError])r  r   ra   r6   )rc   rd   re   rf   r   rh   r4  rO  rW  r  ra  r  ri   r`   rL   rJ   r-  r-  w  s     >?"%8;	  *8 *8X .	% " ,/	&."rL   r-  c                  L  ^  \ rS rSrS rSU 4S jjrSS jrSS jr  S         SS jjr          SS jr	SS jr
SS jr S         SS	 jjrSS
 jr          SS jr        SS jr          SS jr  S               SS jjrSrU =r$ ) r  i  c                ^   > [         TU ]  5         SSKJn  U" 5       U l        Xl        X l        g )Nr   ValueRangeAnalysis)r  r  r6  ri  vr_analysisr  parent_handler)r   r  rk  ri  r  s       rJ   r  CSEProxy.__init__  s'    /-/,rL   c                   ^^^^^^^ U R                   " T/TQ70 TD6m[        U R                  T5      " T0 TD6m[        5       mSmSUUUUUUU4S jjn[        R
                  " UT5      $ )Nr   c                  > [         R                  R                  b]  [         R                  R                  5       R                  nUS:X  a  [
        R                  S:H  OUS:w  a  [
        R                  S:H  OSnOSnU(       a&  T
S:X  a  TR                  nO[        TT
5      " T0 T	D6nOS n[         R                  R                  R                  [         R                  R                  U TUS9n[
        R                  R                  (       ac  U(       a\  SSKJn  [%        U[&        [(        45      (       a  UT   n[         R                  R                  R+                  S	U S
U" U5       S35        TS-  mUR-                  T
TT	5        U$ )NrF  rI  rT  Fmasked)r6  rx   r   )r*   r|  r}  rR   r>   )r1   r{   r  r  r0  r   rK  rQ  rx   r  r  r  r  r  r  r  torch._inductor.codegen.tritonr*   r/  r	  r
  r  rC  )r  
device_strtriton_backendr  csevarr*   r  r6  dtype_handlerr  r   
output_idxr  s         rJ   do_cse!CSEProxy._default.<locals>.do_cse  sh   ww%%1WW@@BGG
 "U* &&(2 "U*  ,,8  "' 8##(;;L#*%$ $'  &$'L  $XX\\**  "	 + F "">>>F lT5M::#/
#;L  **'xz+l:S9TTUV !OJ!!$f5MrL   )r  rs   ra   rQ  )_bound_variabler  rk  r   pytreetree_map)	r   r   r  r  rv  r6  rt  ru  r  s	    ``` @@@@rJ   _defaultCSEProxy._default  sd    %%d<T<V<++T2DCFC24
1	 1	f vu--rL   c                  ^ SSK Jn  SSKJn  [	        [
        R                  U5      (       a  [        R                  " 5       $ [
        R                  R                  mTR                  U:X  a{  U R                  R                  bd  [	        U R                  R                  [        5      (       d   eU R                  R                  R                  T[        R                  " 5       5      $ [        R                   (       az  [#        XA5      (       aj  [%        U4S jS 5       5      (       a  [        R                  " 5       $ U(       a   eSS jn['        [)        Xb5      5      n[+        U R,                  U5      " U6 $ [        R                  " 5       $ )z
If the variable comes from an FX node, we forward the bound we have already computed
Else, if the variable when codegen'ing another op, we try to compute its bounds
r   rh  )TritonTemplateKernelc              3  @   >#    U  H  oTR                   ;   v   M     g 7fr[   )r  )r  r  fx_nodes     rJ   r  +CSEProxy._bound_variable.<locals>.<genexpr>D	  s     V0U1&0Us   )set_indirectr  r  c                    [        U [        5      (       a  U R                  $ [        U [        R                  5      (       a  [        U 5      $ U $ r[   )r/  rQ  r6  r   r  r   r  s    rJ   arg_to_bound.CSEProxy._bound_variable.<locals>.arg_to_boundM	  s8    a--88O5::..&q>)HrL   )r  r   ra   r   )r6  ri  select_algorithmr~  r/  r1   r  r   r  interpreterr  r  r  dictr  r   compute_all_boundsr  r  r	  r  r  rj  )	r   r   r  r  ri  r~  r  
arg_boundsr  s	           @rJ   rx  CSEProxy._bound_variable2	  s   
 	0;ahh 455&&((--,,>>T!dkk&@&@&Ldkk88$????;;--11';;N;N;PQQ&&73E+L+L V0UVVV"**,, : c,56J4++T2J??""$$rL   c                V   [        U[        5      (       a  [        R                  " U5      n[        U[        R                  5      (       d   U5       eUR
                  R                  S:  Ga  U(       a  [        R                  " U[        R                  " U[        R                  5      5      nUR
                  R                  S:  a.  [        R                  " US5      n[        R                  " XeU5      nOUn[        R                   " 5       nUR
                  [        R                   " 5       :w  a  [        U[        R"                  5      (       a  UR
                  [        [$        * S5      -  n[        UR                  U-   UR                  U-   5      nUR
                  R                  S:  a!  UR
                  [        S[$        5      -  n	Xy-  nU R&                  R(                  R+                  U R&                  R,                  XWS9nU R.                  R1                  XU5      n
[3        U5      (       av  UR
                  R                  S:  + n[        U[        R"                  5      (       + =(       d    UR
                  R                  U:  + nU R&                  R5                  XX5        U
$ )Nr   ry  )r6  )r/  r  r   r  r  r6  r  r,   r  rx  r   longr  ltr9  r   r  Numberr   r  r  r  r  rk  r  r#   r  )r   r  r   r  r  stmr  
new_bounds
neg_boundspos	sympy_varassert_lowerassert_uppers                rJ   r  CSEProxy.indirect_indexingY	  s    dC  ==&D$

++1T1+ ::aggc3>>$

#CD::##q(QB))BS1C %,,.Jzz[0022z$7U7U !ZZ+vgr*BB
($$t+Z-=-=-D
 ::##q(**{1f'==C!+!1J++//**4;;+>+>*WC''99#UK	5!! #

 0 0A6L)$== 

  4BL KK$$YlQrL   c                :    U R                   R                  XX45      $ r[   )r  r  r  s        rJ   r  CSEProxy.check_bounds	  s     {{''EAArL   c                   XR                   R                  R                  ;   a)  [        R                   R                  R                  U5        [        U[        R                  5      (       a  U R                   R                  X5      $ U R                   R                  R                  nX;   a  X1   $ U R                   R                  X5      nUR                  S:X  a  U R                   =R                  S-  sl        U$ r  )r  r  rW  r1   r  r  r   r   TMPr  rS  r  r7  r  )r   r   rh  rS  outs        rJ   r  CSEProxy.load	  s    ;;??555 HH&&**40udhh//;;,,T99kkoo11$$kkt+ ==AKK  A% 
rL   c                z   X R                   R                  R                  U'   U R                   R                  (       a~  U[        R
                  R                  ;   a_  U R                   R                  R                  U5      nUR                  5        H%  nX R                   R                  R                  U'   M'     g g g r[   )	r  r  rS  r  r1   r{   name_to_buffer
get_outputget_mutations)r   r   r  r  
other_names        rJ   _update_store_cacheCSEProxy._update_store_cache	  s    ,1##D);;##0F0F(F++**55d;C!//1
:?++J7 2 )G#rL   c                    U R                   R                  R                  U5        Uc  U R                  X5        U[        R
                  R                  ;  a  U R                   R                  XX4S9$ g )N)r  )r  r  r  r  r1   r{   r  r  r  s        rJ   r  CSEProxy.store	  s^     	&&**40<$$T1qww...;;$$T%$CCrL   c                    U R                   R                  R                  U5        U R                  X5        U[        R
                  R                  ;  a  U R                   R                  XU5      $ g r[   )r  r  r  r  r1   r{   r  r  r  s       rJ   r  CSEProxy.store_reduction	  sV    &&**40  -qww...;;..tEBB /rL   c                x    U R                   =R                  S-  sl        U R                   R                  XX45      $ r  )r  r  r  r  s        rJ   r  CSEProxy.reduction	  s0     	!!Q&!{{$$U~MMrL   c                :    U R                   R                  XU5      $ r[   )r  r  r  s       rJ   r  CSEProxy.scan	  s     {{F;;rL   c                :    U R                   R                  XX45      $ r[   )r  r  r  s        rJ   r  CSEProxy.sort	  s     {{CCrL   c           	     D    U R                   R                  UUUUUUU5      $ )a
  
[Note: Inductor bucketize op]

Inputs:
-------
values: the values to be bucketized.
boundaries: a tuple containing
  (a) the name of the boundaries tensor (which must be sorted, unless
  the sorting tensor is present),
  (b) the length of the tensor in the last dimension (i.e. the length of
  one set of boundaries),
  (c) the number of elements in the underlying storage (i.e. the length
  of the flattened tensor, ignoring striding), and
  (d) the stride of the tensor in the last dimension.
boundary_indices: indices into a flattened version of the boundaries
tensor, of the same size and shape as "values".  Each index points to
the first element in the set of boundaries to be used for the
corresponding value.
indexing_dtype: the dtype to use when indexing into the boundaries
tensor.  This must be int64 or int32.  This additionally specifies the
dtype of the return value.
right: see "Details" below.
sorter: an optional tuple containing
  (a) the name of an optional sorting tensor, used to access unsorted
  boundaries without reordering the boundaries tensor, and
  (b) the stride of the tensor in the last dimension.
The values in the sorting tensor are used as indices into the *last*
dimension of the boundaries tensor, with all other indices matching.
The size of the sorting and boundaries tensors must be equivalent.
sorter_indices: must be present if the sorting array is present; see
"boundary_indices" for the equivalent definition for the boundaries
tensor.

Output:
-------
The buckets each value belongs in, within a given set of boundaries.  0
indicates a position before the first boundary, and len(boundaries_set)
represents a position after the last boundary.

Details:
--------
Given a value and a set of boundaries, calculate the bucket that each
value belongs to.  This works differently in 1-D and N-D cases.

for values [[-1, 0, 1, 2], [3, 4, 5, 9]], boundaries [0, 4, 4, 8], right=True
return =   [[ 0, 1, 1, 1], [1, 3, 3, 4]].

for values [[-1, 0, 1, 2], [3, 4, 5, 9]], boundaries [[0, 4], [4, 8]], right=True
return =   [[ 0, 1, 1, 1], [0, 1, 1, 2]]

Note that in the N-D boundaries case, the shape of "values" and
"boundaries" must match in every dimension _except_ the last.

When right == False, bucket i refers to range (boundaries[i], boundaries[i+1]].
When right == True,  bucket i refers to range [boundaries[i], boundaries[i+1]).

Boundaries must be non-decreasing, or a sorter must be provided which
would re-index offsets in a non-decreasing order (e.g. the second output
of torch.sort(offsets)).  Otherwise, the result is undefined.
)r  r  r  s           rJ   r  CSEProxy.bucketize	  s1    L {{$$
 	
rL   )r  rk  rj  )r  zKernel[Any]rk  zOpsHandler[Any])r   rs   r  ztuple[Any, ...]r  zdict[str, Any]ra   r   )r   rs   r  r   r  r   ra   rG  r  )
r  rQ  r   r  r  rb   r  rb   ra   r  r  r  )r   rs   r  rQ  ra   r  r[   r  r  r  r  r   r  r!  )rc   rd   re   rf   r   r  r{  rx  r  r  r  r  r  r  r  r  r  r  ri   r  r  s   @rJ   r  r    s   D-;.z%%V // %/ 	/
 / 
/bBB&0B9=BFJB	B
"@ SW *3>FO	CNN N &	N
 ;N 
5N	<'	<
	< (	< 
!	<D'D (D 	D
 D 
!D  4804N
N
 CN
 &	N

 $N
 N
 1N
 .N
 
N
 N
rL   r  )rI   rs   ra   r  r[   )
rr   rs   r  r   r  r   r  r   ra   r  )rr   Union[torch.device, str, None]ra   zOrderedSet[BackendFeature])rr   r  r6  r"  ra   rb   )rr   rs   ra   zOptional[SchedulingConstructor])F)rr   rs   r:  rb   ra   r   r2  )rh  Sequence[sympy.Expr]ri  r  rj  r  ra   r   )rr   rs   rm  r   ra   r  )rr   rs   ra   r   )r  rs   r  r   r  r   ra   r  )rI  rs   ra   rb   r`   r  )ra   r   )
__future__r   r  dataclassesenumr  rU  rF   r	  r  retypingr   r   r   r   r   r	   r
   r   r   r   r   r   typing_extensionsr   r   r   torch.fxtorch._prims_commonr   torch.utilsr   ry  torch.utils._ordered_setr   torch.utils._sympy.numbersr   torch.utils._sympy.printersr   _PythonPrintertorch.utils._sympy.symbolr   r   r   torch.utils._sympy.value_rangesr   r   rr  r   r   dtype_propagationr   ops_handlerr   r    utilsr!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   virtualizedr,   r-   r.   r/   r0   r1   collections.abcr2   r3   r4   r   r5   r6   r7   r8   r  r:   r  r;   r<   r=   r]  r@   rA   r   r0  r   rs   r  r3  rB  _logginggetArtifactLoggerrc   rD   	getLoggerr  rK   rN   rl   	dataclassr   r   r   r   r   KernelArgTyper   r   r   r  r   r"  r2  r7  r1  r<  	lru_cacher.  rk  rn  rt  bfloat16r  float16rb   r  float64int8int16r  r  r   uint16r  uint64ru  r  r  r  compile
IGNORECASErR  rL  rN  r  r  INT_TO_FLOATr  r  r  r  r  r  r  r  r  rQ  rI  rK  r
  rx   ReductionCacheKeyrN  r  r  r  r+  r-  r  r   s   0rJ   <module>r     s7   "         	   
 
 
 &    ? ) / - G O O D  : ;    Q P BB>>$DD-	B$hy&9%:N%JK23sELL()J F~~//*E!=
/		 /( T[ [ [| # # #          = = = lIw8H,VW,.) .3" 3"l :< 6 ;8 @D	, / !=	
 

&T 
&3*33$3*35C3	3U
 &+

"
!
 TJ JZUU$U  U 	U;;&7;	;, 
NNEKK	MM5;;> JJMMMMJJKKKKKKKKLLLLLL

E 	
> : ,''' ' 	'T_B _BD%N %S1 S1l zz";2==Q a;#%5z# a;H - - -  6: _6;HH-_6 ;HH/- 	_6 ;HH/- 	_6$ ;HH/- 	%_60 ;HH/- 	1_6< ;HH*)	=_6L ;HH(0	M_6X 	;HH66>	Y_6h ;HH1i_6r ;HH2s_6| ;HH1}_6F ;HH2 G_6P ;HH%8$Q_6^ 	;HH&%		__6j ;HH%8	k_6v 	;HH&	w_6@ ;HH+A_6L %;HH88)	M_6X %;HH88)	Y_6d %;HH8)e_6n %;HH8)o_6z 
;HH'
{_6D ;HH(E_6N ;HHSO_6\ ,;HH?0]_6f ,;HH?0g_6r &;HH9*s_6| 
;HH*
}_6F );HHD-G_6P );HHD-Q_6Z );HHD-[_6d );HHD-e_6n (;HHC,o_6x $1;HHL5$y_6B $1;HHL5$C_6L $1;HHL5$M_6V $1;HHL5$W_6` ';HHB+a_6j (;HHC,k_6t (;HHC,u_6 2 _D	-# -"> *Z 
 N N N 
 ,A AH
!; !;H 5+;Tk5c!1223	5a'/=0
1 aH
< 
<\3Wgo. \3~    T p" p"fz
~ z
w7s   "a+