
    shZ                   	   S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKrS SKrS SKJrJr  S SKJr  S SKJr  S SKJr  S SKJr  S SKJrJrJrJrJrJr  S S	KJrJrJ r J!r!J"r"J#r#  S S
K$J%r%  S SK&r'S SK(r'S SK)J*s  J+r,  S SK-J.r.  S SK'J/r/  S SK0J1r1  S SK2J3r3J4r5Jr6J*r7  S SK8J9r9  S SK:J;r;  S SK<J=r=J>r>J?r?J@r@JArAJBrBJCrCJDrDJErE  S SKFJ4rG  S SKHJIrI  S SKJJKrKJLrLJMrM  S SKNJOrOJPrPJQrQ  S SKRJSrSJTrTJUrUJVrV  S SKWJXrX  S SKYJZrZJ[r[J\r\J]r]J^r^J_r_  S SK`Jara  S SKbJcrcJdrdJereJfrfJgrgJhrhJiriJjrj  S SKkJlrl  S SKmJnrn  S SK(Joro  S SKpJqrqJrrr  S SKsJtrt  S S KuJvrv  S S!KwJxrx  S"S#KyJzrz  S"S$K{J|r|J}r}  S"S%K~Jr  S"S&KJr  S"S'KJr  S(S)KJ4r4Jr  S(S*KJr  S(S+KJr  S(S,KJr  S(S-KJr  S(S.KJrJr  S(S/KJr  S(S0KJr  S(S1KJrJr  S(S2KJr  S(S3KJr  S(S4K*JrJrJrJrJrJrJrJrJr  S(S5KJr  \(       a  S S6KJrJr  S S7KYJr  S S8KJr  S(S9KJr  \ " S:5      r\" S;5      r\(       d  \4GR`                  " 5       (       d  SzS< jrS{S= jrOS S>KJrJr  \(       a
  S S?KJrJrJr   " S@ SA\GRp                  5      rS|SB jr\" 5       r\GRx                  " \5      r\'GR~                  GR                  \SC5      r\'GR~                  GR                  \SD5      r\'GR~                  GR                  \SE5      r\'GR~                  GR                  \SF5      rS}SG jrS~SH jr\GR                  " S5      SSI j5       r\GR                  " S5      SSJ j5       r        SSK jrSSL jr      SSM jrS~SN jrSSSP jjr   S         SSQ jjrSSR jr    SSS jr      SST jr S       SSU jjr S   SSV jjr\GR                  SSW j5       r " SX SY\"SOSZ9r " S[ S\\!5      r        SS] jr\" S^S_9        SS` j5       r " Sa Sb5      r " Sc Sd\5      r " Se Sf\5      r          SSg jr      SSh jr SSiSiSiSj.                   SSk jjjrSSl jr        SSm jr S       SSn jjr\S4         SSo jjr\" S 5      r                  SSp jrSSq jrSSr jr\SS4           SSs jjrSSt jr        SSu jr        SSv jrSSw jr SSSx.         SSy jjjrg)    )annotationsN)ABCabstractmethod)defaultdict)AbstractContextManager)currentframe)count)AnyCallableOptionalTYPE_CHECKINGTypeVarUnion)Neveroverride	ParamSpecProtocol	TypedDictUnpack)mock)#min_cut_rematerialization_partition)fx)enable_python_dispatcher)compiled_autogradconfigloggingutils)get_interface_for_device)wrap_compiler_debug)	chromium_event_timedCompileEventLoggercountersdetect_fake_modedynamo_timedflatten_graph_inputsget_metrics_contextlazy_format_graph_codeset_feature_use)r   )!unwrap_tensor_subclass_parameters)aot_export_modulemake_boxed_funcSerializableAOTDispatchCompiler)	code_hashFxGraphCacheoutput_code_log)BoxedDeviceIndexformat_default_skip_message#log_cudagraph_skip_and_bump_counterPlaceholderInfo)save_args_for_compile_fx_inner)CompiledAOTICompiledFxGraphCompiledFxGraphConstantsWithGmget_expanded_dimsindex_expanded_dims
OutputCode)	cache_dir)	BoxedBoolcount_tangentsfresh_inductor_cache	InputTypeis_gpushould_assume_input_aligned should_use_remote_fx_graph_cachetensor_is_aligned)trace_structured)compile_time_strobelight_meta)GraphModule)free_unbacked_symbolsSymExprPrinter)FakeTensorProp)_WaitCounter)
OrderedSet   )aot_autograd)ShortenTraceback	SkipFrame)_use_lazy_graph_module)_PyTreeCodeGen)
has_triton   )r   metrics)DebugContext)select_decomp_table)InductorError)joint_graph_passes)post_grad_passesview_to_reshape)pre_grad_passes)GraphLowering)get_device_typeIRNode)complex_memory_overlap)TritonBundler)	align_inputs_from_check_idxsclone_preserve_stridescopy_misaligned_inputs get_cloned_parameter_buffer_name%get_first_incompatible_cudagraph_node#maybe_get_suppress_shape_guards_ctxoutput_noderemove_unaligned_input_idxsshape_env_from_inputs)V)	GeneratorSequence)_StrideExprStr)
OpOverload)ExternKernelNode_P_Tc                "    [         R                  $ N)dynamo_utilsidentityattrs    n/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/torch/_inductor/compile_fx.pytime_and_logry      s    $$$    c                     g rs    )argskwargss     rx   log_optimus_to_scubar      s    rz   )r   ry   )FQNGraphInputNameGraphSignaturec                       \ rS rSrSrSrSrSrg)FxCompileMode   r   rS   rL   r|   N)__name__
__module____qualname____firstlineno__NORMAL	SERIALIZE
SUBPROCESS__static_attributes__r|   rz   rx   r   r      s    F IJrz   r   c                    Sn [         R                  R                  U 5      nUc  [        R                  $  UR                  5       n[        U   $ ! [         a    SS KnUR                  " [        5      nUR                  SUU SR                  [        S [        R                  R                  5        5       5      5      5        [         R                  R                  U 5        [        R                  s $ f = f)NTORCHINDUCTOR_FX_COMPILE_MODEr   z>Invalid value of %s for %s. Expected one of %s. Using default., c              3  8   #    U  H  n[        U5      v   M     g 7frs   )repr.0xs     rx   	<genexpr>+_fx_compile_mode_default.<locals>.<genexpr>   s     O.NT!WW.N   )osenvirongetr   r   upperKeyErrorr   	getLoggerr   errorjoinsorted__members__keyspop)namevaluer   logs       rx   _fx_compile_mode_defaultr      s    *DJJNN4 E}###$U## $)		LIIfOm.G.G.L.L.NOOP		
 	

t###$s   A B"C43C4
perf_hintspre_grad_graphspost_grad_graphscudagraph_static_inputsc                    [         R                  R                  R                  5       n[	        [        U 5      5      nU(       a  UR                  (       d  U$ UR                  R                  $ rs   )torch_guardsTracingContexttry_getlistrangefw_metadatastatic_input_indices)	num_fixedcontextfixeds      rx   get_static_input_idxsr      sM    
 mm**224Gy!"E'--333rz   c                   U R                   R                  SS9S   n/ nUR                  S    H  n[        U[        R
                  R                  5      (       a^  UR                  R                  S5      =nb@  [        U[        R                  5      (       a!  UR                  UR                  5       5        M  UR                  S 5        M     X!R                  S'   g )Noutputopr   valoriginal_output_strides)graph
find_nodesr}   
isinstancer   r   Nodemetar   Tensorappendstride)gmrg   output_stridesr   r   s        rx   record_original_output_stridesr      s    ((%%%215KN""1%vuxx}}--..;3--!!#**,/!!$' & 3A./rz   c                 6    [         R                  " [        5      $ rs   )dynamo_loggingget_step_loggerr   r|   rz   rx   _step_loggerr      s    ))#..rz   c                 &   [         R                  R                  5       (       an  [         R                  R                  R                  R
                  (       d:  [         R                  R                  5       S:  a  [        R                  " S5        g g g g )N)   r   zTensorFloat32 tensor cores for float32 matrix multiplication available but not enabled. Consider setting `torch.set_float32_matmul_precision('high')` for better performance.)	r   cudais_availablebackendsmatmul
allow_tf32get_device_capabilitywarningswarnr|   rz   rx   _warn_tf32_disabledr      sf     	

!!##**55JJ,,.&8d	
 9 6 	"rz   c           	        SSK JnJn  0 nU R                  SS9 H  u  pgXuU'   U" UUUUR                  S9  M     U R                  SS9 H  u  phXU'   U" UUUUR                  S9  M     UR                  R                  SS9n	/ n
U	 H  nUR                  nXR                  ;   a"  UR                  U   nU
R                  U5        M@  XR                  ;   aE  UR                  U   nU
R                  U5        [        X^   5      UR                  [        U5      '   M  XR                   ;   d   eU
R                  S 5        M     SSKJn  ['        UR                  R(                  5      S	   R*                  S   n/ nUR,                  nUR.                  nUR0                  n[3        U5       H  u  nnS nU[5        U5      [5        U5      -   [5        U5      -   :  a?  UR                  U;   a  UUR                     nOUR                  U;   a  UUR                     nUR                  U5        M     U" UU
U[6        R8                  " 5       S U0 5      nU$ )
Nr   )_assign_attr	_AttrKindF)remove_duplicate)	attr_kindplaceholderr   )_unlift)torch.export.unflattenr   r   named_parameters	PARAMETERnamed_buffersBUFFERr   r   r   inputs_to_parametersr   inputs_to_buffersrb   r   rd   user_inputstorch.export._unliftr   r   nodesr}   buffers_to_mutateuser_inputs_to_mutateoutput_tokens	enumeratelenpytreeLeafSpec)modr   graph_signaturer   r   
state_dictr   parambufferplaceholder_nodeslifted_inputsnode	node_nameparameter_namebuffer_namer   outputsmutated_outputsbuffer_mutationsuser_input_mutationsr   idxoutr   unlifted_gms                            rx   _unlift_graphr     sO    ?OQJ++U+C 4))		
 D ))5)A!4&&		
 B ++}+=)+M "II	<<<,AA)LN  0;;;);;IFK  -&z'>? GG4[AB  ; ;;;;  & " -288>>"2&++A.GO&88*@@#11Mg&S6:%&-A)BBSEWWWxx++(211,SXX6u% ' 

K rz   c           
   #    #    [        [        R                  " U R                  R	                  S[
        R                  R                  R                  S9U R                  R	                  S[
        R                  R                  R                  S95      5       H  nUR                  [
        R                  R                  R                  :X  a<  UR                  S   R                  nUR                  S   R                  nUv   Uv   Mq  UR                  [
        R                  R                  R                  :X  d  M  UR                  S   R                  nUR                  S   R                  nUv   Uv   M     g 7f)Ncall_functionr   targetrS   rL   r   )r   	itertoolschainr   r   r   opshigher_ordercond
while_loopr	  r}   r   )r   r   true_subgraph_namefalse_subgraph_namecond_subgraph_namebody_subgraph_names         rx   _get_subgraph_namesr  >  s    HH?599;Q;Q;V;VWHH"599+A+A+L+L   	
 ;;%))00555!%1!2!2"&))A,"3"3$$%%[[EII22===!%1!2!2!%1!2!2$$$$#s   D6E;<?E;c                   [        SSSS9   [        R                  n[        R                  n[	        U 5       H&  n[        X5      n[        US5      n[        XU5        M(     [        XX#5      sS S S 5        $ ! , (       d  f       g = f)N_recursive_pre_grad_passesTpre_grad_pass_time_uslog_pt2_compile_eventdynamo_compile_column_usr|   )	r$   r   add_pre_grad_passesremove_pre_grad_passesr  getattrr  setattrr[   )r   example_inputs
add_passesremove_passessubgraph_namesubgraphnew_subgraphs          rx   r  r  S  sx     
$"!8

 //
5504Mr1H5hCLB|4	 5
 r:M
 
 
s   A!A77
Bc                    [        SSSS9   [        U 5       H  n[        X5      n[        U5        M     [	        U 5        S S S 5        g ! , (       d  f       g = f)N_recursive_joint_graph_passesTjoint_graph_pass_time_usr  )r$   r  r  r&  rX   )r   r"  r#  s      rx   r&  r&  f  sP    	'"!;

 14Mr1H)(3 5 	2
 
 
   4A		
AFc                    [        SSSS9   [        U 5       H  n[        X5      n[        X15        M     [	        X5        S S S 5        g ! , (       d  f       g = f)N_recursive_post_grad_passesTpost_grad_pass_time_usr  )r$   r  r  r*  rY   )r   is_inferencer"  r#  s       rx   r*  r*  r  sP    	%"!9

 14Mr1H'? 5 	*
 
 
r(  c                   SSK JnJnJnJnJn  U" XX#5      n	Uc  U	" 5       OSn
[        [        U	R                  R                  5      S   R                  S   5       VVs0 s H  u  pUR                  U_M     nnn/ n/ n0 nU R                  R                   H^  nUR                  U;   a  UR                  U5        M&  UR                  U   U:X  d  M;  UR                  S:w  d  MM  UR                  U5        M`     U HC  nSUR                  -   nU" U UUc  XUR                        OSU5        UUR                     UU'   ME     USSS2    Hb  nUR                  (       a3  UR                   H!  nUR                  U   U:X  a  M   SU S35       e   MG  U R                  R!                  U5        Md     U R#                  5         U	U4$ s  snnf )	a  
This function takes an GraphModule input "gm".
The gm will be split into 2 components,
  1) const_gm, which consists the subgraph of gm that can be constant folded.
  2) gm (being inplace modified,) which returns the graph after constant folding.

If an additional "lifted_constants" argument is passed in, we will assume the gm has
been lifted and run the transformation accordingly.

When a "skip_folding_node_fn" callback is passed, we will skip constant folding on
the nodes for which the callback returns True.

const_output_index is a mapping of corresponding node name from gm to the
output index of const_gm.
Returns (const_gm, const_output_index)
r   )CONST_MODULE_TAGMETA_TAG
MODULE_TAGreplace_node_with_constantrun_and_get_constant_graphNr   r   _FOLDED_CONST_znode: z user not empty.) torch._inductor.constant_foldingr.  r/  r0  r1  r2  r   tupler   r   r}   r   r   r   r   users
erase_node	recompile)r   skip_constructorlifted_constant_namesskip_folding_node_fnr.  r/  r0  r1  r2  const_gmconst_resultr  r   const_outputsto_erase_nodeto_replace_nodeconst_output_indexr   new_const_namens                       rx   split_const_gmrD  ~  s   ,  *
3H "7!>8:DL #,E(..2F2F,G,K,P,PQR,S"T"T"T   MO99%""4(YYx $44M9Q  &	   )DII5" )0 49956		
 .;499-E>*   dd#::ZZvvh':5VvEU7VV5   HH% $ LLN'''Es   Gc                n   [         R                  R                  n[        UR                  R
                  UR                  R
                  UR                  R
                  UR                  R
                  /5      nU H  nU R                  R                  SUS9 H  n[        UR                  R                  SS 5      [         R                  5      (       d  M>  UR                  S   R                  [         R                   :X  d  Mk  UR                  S   R"                  R$                  S:X  d  M      g   M     g)Nr  r  r   r   TF)r   r  atenrK   mmdefaultaddmmbmmbaddbmmr   r   r   r   r   r   dtypefloat32devicetype)r   rF  tf32_opsr	  r   s        rx   is_tf32_warning_applicablerQ    s    99>>DGGOOJJHHLL  		
H HH''?6'JD499==5u||DDIIe$**emm;IIe$++00F: K  rz   c                   [        S U  5       5      n[        R                  (       aE  [        R                  (       a0  U(       d)  [        R                  S5        [        R                  " SS9$ [        R                  R                  (       a)  [        R                  S5        [        R                  " SS9$ [        R                  " 5       $ )z
For CPU backend, enable comprehensive padding causes some unit tests
fail due to changing number of generated kernels. Skip for now.
c              3     #    U  HE  n[        U[        R                  5      (       d  M$  [        UR                  R
                  5      v   MG     g 7frs   )r   r   r   r@   rN  rO  )r   ts     rx   r   6maybe_disable_comprehensive_padding.<locals>.<genexpr>  s3      '5!Au||9Tqxx}}~s
   #A&Az!Skip comprehensive padding on CPUF)comprehensive_paddingz;Skip comprehensive padding for use_runtime_constant_folding)anyr   disable_padding_cpurV  perf_hint_loginfopatchaot_inductoruse_runtime_constant_folding
contextlibnullcontext)r  has_gpus     rx   #maybe_disable_comprehensive_paddingra    s      '5 G !!f&B&B7>?||%88				9	9I	
 ||%88%%''rz   c                p    U (       d  U(       a  [         R                  " SS9$ [        R                  " 5       $ )z@
graph partition does not support cpp_wrapper and aot_mode yet.
F)graph_partition)r   r[  r^  r_  )cpp_wrapperaot_modes     rx   maybe_disable_graph_partitionrf    s'     h||E22%%''rz   c                   [        5          [        U5      nU(       d4  [        R                  R	                  SS9n[        XS9R                  " U6   O^U(       d  [        R                  " 5       O [        R                  R                  USS5      nU   [        XS9R                  " U6   SSS5        SSS5        U$ ! , (       d  f       N= f! , (       d  f       W$ = f)zq
If we can not detect fake mode from the context of inputs, create one.

The created fake mode will be returned.
Tallow_non_fake_inputs)moderi  N)r   r#   r   _subclassesFakeTensorModerI   	propagater^  r_  r   r[  objectpropagate_dont_convert_inputs)r   r  force_allow_non_fake_inputs	fake_modectxs        rx   fake_tensor_proprs    s     
"	#$^4	))88t8TI2.88.I 3 &&(ZZ&&y2I4P 
 r2PP#  
$    
$	#  s$   BCB:(C:
C	C
Cc                    [         R                  " U 5         [         R                  " 5       sS S S 5        $ ! , (       d  f       g = frs   )r   r[  get_config_copy)config_patchess    rx   get_patched_config_dictrw    s(     
n	%%%' 
&	%	%s	   6
Ac               #     #    [         R                  (       a   [        [        5       SS9   S v   S S S 5        g S v   g ! , (       d  f       g = f7f)NF)dirdelete)r   force_disable_cachesr>   r;   r|   rz   rx   with_fresh_cache_if_configr|  $  s6     "" "ik%@ A@ 	 A@s   (A=A
AAc                  z    \ rS rSr% S\S'   S\S'   S\S'   S\S	'   S\S
'   S\S'   S\S'   S\S'   S\S'   S\S'   Srg)_CompileFxKwargsi0  zOptional[BoxedBool]
cudagraphsSequence[int]static_input_idxsboolis_backwardzOptional[int]graph_idrd  re  r,  zOptional[bool]
layout_optz1Optional[Callable[[list[ExternKernelNode]], Any]]extern_node_serializerzOptional[BoxedDeviceIndex]boxed_forward_device_indexr|   N)r   r   r   r   __annotations__r   r|   rz   rx   r~  r~  0  s=    ##$$NMM ::rz   r~  )totalc                  .    \ rS rSr        SS jrSrg)_CompileFxCallablei=  c                    g rs   r|   )selfr   r  r~   s       rx   __call___CompileFxCallable.__call__>  s    
 rz   r|   Nr   rF   r  Sequence[InputType]r~   Unpack[_CompileFxKwargs]returnr:   )r   r   r   r   r  r   r|   rz   rx   r  r  =  s-     , +	
 
rz   r  c           
        UR                  SS 5        UR                  SS5        UR                  SS5        UR                  SS 5        UR                  SS5        UR                  SS5        UR                  S	S 5        UR                  S
S 5        UR                  SS 5        [        R                  " 5        nUR                  [        R
                  R                  R                  5       5        UR                  [        [        R                  5      5        UR                  [        R                  " SSSSS95        UR                  [        S5      R                  5       5        [        R                  R                   R"                  (       a7  UR                  [        R                  R                   R%                  5       5        UR                  ['        5       5        UR                  [)        5       5        [*        R,                  " SUS   S9  [/        [0        SS9" U U40 UD6sS S S 5        $ ! , (       d  f       g = f)Nr  r  r|   r  Fr  rd  r,  r  r  r  compile_fx_innerinductor_compileT#inductor_cumulative_compile_time_us)
phase_namer  r  z#pytorch.wait_counter.dynamo_compile)r  inductor)compiler_name)
setdefaultr^  	ExitStackenter_contextr   r   _python_dispatch_disable_current_modesrP   dynamo_configuse_lazy_graph_modulert   r$   rJ   guard_dynamocallback_handlerprevent_duplicate_callbacksinstall_callbacksr|  rU   r!   pt2_compiler   _compile_fx_inner)r   r  r~   stacks       rx   r  r  F  s   
 lD)
)2.
mU+
j$'
mU+
ne,
2D9
lD)
.5 
			5EKK88OOQR2=3V3VWX%%"-&*)N		
 	L)NOUUWX==))EE > > P P RS689LN+&&}-	
 ##4JO
 
7 
 		s   8E-H//
H=zcompilation time (in seconds)rv   c                  ^^ [         R                  n[        R                  " U R                  5      S:X  a3  U(       d,  SSKJn  UR                  U 5        [        U R                  5      $ UR                  SS5      n[        R                  SU5        [        X5      n[        [        [!        [#        U R                  R$                  5      5      5      R&                  S   [(        [*        45      (       d   SU R                   35       eUR-                  S5      =nc(  [/        [0        R2                  R4                  5      =US'   n[0        R6                  (       a  [9        U U40 UD6  [:        R:                  " 5       n[=        5       n	[?        S	5      RA                  5        n
[?        S
5      RA                  5          [0        RB                  (       + =(       a'    [0        RD                  =(       d    U	=(       a    U(       + n[0        RD                  nU	n[G        SU5        [I        U5       HZ  u  p[        U[J        RL                  5      (       d  M&  [O        URP                  RR                  5      (       d  ML  X;   d  MS  SUl*        M\     SnSnSmSn[W        U 5      n[:        RX                  " 5       nU(       al  [Z        R\                  " XX&U5      u  nmUbN  Uu  nnU(       a  [Z        R^                  " 5       n[Z        R`                  " UUUUUUR-                  SS5      US9u  nmTb	  TS   S:X  a  Ub   e[c        XU40 UD6nOTS   S:X  a  Ub   eUc   e[d        Rf                  " 5          [c        XU40 UD6nUc   e[:        RX                  " 5       U-
  Ul4        US   nUUl5        [d        Rl                  " 5       u  nnURo                  U5         [d        R~                  " 5         Ub  [        U5      TS'   URh                  TS'   [Z        R                  " UUUUU5        O!TS   S:X  d   eUc   eUc   eUS   nUUl5        Uc   eUnTb  TS   OSm[        R                  " ST 3T=(       d    0 US9  [        R                  " STUT(       a  TR-                  S5      OST(       a  TR-                  S5      OST(       a  TR-                  S5      OSUUS9  Tb  [        SU4S  jU4S! jS"9  UR                  XU5        SSS5        SSS5        [        R                  S#[:        R:                  " 5       U-
  5        [        R                  S$S%R                  S& [        S'   R                  5        5       5      5        [J        R                  R                  R                  R                  5         [        5       " [        R                  S(US   (       a  S)OS* S+US,    35        W$ ! [p        [r        4 a    e [t         a3  n[w        U[y        5       5      R{                  UR|                  5      SeSnAff = f! [d        R~                  " 5         f = f! , (       d  f       GNV= f! , (       d  f       GN`= f)-z
Inductor API that compiles a single graph.

If you change the argument list for this function, make sure you
also update the call to save_args_for_compile_fx_inner below accordingly.
r   )_LazyGraphModuler  r|   z&static input idxs compile_fx_inner: %szGinductor can only compile FX graphs which return a tuple/list, but got r  Nz+pytorch.wait_counter.fx_codegen_and_compilez*pytorch.wait_counter.all_compilation_typesfx_cacheTr  F)r  	constantscache_statebypassmisstriton_bundler_metatime_taken_nshitdisabledfx_graph_cache_)metadatatime_nsr  key
componentscache_bypass_reasonzcache not enabled)r  cache_event_timer  r  r  remote_cache_enabledlocal_cache_enabledartifactc                    > ST  3SS.$ )Nr  jsonr   encodingr|   )r  s   rx   <lambda>#_compile_fx_inner.<locals>.<lambda>:  s    -k]; &%rz   c                 0   > [         R                  " T 5      $ rs   r  dumps)
cache_infos   rx   r  r  >  s    4::j#9rz   metadata_fn
payload_fnz%FX codegen and compilation took %.3fsz&Overview info of inductor aten mms: %sr   c              3  8   #    U  H  u  pS U SU S3v   M     g7f)(: )Nr|   )r   r  r   s      rx   r   $_compile_fx_inner.<locals>.<genexpr>G  s%      
2RJCauBugQ2Rr   aten_mm_infoztorchinductor done compiling 	BACKWARDSFORWARDS graph r  )Srj   aot_compilationrt   count_callsr   torch.fx._lazy_graph_moduler  force_recompiler+   forwardr  static_inputs_logdebugget_input_idxs_to_checkr   nextiterreversedr   r}   r5  r   r   r<   r   tritonr  	save_argsr4   timerB   rJ   r  r{  fx_graph_cacher(   r   r   r   r@   rN  rO  _is_inductor_staticr7   r  r.   prepare_keyget_remote_cacheload_with_keyfx_codegen_and_compiler`   begin_compile_time_taken_ns_fx_graph_cache_keycollectset_triton_bundlerN   rO   	ExceptionrW   r   with_traceback__traceback__end_compilestr_save_graphr!   instantr  rD   post_compiler   rZ  r   r"   items	_inductorasync_compileCompiledTritonKernelscache_clearr   r   INFO)r   r  graph_kwargsre  r  r  inputs_to_checkr  startfx_graph_remote_cache_	use_cachelocalremoteiinputmb_compiled_graphkey_inforemote_cacher  
start_timer  debug_lines	cache_keytriton_bundler  ecompiled_graphr  r  s                               @@rx   r  r  {  s    &&H)Q.x 	A((,rzz**'3'>'>?RTV'WDFWX-nPOd4 89:??BUDMRR 
QRTRZRZQ[\R #&&|44
=2;FMM<T<T2UU\"Z&	
 	
 IIKE<> 	BCIIKqABHHJ +++ &&?*? 	
 %%&
I. ".1HA5%,,//5<<,,--*,0) 2 37
226	 \\^
%1%=%=L6&"Xz
 ##+ [#/#@#@#BL0<0J0J"  , 0 0 F'1-!: M!:h!F$,,, 6O!7C!
 &&0$,,,'''''),$:%;G%! )44437<<>J3N!0$QK	8A!5 "))+!'!33MB ))+".478K4L
01*;*J*JJ'$$! m,555$000''' I4=1 ,,,* *4)?J}%Z 	 	""k]+%2	
 	&&#')3
u%7Az~~l3t  45(!' %	
  ! : 	##N	Jq 	K 	Lv II5tyy{U7JK HH0		 
2:>2J2P2P2R
 	
 
OO!!77CCEN'&}5;:
F Gj)*	, C %i0  #A|~6EEOO
 ))+i 	KJ 	LKsj   Y8BY	"Y	1Y	8C-Y	&A$WD/Y	:YX,9.X''X,,X//YY		
Y	Y
Y*c                  .    \ rS rSr% SrS\S'   SS jrSrg)	_FxCompileStatiY  r   intcodegen_and_compilec                     SU R                    3$ )Nzcodegen_and_compile: )r  )r  s    rx   __repr___FxCompileStat.__repr__]  s    &t'?'?&@AArz   r|   Nr  r  )r   r   r   r   r  r  r  r   r|   rz   rx   r  r  Y  s      Brz   r  c                  p    \ rS rSr% Sr\" \5      rS\S'   \	          S	S j5       r
\S
S j5       rSrg)	FxCompileia  zU
An FxCompile represents a mechanism that can turn a GraphModule into an
OutputCode.
z%dict[type[FxCompile], _FxCompileStat]_compile_statsc                    g rs   r|   )r  r   r  r  r  s        rx   r  FxCompile.codegen_and_compilem  s     rz   c                8    U R                   R                  5         g rs   )r  clear)clss    rx   _reset_statsFxCompile._reset_statsv  s      "rz   r|   N
r   rF   r  r  r  r  r  r~  r  r:   r  None)r   r   r   r   __doc__r   r  r  r  r   r  classmethodr  r   r|   rz   rx   r  r  a  sr     =H<WN9W
  , '	
 ' 
  # #rz   r  c                  <    \ rS rSr\          SS j5       rSrg)_InProcessFxCompilei{  c                  ^^^-^.^/ SU;   a  US   c   eUS   nUR                  SS5      nUR                  SS5      nUR                  SS 5      nUR                  SS5      n	[        R                  n
UR                  SS5      nUR                  S	S 5      nUR                  S
S 5      n[        S5      R	                  5          [
        R                  " 5          [        R                  =nb,  SS K	n[        R                  SU5        UR                  " U5        [        T5      (       a
  [        5         [        S   R!                  5       n["        R$                  " ['        ["        R(                  " 5       S5      5        [+        5       " [,        R.                  SU(       a  SOS SU 35        S>UU4S jjm-[1        SS U-4S jS9  [        R2                  R5                  TT5        [7        T5      n[9        T5        [:        R<                  " 5          [?        TT5      nS S S 5        [A        T5        [        RB                  " W5         [E        T5      nU   [G        TUS9  S S S 5        [        R2                  RI                  TT5        [J        R3                  S[M        STSSSS95        [1        SU4S jS 9  [        RN                  RP                  (       ab  [:        RR                  RT                  RW                  TRX                  5      m/[1        SS! U/4S" jS9  T/[:        RZ                  R2                  l.        [_        5       nURa                  5       (       aa  ["        Rb                  S#:  a!  [e        [        S$   Rg                  5       5      nO[        S$   Ri                  5       n[j        Rl                  " SUS%9  [        Rn                  " 5       (       a   [q        S&[s        [u        5       5      0S'9  S S S 5        [        RB                  " U5         [y        T5         [{        X5         S nS nS nS nU
(       a  [        R|                  R~                  (       ap  [        T5      u  nn[        U/ UUU	U
UUUSS)9
n[        R                  " U5         U	(       d   S*5       eUR                  5         UR                  5       u  nnS S S 5        [        TTUUU	U
UUUUU(       a  UR                  OS U(       a  UR                  OS UUS+9n[        R                  " 5       n[        R                  " U5         UR                  " T6   / nUR                  b  [        5       m.UR                   H  n[        U[        5      (       ay  UR                  5       (       ad  [        [        UR                  5       5      5      S:X  a>  UR                  [        U.4S, jUR                  5       R                   5       5      5        M  UR                  S 5        M     [        U5        [        S-SS.9   UR                  5         UR                  (       Ga8  S/S0KXJYn  UR                  (       d   S*5       eUR                  5       u  n n![        R2                  " S1U R                  5        U!R                  (       a!  [        R2                  " S2U!R                  5        S n"UR                  (       a2  UR                  UR                  5      n"[        R2                  " S3U"5        [        S4SS.9   UR                  UU R                  U!R                  U"UR                  / [        R                  UR                  R                  5      QS59n#S S S 5        OUR                  5       R                  n#S S S 5        UR                  5       u  n$n%n&[        =R                  U$-  slg        [        =R                  U&-  slh        [        =R                  U%-  sli        U(       GaW  [        R                  R                  (       Ga7  [        RX                  R                  (       Gd  [:        RZ                  R                  R                  " T6 (       a  S n'TRX                  R                   H  n(U(R                  R                  S6S 5      n)U(R                  S7:X  dM  [        U)[:        R                  5      (       a.  [:        RZ                  R                  R                  U)5      (       d  M~  U(R                  R                  S8S 5      =n'(       d  M    O   S9n*U'(       a	  U* S:U' S;3n*OU* S;3n*U*[        RX                  ll        U(       a  [        RX                  R                  (       da  [        T5      n+U+(       aO  S<U+R                   3n*U+R                  R                  S8S 5      =n'(       a  U* S:U' S;3n*U*[        RX                  ll        [        R                  (       a^  [        W#[r        [        45      (       d   e[        U#5      sS S S 5        sS S S 5        sS S S 5        sS S S 5        sS S S 5        sS S S 5        $ U(       aX  [        RX                  R                  (       d9  SS=KwJxn,  U," [        RX                  R                  5      [        RX                  ll        U R                  [        U 5         =R                  S/-  sl|        [        W#UTU[        RX                  R                  UR                  5       [        S   U-
  UTUUUU5      sS S S 5        sS S S 5        sS S S 5        sS S S 5        sS S S 5        sS S S 5        $ ! , (       d  f       G	N@= f! , (       d  f       G	N= f! [v         a    [        R                  S(5         GNf = f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GND= f! , (       d  f       GNN= f! , (       d  f       O= f S S S 5        O! , (       d  f       O= fS S S 5        O! , (       d  f       O= fS S S 5        O! , (       d  f       O= fS S S 5        O! , (       d  f       O= fS S S 5        g ! , (       d  f       g = f)?Nr  r  r|   r  Fr  rd  r,  r  r  z/pytorch.wait_counter.actual_codegen_and_compiler   z3Sleeping for %s since sleep_sec_TESTING_ONLY is setr  i  ztorchinductor compiling r  r  r  c                    > [         R                  " 5       n [        R                  R                  R
                  R                  U TTSS S9  U R                  5       $ )Nr  )save_dir)ioStringIOr   r  repro	after_aotsave_graph_reprogetvalue)fdr  r   s    rx   log_graph_runnableC_InProcessFxCompile.codegen_and_compile.<locals>.log_graph_runnable  sJ    [[]##-->>NJ ?  {{}$rz   r  c                     SSS.$ )Nfx_graph_runnablestringr  r|   r|   rz   rx   r  9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    / (%rz   c                    > T " 5       $ rs   r|   )r2  s   rx   r  r7    s	    #5#7rz   r  r,  %szAFTER POST GRADTinclude_strideinclude_devicecoloredinductor_post_grad_graphc                 &   > T R                  SSSS9$ )NFTprint_outputr<  r=  )print_readable)r   s   rx   r  r7    s    r'8'8%*4PT (9 (rz   r  c                     SSS.$ )Ninductor_post_to_pre_grad_nodesr  r  r|   r|   rz   rx   r  r7    s    $E(.-rz   c                 0   > [         R                  " T 5      $ rs   r  )provenance_tracking_jsons   rx   r  r7  
  s    4::6N+Orz   )   
   graph_break)	overwritenum_graph_breakspt2_configs)extra_loggingzfailed to log pt2_configs)	r  	shape_envr  rd  re  r  r,  r  is_const_graphz"AOT mode only supports C++ wrapper)r  rP  r  rd  re  r  r,  r  rA  const_wrapper_codeconst_kernel_codeconst_moduler  c              3  F   >#    U  H  nTR                  U5      v   M     g 7frs   )doprint)r   sps     rx   r   :_InProcessFxCompile.codegen_and_compile.<locals>.<genexpr>m  s     )X@W1!))A,,@Ws   !zGraphLowering.compile_to_fn)r  rS   )AotCodeCompilerzOutput wrapper code: 
%szOutput kernel code:
%sz#Serialized Extern Kernel Nodes: 
%szAotCodeCompiler.compile)device_typeadditional_filesr   r   stack_tracezWgraph with symbolic shapes inputs and config.triton.cudagraph_skip_dynamic_graphs=True.z Found from 
z,disabling cudagraphs due to incompatible op ) check_lowering_disable_cudagraphr  )r   rj   r  rJ   r  rt   preserve_rng_stater   sleep_sec_TESTING_ONLYr  r   warningsleeprQ  r   r"   copysyssetrecursionlimitmaxgetrecursionlimitr   r   r  rD   r  fx_graphri   rZ   r   no_gradrs  r   set_fake_modeget_cuda_device_contextr*  fx_graph_transformedpost_grad_graphs_logr'   traceenabledr   	tracebackget_graph_provenance_jsonr   r   _inductor_post_to_pre_grad_nodesr&   in_progressversion_infosumvaluesr  r!   compilation_metric	is_fbcoder   r  rw  
ValueErrorra  rf  r\  r]  rD  r\   set_graph_handlerruncodegen_with_cpp_wrapperr   rT   CachedMetricsHelpergraph_outputsrH   r   r^   has_tensor_outputr   rG   
get_strider   r5  
get_layoutr   _check_triton_bf16_supportr$   freeze_runtime_assertsre  	codecacherZ  rd  r/   extern_kernel_nodesr  compiler[  dictfromkeyswrapper_coder\  compile_to_modulecallcount_bytesnum_bytes_accessednode_runtimesnodes_num_elemr  cudagraph_skip_dynamic_graphsdisable_cudagraphs_reasonr   any_is_symbolicr   r   r   r   re   r	  r   r5   torch._inductor.cudagraph_utilsr_  device_node_mappingr  rO  r  r6   
get_deltas)0r  r   r  r  r  r  r  r  r  rd  re  r,  r  r  	sleep_secr  inductor_countersrP  rq  cuda_contextmetrics_contextrM  rA  const_graphrR  rS  r<  r   metrics_helperr   r  rZ  r  kernel_codeserialized_extern_kernel_nodescompiled_fn	num_bytesr  r  r]  r   meta_valdisablemaybe_incompat_noder_  r2  rX  rH  s0    ``                                          @@@rx   r  '_InProcessFxCompile.codegen_and_compile|  sd
    |+\0J0VVV ,\ :
+7+;+;<OQS+T(,,]EB"."2"2:t"D(,,]EB**)--neD5t< 	 BNAQAQ($B
"
 JKQQS++-#:::	GI9 

9%)"--#% ( 4 9 9 ; !!#c&;&;&=t"DEN*"-;:> ?!
$% %  8 GGR0
 .n=I$ B ,R@	 ! +2. +6r:!/N ",,R@$***)'+'+ $	 !.  <<''**DDRXXN - %"% $P 1 OO))J #6"7"..00'''1+.x/F/M/M/O+P(+3M+B+H+H+J(&99"&9I ##%%	A, -s3J3L/M+e ,z 	*3NC-kD%)""%)"$(! 3 3 P P3A"3E0H0"/ ')"+!)$/!)/E%1$/'+#K ,,[9*P,PP{#)'@@B >*,= : & $2'% +%+A!- +'9) (:'?'?( '8&=&=!,$3+. ")!<!<!>((/II~.QSN**6 +,#(#6#6C *3 7 7$'$9$9$;$;$'(=cnn>N(O$PTU$U !/ 5 5$))X@P@W@W)X$X!" !/ 5 5d ; $7 /u5 &5T
 446 >>>B#(#4#4  D#4 9>8V8V8X5L++11 ;\=O=O  +00 / 5 5$={?P?P!" >B:$88$)$@$@(-(A(A%& !?
 !0 5 5$J$B!"
 ". 9QU" />.E.E$)$0$6$6$/$5$5$B050A0A6&)-,1,>,>,O,O**6& /F /"	" "" +0*A*A*C*H*HKgj @E?P?P?R<I~}..);.))]:)**n<* #"MMGGG ! A A A!OO11AA>R&*$&HHNND'+yy}}UD'AH $= 8'1(ELL'I'I','<'<'L'LX'V'V (.2iimmM4.PP{P % %3 #|&)0	k]"&MG)0	nG<C9!!''*K*K.STV.W+.(TUhUoUoTp&qG.A.F.F.J.J -t/  {   .5I\+b*Q@GAGG=(()+T{CCCC+K8w 0/k ED DC +*[ .- TSF
 "!''*K*K
 = ! ; ; 9 ''T
3GG1LG*#&99&113 ,/@@"&)$'2W 0/k ED DC +*[ .- TSP ! "\h & A $?@Ao ,+j :9z" "E 9 0//k EDD DCC +**[ .-- TSSs~   q'D6qm)qnm$	E2nm6*qp3p	 A%o?2n.7A/o?&C0o$C6oAo (#oFo$!C3o$	o?	p	&	p3/	q8	q'Co$	o?$	p	-	p36	q?	q'
m!q$
m3.n6n	nn	n
n+&q.
n=8o? 
o
o
o!o$$
o2.o?6	p	?
p	p		p3
p'#p3*	q3
q=q	q'
q	q''
q5r|   Nr!  )r   r   r   r   r   r  r   r|   rz   rx   r'  r'  {  sK    zz ,z '	z
 'z 
z zrz   r'  c                   [         [        R                  :X  a  [        5       nOK[         [        R                  :X  a  SSKJn  U" 5       nO%[         [        R                  :X  a  SSKJ	n  U" 5       nWR                  XX#5      $ )NrS   )_DebugSerdeFxCompile)_SubprocessFxCompile)fx_compile_moder   r   r'  r   compile_fx_extr  r   compile_fx_subprocr  r  )r   r  r  r  schemer  r  s          rx   r  r    s_     -...$&	M33	38%'	M44	4<%'%%b/XXrz   c                   / n[        U 5       H  u  p4[        U[        R                  5      (       d  M&  [	        UR
                  R                  5      (       d  ML  [        5          X1;   a  [        U5      (       a   SSS5        Mw  [        U5      (       d   SSS5        M   SSS5        UR                  U5        M     U$ ! , (       d  f       N%= f)z
This function runs at compile time, and generates a list of indices for which we
might need to do a copy to preserve alignment requirements.
N)r   r   r   r   r@   rN  rO  rf   rC   rA   r   )inputsr  ids_to_checkr  r  s        rx   r  r    s     Lf%%..ell''((02 %*;E*B*B	 32
 /u55 32
 6 3 	A) &,  32s   $C C  
C	r|   )r  placeholdersmutated_input_idxsc                 ^ ^^^ SSK Jn	  [        R                  R                  (       aC  [
        R                  " U	UUUUUUU[        R                  R                  R                  5       S9	mO[        mS mSUUU U4S jjn
U
$ )Nr   )cudagraphify_impl)device_indexstack_tracesr  r,  r  r  r  
compile_idc                   > Tc(  [         R                  " 5          T" TU T5      mS S S 5        T" U 5      $ ! , (       d  f       N= frs   )rt   r`  )
new_inputsr  cudagraphify_fnmodelr  s    rx   r|  cudagraphify.<locals>.runW  s=    002-eZARS 3:&& 32s	   5
A)r  r  r  r
   )torch._inductor.cudagraph_treesr  r   r  cudagraph_trees	functoolspartialr   r   CompileContextcurrent_compile_id)r  r  r  r  r  r,  r  r  r  new_cudagraphify_implr|  r  r  s   ``         @@rx   cudagraphifyr  5  sr    
 }}$$#++!%%#%%1}}33FFH

 ,K' ' Jrz   c                    [         R                  " U R                  5       U R                  5       U R                  U R
                  S9$ )z)
Copy and input while preserving strides
)rL  rN  )r   empty_stridedsizer   rL  rN  )r   s    rx   static_inputr  a  s/     qvvx177188TTrz   c                R    [        X5      n [        X5      nU R                  U5        g)z=Index into expanded dimensions of both dst and src then copy_N)r9   copy_)dstsrcexpanded_dimss      rx   index_expanded_dims_and_copy_r  h  s#     c
1C
c
1CIIcNrz   c                ~  ^^	^
^^^ [        UT5      n[        [        UT5      5      m[        X5        [	        U[
        5      (       d   e[        U5       VVs/ s H  u  pEUT;  a  [        U5      O/ PM     snnm[        U5       VVs/ s HG  u  pE[	        U[        R                  5      (       d  UO UT;  a  [        U5      OUR                  5       PMI     snnm[        [        UT5      5       H@  u  nu  pV[	        U[        R                  5      (       d  M)  UT;  d  M1  [        TU   XV5        MB     [        R                  R                  5         [        R                  R!                  5       nUR#                  [        R                  R%                  5       5        [        R                  R'                  U5         U " [        T5      5        SSS5        UR                  5         [        R                  R%                  5       R#                  U5        [        R                  R                  5         [        R                  R)                  5       m
[        R                  R+                  T
USS9   U " [        T5      5      mSSS5        [	        T[
        [,        45      (       d  T4m[.        R0                  (       a  SU
UUUU4S jjnO8[3        [5        T5      5       Vs/ s H  oDT;  d  M
  UPM     snm	SU	U
UUU4S jjn[7        X5      $ s  snnf s  snnf ! , (       d  f       GNP= f! , (       d  f       N= fs  snf )zI
Assumes inputs[static_input_idxs[i]] are always the same memory address
Nthread_local)streamcapture_error_modec                  > [        T5      [        U 5      :X  d   e[        [        TU T5      5       H  u  nu  p#n[        U[        R
                  5      (       d  M*  [        U[        R
                  5      (       d   eUT;   a&  UR                  5       UR                  5       :X  d   eMw  [        X#U5        M     U R                  5         TR                  5         T	$ rs   )
r   r   zipr   r   r   data_ptrr  r  replay)
r  r  r  r  r  r   inps_expanded_dimsr  static_inputsstatic_outputss
        rx   r|  cudagraphify_impl.<locals>.run  s    }%Z8882;M:/AB3..c "#u||44!#u||4444++<<>S\\^;;;
 2#MJ3 LLN!!rz   c                   > T H<  nTU   nX   n[        U[        R                  5      (       d   e[        TU   X25        M>     U R	                  5         TR                  5         T$ rs   )r   r   r   r  r  r  )	r  r  r  r  copy_indicesr   r  r  r  s	       rx   r|  r    sa    # 23 7 o!#u||4444-mC.@#U	 $
 LLN!!rz   )r  list[InputType]r   Callable[[list[InputType]], Any])r  rK   rh   rc   r   r   r   r8   r   r   r  detachr  r  r   synchronizeStreamwait_streamcurrent_streamr  	CUDAGraphr   r5  r   size_assertsr   r   ra   )r  r  r  check_input_idxsr  r   r  r  r|  r  r   r  r  r  s     `      @@@@@rx   r  r  s  s    /v7HI)3#F,=>* 64fd####  ''FC !$+< <!"D'  '	 (FC a..  ++ a	 (	M $-S9K-L#Maa&&36G+G)-*<aO $N
 
JJZZ F
uzz0023			6	"d=!" 
#
	JJ++F3	JJ JJ  "E			%>		RtM23 
SntUm44(*	" 	"* !]!34
4CCT8TC4
	" 	" (>>Y	* 
#	" 
S	R6
s1   L	AL<L.L)	L:+L:
L&)
L7c                   [        U [        5      (       d   U 5       e[        U 5        Uc  SS0O0 UESS0EnUR                  S[        R
                  R                  5      nU(       a  UR                  S5      (       a   S5       eO0 UES[        U R                  5      0EnUR                  SS 5      nU R                  R                  SS 5      n[        R                  R                  U5      n[        R                   " S5         [        R                  R#                  U5         [%        SSSS	9   ['        5          [)        U U[*        R,                  " UUS
9US9n[        U[.        5      (       d   eUR0                  sS S S 5        sS S S 5        sS S S 5        sS S S 5        $ ! , (       d  f       O= f S S S 5        O! , (       d  f       O= fS S S 5        O! , (       d  f       O= fS S S 5        g ! , (       d  f       g = f)Nrd  Tzaot_inductor.output_pathz.pt2a
  The output path for aot_compile should not have an extension with .pt2 this is for specifying the output path for the .so in AOTInductor. If you would like to package the AOTInductor generated files into a pt2, please call `torch._inductor.aoti_compile_and_package`.r  dynamo_compile_idcompile_fx_aot)r  reset_event_log_on_exit)r  )inner_compilerv  )r   rF   r)   r   r   r\  output_pathendswithr-   coder   r   r   r   r  rj   set_aot_compilationcompile_contextr    r&   
compile_fxr  r  r5   filename)	model_example_inputs_r  rv  r  r  saved_compile_idsaved_compile_contextcompiled_artifactss	            rx   r  r    s    fk**2F2* &f- ! 
44t4  !$$"F$7$7$C$CK ''// 	
R	
//

&	&++(>

 ,//0H$O{{':DA!MM889IJ	d#%%&;<"&$(	

 	'#++'= *
 ,l;;;;!** 		
 	
 	=< 	$# 		
 	
 	
 	=<< 	$##sa    G1$G0F=;AF"	=	F=	G	G1"
F0,F=4	G=
GG	G1
G%	!G11
G?c                  ^^^^ SSK JnJn	  [        U 5        [        R
                  " U SS9n
U
(       a  [        XS5        U" U 5        U	" UU U5      u  nmT Vs/ s H  oU   PM	     nn[        U5      nUR                  R                  Gt pUR                  S   n[        U5       VVs/ s H3  u  nn[        U[        R                  R                  5      (       d  M1  UPM5     snnUR                   S'   / n[        R"                  R$                  R'                  5       nS/mSmUGbA  UR(                  c   eUR(                  n[+        S[-        U5      S-
  5      m[.        [0           " 5       nUR2                  nUc   eSn[-        U5      S:  a  / m[5        [-        U5      5       HN  nUT;  a   S UU'   US:  a  UU   UUS-
     :X  a  US-  nOUR7                  UU   5        TR9                  U5        MP     UR:                  c   e[5        [-        UR:                  5      5       H  nUU;  d  M  S UR:                  U'   M     UR<                  (       a  UR<                  R>                  n[@        RB                  RE                  USS5         U" UUUUUSUU
S9mS S S 5        [F        RH                  (       a  T$ S
UUUU4S	 jjnSUl%        U$ s  snf s  snnf ! , (       d  f       ND= f)Nr   )%convert_conv_weights_to_channels_lastfreezeTr9  user_visible_output_idxsrS   ri  )r  r  r  r,  r  r  c           
        > T Vs/ s H  nXT[        UT5         -
     PM     nnU R                  5         T" U5      $ s  snf rs   )minr  )r}   r  args_newmax_offset_idxoptimized_functionpreserved_arg_indicesunwrapped_args_offsetss      rx   wrapper%fw_compiler_freezing.<locals>.wrapperl  sU     +
* +C>,BCCD* 	 
 	

!(++
s   >)r}   zlist[object]r  zSequence[torch.Tensor])&torch._inductor.freezingr  r  r&  r\   decide_layout_optrs  r#   r   r   r}   r   r   r   r   r   r   r   r   r   params_flat_unwrap_subclassesrg  r   rK   r  params_unwrapped_to_flat_indexr   addr   params_flatr   r   r   r[  rn  rj   r  _boxed_call)aot_autograd_modelaot_example_inputsdynamo_modelnum_example_inputsr  r  r  forward_devicer  r  r  	opt_modelindrq  r   model_outputs_nodemodel_outputsr  rC  r  tracing_contextparams_flat_unwrappreserved_indices_params_flatunwrapped_idxscurrent_offsetr  r  r  r  r  r   s                              @@@@rx   fw_compiler_freezingr    s    W ""45001CRVWJ+F-.@A'-($I$ >SS=RcS1=RS !34I '__22Q&++A.M#M2;2QjEHHMM6R2;67 mm22::<OSN"<<HHH,JJQ$6 7! ;<(23(9%(GG)))!"Q&%'"s-./A--(,"1%q5^A..Q2GG"a'N-11.2CD")).9 0 **666s?6678A5515++A. 9 && / ; ; P P			9&=t	D*/!'5!	
 
E 	!!, , GNQ T;L 
E	Ds   K
)0KK	K
K#c                     [         R                  R                  (       a  [        [	        S5      5        [         R                  R
                  b  [         R                  R
                  O	[        5       SSSS.$ )Nzcpp wrapper enabledFT)ztriton.autotune_at_compile_timeztriton.autotune_cublasLtztriton.cudagraphsztriton.store_cubin)r   r  r  r2   r1   autotune_at_compile_timerR   r|   rz   rx   get_cpp_wrapper_configr  y  sY    }}+'(=>	
 }}55A MM22$)""
 
rz   c                   [         R                  R                  5       (       d  [        R                  " 5       $ U R
                  R                  SS9n[        S U 5       5      n[        S [        U 5      R                  S    5       5      n[        S X#-   5       5      n[        U5      S:X  a1  [         R                  R                  [        [        U5      5      5      $ [        R                  " 5       $ )zP
Returns a cuda device context manager if there is a single device in the graph
r   r   c              3     #    U  HX  n[        UR                  R                  S 5      [        R                  5      (       d  M=  UR                  S    R
                  v   MZ     g7fr   N)r   r   r   r   r   rN  )r   r   s     rx   r   *get_cuda_device_context.<locals>.<genexpr>  sC      9%DdiimmE*ELL9 	 		%%s   <A" A"c              3  
  #    U  Hy  n[        U[        R                  5      (       d  M$  [        UR                  R	                  S 5      [
        R                  5      (       d  M^  UR                  S    R                  v   M{     g7fr  )r   r   r   r   r   r   r   rN  )r   args     rx   r   r    sU      7*Cc277# 	(2388<<3F(U 	*s   #B6B# Br   c              3  H   #    U  H  oR                   S :X  d  M  Uv   M     g7f)r   N)rO  )r   rN  s     rx   r   r    s      896kkV>S9s   "	"rS   )r   r   r   r^  r_  r   r   rK   rg   r}   r   rN  r  r  )r   r   input_devicesout_devicescuda_devicess        rx   rl  rl    s     ::""$$%%''++}+=.8 9%9 /M -7 7r?''*7 -K
 .8 8+98 .L |! 	

$tL123 ##%rz   c                  ^ ^^^^^ U(       aF  [         R                  " U5         [        T U[         R                  " U5      " T5      US9sSSS5        $ [         R                  (       Ga  [         R                  " SS0[	        5       E5         [
        R                  " U5         Un[        T [        5      (       Ga$  T R                  R                   Vs/ s H0  nUR                  S:X  d  M  UR                  R                  S5      PM2     nnU Vs/ s H&  n[        U[        R                  5      (       a  UOSPM(     nn[!        S U 5       5      (       a  [#        [%        5       Xu5       Ho  u  pnU
c  M  [        U[        R                  5      (       d   eU
R&                  UR&                  :w  d  MH  [)        SU	 S	U
R&                   S
UR&                   S35      e   Un[        T U[*        R,                  " TSS9US9sSSS5        sSSS5        $ [*        R,                  " [        TUS9n[/        T 5      (       d  [1        T UU5      $ [        T [        5      (       a6  [        T R                  R2                  [4        5      (       a  [7        T UU5      $ [9        [:        R<                  5         [?        5          [        R@                  RB                  RE                  [         RF                  RH                  5         [        T [        5      (       am  [K        SU 4S jS9  [L        RO                  S[Q        ST SSSS95        [S        T R                  5      [        RT                  RN                  l+        [Y        T U5      m [!        S U 5       5      (       a(  [[        T UU5      sSSS5        sSSS5        sSSS5        $ [         R\                  (       a   e[_        U5      m[a        [         Rb                  Rd                  5      m[g        S5      m[i        [j        5      mUb  UO	[m        5       n        S&UUUUU U4S jjn[*        R,                  " USS9n[o        [p        U5      n[         Rr                  (       a9  [        Rt                  " 5       (       d  [*        R,                  " [v        T TTTTTS9nO%[*        R,                  " USS9n[o        [p        U5      n        S'S jn[y        SS9      S(UUUU4S jj5       n[o        [p        U5      n[{        U5      =(       d    [        R|                  R                  SS9n[        R                  R                  R                  5       =(       d    [        R                  R                  U5      n[
        R                  (       Ga  [        R                  " SS9   [        T USUS9u  nnSS KFJGn  U" U5      nUR                  R                   H{  nUR                  S!:X  d  M  SUR                  ;  d  M'  [        UUR                  5      n[        U[        R                  5      (       d  M^  UR                  USS"9UR                  S'   M}     SSS5        [        T WW5      nS#T R                  ;   a  T R                  S#   UR                  S#'   S$T R                  ;   a  T R                  S$   UR                  S$'   [        R                  R                  5       nU(       a  [        R                  R                  O[        R                  n[
        R                  " U5         [        R                  " 5          U" 5          U" UU5      sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        $ [
        R                  " U5         [        R                  R                  U5         [        R                  " 5          [        R                  " SS9    [        UUUUUSTS%9" T U5      sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        $ ! , (       d  f       GN= fs  snf s  snf ! , (       d  f       O= f SSS5        GMz  ! , (       d  f       GN= f! , (       d  f       GN:= f! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        GMK  ! , (       d  f       GNZ= f! [         a  nUR                  5       SeSnAff = f! , (       d  f       O= f SSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        g! , (       d  f       g= f))a  
Main entry point for compiling given FX graph.  Despite the fact that this
lives in :mod:`torch._inductor`, this function is responsible for calling
into AOT Autograd (and we will eventually get a callback to
``inner_compile`` to perform actual compilation.  In other words, this
function orchestrates end-to-end compilation for the inductor backend when
you use :func:`torch.compile`.

NB: This function TAKES OWNERSHIP of the input ``model_`` and can potentially
mutate it!  Make a copy if you need to preserve the original GraphModule.
)r  decompositionsNrd  Fr   r   c              3  (   #    U  H  oS Lv   M
     g 7frs   r|   )r   vs     rx   r   compile_fx.<locals>.<genexpr>  s     :k}ks   zBDevice mismatch between fake input and example input at position #r  z vs zx. If the model was exported via torch.export(), make sure torch.export() and torch.aot_compile() run on the same device.T)rd  inductor_pre_grad_graphc                 X   > T R                  SSSS9S[        T R                  5       3-   $ )NFTrA  z

 # graph id: )rC  idr   )r  s   rx   r  compile_fx.<locals>.<lambda>  s7    6#8#8!&tD $9 $ &b&6%78$9rz   rD  r:  zBEFORE PRE GRADr;  c              3  X   #    U  H   n[        U[        [        [        45      v   M"     g 7frs   )r   r   r5  r  r   s     rx   r   r*  3  s!     K?az!dE4011?s   (*c                <  > [         R                  " S5         U(       a  [        U 5        [        R                  R
                  R                  T[        U5      5      n[        U 5      n[        R                  (       Ga\  [        R                  " UR                  6 n[        U5      n[        R                  R                  R!                  5       nUb/  UR"                  (       a  U(       d  UR"                  R$                  nOSn['        T[(        5      (       aX  TR*                  R,                  Gt pU
R.                  S:X  d   e[        R0                  " U
R                  5      u  p[        U5      nOUnX::  d   eX-   nX::  d   e[3        X5       Vs/ s H2  n['        X^   [        R4                  R6                  5      (       d  M0  UPM4     snUR8                  S'   O/ UR8                  S'   T" U U[;        U5      TTUTS9sS S S 5        $ s  snf ! , (       d  f       g = f)Nz$compile_fx.<locals>.fw_compiler_baser   r   r  )r  r  r  r,  r  )rt   r$   r&  r   r  r   num_fw_fixed_argumentsr   rg   r   keep_output_strider   arg_tree_leavesr}   r   r   r   r   num_mutated_inp_runtime_indicesr   rF   r   r   r   tree_flattenr   r   r   r   r   )r   r  r,  r   r  r  num_model_outputsr   original_output_start_indexr   orig_model_outputs_nodeorig_model_outputsnum_orig_model_outputsorig_output_end_idxr  r  r  r  r  r  r  s                  rx   fw_compiler_base$compile_fx.<locals>.fw_compiler_baseQ  s   
 **+QR1"5--DD&N(; &1_",,,$*$:$:<N<S<S$TM(+M(:%#mm::BBDG*w/B/B<#//OO 4 783!&+666<ll6H6H3699XEEE060C0C3881-* 255G1H.1B.1FFF  4L (
 /CCC $)7$K$C &m&8%((--H	 $K&++,FG KM&++,FG$"&;E&B)%!-/=C SRnKo SRs$   E9H/HH
4HH
Hr9  )r  r  r  r  r  r  c                    [        U 5      nU   [        U 5        S S S 5        [        X40 UDSS0D6$ ! , (       d  f       N= f)Ncompilerr  )rl  r&  r   )r   joint_inputsr~   r  s       rx   partition_fn compile_fx.<locals>.partition_fn  sF    
 326L-b1 6$*5?  s	   2
A backward)r  c                @  > SSK Jn  [        R                  " S5         U   [	        U 5      n[
        R                  (       av  [        R                  " UR                  6 n[        U5       VVs/ s H2  u  pV[        U[        R                  R                  5      (       d  M0  UPM4     snnUR                  S'   O/ UR                  S'   [!        U 5      n[
        R"                  (       a  [
        R$                  " ['        5       5      O[(        R*                  " 5          T" U U[-        [/        U5      5      TST
T	S9sS S S 5        sS S S 5        sS S S 5        $ s  snnf ! , (       d  f       O= f S S S 5        O! , (       d  f       O= fS S S 5        g ! , (       d  f       g = f)Nr   )compile_lockzcompile_fx.<locals>.bw_compilerr  T)r  r  r  r  r  )torch._dynamo.convert_framerE  rt   r$   rg   r   bw_outputs_user_visibler   r3  r}   r   r   r   r   r   r   r=   rd  r[  r  r^  r_  r   r   )r   r  rE  r  r  r  rC  r   r  r  r  r  s           rx   bw_compilercompile_fx.<locals>.bw_compiler  sK    A ))*KL%0_"11$*$:$:<N<S<S$TM '0&>K&>FC%a7 &>K&++,FG KM&++,FG&r* )) LL!7!9:#//12 )&*.uU|*<#-$(!)3A	2 2  MLK2 2 2  MLLsZ   FAE5-/E
 E
&A4E5E8	E5	FE5
E($E5,	F5
F	?F
Frh  )unlift_effect_tokens)trace_jointr'  r   )_detect_fake_mode_from_gmget_attr)static_shapes dynamo_flat_name_to_original_fqnr  )fw_compilerrH  inference_compilerr'  rA  keep_inference_input_mutationsr  )r   rF   r  r  r,  r  r  r:   )r   rF   r@  zSequence[object]r~   rn  r  ztuple[GraphModule, GraphModule])r   rF   r  r  r  r:   )Xr   r[  r  rd  r  rj   set_real_inputsr   rF   r   r   r   r   r   r   r   rW  r  r	   rN  rz  r  r  graph_returns_tuplemake_graph_return_tuple_codegenrQ   handle_dynamo_export_graphrP   r  r  r   r   rq  preserve_node_metaro  rp  rD   pre_grad_graphs_logr  r'   r-  r  _pre_grad_graph_idr  r%   _raise_error_for_testingr   r<   r  r  r0   r  _graph_counterrV   r,   r:   freezingis_grad_enabledr  rE   r#   rk  rl  r   r   r   r  functorch_configr*   torch._export.utilsrL  r  r	  from_tensorr  _C_is_any_autocast_enabled_DisableAutocastr^  r_  rk  r   _disabletracingrM   rN   remove_dynamo_frames) r  r  r  rv  r'  inputs_r   fake_inputsinpr  fir  recursive_compile_fxr<  rP  rQ  rA  rH  rq  r  r   r   rL  r	  r  disable_ampr   r  r  r  r  r  s    ` `                         @@@@rx   r  r    sW   , \\.)$ll>:=I- *) LL!5,. o.+:G&+.. !' 2 2 2ww-/ )DIIMM%( 2    +* &c5<<88CdB*  
 :k:::&)%';&H
>#-a#>#>>#>!yyAHH4&0&hilhmmo')yykahhZ @o%o'" !"	 'I *G'//4P-	; /. T %,,#% v&&& 
 	
 &+&&:~, , * 
 	
 	}BBC "--fll.B.BC fk**)9  %%&%#'#' 	 8:&,,7GEOO!!4/HF
 K?KKK'$C 	DC 	#" 	DCR 2222 1 v}}778
 *$/ ' -8N>Q>S 	N	N	/N	 N	 	N	 N	b .UC 	 6j+N??5#8#8#:#:5>5F5F$##5+%!-6 "+!2!23CRV!W!@."
	
	*
	 
	 -	
	 
'*	="	"	-@"	"	 "	 
>"	H 6j+N$
 J--D-I 	 MM((002 7}}++I6 	
 !''TB&7# %#1	'#O J5b9	 HHNNDww*,dii1G!(T[[!9%fell;;/8/D/D &d 0E 0DIIe,	 +# C2 (ODK1V[[@GM{{6H  !CD #fkk18>DW8X  !45  ((;;=K-8))j>T>T  +->-G-G-I79)+G LU9-I-I++W 	DC 	#" 	DCb OOI&MM!!/2&&(""=9# + +'9#1!-37) /+ >= )( 32 '&_ 	DC 	#" 	DCa *). /..   J	 CBX LU9-I-I-I++++& $ 9 ,,.D89 >== )(( 322 '&&_ 	DCC 	#"" 	DCCs  &ab#1a.a$,a$a.-a)>4a.69a.3Aa.>	b,f77Af9B&f	f(	f7;G<f7Ab	 b	3b		!b	*Cf<c%	c		b/#	c	,	c%	5	f>	f	f7f0 e)	e&d5;d=c7	d5	e$	e)	-	f6	f?	f7
a!$
a..
a<	8b
b
b,'f/
b=9c	 	c%		
cc%	f%
c4/f7
dddd
d($d5,	e5
e?e	e)	
ee)	 	f)
e73f:	f
ff	f7
f+	'f77
gc                   [        U [        5      (       d  g[        U 5      R                  u  n[        U[        [
        45      (       a  g[        U[        R                  R                  R                  5      (       a~  [        UR                  S5      (       ac  [        UR                  R                  R                  5      S:  a6  [        S UR                  R                  R                   5       5      (       a  gg)z"True if a FX graph returns a tupleT_schemarS   c              3  R   #    U  H  n[        UR                  5      S :H  v   M     g7f)r   N)r  rO  )r   rets     rx   r   &graph_returns_tuple.<locals>.<genexpr>B  s     O5NcCHH)5Ns   %'F)r   rF   rg   r}   r   r5  r   r   r   r   hasattrr	  r   ro  returnsall)r   rvs     rx   rT  rT  7  s    b+&&O  ER"tUm$$2uxx}}))**BIIy))		!!))*Q.ORYY5F5F5N5NOOO rz   c                  ^^ [        U 5      nUR                  u  n[        R                  " U5      u  nmU R                  R                  U5         U R                  R                  U5        SSS5        U R                  R                  U5        [        U 5      (       d   eU" X5      m[        R                  " T5      SUU4S jj5       nU$ ! , (       d  f       Ng= f)zu
Mutate gm so it returns a tuple.  This is only needed for graphs
not created by torchdynamo that return non-tuples.
Nc                 >   > [         R                  " T" U 0 UD6T5      $ rs   )r   tree_unflatten)r}   r~   r  specs     rx   r  (make_graph_return_tuple.<locals>.wrapper\  s     $$[$%A&%A4HHrz   )r}   r
   r~   r
   r  r
   )rg   r}   r   r5  r   inserting_beforer   r7  rT  r  wraps)r   r  
compile_gmr   rv  r  r  rz  s         @@rx   rU  rU  I  s     r?DIIER""2&HB		"	"4	(
 
)HHr""""R(K__[!I "I N 
)	(s   C
Cc                4  ^^ U R                   R                  m[        R                  R                   R	                  5       U R                   l        U R                  5         U" U TR                  " U6 5      m[        R                  " T5      SUU4S jj5       nU$ )z
`torch._dynamo.export` embeds pytrees in the FX graph codegen object,
convert that to a normal FX graph so inductor can compile it.
c                 F   > TR                  T" TR                  " U 6 6 5      $ rs   )process_outputsprocess_inputs)r}   codegenr  s    rx   r  +handle_dynamo_export_graph.<locals>.wrapperr  s$    &&{G4J4JD4Q'RSSrz   )r}   r
   r  r
   )	r   rV  r   r   CodeGenr8  r  r  r}  )r   r  r~  r  r  r  s       @@rx   rW  rW  c  su     hhG..0BHHLLNR!7!7!@AK__[!T "T Nrz   c                   SS jn[         R                  " U R                  R                  5       U R                  5       H  n[        U[        5      (       d  M  [        U5      nU(       a2  [        U5      (       a"  UR                  5       [        R                  :w  a  M`  [        U5      nUR                  SS9(       a    g U" UR                  5       5        M     g )Nc                    SSK Jn  U c   e[        U R                  5      nUR	                  U 5      n[
        R                  " UR                   S35        U" S5      e)Nr   )rO   z9 does not support bfloat16 compilation natively, skippingzBF16 is not supported)torch._dynamo.excrO   r   rO  get_device_propertiesr   r   r   )rN  rO   device_interfacedevice_propss       rx   warn_and_skip1_check_triton_bf16_support.<locals>.warn_and_skipz  s\    /!!!3FKK@'==fE  !!Z[	
 /00rz   F)including_emulation)rN  zOptional[torch.device]r  r   )r
  r  graph_inputsrw  r  r   r^   r]   r@   	get_dtyper   bfloat16r   is_bf16_supported
get_device)r   r  r   r[  r  s        rx   r  r  y  s    
1  2 2 9 9 ;U=P=PQ$''%d++&&~~5>>1 4K@--%-Hdoo'( Rrz   )optionsc               6   SSK Jn  U" U 5      (       d   S5       eSnSn[        U R                  R                  [
        R                  R                  R                  5      (       a  U R                  R                  n[
        R                  R                  R                  5       U R                  l        U R                  5         UR                  R                  b  UR                  R                  nUR                  R                  b  UR                  R                  nO:[        U S5      (       a  U R                  n[        U S5      (       a  U R                  nUb  [         R"                  " U5      OSnUb  [         R"                  " U5      OSn	[         R$                  " X=(       d    0 45      u  pU
 Vs/ s H,  n[        US   [
        R&                  5      (       a  US   OSPM.     nnUb  X:w  a  [)        SU S	U 35      eUc  UU	S
.O0 UEUU	S
.EnX4$ s  snf )z
Flatten the inputs to the graph module and return the flat inputs and options.
Add "aot_inductor.serialized_in_spec" and "aot_inductor.serialized_out_spec" to the options.
rS   )rT  zGraph output must be a tuple(). This is so that we can avoid pytree processing of the outputs. Please change the module to have tuple outputs.N_in_spec	_out_spec z>Trying to flatten user inputs with exported input tree spec: 
z-
but actually got inputs with tree spec of: 
)zaot_inductor.serialized_in_specz aot_inductor.serialized_out_spec)r  rT  r   r   rV  r   r   rQ   r  r8  pytree_infoin_specout_specrs  r  r  r   treespec_dumpstree_flatten_with_pathr   rz  )r   r}   r~   r  rT  r  r  r  serialized_in_specserialized_out_specflat_args_with_pathreceived_specr   flat_example_inputss                 rx   _aoti_flatten_inputsr    s    0r"" 	" GH"((##UXX^^%B%BCC((##!HHNN224
&&2))11G''3**33H 2z""kkG2{##||H;B;N..w7TV+3+?h'R  *0)F)F	|*& CVBUQ
1Q4..!D8BU   }7Mi <o
 	
 ? 0B0C	



/A0C
  ''1s   43H)rw   r  r  z.Callable[[Callable[_P, _T]], Callable[_P, _T]])r}   rn  r~   rn  r  r#  )r  r   )r   r  r  	list[int])r   rF   r  r#  )r  zCallable[..., None]r"  )r   rF   r   rF   r   r   r  rF   )r   rF   r  zGenerator[str, None, None])r   rF   r  r  r  rF   )F)r   rF   r,  r  r  r#  )TNN)
r   rF   r9  r  r:  zOptional[list[str]]r;  z)Optional[Callable[[torch.fx.Node], bool]]r  z"tuple[GraphModule, dict[str, int]])r   rF   r  r  )r  r  r  "AbstractContextManager[None, None])rd  r  re  r  r  r  )r   rF   r  r  rp  r  r  z torch._subclasses.FakeTensorModers   )rv  z$Optional[Union[str, dict[str, Any]]]r  zdict[str, Any])r  zGenerator[None, None, None]r  )r   rF   r  r  r  r  r  r:   )
r   rF   r  r  r  r  r  r  r  r:   )r  r  r  r  r  r  )r|   )r  Callable[..., Any]r  r  r  r  r  zlist[Optional[str]]r  r  r,  r  r  ztuple[torch.Tensor, ...]r  zSequence[PlaceholderInfo]r  ztuple[int, ...]r  r  )r   torch.Tensorr  r  )r  r  r  r  r  r  r  r#  )r  r  r  zlist[torch.Tensor]r  r  r  r  )
r  rF   r  r  r  r  rv  zOptional[dict[str, str]]r  zUnion[list[str], str])r
  rF   r  r  r  rF   r  r  r  r  r  r<   r  r  r  r0   r  z0Callable[[list[object]], Sequence[torch.Tensor]])r  zdict[str, object])r   torch.fx.GraphModuler  zAbstractContextManager[None])r  rF   r  r  r  zCallable[..., OutputCode]rv  Optional[dict[str, Any]]r'  z.Optional[dict[OpOverload, Callable[..., Any]]]r  zGUnion[Callable[[list[object]], Sequence[torch.Tensor]], str, list[str]])r   rF   r  r  r~  r  r  r  )r   r\   r  r#  )
r   r  r}   z!Union[list[Any], tuple[Any, ...]]r~   r  r  r  r  z tuple[list[Any], dict[str, Any]])
__future__r   r^  enumr  r+  r
  r  r   r   re  r  r   abcr   r   collectionsr   r   inspectr   r	   typingr
   r   r   r   r   r   typing_extensionsr   r   r   r   r   r   unittestr   torch._inductor.async_compiler   torch.fxtorch.utils._pytreer   _pytreer   functorch.compiler   r   torch._dispatch.pythonr   torch._dynamor   r   r  r   rt   torch._dynamo.device_interfacer   torch._dynamo.repro.after_aotr   torch._dynamo.utilsr    r!   r"   r#   r$   r%   r&   r'   r(   torch._functorchr_  7torch._functorch._aot_autograd.subclass_parametrizationr)   torch._functorch.aot_autogradr*   r+   r,   torch._inductor.codecacher-   r.   r/   r  r0   r1   r2   r3   torch._inductor.debugr4   torch._inductor.output_coder5   r6   r7   r8   r9   r:   %torch._inductor.runtime.runtime_utilsr;   torch._inductor.utilsr<   r=   r>   r?   r@   rA   rB   rC   torch._loggingrD   torch._utils_internalrE   rF   %torch.fx.experimental.symbolic_shapesrG   rH    torch.fx.passes.fake_tensor_proprI   torch.monitorrJ   torch.utils._ordered_setrK   _dynamo.backends.commonrM   _dynamo.excrN   rO   fx._lazy_graph_modulerP   fx.graphrQ   utils._tritonrR   r  rT   r  rU   decompositionrV   excrW   fx_passes.joint_graphrX   fx_passes.post_gradrY   rZ   fx_passes.pre_gradr[   r   r\   irr]   r^   output_coder_   triton_bundlerr`   ra   rb   rc   rd   re   rf   rg   rh   ri   virtualizedrj   collections.abcrk   rl   rm   
torch._opsrn   ro   rp   rq   ry  ry   r   torch._inductor.fb.utils&torch._functorch._aot_autograd.schemasr   r   r   Enumr   r   r  r   r   r   _logginggetArtifactLoggerrY  rY  rn  r  r   r   	lru_cacher   r   r  r  r  r&  r*  rD  rQ  ra  rf  rs  rw  contextmanagerr|  r~  r  r  r  r  r  r'  r  r  r  r  r  r  r  r\  r  r  rl  r  rT  rU  rW  r  r  r|   rz   rx   <module>r     s&   "    	    	 
   # # -    I I U U  $  $ $ A  ;  D =
 
 
 8 
 O N  A  <	 	 	 , ?   W ; & / 2 5 : % &   .  5 B /   ' I )
 
 
  3:%$ t_T]((**% L DII $. +,!00<Hnn66xARS ~~77BTU NN44' 

4A T/ / T	
 	
I	I%I8FIIX%*NN'N N&		+ "15FJ	E(E(E( /E( D	E(
 (E(P*('('(.	(	(!%	('	( ).' "& &	@ <@(8((  
;y 
; 2
2
'2
 '2
 	2
j 23ZZ'Z -Z 	Z 4ZzB B# #4|) |~YY'Y
 #Y -Y Y0  $   J (*) +-.0*,))$) 	)
 &) ) ) () ,) () )XU		  
	 (*\?\?\? %\? &	\?D )9/3	;+;+$;+ &;+ -	;+
 ;+| qc#c+c c 	c
 &c c c %c 6cL&B 0@/3EIJ9J9(J9 -J9 -	J9
 CJ9 MJ9Z$ # 	4 # 	,)D (,I(
 )-I(I(
+I( %I(
 &I( &I(rz   