
    sh+                    p   S SK Jr  S SKrS SKJr  S SKJrJrJrJ	r	J
r
  S SKrS SKJrJr  S SKJr  S SKJr  \	(       a  S SKJr  \R*                  R-                  \S	5      r\R*                  R-                  \S
5      r\\\
\\R8                  4         r\\\   /\4   r\R>                  " SS9 " S S5      5       r \R>                  " SS9 " S S5      5       r!\R>                  " SS9 " S S5      5       r"    S(S jr#S)S jr$S*S jr%S+S jr&S,S jr'      S-S jr(        S.S jr)S/S jr*    S0S jr+    S0S jr,S1S jr-\R>                   " S S5      5       r.          S2S  jr/S3S! jr0 " S" S#\5      r1            S4S$ jr2      S5S% jr3\R>                  " SS9 " S& S'5      5       r4g)6    )annotationsN)Enum)AnyCallableOptionalTYPE_CHECKINGUnion)countersget_metrics_context)	InputType)
OrderedSet)Sequence
perf_hintscudagraph_static_inputsT)frozenc                  $    \ rS rSr% SrS\S'   Srg)
FunctionID   z9Unique counter of a function wrapped in cudagraphify_implintid N__name__
__module____qualname____firstlineno____doc____annotations____static_attributes__r       s/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/torch/_inductor/cudagraph_utils.pyr   r      s
    ?Gr    r   c                  B    \ rS rSr% SrS\S'   S\S'   S\S'   S\S	'   S
rg)PlaceholderInfo#   z
A serializable version of torch.fx.Node that contains information
pertinent to placeholder stack traces. We use these in logging and error messages
related to cudagraphs, and will cache these results.
strnameOptional[str]stack_tracelist[PlaceholderInfo]usersmutating_use_stack_tracer   Nr   r   r    r!   r#   r#   #   s      I  ++r    r#   c                  V    \ rS rSr% SrS\S'   S\S'   S\S'   S	\S
'   S\S'   S\S'   Srg)WrappedFunction2   z
Represents a function that you want to record for CUDA graph replay,
with a little more metadata so we can identify if we have an applicable
CUDA graph in our CUDA graph tree for it.
zCallable[..., Any]modelSequence[int]static_input_idxsr   r   ztuple[torch.Tensor, ...]	constantsSequence[PlaceholderInfo]placeholdersmutated_input_idxsr   Nr   r   r    r!   r-   r-   2   s,     $$N''++%%r    r-   c                   [        U R                  5      S:X  a8  [        [        U R                  5      5      R                  R                  SS 5      $ U R                   Hh  nUR                  [        R                  R                  R                  R                  :X  d  MA  UR                  R                  SS 5      =n(       d  Mf  Us  $    g )N   r(   )lenr*   nextitermetagettargettorchopsatencopy_default)placeholder_nodeuser(   s      r!   &get_mutating_use_stack_trace_from_noderE   B   s     !!"a'D)//0166::=$OO%%::--555!hhll=$??{?"" &
 r    c                    U R                   $ N)r+   )placeholder_infos    r!   get_mutating_use_stack_tracerI   Q   s    444r    c                    U R                   nU R                  R                  SS 5      n/ nS nU R                  S:X  a0  U R                   Vs/ s H  n[        U5      PM     nn[        U 5      n[        XX45      $ s  snf )Nr(   placeholder)r&   r;   r<   opr*   to_placeholder_inforE   r#   )rC   r&   r(   r*   r+   is         r!   rM   rM   U   s      D"''++M4@KE#m+1A1G1GH1GA$Q'1GH#I$
  4eNN Is   A9c                z    U R                    Vs/ s H  oR                  S:X  d  M  [        U5      PM!     sn$ s  snf )NrK   )nodesrL   rM   )graphnodes     r!   get_placeholder_inforS   d   s9    .3kk.9dWW=U!D!k  s   88c                    SU  3$ )Nzskipping cudagraphs due to r   )reasons    r!   format_default_skip_messagerV   j   s    (11r    c                    SnU H  nX   n[        U5      =n(       d  M    O   [        S[        U5       S35      nU(       a  U SU 3$ U$ )N zmutated inputs (z instances). Found from : 
 )rI   rV   r8   )r4   mutation_indicesr(   idxrK   msgs         r!   get_mutation_stack_tracer]   n   si     "$K"'6{CC;C  
 &
3/01=C (66Jr    c                   [         R                  R                  R                  R                  (       a?  U R
                   Vs/ s H'  nX0R                  ;   a  M  U" X   5      (       a  M%  UPM)     nnOU R
                  n[        R                  SU R                  5        [        R                  SU5        U(       a  [        U R                  U5      $ S $ s  snf )Nz'check mutation static input indices: %sz#check mutation mutation indices: %s)r>   	_inductorconfigtritoncudagraph_treesr5   r1   static_inputs_logdebugr]   r4   )funcinputsis_cuda_graph_recorded_tensorr[   rZ   s        r!   check_for_mutationrh      s     $$44 ..+
.---  1=	 . 	 +
  22143I3I ACST  	!!2!24DE !+
s   CC*Cc                x    U R                    H*  nUR                  R                  SS 5      =n(       d  M(  Us  $    g )Nr(   )r*   r;   r<   )rR   rD   r(   s      r!   _get_use_stack_tracerj      s5    zz((,,}d;;;;  r    c                   U R                  [        R                  " S5      5      =n(       a=  SUR                   S3n[	        U5      =n(       a  [        U SU 35      $ [        U5      $ [        U 5      S:X  a1  [        [        U R                  5       5      5      R                  S:X  a  g S U R                  5        5       n[        SS	R                  U5       35      $ )
Ncpuzcpu device ()rY   r7   cudac              3  8   #    U  H  n[        U5      v   M     g 7frG   )repr).0keys     r!   	<genexpr>:check_multiple_devices_or_any_cpu_nodes.<locals>.<genexpr>   s     A&@sc&@s   zmultiple devices: z, )r<   r>   devicer&   rj   rV   r8   r9   r:   keystypejoin)device_node_mappingcpu_noder\   r(   	keys_reprs        r!   'check_multiple_devices_or_any_cpu_nodesr|      s     '**5<<+>??x?X]]O1-.x88;8.#6H/VWW*3// 	 A%)..012776AA&9&>&>&@AI&);DIIi<P;Q'RSSr    c                    [        U 5      $ rG   )r|   )ry   s    r!    check_lowering_disable_cudagraphr~      s     33FGGr    c                    [         R                  U 5        [        S   S==   S-  ss'   [        5       nUR	                  5       (       a  UR                  SU SS9  g g )Ninductorcudagraph_skipsr7   cudagraph_skip_reasonT)	overwrite)perf_hint_logwarningr
   r   in_progressset)r\   metrics_contexts     r!   #log_cudagraph_skip_and_bump_counterr      sX    #Z*+q0+)+O""$$3SDI %r    c                  *    \ rS rSr% S\S'   SS jrSrg)BoxedDeviceIndex   Optional[int]valuec                D    Ub  [        U[        5      (       d   eXl        g rG   )
isinstancer   r   )self
device_idxs     r!   r   BoxedDeviceIndex.set   s    !Z
C%@%@@@
r    )r   N)r   r   returnNone)r   r   r   r   r   r   r   r   r    r!   r   r      s     r    r   c                n   [        S5      n[        R                  R                  R                  R
                  (       aZ  [        U5      nU Vs/ s H  ofU;  d  M
  UPM     nn[        U5      S:g  nU(       d  g [        U R                  5      n	[        X5      $ [        U5      S:g  nU(       d  S $ U$ s  snf )Nzmutated inputsr   )rV   r>   r_   r`   ra   rb   r   r8   rS   rQ   r]   )
gmmutated_inputsr5   r1   default_msgunique_idxsr[   rZ   has_mutationr4   s
             r!   3check_for_mutation_ignore_cuda_graph_managed_tensorr      s     ..>?K $$44 !23+=X+=CKAWC+=X+,1+BHH5'GG >*a/'t8[8 Ys   	B2B2c                    U R                   (       a  U R                   $ U R                   H"  nUR                   (       d  M  UR                   s  $    g)zE
Gets the first non-empty stack trace of a placeholder or its users.
N)r(   r*   )rK   users     r!   get_placeholder_stack_tracer      sF     &&&!!### " r    c                  .    \ rS rSrSrSrSrSrS	S jrSr	g)
CheckInvariantStatus   r7            c                    U R                   S:X  a  gU R                   S:X  a  gU R                   S:X  a  gU R                    SU R                   3$ )NCudagraphManagedIdxMismatchz-cudagraph managed tensor data pointer changedStaticInputIdxMismatchz!static input data pointer changed&ExpectedDeadIndicesBeforeGraphMismatchz+expected dead indices before graph are livez: )r&   r   )r   s    r!   __str__CheckInvariantStatus.__str__  sK    9955BYY226YYBB@ii[4::,//r    r   Nr   r%   )
r   r   r   r   SUCCESSr   r   r   r   r   r   r    r!   r   r      s$    G #$  ./*0r    r   c                   [        U5      [        U5      :X  a  [        U5      [        U 5      :X  d   S5       eU Vs/ s H  oQU   PM	     nnU Vs/ s H  oRU   PM	     nnU S3n[        [        Xg5      5       Hy  u  nu  p[        U	[        R
                  5      (       d   eX5   nU	R                  5       U
:w  d  MC  X   nU SUR                   SU
 SU	R                  5        S[        U5       S3
nM{     U$ s  snf s  snf )zq
Logs the mismatch between input data pointers and recorded data pointers.
This checks only idxs in target_idxs.
zClength mismatch between inputs, recorded_data_ptr, and placeholdersz.
zinput name: z. data pointer changed from z to z. input stack trace: 
)	r8   	enumeratezipr   r>   Tensordata_ptrr&   r   )r4   rf   recorded_data_ptrtarget_idxsmismatchrN   	t_tensorst_data_ptrs	error_msgtensorr   indexrK   s                r!   log_data_ptr_mismatchr     s    v;#/00S[CDU5U MU %00KqKI01<=AQ'K=*C I!*3y+F!GF&%,,////??(&-K+\+*:*:); <--5Jd6??;L:M N&&A+&N%OrS  "H  1=s   C8C=c                N  ^ [        U R                  5       5      S-   mSU4S jjn[        R                  R                  R
                  R                  (       aM  T[        R                  R                  R
                  R                  :  a  [        R                  U" 5       5        gg)Nr7   c                    > ST  S3$ )NzCUDAGraph supports dynamic shapes by recording a new graph for each distinct input size. Recording too many CUDAGraphs may lead to extra overhead. We have observed a0   distinct sizes. Please consider the following options for better performance: a) padding inputs to a few fixed number of shapes; or b) set torch._inductor.config.triton.cudagraph_skip_dynamic_graphs=True. Set torch._inductor.config.triton.cudagraph_dynamic_shape_warn_limit=None to silence this warning.r   )num_cudagraphss   r!   warn_msg4maybe_warning_due_to_dynamic_shape.<locals>.warn_msg5  s    00>/? @''		
r    TFr   )	r8   rv   r>   r_   r`   ra   "cudagraph_dynamic_shape_warn_limitr   r   )fn_cachenew_int_keyr   r   s      @r!   "maybe_warning_due_to_dynamic_shaper   /  st     )A-N

 	%%HH
//
 
 
'
'
J
JK 	hj)r    c                  8    \ rS rSr% SrS\S'   S\S'   S\S'   S	rg
)CudagraphCachedInfoiL  z
Info needed to realign inputs
r3   r4   zlist[Optional[str]]stack_tracesz	list[str]cudagraph_fail_reasonsr   Nr   r   r    r!   r   r   L  s     ,+%%%%r    r   )rC   torch.fx.Noder   r'   )rH   r#   r   r'   )rC   r   r   r#   )rQ   ztorch.fx.Graphr   r)   )rU   r%   r   r%   )r4   r3   rZ   r0   r   r%   )re   r-   rf   list[InputType]rg   zCallable[[torch.Tensor], bool]r   r'   )rR   r   r   r'   )ry   z!dict[torch.device, torch.fx.Node]r   r'   )r\   r%   r   r   )
r   ztorch.fx.GraphModuler   zOrderedSet[str]r5   zOrderedSet[int]r1   r0   r   r'   )rK   r#   r   r'   )r4   r3   rf   r   r   zSequence[Optional[int]]r   r0   r   r   r   r%   )r   z)dict[tuple[int, ...], Callable[..., Any]]r   r   r   bool)5
__future__r   dataclassesenumr   typingr   r   r   r   r	   r>   torch._dynamo.utilsr
   r   torch._inductor.utilsr   torch.utils._ordered_setr   collections.abcr   _logginggetArtifactLoggerr   r   rc   listr   r   
OutputType	ModelType	dataclassr   r#   r-   rE   rI   rM   rS   rV   r]   rh   rj   r|   r~   r   r   r   r   r   r   r   r   r   r    r!   <module>r      s   "   @ @  = + / ( 00<HNN44' 
 (5ell!2345
d9o&
23	 d#  $ d#, , $, d#& & $&#5O2+?L&
 $B 	>T:TT(H:HHJ      99#9 (9 %	9
 9004 00+ / 	
 # 	>7 
: d#& & $&r    