
    shx1                         S SK r S SKJr  S SKrS SKJs  Jr  S SKJ	r	  S SK
Jr  SSKJrJr  SSKJr  \ R"                  " \5      rS\R(                  S	\R*                  S
\4S jrS\R(                  S	\R*                  S\S
S4S jrS\R(                  S
\R4                  4S jr\\\\4      " 5       rS\R4                  S
S4S jrS\R4                  S
\4S jr S\R(                  S\S\4S jr!S\R(                  4S jr"S r#g)    N)cast)is_symbolic)
OrderedSet   )configir)Vxcomm_buffer_typereturnc                 6   [        U 5      n[        U[        R                  5      (       a  gUR	                  5       n[        U[        R
                  5      (       a  g[        U[        R                  5      (       a  [        UR                  5       5      (       d  gg)zY
Check if an input can be realized as a comm buffer of the specified
`comm_buffer_type`.
TF)		_get_data
isinstancer   Loopsget_output_specCommBufferLayoutFlexibleLayoutr   	get_numel)r
   r   datalayouts       q/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/torch/_inductor/comm_lowering.pycan_realize_as_comm_bufferr   8   so     Q<D$!!!!#F&"--..&"++,,[AQ5R5R    
group_namec                    U R                  5         [        U 5      n[        U[        R                  5      (       d   eUR                  5       n[        U[        R                  5      (       a  g[        U[        R                  5      (       d  [        SU S35      e[        UR                  5       5      (       a  [        SU S35      e[        R                  " UUUS9Ul        g)z
Realize an input as a comm buffer of the specified `comm_buffer_type`.

Specifically, this realizes the underlying buffer if it's still unrealized
and changes the layout of the buffer to `ir.CommBufferLayout`.
NzOA buffer can only be realized as a comm buffer if it has `FlexibleLayout` (got ).zGA buffer with symbolic shape cannot be converted to a comm buffer (got )r   r   r   )realizer   r   r   Bufferr   r   r   AssertionErrorr   r   r   )r
   r   r   bufferr   s        r   realize_as_comm_bufferr!   N   s     IIKq\Ffbii((((##%F&"--..fb//00))/4
 	

 6##%&&""(-
 	

 '')FMr   c                 |   [        U R                  [        R                  5      (       a$  U R                  R	                  5       R                  $ [        U R                  [        R
                  5      (       a.  [        [        R                  U R                  R                  5      $ [        SU R                   S35      e)Nz\Expect the data attr of a `TensorBox` to be either an `ir.BaseView` or `ir.StorageBox` (got r   )	r   r   r   BaseViewunwrap_view
StorageBoxr   r   r   r
   s    r   r   r   r   s    !&&"++&&vv!!#(((	AFFBMM	*	*BIIqvv{{++889xrC
 	
r   c                 |    [         R                  [        [        R                  5      U R                  5       45        g)z
If a non-blocking collective is lowered as a blocking collective, the wait
node in the original graph becomes useless and we can skip the lowering it.
N)_bufs_to_skip_waitaddidr	   graphget_namer&   s    r   mark_as_skip_waitr-      s%    
 BqwwK67r   c                 b    [        [        R                  5      U R                  5       4[        ;   $ N)r*   r	   r+   r,   r(   r&   s    r   should_skip_waitr0      s!    qwwK&*<<<r   inp	reduce_opc                 l   SSK Jn  U R                  5       U R                  5       R                  -  n[
        R                  R                  =(       ac    U" U5      =(       aT    [        U [        R                  R                  5      =(       a)    US;   =(       a    U[
        R                  R                  :*  $ )Nr   )is_symm_mem_enabled_for_group)sum)#torch.distributed._symmetric_memoryr4   r   	get_dtypeitemsizer   _collectiveauto_selectr   r   CommBufferTypeSYMM_MEM#one_shot_all_reduce_threshold_bytes)r1   r2   r   r4   inp_sizes        r   $_should_lower_as_one_shot_all_reducer?      s     R}}!9!99H&& 	O)*5	O&sB,=,=,F,FG	O !	O **NNNr   c           	      B   [        U [        R                  R                  U5        [        R
                  " [        R                  R                  [        R                  R                  [        R                  R                  R                  R                  U UU5      5      $ r/   )r!   r   r;   r<   pytreetree_map	TensorBoxcreateFallbackKerneltorchopssymm_memone_shot_all_reducedefault)r1   r2   r   s      r   _one_shot_all_reducerK      sn    3 1 1 : :JG??

  II22::		
 r   c            	        ^^^^^^  [         R                  R                  R                    SSKJmJ	mJ
mJmJm  UUU4S jn [         R                  R                  mU " TR                  5      S[        R                  S[        S[        S[        R                  4UU4S	 jj5       nU " TR                   5      S[        R                  S[        S[        S[        R                  4UU4S
 jj5       nU " TR"                  5      UU4S j5       nU " TR$                  5      U4S j5       nU " TR&                  5      U4S j5       nU " TR(                  5      U4S j5       nU " TR*                  5      U4S j5       nU " TR,                  5      U4S j5       nU " TR.                  5      U4S j5       n	U " TR0                  5      U4S j5       n
U " TR2                  5      UU4S j5       nU " TR4                  5      U4S j5       nU " [         R                  R6                  R8                  5      S 5       nU " TR:                  5      U4S j5       ng ! [         a    [
        R                  S5         g f = f)NzRInductor support for distributed collectives depends on building torch.distributedr   )add_layout_constraintcloneconstrain_to_fx_stridescopy_register_loweringc                 &   > T" U T5        T" U 5      $ r/    )fnrM   rO   rQ   s    r   register_comm_lowering7register_comm_lowerings.<locals>.register_comm_lowering   s    b"9: $$r   r1   r2   r   r   c                   > [        XU5      (       a  [        XU5      $ T" U 5      n [        R                  (       aG  U R	                  5         [
        R                  R                  R                  U R                  5       5        [        R                  R                  U 5      n [        R                  R                  TR                  R                   XU5        U $ r/   )r?   rK   r    reorder_for_compute_comm_overlapr   r	   r+   no_fuse_buffer_namesr)   r,   r   ExternKernelrequire_contiguous_CollectiveKernelcreate_inplaceall_reduce_rJ   )r1   r2   r   c10drN   s      r   _all_reduce,register_comm_lowerings.<locals>._all_reduce   s    /
KK'
CC Cj22 KKMGG((,,S\\^<oo005
++$$cj	
 
r   c                   > [        XU5      (       a   T" U [        XU5      5      n[        U5        U $ [        R                  R                  U 5      n [        R                  R                  TR                  R                  XU5        U $ r/   )
r?   rK   r-   r   rZ   r[   r\   r]   r^   rJ   )r1   r2   r   retr_   rP   s       r   _all_reduce_-register_comm_lowerings.<locals>._all_reduce_   s|     0
KK$SZ@C c"J oo005
++$$cj	
 
r   c                    > U  Vs/ s H  nT" U5      PM     n n[         R                  R                  TR                  R                  U UU5        U $ s  snf r/   r   r\   r]   all_reduce_coalesced_rJ   )inputsr2   r   r1   r_   rN   s       r   _all_reduce_coalesced6register_comm_lowerings.<locals>._all_reduce_coalesced   sS    (./%*/
++&&..		
  0s   Ac                 t   > [         R                  R                  TR                  R                  U UU5        U $ r/   rg   )ri   r2   r   r_   s      r   _all_reduce_coalesced_7register_comm_lowerings.<locals>._all_reduce_coalesced_   s6    
++&&..		
 r   c                    > [         R                  R                  [         R                  R	                  TR
                  R                  U UU5      5      $ r/   )r   rC   rD   r\   create_out_of_placeall_gather_into_tensorrJ   )r1   
group_sizer   r_   s      r   _all_gather_into_tensor8register_comm_lowerings.<locals>._all_gather_into_tensor   sF    ||""  44++33	
 	
r   c           	         > [         R                  " [        R                  R                  [        R
                  R                  TR                  R                  U UU5      5      $ r/   )	rA   rB   r   rC   rD   r\   rp    all_gather_into_tensor_coalescedrJ   )ri   rr   r   r_   s      r   !_all_gather_into_tensor_coalescedBregister_comm_lowerings.<locals>._all_gather_into_tensor_coalesced  sM    LL  4455==	
 	
r   c                r   > [         R                  R                  TR                  R                  U UUUS9  U$ )N)out)r   r\   r]   all_gather_into_tensor_outrJ   )r1   rr   r   rz   r_   s       r   _all_gather_into_tensor_out<register_comm_lowerings.<locals>._all_gather_into_tensor_out  s>    
++++33 	, 	
 
r   c           	         > [         R                  R                  [         R                  R	                  TR
                  R                  U UUU5      5      $ r/   )r   rC   rD   r\   rp   reduce_scatter_tensorrJ   )r1   r2   rr   r   r_   s       r   _reduce_scatter_tensor7register_comm_lowerings.<locals>._reduce_scatter_tensor"  sI    ||""  44**22
 	
r   c           
         > [         R                  " [        R                  R                  [        R
                  R                  TR                  R                  U UUU5      5      $ r/   )	rA   rB   r   rC   rD   r\   rp   reduce_scatter_tensor_coalescedrJ   )ri   r2   rr   r   r_   s       r    _reduce_scatter_tensor_coalescedAregister_comm_lowerings.<locals>._reduce_scatter_tensor_coalesced.  sP    LL  4444<<	
 		
r   c           	         > [         R                  R                  [         R                  R	                  TR
                  R                  U UUU5      5      $ r/   )r   rC   rD   r\   rp   all_to_all_singlerJ   )r1   output_split_sizesinput_split_sizesr   r_   s       r   _all_to_all_single3register_comm_lowerings.<locals>._all_to_all_single;  sI    ||""  44&&.."!
 	
r   c                    > T" U 5      n [         R                  R                  TR                  R                  XU5        U $ r/   r   r\   r]   
broadcast_rJ   )r1   srcr   r_   rN   s      r   
_broadcast+register_comm_lowerings.<locals>._broadcastG  s8    Cj
++OO##Sz	
 
r   c                 r   > [         R                  R                  TR                  R                  XU5        U $ r/   r   )r1   r   r   r_   s      r   _broadcast_,register_comm_lowerings.<locals>._broadcast_O  s/    
++OO##Sz	
 
r   c           	          [         R                  R                  [         R                  R	                  [
        R                  R                  R                  R                  U UUU5      5      $ r/   )
r   rC   rD   r\   rp   rF   rG   _dtensorshard_dim_alltoallrJ   )r1   
gather_dim	shard_dimr   s       r   _shard_dim_alltoall4register_comm_lowerings.<locals>._shard_dim_alltoallV  sR    ||""  44		""55==
 	
r   c                    > [        U 5      (       a  U $ [        R                  R                  TR                  R
                  U 5        U $ r/   )r0   r   _WaitKernelcreate_waitwait_tensorrJ   )r1   r_   s    r   _wait_tensor-register_comm_lowerings.<locals>._wait_tensorb  s8    C  J
""4#3#3#;#;SA
r   )rF   rG   _c10d_functional
all_reduceAttributeErrorloginfoloweringrM   rN   rO   rP   rQ   r   rC   strr^   all_reduce_coalescedrh   rq   rv   r{   r   r   r   	broadcastr   r   r   r   )rU   r`   rd   rj   rm   rs   rw   r|   r   r   r   r   r   r   r   rM   r_   rN   rO   rP   rQ   s                  @@@@@@r   register_comm_loweringsr      sf   		""-- % 99%%DDOO, # 3 2<<  -( D,,-\\&)7:	 .$ D556 7 D667 8 D778
 9
 DAAB	
 C	
 D;;< = D667	
 8	
 D@@A

 B

 D223	
 4	
 DNN+ , DOO, - EII..AAB	
 C	
 D,,- .k   	
 	s   $I I.-I.)$loggingtypingr   rF   torch.utils._pytreeutils_pytreerA   torch._inductor.utilsr   torch.utils._ordered_setr    r   r   virtualizedr	   	getLogger__name__r   rC   r;   boolr   r   r!   IRNoder   tupleintr(   r-   r0   r?   rK   r   rS   r   r   <module>r      s"      $ $ - /   !T	||')'8'8	,!	||!')'8'8!FI!	!H
 
")) 
  c3h02 8 8t 8=		 =d =	"%36
bll 
}r   