
    sh                     b   S SK r S SKrS SKrS SKJr  S SKrS SKJr  S SK	J
r
  S SKJr  S SKJrJrJrJr  S SKJr  S SKJrJrJrJr  S r SS	\R2                  S
\S\S\S\R6                  S\\R8                     S\4S jjrS	\R2                  S
\S\S\4S jrS	\S\\   S\R2                  4S jrg)    N)Optional)_get_device_module)distributed_c10d)ShardShardedTensorShardedTensorMetadataTensorProperties)ShardMetadata)
DeviceMeshDTensor	Replicater   c                     UR                  5       S:X  a  SU  SU 3$ UR                  5       S:X  a"  SU  SU S[        U5      R                  5        3$ SU  SU SX-   3$ )Ncpuzrank:/hpu:)lowerr   current_device)rankdevice_typenum_devices_per_nodes      w/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/torch/distributed/fsdp/_shard_utils.py_get_remote_device_strr      s}    e#tfAk]++					%tfAk]!,>{,K,Z,Z,\+]^^tfAk]!D,G+HII    tensorr   
world_sizer   pgdevicereturnc                    U R                  USS9n[        U5      U:  a{  Xa   R                  5       nU R                  5        Vs/ s H  nSPM     n	n[        R
                  " U R                  5       S   U-  5      U-  U	S'   [        R                  " XyU5      /n
O/ n
U Vs/ s H  n[        UR                  5       5      PM     nnS/[        [        R                  " U Vs/ s H  oS   PM	     sn5      5      SS -   nS/[        US   5      S-
  -  n	U Vs/ s H  o/U	-   PM
     nnUc   [        R                  " U5      R                  OUR                  n[        [        U5      5       Vs/ s H%  n[        [         R"                  " UU5      UU5      PM'     nn[        U5      [        U5      s=:X  a  [        U5      :X  d   e   e[%        UUU5       VVVs/ s H  u  nnn['        UUU5      PM     nnnn[)        UU R                  5       [+        U R,                  U R.                  S[0        R2                  U R5                  5       S9S9n[6        R8                  " U
UUS	9$ s  snf s  snf s  snf s  snf s  snf s  snnnf )
z
Shard a tensor to chunks along the first dimension. The local rank will gets its
corresponding chunk as the local shard to create a ShardedTensor.
r   )dimN   F)dtypelayoutrequires_gradmemory_format
pin_memory)shards_metadatasizetensor_properties)sharded_tensor_metadataprocess_group)chunklencloner*   mathceilr   from_tensor_and_offsetslist	itertools
accumulater   _get_pg_default_devicetyperanger   distget_global_rankzipr
   r   r	   r$   r%   torchcontiguous_format	is_pinnedr   +_init_from_local_shards_and_global_metadata)r   r   r   r   r   r   chunkslocal_shard_offsetslocal_shardsr.   chunk_sizes
chunk_sizedim0_offsetsd0chunk_offsetsr   r
placementsoffsetr*   	placementshard_metadatar,   s                            r   _create_chunk_sharded_tensorrP      sx    \\*!\,F
6{Tl((*$kkm,m1m,YYv{{}Q/*<=D
55kDQR 4::6%4

%6K:3kJk
mkJK	r L cSQ(1,-G.:;lTG^lM; > 	//388[[  s;'( )A 	  Q' 	

 )   {s=1DS_DDDDD (+=+z'R'R#FD) 	fdI.'R   4&[[]*,,==11'')

 DD.EUW U - ;J <s$   I#!#I(!I-I2.,I7I<device_meshc                 h   U R                  5       R                  5       n [        UR                  5       Vs/ s H  n[	        5       PM     nn[        UR                  5       Vs/ s H  n[	        5       PM     nn[        S5      US'   [        R                  " XUSS9R                  US9$ s  snf s  snf )z
Shard a tensor to chunks along the first dimension. The local rank will gets its
corresponding chunk as the local tensor to create a DTensor.
r   r"   F)	run_check)rL   )	detachr0   r9   ndimr   DShardr   
from_localredistribute)r   r   rQ   rC   replicate_placementsshard_placementss         r   _create_chunk_dtensorr[   \   s     ]]_""$F 27{7G7G1HI1HAIK1HI-2;3C3C-DE-D	-DE!!9R1Ul#  	 JEs   B*#B/	root_meshc                     XR                   :X  d   S5       e[        [        R                  " U R                  5      5      n[        5       US'   U R                  U R                   US9n U R                  5       $ )zL
All gather a DTensor in its sharded dimension and return the local tensor.
z2The device mesh of a tensor should be a root mesh.r"   )rQ   rL   )rQ   r4   copydeepcopyrL   r   rX   to_local)r   r\   rL   s      r   _all_gather_dtensorra   u   sx     *** <* dmmF$5$567J [JrN  && ! F
 ??r   )N) r^   r5   r1   typingr   r=   torch.distributeddistributedr:   torch._utilsr   r   'torch.distributed._shard.sharded_tensorr   r   r   r	   &torch.distributed._shard.sharding_specr
   torch.distributed.tensorr   r   r   rV   r   TensorintProcessGroupr   rP   r[   ra    r   r   <module>rm      s           + .  A T TJ &*;LL;
; ; 	;
 	; U\\"; ;|LL
  	2
# \\r   