
    sh;?                        % S SK r S SKrS SKJrJr  S SKrS SKJr  S SKJ	r	  S SK
JrJr  S SKJr  S SKJr  / SQrSq\S   \S	'   S
\S\4S jrS\S
\SS4S jr " S S5      r " S S\5      rS
\S\R4                  4S jrg)    N)OptionalUnion)Tensor)_get_device_handle
DeviceMesh)DTensorSpec)Shard)is_rng_supported_meshmanual_seedOffsetBasedRNGTracker_RNGStateTracker_rng_trackerdevice_meshreturnc                     [        U R                  5      nU(       a  [        US5      (       a  g[        R                  " SU R                   S35        g)a  Checks if the current device of ``device_mesh`` supports DTensor's random APIs.
Currently DTensor Random APIs only supports cuda/cuda-like devices. We suggest
users call this API to test the availability before using our random APIs.

Args:
    device_mesh (:class:`DeviceMesh`): The device mesh on which we check if the
        random ops APIs are supported.

Returns:
    A bool value. True if ``device_mesh`` supports DTensor Random APIs; False otherwise.

.. warning::
    Currently we only support correct RNG on cuda/cuda-like devices.
set_rng_stateTz:DTensor random operators may not have complete support on  device meshF)r   device_typehasattrwarningswarn)r   device_handles     t/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/torch/distributed/tensor/_random.pyr
   r
      sM     '{'>'>?M@@ 	HI`I`Haamn	
     seedc                     [        U5      (       d%  [        R                  " SUR                   S35        g[        (       d
  [        USS9qUR                  5       b  [        R                  U 5        g[        S5      e)a9  Sets the seed for generating random numbers for the calling rank.

Args:
    seed (int): The desired seed.
    device_mesh (:class:`DeviceMesh`): The device mesh to set the seed. It is
        required that the ``device_mesh`` include the calling rank. This is
        to ensure that the SPMD region maintains a synchronous RNG state, which
        means no ranks should be initialized with values other than ``seed``.

Returns:
    None

.. warning::
    :func:`manual_seed` does not check the ``seed`` value correctness. Users must
    ensure on their own that the value passed in is the desired ``seed`` for ranks
    within ``device_mesh``.
    If ``device_mesh`` is a sub-mesh and the calling rank is not a part of it,
    ``manual_seed`` will throw an error.
    Current implementation only supports a GPU device mesh.
z7DTensor manual_seed() may not have complete support on r   NF)run_state_synczmanual_seed requires the current rank to be a part of the device mesh otherwise DTensor RNG state on the rank will not be initialized and the behavior of DTensor random ops is undefined.)	r
   r   r   r   r   r   get_coordinate_manual_seedRuntimeError)r   r   s     r   r   r   2   s|    * !--))*,8	
 	
 <,[O !!#/!!$'?
 	
r   c                       \ rS rSrSrS\R                  4S jr\S\	\
\4   4S j5       r\S\4S j5       r\R                  SS	 j5       rS\4S
 jrS\
S\4S jrS\
S\SS4S jrS\4S jrS\SS4S jrSrg)r   _   ay  
_RNGStateTracker stores Random Number Generator (RNG) state (a ByteTensor object)
in a dict, mapping from a corresponding tag to each state tensor. It also provides
a set of convenient utility methods to help access/modify the state tensors. The most
important interface is _distribute_region which will be used when DTensor executes
a random op (an operator that calls RNG).
devicec                 2   Xl         [        U R                   R                  5      U l        U R                  (       a  U R                  R	                  5       (       d/  [        U R                  R                   SUR                   S35      e0 U l        SU l	        g )Nz( instantiation requires the presence of z device but couldn't find.T)
_devicer   type_device_handleis_availabler    	__class____name___states_use_distribute_region)selfr#   s     r   __init___RNGStateTracker.__init__h   s}    01B1BC##(;(;(H(H(J(J>>**++S;;-9; 
 +-&*#r   r   c                     U R                   $ N)r+   r-   s    r   
rng_states_RNGStateTracker.rng_statest   s    ||r   c                     U R                   $ r1   r,   r2   s    r   distribute_region_enabled*_RNGStateTracker.distribute_region_enabledx   s    ***r   Nc                     Xl         g r1   r6   )r-   values     r   r7   r8   |   s    &+#r   c                     XR                   ;   $ r1   )r3   )r-   names     r   rng_state_is_sync"_RNGStateTracker.rng_state_is_sync   s    &&r   r<   c                     XR                   ;  a$  [        U R                  R                   SU 35      eU R                   U   SS R	                  [
        R                  S9n[        UR                  5       5      $ )N  does not have random state for r      dtype	r3   r    r)   r*   viewtorchint64intitem)r-   r<   seed_tensors      r   get_seed_RNGStateTracker.get_seed   sp    &>>**++KD6R  t,a277ekk7J;##%&&r   r   c                 T   [         R                  " U/[         R                  SS9R                  [         R                  5      n[         R                  " S/[         R                  SS9R                  [         R                  5      n[         R
                  " X4/5      U R                  U'   g )NcpurC   r#   r   )rF   tensoruint64rE   uint8catr3   )r-   r<   r   rJ   offset_tensors        r   set_seed_RNGStateTracker.set_seed   st    llD6eLQQKK
 aSUKPPKK
 !&		;*F Gr   specc                     g r1    )r-   rW   s     r   _distribute_region#_RNGStateTracker._distribute_region       r   parallel_seedc                     g r1   rY   r-   r]   s     r   r   _RNGStateTracker._manual_seed   r\   r   )r%   r'   r+   r,   )r   N)r*   
__module____qualname____firstlineno____doc__rF   r#   r.   propertydictstrr   r3   boolr7   setterr=   rH   rK   rU   r   rZ   r   __static_attributes__rY   r   r   r   r   _   s    
+u|| 
+ Df-   +4 + + %%, &,' ''S 'S 'HS H H H{ # $ r   c                      ^  \ rS rSrSr SS\S\4U 4S jjjrS\SS4S	 jr	\
R                  S
\4S j5       rS\S\4S jrS\S\SS4S jrS
\SS4S jrS
\S\SS4S jrS\\   S\\   S\4S jrSrU =r$ )r      z
This subclass of ``_RNGStateTracker`` defines the default policy of how RNG states
should be shared and synchronized among all ranks to respect the semantics of DTensor
random operators.

note: _RNGStateTracker only supports cuda/cuda-like device.
r   r   c                   > [         TU ]  [        US95        U R                  c   eU R                  R
                  S:X  a9  [        U R                  R                   SU R                  R
                   S35      eU R                  R                  5       R                  U R                  5      nU(       a  [        R                  " US5        UR                  S5      U R                  S'   g )N)r   rN   zG instantiation requires the presence of CUDA/CUDA-like/XPU device. Got z	 instead.r   parallel-rng)superr.   _resolve_devicer'   r%   r&   r    r)   r*   get_rng_statetodist	broadcastr3   )r-   r   r   	rng_stater)   s       r   r.   OffsetBasedRNGTracker.__init__   s    
 	[AB""...<<%>>**+ ,226,,2C2C1DIO 
 ''557::4<<H	NN9a(*3,,u*='r   r]   r   Nc                 (    U R                  SU5        g )Nrn   )rU   r_   s     r   r   "OffsetBasedRNGTracker._manual_seed   s    nm4r   rW   c              #   4  #    U R                  S5      (       d  [        S5      eU R                  (       a  U R                  S5      nU R	                  U5        [
        R                  R                  U R                  /U R                  R                  S9   U R                  c   eU R                  R                  U R                  S   5         S v   U R                  X5         S S S 5        g S v   g ! U R                  X5        f = f! , (       d  f       g = f7f)Nrn   zlOffsetBasedRNGTracker requires the random state to be synchronized before entering into a distribute region!)devicesr   )r=   r    r7   
get_offset_set_pre_op_offsetrF   randomfork_rngr%   r&   r'   r   r3   _set_post_op_offset)r-   rW   
old_offsets      r   rZ   (OffsetBasedRNGTracker._distribute_region   s      %%n55< 
 ))8J##D)&&DLL4E4E '  **666##11$//.2QR? ,,T>   ,,T> s6   BD8DC1D#D1DD
DDr<   c                     XR                   ;  a$  [        U R                  R                   SU 35      eU R                   U   SS  R	                  [
        R                  S9n[        UR                  5       5      $ )Nr@   rA   rB   rD   )r-   r<   rT   s      r   r{    OffsetBasedRNGTracker.get_offset   sp    &>>**++KD6R  .388u{{8K=%%'((r   offsetc                 Z   XR                   ;  a$  [        U R                  R                   SU 35      eU R                   U   SS n[        R
                  " U/[        R                  SS9R                  [        R                  5      n[        R                  " X4/5      U R                   U'   g )Nr@   r   rA   rN   rO   )
r3   r    r)   r*   rF   rP   rQ   rE   rR   rS   )r-   r<   r   rJ   rT   s        r   
set_offset OffsetBasedRNGTracker.set_offset   s    &>>**++KD6R  t,a2fXU\\%PUUKK
 !&		;*F Gr   c                 b   UR                   nUR                  nS/UR                  -  n[        UR                  5       Hc  u  pV[        U[        5      (       d  M  UR                  nXG   S:X  a  U/XG'   M7  XG   n[        U[        5      (       d   eUR                  U5        Me     UR                  5       n	U	c   eUR                   n
/ n/ nU H  nSnSn[        U[        5      (       aM  U Vs/ s H  nU	U   PM
     nnU Vs/ s H  nU
U   PM
     nn[        UU5       H  u  nnUU-  U-   nUU-  nM     UR                  U5        UR                  U5        M     U R                  X5      n[        U5      n[        UR                  5       HT  u  nn[        U[        5      (       d  M  UR                  U5      nUR                  nUR                  X'   USSS9S   UU'   MV     SSKJn  U" U5      nU R#                  S5      nUU-  S	-   S
-  S
-  nU R%                  SUU-   5        gs  snf s  snf )aO	  Set the starting RNG offset for current device's local shard before actual
op execution. The pre_op_offset value should start from the current RNG offset
and increment by the size of local shard until it reaches the size of the whole
DTensor. For different ranks that hold the same DTensor shard, their pre_op_offset
will be the same.

Args:
    spec (:class:`DTensorSpec`): the spec of the DTensor object on which
        we prepare the offset for running random ops.

Returns:
    None

.. warning::
    Note that, current implementation does not consider DTensor's continguity.

Example:
    take a DTensor of shape [8, 16] as an example. Assume that the DTensor
    is placed on a device mesh with placements ([Shard(1), Replicate(), Shard(0)]),
    and the mesh is:
        [[[0, 1], [2, 3]], [[4, 5], [6, 7]]]
    ``spec.mesh.get_coordinate()`` provides the coordinate of the current rank
    in the mesh. For example, the coordinate of rank 5 is (1, 0, 1).

    Another concept to introduce besides rank coordinate is shard coordinate.
    Each rank holds a local shard of the DTensor. In the example, the DTensor
    is partitioned into 4 [4, 8] shards. The first shard has 2 replicas and
    rank 0 (coord (0, 0, 0)) and rank 2 (coord (0, 1, 0)) have 1 replica each.
    That being said, the local shard on rank 0 and rank 2 correspond to the same
    shard of the DTensor. To denote each DTensor shard, we use a shard coordinate
    (in the example, it will be a tuple (i, j) where shard (i, j) has the slice
    DTensor[4 * i : 4 * (i + 1), 8 * j : 8 * (j + 1)], 0 <= i < 2, 0 <= j < 2).

    Once we have rank coordinate and shard coordinate, we can calculate on each rank
    what shard of the DTensor the rank holds, with the help of dim_map. The dim_map
    of the above DTensor is [2, 0] so the shard coordinate of a rank with rank coord
    (x, y, z) is simply (z, x) by taking(rank_coord[dim_map[0]],rank_coord[dim_map[1]]).
    Following this calculation,
    rank 0 and rank 2 holds the shard of coord (0, 0);
    rank 1 and rank 3 holds the shard of coord (0, 1);
    rank 4 and rank 6 holds the shard of coord (1, 0);
    rank 5 and rank 7 holds the shard of coord (1, 1);

    The last value to calculate before obtaining the starting offset is the shard linear index.
    The starting offset for each rank will be its shard_linear_index * local_tensor_numel.
Nr      F)return_offsetprodrn         )shapemeshndim	enumerate
placements
isinstancer	   dimlistappendr   zip_calc_shard_linear_idxsize_local_shard_size_on_dim#torch.distributed.tensor._ops.utilsr   r{   r   )r-   rW   dtensor_shaper   dim_mapi	placement	shard_dimmesh_dim_listmesh_coordinate	mesh_sizeshard_idx_by_dimtotal_num_shards_by_dimmesh_dim	shard_idxtotal_num_shardsd
rank_coord
num_shardsidxr   shard_linear_idxlocal_size_on_rank_0mesh_dim_sizer   
local_sizecurrent_offsetoffset_incrs                               r   r|   (OffsetBasedRNGTracker._set_pre_op_offset   sN   ^ 

yy 13tdii/?%doo6LA)U++%MM	%+*+G&$+$6M%mT::::!((+ 7 --/***JJ	"$HI (D)):BC(Qoa0(
C4<=HqilH
=!$Z!<IC )D 03 6I$,$ "= ##I.#**+;<     66

  $M2'8NC)U++ $		#%MM	2;2T2T!,!"'	 3U 3
 3$Y/	 9 	=./
 8 (*4q8Q>B(DEM D=s   ,H'H,r   c                 v    UR                   nSSKJn  U" U5      nUS-   S-  S-  nU R                  SX%-   5        g)a  Sets the RNG to a synchronized state after running the local random op. Every
rank should set its RNG offset to `old_offset + DTensor.numel()` where old_offset is
the offset before calling `set_pre_op_offset` i.e. the offset before running DTensor
random ops.

Args:
    spec (:class:`DTensorSpec`): the spec of the DTensor object on which
        we post-process the offset for running random ops.

Returns:
    None
r   r   r   r   rn   N)r   r   r   r   )r-   rW   r   r   r   numels         r   r   )OffsetBasedRNGTracker._set_post_op_offset_  s@     

<]# q 1$
(:;r   shard_coord
shard_sizec                 r    SnSn[        [        U5      [        U5      5       H  u  pVX5U-  -  nXF-  nM     U$ )Nr   r   )r   reversed)r-   r   r   r   shard_coord_strider   r   s          r   r   ,OffsetBasedRNGTracker._calc_shard_linear_idxv  sN     Xk2HZ4HIIC&8 88& J  r   rY   )T)r*   ra   rb   rc   rd   r   rh   r.   rH   r   
contextlibcontextmanagerr   rZ   rg   r{   r   r|   r   r   r   rj   __classcell__)r)   s   @r   r   r      s      $>> > >*5# 5$ 5 {  0)s )s )
Hs 
HC 
HD 
HrF{ rFt rFh< < < <.
 9
 26s)
 	
  
 r   r   c                     U R                   n[        U5      nUc   eU R                  5       UR                  5       -  n[        R
                  " U SUS 35      $ )N:r   )r   r   get_rankdevice_countrF   r#   )r   r   r   
device_idxs       r   rp   rp     s[    ))K&{3M$$$%%'-*D*D*FFJ<<;-qA788r   )r   r   typingr   r   rF   torch.distributeddistributedrs   r   torch.distributed.device_meshr   r   &torch.distributed.tensor._dtensor_specr   (torch.distributed.tensor.placement_typesr	   __all__r   __annotations__rh   r
   rH   r   r   r   r#   rp   rY   r   r   <module>r      s      "     H > : .2h)* 1z d 4*
c *

 *
t *
Z: :zd , d N9 9 9r   