
    sh5                     F   S SK r S SKrS SKrS SKJr  S SKJr  S SKJrJ	r	J
r
  S SKJr  S SKrS SKJr  S SKJrJr  S SKJr  S SKJr  S S	KJr  S S
KJr  S SKJr  S SKJr  S SK J!r!J"r"  S SK#J$r$J%r%  S SK&J'r'  S SK(J)r)  S SK*J+r+  S SK,J-r-  SSK.J/r/J0r0J1r1  / SQr2 " S S\5      r3\" S\4S9    S-S\"S\+S\	\Rj                     S\6S\7S \	\%   S!\!4S" jj5       r8\" S#S$9\/SSSSSS%.S\"S&\
\9\Rt                  S4   S\	\+   S \	\%   S\	\Rj                     S\7S!\!4S' jj5       5       r;\" S#S$9SSSS\3Rx                  S(.S\"S&\
\9\Rt                  S4   S\	\+   S \	\%   S\	\Rj                     S)\3S!\4S* jj5       r=S\"S!\"4S+ jr>    S-S\"S\+S\	\Rj                     S\6S\7S \	\%   S!\!4S, jjr?g).    N)Future)Enum)castOptionalUnion)
deprecated)_copy_state_dict_create_cpu_state_dict)_AsyncCheckpointExecutor)$_ProcessBasedAsyncCheckpointExecutor)#_ThreadBasedAsyncCheckpointExecutor)_storage_setup)DefaultSavePlanner)_dcp_method_logger)MetadataSTATE_DICT_TYPE)SavePlanSavePlanner)AsyncStager)Stateful)StorageWriter)_get_default_group   )_api_bc_check_DistWrapper_profile)save_state_dictsave
async_saveAsyncCheckpointerTypec                        \ rS rSrSrSrSrSrg)r    '   z!Enum for async checkpointer type.threadprocess N)__name__
__module____qualname____firstlineno____doc__THREADPROCESS__static_attributes__r%       ځ/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/torch/distributed/checkpoint/state_dict_saver.pyr    r    '   s    +FGr.   r    za`save_state_dict` is deprecated and will be removed in future versions.Please use `save` instead.)categoryF
state_dictstorage_writerprocess_groupcoordinator_rankno_distplannerreturnc           	          UR                  5         [        5          [        U UUUUU5      sSSS5        $ ! , (       d  f       g= f)z3This method is deprecated. Please switch to 'save'.N)resetr   _save_state_dict)r1   r2   r3   r4   r5   r6   s         r/   r   r   .   s;      

 
s	   5
AT)log_exceptions)checkpoint_idr2   r6   r3   r5   r<   c          
         [         R                  R                  S5        U=(       d;    [        R                  " 5       (       + =(       d    [        R
                  " 5       (       + nU(       a  [        R                  " S5        [        5          [        [        [        X!SS95      n[        [        U 5      UUUUS9sSSS5        $ ! , (       d  f       g= f)a  
Save a distributed model in SPMD style.

This function is different from ``torch.save()`` as it handles
``ShardedTensor`` , and ``DTensor`` by having each rank only save their local shards.

For each ``Stateful`` object (having both a ``state_dict`` and a ``load_state_dict``),
save will call ``state_dict`` before serialization.

.. warning::
    There is no guarantees of Backwards Compatibility across PyTorch versions
    for saved state_dicts.

.. warning::
    If using the `process_group` argument, make sure that only its ranks
    call `save_state_dict` and that all data in state_dict belong to it.

.. note::
    When saving checkpoint for FSDP's `ShardingStrategy.HYBRID_SHARD`, only one of
    the shard_group should be calling `save_state_dict` and the corresponding process
    group needs to be passed in.

.. note::
    If no process group is available, this function assumes the intention is to save the
     state_dict in the local process.

.. note:
    Rank 0 is assumed to be the coordinator rank.


Args:
    state_dict (Dict[str, Any]): The state_dict to save.
    checkpoint_id (Union[str, os.PathLike, None]):
        The ID of this checkpoint instance. The meaning of the checkpoint_id
        depends on the storage. It can be a path to a folder or to a file.
        It can also be a key if the storage is a key-value store.
        (Default: ``None``)
    storage_writer (Optional[StorageWriter]):
        Instance of StorageWriter used to perform writes. If this is not
        specified, DCP will automatically infer the writer based on the
        checkpoint_id. If checkpoint_id is also None, an exception will
        be raised. (Default: ``None``)
    planner (Optional[SavePlanner]):
        Instance of SavePlanner. If this is not specificed, the default
        planner will be used. (Default: ``None``)
    process_group (Optional[ProcessGroup]):
        ProcessGroup to be used for cross-rank synchronization.
        (Default: ``None``)
    no_dist (bool):
        If ``True``, this function will assume the intent is to load
        a checkpoint without using cross-rank synchronization.
        (Default: ``False``)

Returns:
    Metadata: Metadata object for the saved checkpoint.

Example:
    >>> # xdoctest: +SKIP
    >>> my_model = MyModule()

    >>> state_dict = {"model": my_model}

    >>> fs_storage_writer = torch.distributed.checkpoint.FileSystemWriter(
    ...     "/checkpoint/1"
    ... )
    >>> torch.distributed.checkpoint.save(
    >>>     state_dict=state_dict,
    >>>     storage_writer=fs_storage_writer,
    >>> )

.. note::
    save_state_dict uses collectives to coordinate writes across ranks.
    For NCCL-based process groups, internal tensor representations of
    objects must be moved to the GPU device before communication takes place.
    In this case, the device used is given by ``torch.cuda.current_device()``
    and it is the user's responsibility to ensure that this is set so that
    each rank has an individual GPU, via ``torch.cuda.set_device()``.
z!torch.distributed.checkpoint.savezptorch.distributed is disabled, unavailable or uninitialized, assuming the intent is to save in a single process.Freader)r1   r2   r3   r5   r6   N)torch_C_log_api_usage_oncedistis_availableis_initializedwarningswarnr   r   r   r   r:   _stateful_to_state_dict)r1   r<   r2   r6   r3   r5   s         r/   r   r   J   s    r 
HH  !DEQd//11Q4;N;N;P7PG~	
 
>.PUV
  .z:)'
 
s   .C
C)r<   r2   r6   r3   async_checkpointer_typerI   c          	         [         R                  R                  S5        [        R                  " 5       (       aX  [        R
                  " 5       (       a>  U=(       d
    [        5       n[         R                  " S5      UR                  ;   d   S5       e[        [        [        X!SS95      n[        U 5      n [        U[        5      (       a  UR                  U 5      nO[!        U 5      n[#        XSS9  U[$        R&                  :X  a
  [)        5       O	[+        5       nUR-                  UUUUUS9n	[        U[        5      (       a!  UR.                  (       a  UR1                  5         U	$ )a  Asynchronous version of ``save``. This code first de-stages the state_dict on to the
staging storage (defaults to CPU memory), and then calls the `save` in a separate thread.

.. warning::
    This feature is experimental and subject to change.

Args:
    state_dict (Dict[str, Any]): The state_dict to save.
    checkpoint_id (Union[str, os.PathLike, None]):
        The ID of this checkpoint instance. The meaning of the checkpoint_id
        depends on the storage. It can be a path to a folder or to a file.
        It can also be a key if the storage is a key-value store.
        (Default: ``None``)
    storage_writer (Optional[StorageWriter]):
        Instance of StorageWriter used to perform 'stage' and  'save'. If
        this is not specified, DCP will automatically infer the writer based on the
        checkpoint_id. If checkpoint_id is also None, an exception will
        be raised. (Default: ``None``)
    planner (Optional[SavePlanner]):
        Instance of SavePlanner. If this is not specificed, the default
        planner will be used. (Default: ``None``)
    process_group (Optional[ProcessGroup]):
        ProcessGroup to be used for cross-rank synchronization.
        (Default: ``None``)

Returns:
    Future: A future holding the resultant Metadata object from `save`.

Example:
    >>> # xdoctest: +SKIP
    >>> my_model = MyModule()

    >>> state_dict = {"model": my_model}

    >>> fs_storage_writer = torch.distributed.checkpoint.FileSystemWriter(
    ...     "/checkpoint/1"
    ... )
    >>> checkpoint_future = torch.distributed.checkpoint.async_save(
    >>>     state_dict=state_dict,
    >>>     storage_writer=fs_storage_writer,
    >>> )
    >>>
    >>> # ... do some work ...
    >>>
    >>> checkpoint_future.result()

z'torch.distributed.checkpoint.async_savecpuzfA CPU backend must be enabled for async save; try initializing process group with 'cpu:gloo,cuda:nccl'Fr>   )
type_check)r<   r2   r6   r3   )r@   rA   rB   rC   rD   rE   r   device_device_typesr   r   r   rH   
isinstancer   stager
   r	   r    r,   r   r   execute_save should_synchronize_after_executesynchronize_staging)
r1   r<   r2   r6   r3   rI   pgstaged_state_dictexecutorfs
             r/   r   r      s7   r 
HH  !JKt2244202LL2#3#33	
 u	
3
 ~nERN )4J.+..*00<2:>5I #&;&C&CC 	-.02  %%#%# & A 	>;//;;**,Hr.   c                     0 nU R                  5        H.  u  p#[        U[        5      (       a  UR                  5       OUX'   M0     U$ )z]Creates a shallow copy of `state_dict` where `state_dict` is called for each Stateful object.)itemsrO   r   r1   )r1   stateful_state_dictkeyelems       r/   rH   rH     sE    %%'	!+D(!;!;DOO 	  ( r.   c                   ^ ^^^^^ [         R                  R                  S5        [        X$(       + U5      mTc
  [	        5       mTc   eS m0 n[        TSS 5      =nb  XvS'   TR                  US'   [        S
0 UD6UUU U4S j5       n[        S
0 UD6UUU4S j5       n	TR                  SX5      m[        S
0 UD6UUU4S j5       n
[        S
0 UD6UU4S j5       nTR                  S	X5      $ )Nz,torch.distributed.checkpoint.save_state_dictr<   r3   c                    > Tc   eTR                  5       n S[        R                  " TR                  5      R                  ;  a3  [
        R                  " S5        TR                  TTR                  5        OTR                  TU TR                  S9  TR                  TR                  5        TR                  5       nTR                  U5      nU$ )Nstorage_metazThe function definition for SavePlanner.set_up_planner has been updated to include the storage_meta argument. Please update your implementation to include this parameter.)r1   r_   is_coordinator)r_   inspect	signatureset_up_planner
parametersrF   rG   r`   set_up_storage_writercreate_local_planprepare_local_plan)r_   
local_plandistWr6   r1   r2   s     r/   
local_step$_save_state_dict.<locals>.local_step>  s    """%224!2!273I3I!J!U!UUMM.
 "":u/C/CD""%)$33 # 
 	,,U-A-AB..0
#66zB
r.   c                 \   > Tc   eTR                  U 5      u  n mTR                  U 5      n U $ N)create_global_planprepare_global_plan)all_local_plansglobal_metadatar6   r2   s    r/   global_step%_save_state_dict.<locals>.global_stepU  s<     """+2+E+Eo+V((<<_Mr.   planc                     > Tc   eTR                  T5      n TR                  U T5      nUR                  5         UR                  5       $ rm   )finish_plan
write_datawaitvalue)final_local_plan
all_writescentral_planr6   r2   s     r/   rw   $_save_state_dict.<locals>.write_data`  sL    """"..|<#../?I
!!r.   c                 2   > Tc   eTR                  TU S9  T$ )N)metadataresults)finish)all_resultsrq   r2   s    r/   finish_checkpoint+_save_state_dict.<locals>.finish_checkpointi  s(    ***Lr.   writer%   )
r@   rA   rB   r   r   getattrgroupr   reduce_scatter
all_reduce)r1   r2   r3   r4   r5   r6   ckpt_kwargsckpt_idrj   rr   rw   r   r|   ri   rq   s   ``   `      @@@r/   r:   r:   (  s	    
HH  !OP5EFE$&OK>?DAAN'.O$',{{O$&+& ', &+& ' #11&*RL&+&" '" &+& '
 GZCCr.   )Nr   FN)@ra   osrF   concurrent.futuresr   enumr   typingr   r   r   typing_extensionsr   r@   torch.distributeddistributedrC   #torch.distributed._state_dict_utilsr	   r
   ,torch.distributed.checkpoint._async_executorr   4torch.distributed.checkpoint._async_process_executorr   3torch.distributed.checkpoint._async_thread_executorr   +torch.distributed.checkpoint._storage_utilsr   ,torch.distributed.checkpoint.default_plannerr   #torch.distributed.checkpoint.loggerr   %torch.distributed.checkpoint.metadatar   r   $torch.distributed.checkpoint.plannerr   r   $torch.distributed.checkpoint.stagingr   %torch.distributed.checkpoint.statefulr   $torch.distributed.checkpoint.storager   "torch.distributed.distributed_c10dr   utilsr   r   r   __all__r    FutureWarningProcessGroupintboolr   strPathLiker   r+   r   rH   r:   r%   r.   r/   <module>r      s    	  %  ( ( (    X G K B K F < : > A 8 8 MD  ! 26%)

!
 D--.
 	

 
 k"
 


. 4( 48.2%)15j
j
 bkk4/0j
 ]+	j

 k"j
 D--.j
 j
 j
  )j
Z 4( 48.2%)155J5Q5Qaa bkk4/0a ]+	a
 k"a D--.a 3a a )aH O  26%)GDGD!GD D--.GD 	GD
 GD k"GD GDr.   