
    shB0                     $   S SK r S SKJr  S SKJrJr  S SKJrJrJ	r	J
r
  S SKrS SKJr  S SKJrJr  S SKJr  / SQr\
" S	S
S9rS\4S jr\" S5       " S S\\   5      5       r\" S5       " S S\5      5       r\" S5       " S S\\   5      5       rg)    N)defaultdict)IteratorSized)AnyCallableOptionalTypeVar)functional_datapipe)	DataChunkIterDataPipe)_check_unpickable_fn)BatcherIterDataPipeGrouperIterDataPipeUnBatcherIterDataPipe_T_coT)	covariantnamec                     U S;   ac  [         R                  " SU  SU  S3[        SS9  [        [        R
                  R                  R                  R                  R                  U 5      $ [        S[         SU  35      e)	N)SHARDING_PRIORITIESShardingFilterIterDataPipe`zc` from `torch.utils.data.datapipes.iter.grouping` is going to be removed in PyTorch 2.1Please use `z5` from the `torch.utils.data.datapipes.iter.sharding`   )category
stacklevelzmodule z has no attribute )warningswarnFutureWarninggetattrtorchutilsdata	datapipesitershardingAttributeError__name__)r   s    |/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/torch/utils/data/datapipes/iter/grouping.py__getattr__r(      s{    DDv & UW"		
 u{{''1166??FF
78*,>tfE
FF    batchc                      ^  \ rS rSr% Sr\\S'   \\S'   \\S'   S\	4S\S\S\S\
\	   SS	4
U 4S
 jjjrS\\	   4S jrS\4S jrSrU =r$ )r   %   a  
Creates mini-batches of data (functional name: ``batch``).

An outer dimension will be added as ``batch_size`` if ``drop_last`` is set to ``True``, or ``length % batch_size`` for the
last batch if ``drop_last`` is set to ``False``.

Args:
    datapipe: Iterable DataPipe being batched
    batch_size: The size of each batch
    drop_last: Option to drop the last batch if it's not full
    wrapper_class: wrapper to apply onto each batch (type ``List``) before yielding,
        defaults to ``DataChunk``

Example:
    >>> # xdoctest: +SKIP
    >>> from torchdata.datapipes.iter import IterableWrapper
    >>> dp = IterableWrapper(range(10))
    >>> dp = dp.batch(batch_size=3, drop_last=True)
    >>> list(dp)
    [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
datapipe
batch_size	drop_lastFwrapper_classreturnNc                 l   > US:  d   S5       e[         TU ]  5         Xl        X l        X0l        X@l        g )Nr   z+Batch size is required to be larger than 0!)super__init__r-   r.   r/   r0   )selfr-   r.   r/   r0   	__class__s        r'   r4   BatcherIterDataPipe.__init__A   s7     A~LLL~ $"*r)   c              #   "  #    / nU R                    HD  nUR                  U5        [        U5      U R                  :X  d  M/  U R	                  U5      v   / nMF     [        U5      S:  a&  U R
                  (       d  U R	                  U5      v   g g g 7fNr   )r-   appendlenr.   r0   r/   )r5   r*   xs      r'   __iter__BatcherIterDataPipe.__iter__O   sx     ALLO5zT__,((//	 
 u:>>>((// " s   :B ABc                 L   [        U R                  [        5      (       ae  U R                  (       a"  [	        U R                  5      U R
                  -  $ [	        U R                  5      U R
                  -   S-
  U R
                  -  $ [        [        U 5      R                   S35      e)N   z# instance doesn't have valid length)	
isinstancer-   r   r/   r;   r.   	TypeErrortyper&   r5   s    r'   __len__BatcherIterDataPipe.__len__Z   sv    dmmU++~~4==)T__<<DMM*T__<q@T__TTtDz2233VWXXr)   )r.   r-   r/   r0   )r&   
__module____qualname____firstlineno____doc__r   __annotations__intboolr   rC   r4   r   r=   rE   __static_attributes____classcell__)r6   s   @r'   r   r   %   s    , OO  )2++ + 	+
 I+ 
+ +	0(9- 	0Y Y Yr)   r   unbatchc                   :    \ rS rSrSrS
S\S\4S jjrS rS r	Sr
g	)r   d   a  
Undos batching of data (functional name: ``unbatch``).

In other words, it flattens the data up to the specified level within a batched DataPipe.

Args:
    datapipe: Iterable DataPipe being un-batched
    unbatch_level: Defaults to ``1`` (only flattening the top level). If set to ``2``,
        it will flatten the top two levels, and ``-1`` will flatten the entire DataPipe.

Example:
    >>> # xdoctest: +SKIP
    >>> from torchdata.datapipes.iter import IterableWrapper
    >>> source_dp = IterableWrapper([[[0, 1], [2]], [[3, 4], [5]], [[6]]])
    >>> dp1 = source_dp.unbatch()
    >>> list(dp1)
    [[0, 1], [2], [3, 4], [5], [6]]
    >>> dp2 = source_dp.unbatch(unbatch_level=2)
    >>> list(dp2)
    [0, 1, 2, 3, 4, 5, 6]
r-   unbatch_levelc                     Xl         X l        g Nr-   rS   )r5   r-   rS   s      r'   r4   UnBatcherIterDataPipe.__init__|   s     *r)   c              #   x   #    U R                    H$  nU R                  XR                  S9 S h  vN   M&     g  N	7f)NrS   )r-   _diverS   )r5   elements     r'   r=   UnBatcherIterDataPipe.__iter__   s0     }}Gzz'9K9KzLLL %Ls   ,:8
:c              #     #    US:  a  [        S5      eUS:X  aB  [        U[        [        45      (       a"  U H  nU R	                  USS9 S h  vN   M     g Uv   g US:X  a  Uv   g [        U[        [        45      (       a$  U H  nU R	                  X2S-
  S9 S h  vN   M     g [        SU R                   S35      e Nq N$7f)Nz unbatch_level must be -1 or >= 0rY   r   r@   zunbatch_level z" exceeds the depth of the DataPipe)
ValueErrorrA   listr   rZ   
IndexErrorrS   )r5   r[   rS   items       r'   rZ   UnBatcherIterDataPipe._dive   s     2?@@B'D)#455#D#zz$bzAAA $ aM'D)#455#D#zz$a>OzPPP $ !$T%7%7$88Z[  B Qs%   ACB?ACC#CCrV   N)r@   )r&   rG   rH   rI   rJ   r   rL   r4   r=   rZ   rN    r)   r'   r   r   d   s%    ,+ +c +Mr)   r   groupbyc                       \ rS rSrSrSSSSSS.S\\   S\\/\4   S	\	S
\
S\\
   S\\
   S\	4S jjrS rS rSS jrS rS rS rSrg)r      a	  
Groups data from IterDataPipe by keys from ``group_key_fn``, yielding a ``DataChunk`` with batch size up to ``group_size``.

(functional name: ``groupby``).

The samples are read sequentially from the source ``datapipe``, and a batch of samples belonging to the same group
will be yielded as soon as the size of the batch reaches ``group_size``. When the buffer is full,
the DataPipe will yield the largest batch with the same key, provided that its size is larger
than ``guaranteed_group_size``. If its size is smaller, it will be dropped if ``drop_remaining=True``.

After iterating through the entirety of source ``datapipe``, everything not dropped due to the buffer capacity
will be yielded from the buffer, even if the group sizes are smaller than ``guaranteed_group_size``.

Args:
    datapipe: Iterable datapipe to be grouped
    group_key_fn: Function used to generate group key from the data of the source datapipe
    keep_key: Option to yield the matching key along with the items in a tuple,
        resulting in `(key, [items])` otherwise returning [items]
    buffer_size: The size of buffer for ungrouped data
    group_size: The max size of each group, a batch is yielded as soon as it reaches this size
    guaranteed_group_size: The guaranteed minimum group size to be yielded in case the buffer is full
    drop_remaining: Specifies if the group smaller than ``guaranteed_group_size`` will be dropped from buffer
        when the buffer is full

Example:
    >>> import os
    >>> # xdoctest: +SKIP
    >>> from torchdata.datapipes.iter import IterableWrapper
    >>> def group_fn(file):
    ...     return os.path.basename(file).split(".")[0]
    >>> source_dp = IterableWrapper(["a.png", "b.png", "a.json", "b.json", "a.jpg", "c.json"])
    >>> dp0 = source_dp.groupby(group_key_fn=group_fn)
    >>> list(dp0)
    [['a.png', 'a.json', 'a.jpg'], ['b.png', 'b.json'], ['c.json']]
    >>> # A group is yielded as soon as its size equals to `group_size`
    >>> dp1 = source_dp.groupby(group_key_fn=group_fn, group_size=2)
    >>> list(dp1)
    [['a.png', 'a.json'], ['b.png', 'b.json'], ['a.jpg'], ['c.json']]
    >>> # Scenario where `buffer` is full, and group 'a' needs to be yielded since its size > `guaranteed_group_size`
    >>> dp2 = source_dp.groupby(group_key_fn=group_fn, buffer_size=3, group_size=3, guaranteed_group_size=2)
    >>> list(dp2)
    [['a.png', 'a.json'], ['b.png', 'b.json'], ['a.jpg'], ['c.json']]
Fi'  N)keep_keybuffer_size
group_sizeguaranteed_group_sizedrop_remainingr-   group_key_fnrh   ri   rj   rk   rl   c                4   [        U5        Xl        X l        X0l        X@l        [        [        5      U l        SU l        XPl	        S U l
        Ub  Ub  SUs=:  a  U::  d   e   eXPl
        Ub  Ub  SUs=:  a  U::  d   e   eX`l
        Xpl        [        U l        g r9   )r   r-   rm   rh   max_buffer_sizer   r`   buffer_elementscurr_buffer_sizerj   rk   rl   r   r0   )r5   r-   rm   rh   ri   rj   rk   rl   s           r'   r4   GrouperIterDataPipe.__init__   s     	\* ( *7B47H !$%)"!k&=z0[00000)3& ,)a2G.U:.UUU.UUU)>&,&r)   c                    S nSnS nU R                   R                  5        H;  n[        U R                   U   5      U:  d  M!  [        U R                   U   5      nUnM=     U R                  bB  X R                  :  a3  U R                  (       d"  [        S[        U R                   U   5      5      eU R                  b  X R                  :  a  U R                   U   nU =R                  U-  sl        U R                   U	 U$ )Nr   zFailed to group items)rp   keysr;   rk   rl   RuntimeErrorstrrq   )r5   biggest_keybiggest_sizeresult_to_yieldfindkeys        r'   _remove_biggest_key'GrouperIterDataPipe._remove_biggest_key   s    ++002G4''01L@"4#7#7#@A% 3 &&2999'''T-A-A+-N)O 
 &&.999"22;?O-  -r)   c              #     #    U R                    GHE  nU R                  U5      nU R                  U   R                  U5        U =R                  S-  sl        U R
                  b  U R
                  [        U R                  U   5      :X  an  U R                  U R                  U   5      nU R                  (       a  X#4OUv   U =R                  [        U R                  U   5      -  sl        U R                  U	 U R                  U R                  :X  d  GM  U R                  5       nUc  GM  U R                  U5      nU R                  (       a  X#4OUv   GMH     [        U R                  R                  5       5       Hc  nU R                  U R                  R                  U5      5      nU =R                  [        U5      -  sl        U R                  (       a  X#4OUv   Me     g 7f)Nr@   )r-   rm   rp   r:   rq   rj   r;   r0   rh   ro   r{   tuplert   pop)r5   r<   keyresultry   s        r'   r=   GrouperIterDataPipe.__iter__  s    A##A&C  %,,Q/!!Q&!*t#$$S)C 0 *.););D<P<PQT<U)V'+}}sm&@%%T-A-A#-F)GG%((-$$(<(<<"&":":"<".!//@F+/==3-fD% ( --2245C''(<(<(@(@(EFF!!S[0!#'==3-f< 6s   DG$G$+B9G$c                 :    SU l         [        [        5      U l        g r9   )rq   r   r`   rp   rD   s    r'   resetGrouperIterDataPipe.reset  s     !*40r)   c           
      4   U R                   U R                  U R                  U R                  U R                  U R
                  U R                  U R                  U R                  U R                  4
n[        R                  b  [        R                  " U5      $ U$ rU   )r-   rm   rh   ro   rj   rk   rl   r0   _valid_iterator_id_number_of_samples_yieldedr   getstate_hookr5   states     r'   __getstate__ GrouperIterDataPipe.__getstate__   s    MMMM  OO&&##++
 %%1--e44r)   c                     Uu
  U l         U l        U l        U l        U l        U l        U l        U l        U l        U l	        SU l
        [        [        5      U l        g r9   )r-   rm   rh   ro   rj   rk   rl   r0   r   r   rq   r   r`   rp   r   s     r'   __setstate__ GrouperIterDataPipe.__setstate__1  sZ     	
MM O&#+ !*40r)   c                 8    U R                   R                  5         g rU   )rp   clearrD   s    r'   __del__GrouperIterDataPipe.__del__A  s    ""$r)   )r   r   rp   rq   r-   rl   rm   rj   rk   rh   ro   r0   )r1   N)r&   rG   rH   rI   rJ   r   r   r   r   rM   rL   r   r4   r{   r=   r   r   r   r   rN   rd   r)   r'   r   r      s    *b  $(/3$'u%' w|,'
 ' ' SM'  (}' '<:=41"1 %r)   r   )r   collectionsr   collections.abcr   r   typingr   r   r   r	   (torch.utils.data.datapipes.iter.shardingr   %torch.utils.data.datapipes._decoratorr
   #torch.utils.data.datapipes.datapiper   r   'torch.utils.data.datapipes.utils.commonr   __all__r   rv   r(   r   r   r   rd   r)   r'   <module>r      s     # + 3 3 / E G H 	4(Gc G W;Y,y1 ;Y ;Y| Y1L 1  1h Yh%,y1 h%  h%r)   