
    shR)                        S SK Jr  S SKJrJrJr  S SKrS SKJr  S SKJ	r	  S SK
Jr  \" S5        SSSS	.S
SS\\\\	4   \\\\	4      4   S\\\\4      S\\\\4      S\S\4S jjj5       r\" S5      S\\\R"                     \R$                  4   S\\\\4   \\\4   4   4S j5       rS\\\	S4   \\	   4   S\S\	4S jrg)    )Sequence)AnyOptionalUnionN)Tensor)
exposed_inz
torch.funcTFtie_weightsstrictmoduleztorch.nn.Moduleparameter_and_buffer_dictsargskwargsr
   r   c          	         [        U[        5      (       a  UnGO[        U[        5      (       a  [        S U 5       5      (       d  [	        S5      eU VVs/ s H  owR                  5         H  oPM     M     n	nn0 n
U	 H  nU
R                  US5      nUS-   X'   M     U
R                  5        VVs/ s H  u  pUS:  d  M  UPM     nnn[        U5      S:  a  [	        U S35      eU VVVs0 s H  owR                  5         H  u  pX_M	     M      nnnnO[	        S[        U5       35      e[        R                  R                  R                  U UUUUUS9$ s  snnf s  snnf s  snnnf )a  Performs a functional call on the module by replacing the module parameters
and buffers with the provided ones.

.. note:: If the module has active parametrizations, passing a value in the
    :attr:`parameter_and_buffer_dicts` argument with the name set to the regular parameter
    name will completely disable the parametrization.
    If you want to apply the parametrization function to the value passed
    please set the key as ``{submodule_name}.parametrizations.{parameter_name}.original``.

.. note:: If the module performs in-place operations on parameters/buffers, these will be reflected
    in the ``parameter_and_buffer_dicts`` input.


     Example::

        >>> a = {'foo': torch.zeros(())}
        >>> # xdoctest: +SKIP
        >>> mod = Foo()  # does self.foo = self.foo + 1
        >>> print(mod.foo)  # tensor(0.)
        >>> functional_call(mod, a, torch.ones(()))
        >>> print(mod.foo)  # tensor(0.)
        >>> print(a['foo'])  # tensor(1.)

.. note:: If the module has tied weights, whether or not functional_call respects the tying is determined by the
    tie_weights flag.

    Example::

        >>> a = {'foo': torch.zeros(())}
        >>> # xdoctest: +SKIP
        >>> mod = Foo()  # has both self.foo and self.foo_tied which are tied. Returns x + self.foo + self.foo_tied
        >>> print(mod.foo)  # tensor(1.)
        >>> mod(torch.zeros(()))  # tensor(2.)
        >>> functional_call(mod, a, torch.zeros(()))  # tensor(0.) since it will change self.foo_tied too
        >>> functional_call(mod, a, torch.zeros(()), tie_weights=False)  # tensor(1.)--self.foo_tied is not updated
        >>> new_a = {'foo': torch.zeros(()), 'foo_tied': torch.zeros(())}
        >>> functional_call(mod, new_a, torch.zeros()) # tensor(0.)

An example of passing multiple dictionaries

.. code-block:: python

        a = ({'weight': torch.ones(1, 1)}, {'buffer': torch.zeros(1)})  # two separate dictionaries
        mod = nn.Bar(1, 1)  # return self.weight @ x + self.buffer
        print(mod.weight)  # tensor(...)
        print(mod.buffer)  # tensor(...)
        x = torch.randn((1, 1))
        print(x)
        functional_call(mod, a, x)  # same as x
        print(mod.weight)  # same as before functional_call


And here is an example of applying the grad transform over the parameters
of a model.

.. code-block:: python

    import torch
    import torch.nn as nn
    from torch.func import functional_call, grad

    x = torch.randn(4, 3)
    t = torch.randn(4, 3)
    model = nn.Linear(3, 3)

    def compute_loss(params, x, t):
        y = functional_call(model, params, x)
        return nn.functional.mse_loss(y, t)

    grad_weights = grad(compute_loss)(dict(model.named_parameters()), x, t)

.. note:: If the user does not need grad tracking outside of grad transforms, they can detach all of the
    parameters for better performance and memory usage

    Example::

        >>> detached_params = {k: v.detach() for k, v in model.named_parameters()}
        >>> grad_weights = grad(compute_loss)(detached_params, x, t)
        >>> grad_weights.grad_fn  # None--it's not tracking gradients outside of grad

    This means that the user cannot call ``grad_weight.backward()``. However, if they don't need autograd tracking
    outside of the transforms, this will result in less memory usage and faster speeds.

Args:
    module (torch.nn.Module): the module to call
    parameters_and_buffer_dicts (Dict[str, Tensor] or tuple of Dict[str, Tensor]): the parameters that will be used in
        the module call. If given a tuple of dictionaries, they must have distinct keys so that all dictionaries can
        be used together
    args (Any or tuple): arguments to be passed to the module call. If not a tuple, considered a single argument.
    kwargs (dict): keyword arguments to be passed to the module call
    tie_weights (bool, optional): If True, then parameters and buffers tied in the original model will be treated as
        tied in the reparameterized version. Therefore, if True and different values are passed for the tied
        parameters and buffers, it will error. If False, it will not respect the originally tied parameters and
        buffers unless the values passed for both weights are the same. Default: True.
    strict (bool, optional): If True, then the parameters and buffers passed in must match the parameters and
        buffers in the original module. Therefore, if True and there are any missing or unexpected keys, it will
        error. Default: False.

Returns:
    Any: the result of calling ``module``.
c              3   B   #    U  H  n[        U[        5      v   M     g 7fN)
isinstancedict).0ds     t/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/torch/_functorch/functional_call.py	<genexpr>"functional_call.<locals>.<genexpr>}   s     K0J1:a&&0J   zFExpected all elements of parameter_and_buffer_dicts to be dictionariesr      zL appeared in multiple dictionaries; behavior of functional call is ambiguouszTExpected parameter_and_buffer_dicts to be a dict, or a list/tuple of dicts, but got r	   )r   r   r   all
ValueErrorkeysgetitemslentypennutils	stateless_functional_call)r   r   r   r   r
   r   parameters_and_buffersr   kall_keysall_keys_countervkeynrepeated_keyss                  r   functional_callr/      s   ^ ,d33!;	.	9	9K0JKKKX   :L9!668aA8A9L+-A $$Q*A"#a%  ,<+A+A+CM+Cq1u+CM}! /!mn  5"
4QADA4 	 "
 678:
 	

 88.. /  ' M
 N
"
s   "E2E!E!.%E'modelsreturnc           
        ^^ [        U 5      S:X  a  [        S5      e[        S U  5       5      (       d"  [        S U  5       5      (       d  [        S5      e[        U S   5      m[        U4S jU  5       5      (       d  [        S5      eU  Vs/ s H  n[	        UR                  5       5      PM     nnUS    V^s0 s H#  mT[        [        U4S jU 5       5      T5      _M%     nnU  Vs/ s H  n[	        UR                  5       5      PM     nnUS    V^s0 s H#  mT[        [        U4S	 jU 5       5      T5      _M%     nnXF4$ s  snf s  snf s  snf s  snf )
a  stack_module_state(models) -> params, buffers

Prepares a list of torch.nn.Modules for ensembling with :func:`vmap`.

Given a list of ``M`` ``nn.Modules`` of the same class, returns two dictionaries
that stack all of their parameters and buffers together, indexed by name.
The stacked parameters are optimizable (i.e. they are new leaf nodes in the
autograd history that are unrelated to the original parameters and can be
passed directly to an optimizer).

Here's an example of how to ensemble over a very simple model:

.. code-block:: python

    num_models = 5
    batch_size = 64
    in_features, out_features = 3, 3
    models = [torch.nn.Linear(in_features, out_features) for i in range(num_models)]
    data = torch.randn(batch_size, 3)

    def wrapper(params, buffers, data):
        return torch.func.functional_call(models[0], (params, buffers), data)

    params, buffers = stack_module_state(models)
    output = vmap(wrapper, (0, 0, None))(params, buffers, data)

    assert output.shape == (num_models, batch_size, out_features)

When there's submodules, this follows state dict naming conventions

.. code-block:: python

    import torch.nn as nn
    class Foo(nn.Module):
        def __init__(self, in_features, out_features):
            super().__init__()
            hidden = 4
            self.l1 = nn.Linear(in_features, hidden)
            self.l2 = nn.Linear(hidden, out_features)

        def forward(self, x):
            return self.l2(self.l1(x))

    num_models = 5
    in_features, out_features = 3, 3
    models = [Foo(in_features, out_features) for i in range(num_models)]
    params, buffers = stack_module_state(models)
    print(list(params.keys()))  # "l1.weight", "l1.bias", "l2.weight", "l2.bias"

.. warning::
    All of the modules being stacked together must be the same (except for
    the values of their parameters/buffers). For example, they should be in the
    same mode (training vs eval).
r   z7stack_module_state: Expected at least one model, got 0.c              3   8   #    U  H  oR                   v   M     g 7fr   trainingr   ms     r   r   %stack_module_state.<locals>.<genexpr>   s     +Fq

F   c              3   B   #    U  H  oR                   (       + v   M     g 7fr   r4   r6   s     r   r   r8      s     2R6azz>>6r   zLstack_module_state: Expected all models to have the same training/eval mode.c              3   @   >#    U  H  n[        U5      T:H  v   M     g 7fr   )r"   )r   r7   
model0_typs     r   r   r8      s     5ftAw*$fs   z@stack_module_state: Expected all models to be of the same class.c              3   ,   >#    U  H	  oT   v   M     g 7fr    )r   paramsr(   s     r   r   r8      s     'K
fq	
   c              3   ,   >#    U  H	  oT   v   M     g 7fr   r>   )r   buffersr(   s     r   r   r8      s     'N+w
+r@   )	r!   RuntimeErrorr   r"   r   named_parametersconstruct_stacked_leaftuplenamed_buffers)r0   model
all_paramsr(   r?   all_buffersrB   r<   s      `   @r   stack_module_staterK      sf   t 6{aTUU+F+++s2R62R/R/RZ
 	
 fQiJ5f555N
 	
 ?EEfU$u--/0fJE AA 	
!%'K
'K"KQOO   =CCF54++-.FKC QA 	
!%'N+'N"NPQRR  
 ? F Ds   #E:*E*#E*Etensors.namec                     [        S U  5       5      n[        S U  5       5      nU(       d  U(       d  [        SU S35      e[        R                  " U 5      nU(       a  UR	                  5       R                  5       nU$ )Nc              3   8   #    U  H  oR                   v   M     g 7fr   requires_gradr   ts     r   r   )construct_stacked_leaf.<locals>.<genexpr>   s     =WOOWr9   c              3   B   #    U  H  oR                   (       + v   M     g 7fr   rP   rR   s     r   r   rT      s     B'Q00'r   z	Expected z0 from each model to have the same .requires_grad)r   rC   torchstackdetachrequires_grad_)rL   rM   all_requires_gradnone_requires_gradresults        r   rE   rE      sr     =W==B'BB%7vMN
 	
 [[!F//1M    )NN)collections.abcr   typingr   r   r   rV   torch.nnr#   r   torch._functorch.utilsr   r   strrF   boolr/   Module
ModuleListrK   listrE   r>   r]   r   <module>rg      sY   $ ' '    - L )-'+	O OO %d3;&7$sF{BS9T&T UO 5e$
%O T#s(^$	O O O Od LO(299%r}}45O
4S>4S>)*O Od5%tF|34<?r]   