
    sh"                         S SK Jr  S SKJr  S SKJr  S SKJrJ	r	J
r
   " S S\5      r " S S\
5      r " S	 S
\	5      r " S S\5      rSr " S S\R"                  5      rSr " S S\R(                  5      rg)    )cuda)array)deviceufunc)UFuncMechanismGeneralizedUFuncGUFuncCallStepsc                   4    \ rS rSrSrS rS rS	S jrS rSr	g)
CUDAUFuncDispatcher   z<
Invoke the CUDA ufunc specialization for the given inputs.
c                 2    Xl         UR                  U l        g N)	functions__name__)selftypes_to_retty_kernelspyfuncs      j/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/numba/cuda/vectorizers.py__init__CUDAUFuncDispatcher.__init__   s    /    c                 B    [         R                  U R                  X5      $ )af  
*args: numpy arrays or DeviceArrayBase (created by cuda.to_device).
       Cannot mix the two types in one call.

**kws:
    stream -- cuda stream; when defined, asynchronous mode is used.
    out    -- output array. Can be a numpy array or DeviceArrayBase
              depending on the input arguments.  Type must match
              the input arguments.
)CUDAUFuncMechanismcallr   )r   argskwss      r   __call__CUDAUFuncDispatcher.__call__   s     "&&t~~tAAr   c                    [        [        U R                  R                  5       5      S   5      S:X  d   S5       eUR                  S:X  d   S5       eUR
                  S   n/ nUS:X  a  [        S5      eUS:X  a  US   $ U=(       d    [        R                  " 5       nUR                  5          [        R                  R                  R                  U5      (       a  UnO[        R                  " X5      nU R                  XTU5      n[        SUR                   S9nUR#                  XrS	9  S S S 5        US   $ ! , (       d  f       WS   $ = f)
Nr      zmust be a binary ufunc   zmust use 1d arrayzReduction on an empty array.)r    )dtypestream)lenlistr   keysndimshape	TypeErrorr   r#   auto_synchronizecudadrvdevicearrayis_cuda_ndarray	to_device_CUDAUFuncDispatcher__reducenp_arrayr!   copy_to_host)r   argr#   ngpu_memsmemoutbufs           r   reduceCUDAUFuncDispatcher.reduce   s'   4++-.q12a7 	A :A 	A7xx1}111}IIaL6:;;!Vq6M (4;;=$$&||''77<<nnS1--v6C4syy1CS0 ' 1v '& 1vs   -A=D77
E	c                    UR                   S   nUS-  S:w  ab  UR                  US-
  5      u  pVUR                  U5        UR                  U5        U R                  XRU5      nUR                  U5        U " XvXsS9$ UR                  US-  5      u  pUR                  U5        UR                  U	5        U " XXS9  US-  S:  a  U R                  XU5      $ U$ )Nr   r   r    )r6   r#   )r(   splitappendr/   )
r   r5   r4   r#   r3   fatcutthincutr6   leftrights
             r   __reduceCUDAUFuncDispatcher.__reduce;   s    IIaLq5A:!iiA.OFOOF#OOG$--&9COOC #==))AF+KDOOD!OOE"$6Avz}}TV<<r   )r   r   Nr   )
r   
__module____qualname____firstlineno____doc__r   r   r8   r/   __static_attributes__ r   r   r
   r
      s    (B:r   r
   c                   R   ^  \ rS rSrS/rU 4S jrS rS rS rS r	S r
S	 rS
rU =r$ )_CUDAGUFuncCallStepsS   _streamc                 T   > [         TU ]  XX45        UR                  SS5      U l        g )Nr#   r   )superr   getrM   )r   ninnoutr   kwargs	__class__s        r   r   _CUDAGUFuncCallSteps.__init__X   s$    D1zz(A.r   c                 .    [         R                  " U5      $ r   r   is_cuda_arrayr   objs     r   is_device_array$_CUDAGUFuncCallSteps.is_device_array\       !!#&&r   c                     [         R                  R                  R                  U5      (       a  U$ [         R                  " U5      $ r   r   r+   r,   r-   as_cuda_arrayrY   s     r   as_device_array$_CUDAGUFuncCallSteps.as_device_array_   5     <<##33C88J!!#&&r   c                 >    [         R                  " XR                  S9$ Nr"   )r   r.   rM   )r   hostarys     r   r.   _CUDAGUFuncCallSteps.to_devicei   s    ~~gll;;r   c                 8    UR                  X R                  S9nU$ re   )r1   rM   )r   devaryrf   r6   s       r   to_host_CUDAGUFuncCallSteps.to_hostl   s    !!',,!?
r   c                 @    [         R                  " XU R                  S9$ N)r(   r!   r#   )r   device_arrayrM   )r   r(   r!   s      r   allocate_device_array*_CUDAGUFuncCallSteps.allocate_device_arrayp   s      u$,,OOr   c                 <    UR                  X R                  S9" U6   g re   )forallrM   )r   kernelnelemr   s       r   launch_kernel"_CUDAGUFuncCallSteps.launch_kernels   s    eLL148r   )rM   )r   rD   rE   rF   	__slots__r   r[   ra   r.   rj   ro   ru   rH   __classcell__rT   s   @r   rK   rK   S   s6    I/''<P9 9r   rK   c                   D   ^  \ rS rSrU 4S jr\S 5       rS rS rSr	U =r
$ )CUDAGeneralizedUFuncw   c                 F   > UR                   U l         [        TU ]	  X5        g r   )r   rO   r   )r   	kernelmapenginer   rT   s       r   r   CUDAGeneralizedUFunc.__init__x   s    +r   c                     [         $ r   )rK   r   s    r   _call_steps CUDAGeneralizedUFunc._call_steps|   s    ##r   c                 ~    [         R                  R                  R                  USUR                  UR
                  S9$ NrC   r(   stridesr!   gpu_data)r   r+   r,   DeviceNDArrayr!   r   )r   aryr(   s      r   _broadcast_scalar_input,CUDAGeneralizedUFunc._broadcast_scalar_input   s9    ||''55E>B<?II?B|| 6 M 	Mr   c                     [        U5      [        UR                  5      -
  nSU-  UR                  -   n[        R                  R
                  R                  UUUR                  UR                  S9$ r   )	r$   r(   r   r   r+   r,   r   r!   r   )r   r   newshapenewax
newstridess        r   _broadcast_add_axis(CUDAGeneralizedUFunc._broadcast_add_axis   sa    HCII.E\CKK/
||''55H>H<?II?B|| 6 M 	Mr   )r   )r   rD   rE   rF   r   propertyr   r   r   rH   rx   ry   s   @r   r{   r{   w   s.    , $ $MM Mr   r{   c                   F    \ rS rSrSrSrS rS rS rS r	S r
S	 rS
 rSrg)r      z
Provide CUDA specialization
r   c                 (    UR                  X#S9" U6   g re   )rr   )r   funccountr#   r   s        r   launchCUDAUFuncMechanism.launch   s    E)40r   c                 .    [         R                  " U5      $ r   rW   rY   s     r   r[   "CUDAUFuncMechanism.is_device_array   r]   r   c                     [         R                  R                  R                  U5      (       a  U$ [         R                  " U5      $ r   r_   rY   s     r   ra   "CUDAUFuncMechanism.as_device_array   rc   r   c                 *    [         R                  " XS9$ re   )r   r.   )r   rf   r#   s      r   r.   CUDAUFuncMechanism.to_device   s    ~~g55r   c                      UR                  US9$ re   )r1   )r   ri   r#   s      r   rj   CUDAUFuncMechanism.to_host   s    ""&"11r   c                 ,    [         R                  " XUS9$ rm   )r   rn   )r   r(   r!   r#   s       r   ro   (CUDAUFuncMechanism.allocate_device_array   s      u&IIr   c                    [        [        U5      5       Vs/ s H+  nX1R                  :  d  UR                  U   X#   :w  d  M)  UPM-     nn[        U5      [        UR                  5      -
  nS/U-  [	        UR
                  5      -   nU H  nSXc'   M	     [        R                  R                  R                  UUUR                  UR                  S9$ s  snf )Nr   r   )ranger$   r'   r(   r%   r   r   r+   r,   r   r!   r   )r   r   r(   ax
ax_differs
missingdimr   s          r   broadcast_device#CUDAUFuncMechanism.broadcast_device   s    #(U#4 5#4Rxx2%)3 #4
 5 Z#cii.0
#
"T#++%66BGK  ||''55E>E<?II?B|| 6 M 	M5s   (CCrI   N)r   rD   rE   rF   rG   DEFAULT_STREAMr   r[   ra   r.   rj   ro   r   rH   rI   r   r   r   r      s3     N1''62JMr   r   z
def __vectorized_{name}({args}, __out__):
    __tid__ = __cuda__.grid(1)
    if __tid__ < __out__.shape[0]:
        __out__[__tid__] = __core__({argitems})
c                   <    \ rS rSrS rS rS rS r\S 5       r	Sr
g)	CUDAVectorize   c                     [         R                  " USSS9" U R                  5      nX"R                  UR                     R
                  R                  4$ )NT)deviceinline)r   jitr   	overloadsr   	signaturereturn_type)r   sigcudevfns      r   _compile_coreCUDAVectorize._compile_core   sA    ((3tD9$++F))#((3==IIIIr   c                 ~    U R                   R                  R                  5       nUR                  [        US.5        U$ )N__cuda____core__)r   __globals__copyupdater   )r   corefnglbls      r   _get_globalsCUDAVectorize._get_globals   s5    {{&&++-!') 	*r   c                 .    [         R                  " U5      $ r   r   r   r   fnobjr   s      r   _compile_kernelCUDAVectorize._compile_kernel   s    xxr   c                 B    [        U R                  U R                  5      $ r   )r
   r~   r   r   s    r   build_ufuncCUDAVectorize.build_ufunc   s    "4>>4;;??r   c                     [         $ r   )vectorizer_stager_sourcer   s    r   _kernel_templateCUDAVectorize._kernel_template   s    ''r   rI   N)r   rD   rE   rF   r   r   r   r   r   r   rH   rI   r   r   r   r      s,    J@ ( (r   r   zy
def __gufunc_{name}({args}):
    __tid__ = __cuda__.grid(1)
    if __tid__ < {checkedarg}:
        __core__({argitems})
c                   6    \ rS rSrS rS r\S 5       rS rSr	g)CUDAGUFuncVectorize   c                     [         R                  " U R                  U R                  5      n[	        U R
                  UU R                  S9$ )N)r~   r   r   )r   GUFuncEngineinputsig	outputsigr{   r~   r   )r   r   s     r   r   CUDAGUFuncVectorize.build_ufunc   s9    ))$--H#dnn+1+/;;8 	8r   c                 :    [         R                  " U5      " U5      $ r   r   r   s      r   r   #CUDAGUFuncVectorize._compile_kernel   s    xx}U##r   c                     [         $ r   )_gufunc_stager_sourcer   s    r   r   $CUDAGUFuncVectorize._kernel_template   s    $$r   c                     [         R                  " USS9" U R                  5      nU R                  R                  R                  5       nUR                  [         US.5        U$ )NT)r   r   )r   r   r   py_funcr   r   r   )r   r   r   glblss       r   r    CUDAGUFuncVectorize._get_globals   sN    #d+DKK8((--/$"(* 	+r   rI   N)
r   rD   rE   rF   r   r   r   r   r   rH   rI   r   r   r   r      s%    8$ % %r   r   N)numbar   numpyr   r0   
numba.cudar   numba.cuda.deviceufuncr   r   r   objectr
   rK   r{   r   r   DeviceVectorizer   r   DeviceGUFuncVectorizer   rI   r   r   <module>r      s     # "5 5H& HV!9? !9HM+ M2-M -M` (K// (2 +;; r   