
    sh7x                     F   S r SSKJrJr  SSKJr  SSKrSSKrSSKJ	r	  SSK
rSSKJrJr  SSKJrJr  SSKJr  SS	KJr  S
 rS rS r " S S\5      rS r " S S\5      r " S S\5      rS rS rS r S r! " S S\5      r" " S S\5      r# " S S\5      r$ " S S\S 9r%g)!zA
Implements custom ufunc dispatch mechanism for non-CPU devices.
    )ABCMetaabstractmethod)OrderedDictN)reduce)_BaseUFuncBuilderparse_identity)typessigutils)	signatureparse_signaturec                 d    X:X  a  U $ U S:X  a  U$ US:X  a  U $ [        SR                  X5      5      e)-
Raises
------
ValueError if broadcast fails
   zfailed to broadcast {0} and {1})
ValueErrorformat)abs     j/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/numba/cuda/deviceufunc.py_broadcast_axisr      s=     	v	
a	
a:AA!GHH    c                 :   [        [        X/5      u  p[        U 5      [        U5      :  a  SU -   n [        U 5      [        U5      :  a  M  [        U 5      [        U5      :  a  SU-   n[        U 5      [        U5      :  a  M  [        S [        X5       5       5      $ )r   r   c              3   <   #    U  H  u  p[        X5      v   M     g 7fN)r   ).0r   r   s      r   	<genexpr>&_pairwise_broadcast.<locals>.<genexpr>1   s     G3F41&&3Fs   )maptuplelenzip)shape1shape2s     r   _pairwise_broadcastr%   #   s      01NF
f+F
# f+F
# f+F
# f+F
# G3v3FGGGr   c                      U (       d   eU S   nU SS n [        USS9 H  u  p4[        X5      nM     U$ ! [         a    [        SR                  W5      5      ef = f)r   r   r   N)startz!failed to broadcast argument #{0})	enumerater%   r   r   )	shapelistresultothersieachs        r   _multi_broadcastr.   4   ss     9q\Fqr]F q1GA(6F 2
   H<CCAFGGHs	   5 %Ac                       \ rS rSrSrSrSrS rS rS r	S r
S	 rS
 rS rS rS rS rS rS r\S 5       rS rS rS rS rSrg)UFuncMechanismG   z(
Prepare ufunc arguments for vectorize.
NFc                     Xl         X l        [        U R                  5      nS/U-  U l        / U l        SU l        S/U-  U l        g)zFNever used directly by user. Invoke by UFuncMechanism.call().
        N)typemapargsr!   argtypes	scalarposr   arrays)selfr3   r4   nargss       r   __init__UFuncMechanism.__init__N   sC     	DIIfunr   c                    [        U R                  5       H  u  pU R                  U5      (       a   U R                  U5      U R                  U'   M;  [        U[        [        [        [        R                  45      (       a  U R                  R                  U5        M  [        R                  " U5      U R                  U'   M     g)z!
Get all arguments in array form
N)r(   r4   is_device_arrayas_device_arrayr7   
isinstanceintfloatcomplexnpnumberr6   appendasarray)r8   r,   args      r   _fill_arraysUFuncMechanism._fill_arraysY   s      		*FA##C((!%!5!5c!:AC#ugryy!ABB%%a(!#CA +r   c                     [        U R                  5       HG  u  pUc  M
  [        US5      nUc   [        R                  " U5      R
                  nX0R                  U'   MI     g)z
Get dtypes
Ndtype)r(   r7   getattrrC   rF   rK   r5   )r8   r,   aryrK   s       r   _fill_argtypesUFuncMechanism._fill_argtypesf   sN      ,FAW-=JJsO11E#(a  -r   c                    / nU R                   (       a  U R                   H  n/ n[        [        UU R                  5      5       HK  u  nu  pVUc-  [
        R                  " U R                  U   5      R                  nUR                  Xe:H  5        MM     [        U5      (       d  M  UR                  U5        M     U(       dT  / nU R                   HB  n[        S [        X R                  5       5       5      nU(       d  M1  UR                  U5        MD     U(       d  [        S5      e[        U5      S:  a  [        S5      eUS   U l        g)z,Resolve signature.
May have ambiguous case.
Nc              3   D   #    U  H  u  pUS L =(       d    X:H  v   M     g 7fr    )r   formalactuals      r   r   4UFuncMechanism._resolve_signature.<locals>.<genexpr>   s,      "A"? '5f #)D."DF4D"D"?s    zNo matching version.  GPU ufunc requires array arguments to have the exact types.  This behaves like regular ufunc with casting='no'.r   zqFailed to resolve ufunc due to ambiguous signature. Too many untyped scalars. Use numpy dtype object to type tag.r   )r6   r3   r(   r"   r5   rC   rF   r4   rK   rE   all	TypeErrorr!   )r8   matches	formaltys	match_mapr,   rS   rT   all_matchess           r   _resolve_signature!UFuncMechanism._resolve_signatureq   s.    >>!\\		+4S9=6H ,I'A'~!#DIIaL!9!?!?$$V%56,I y>>NN9- * G!\\	! "A"%i"?"A A ;NN9- *  D E E w<! B C C
  
r   c                     U R                    H@  n[        R                  " U R                  U   /U R                  U   S9U R
                  U'   MB     U R
                  $ )z@Return the actual arguments
Casts scalar arguments to np.array.
rK   )r6   rC   arrayr4   r5   r7   )r8   r,   s     r   _get_actual_argsUFuncMechanism._get_actual_args   sI     AXXtyy|nDMM!<LMDKKN   {{r   c                    U Vs/ s H  o"R                   PM     nn[        U6 n[        U5       GH  u  pVUR                   U:X  a  M  U R                  U5      (       a  U R	                  Xd5      X'   MC  [        [        U5      5       Vs/ s H+  nXvR                  :  d  UR                   U   XG   :w  d  M)  UPM-     nn[        U5      [        UR                   5      -
  n	S/U	-  [        UR                  5      -   n
U H  nSX'   M	     [        R                  R                  R                  UUU
S9nU R                  U5      X'   GM     U$ s  snf s  snf )z)Perform numpy ufunc broadcasting
        r   )shapestrides)rd   r.   r(   r=   broadcast_deviceranger!   ndimlistre   rC   libstride_tricks
as_stridedforce_array_layout)r8   arysr   r)   rd   r,   rM   ax
ax_differs
missingdimre   strideds               r   
_broadcastUFuncMechanism._broadcast   s@    '++dWWd	+ ),oFAyyE! '',,"33C?DG 05SZ/@ "A/@%'88^%(YYr]ei%? #%/@J "A "%Uc#))n!<J cJ.ckk1BBG(&' ) !ff22==cDIFM > OG #55g>DG/ &2 9 ,"As   E	(E4Ec                     U R                  5         U R                  5         U R                  5         U R                  5       nU R	                  U5      $ )zKPrepare and return the arguments for the ufunc.
Does not call to_device().
)rH   rN   r\   ra   rs   )r8   rn   s     r   get_argumentsUFuncMechanism.get_arguments   sE     	!$$&t$$r   c                 4    U R                   U R                     $ )z)Returns (result_dtype, function)
        )r3   r5   r8   s    r   get_functionUFuncMechanism.get_function   s     ||DMM**r   c                     g)z2Is the `obj` a device array?
Override in subclass
FrR   r8   objs     r   r=   UFuncMechanism.is_device_array   s     r   c                     U$ )ziConvert the `obj` to a device array
Override in subclass

Default implementation is an identity function
rR   r}   s     r   r>   UFuncMechanism.as_device_array   s	     
r   c                     [        S5      e)zDHandles ondevice broadcasting

Override in subclass to add support.
z'broadcasting on device is not supportedNotImplementedErrorr8   rM   rd   s      r   rf   UFuncMechanism.broadcast_device   s    
 ""KLLr   c                     U$ )zCEnsures array layout met device requirement.

Override in sublcass
rR   )r8   rM   s     r   rm   !UFuncMechanism.force_array_layout   s	    
 
r   c                 n  ^^ UR                  SU R                  5      mUR                  SS5      nU(       a(  [        R                  " SSR	                  U5      -  5        U " X5      mTR                  5       nTR                  5       u  pVUS   R                  nUb'  TR                  U5      (       a  TR                  U5      nUU4S jnUS   R                  S:  a  U V	s/ s H
  o" U	5      PM     nn	/ n
S	nU HO  n	TR                  U	5      (       a  U
R                  U	5        S
nM.  TR                  U	TS9nU
R                  U5        MQ     US   R                  nUco  TR                  XTS9nU
R                  U/5        TR                  XmS   TU
5        U(       a  UR!                  U5      $ UR#                  5       R!                  U5      $ TR                  U5      (       aS  UR                  S:  a  U" U5      nUnU
R                  U/5        TR                  XmS   TU
5        UR!                  U5      $ UR                  U:X  d   eUR$                  U:X  d   eTR                  XTS9nU
R                  U/5        TR                  XmS   TU
5        UR#                  UTS9R!                  U5      $ s  sn	f )z1Perform the entire ufunc call mechanism.
        streamoutNzunrecognized keywords: %s, r   c                   > TR                   (       a  [        e U R                  5       $ ! [         aL    TR                  U 5      (       d  e TR	                  U T5      R                  5       nTR                  UT5      s $ f = fr   )SUPPORT_DEVICE_SLICINGr   ravelr=   to_host	to_device)r   hostarycrr   s     r   attempt_ravel*UFuncMechanism.call.<locals>.attempt_ravel  sr    (())9wwy & 9))!,, !jjF399;G<<889s   * AB ?B r   FT)r   )popDEFAULT_STREAMwarningswarnjoinrv   rz   rd   r=   r>   rh   rE   r   allocate_device_arrayextendlaunchreshapecopy_to_hostrK   )clsr3   r4   kwsr   restyfuncoutshaper   r   devarys
any_devicedev_ard   devoutr   r   s                  @@r   callUFuncMechanism.call   s   
 3#5#56ggeT"MM5		#FG !oo'7== ?r11#66$$S)C	9" 7<<!.23dM!$dD3 
A!!!$$q!!
Qv6u%  Q;--e6-JFNNF8$IId!Hfg6 ~~h// **,44X>>$$ xx!|#C(FNNF8$IId!Hfg6>>(++
 99%%%99%%%--e6-JFNNF8$IId!Hfg6&&s6&:BB8LLa 4s   !J2c                     [         e)z2Implement to device transfer
Override in subclass
r   )r8   r   r   s      r   r   UFuncMechanism.to_deviceK  
     "!r   c                     [         e)z0Implement to host transfer
Override in subclass
r   )r8   devaryr   s      r   r   UFuncMechanism.to_hostQ  r   r   c                     [         e)z2Implements device allocation
Override in subclass
r   )r8   rd   rK   r   s       r   r   $UFuncMechanism.allocate_device_arrayW  r   r   c                     [         e)z;Implements device function invocation
Override in subclass
r   )r8   r   countr   r4   s        r   r   UFuncMechanism.launch]  r   r   )r4   r5   r7   r6   r   r3   )__name__
__module____qualname____firstlineno____doc__r   r   r:   rH   rN   r\   ra   rs   rv   rz   r=   r>   rf   rm   classmethodr   r   r   r   r   __static_attributes__rR   r   r   r0   r0   G   s     N"	%1	))#VB%+
M WM WMr""""r   r0   c                     [        U [        R                  5      (       a  U R                  n [        R                  " [        U 5      5      $ r   )r?   r	   
EnumMemberrK   rC   str)tys    r   to_dtyper   d  s1    "e&&''XX88CGr   c                   \    \ rS rSrSS0 4S jr\S 5       rSS jrS rS r	S	 r
S
 rS rSrg)DeviceVectorizeij  NFc                     U(       a  [        S5      eU H8  nUS:X  a  [        R                  " S[        5        M&  SnUS-  n[	        Xe-  5      e   Xl        [        U5      U l        [        5       U l	        g )Ncaching is not supportednopythonz+nopython kwarg for cuda target is redundantzUnrecognized options. z3cuda vectorize target does not support option: '%s')
rW   r   r   RuntimeWarningKeyErrorpy_funcr   identityr   	kernelmap)r8   r   r   cachetargetoptionsoptfmts          r   r:   DeviceVectorize.__init__k  sp    677 Cj K,. /LLsy)) ! &x0$r   c                     U R                   $ r   r   ry   s    r   pyfuncDeviceVectorize.pyfunc{      ||r   c           	      2   [         R                  " U5      u  p#[        U/UQ76 nU R                  R                  nU R                  U R                  XE5      nU R                  U5      u  psU R                  U5      n[        [        R                  /U V	s/ s H  oS S  PM	     sn	US S  /-   Q76 n[        Xh5        USU-     n
U R                  X5      n[        S UR                   5       5      n[        U5      nX4U R                   [        U5      '   g s  sn	f )Nz__vectorized_%sc              3   8   #    U  H  n[        U5      v   M     g 7fr   )r   )r   ts     r   r   &DeviceVectorize.add.<locals>.<genexpr>  s     =}!(1++}   )r
   normalize_signaturer   r   r   _get_kernel_source_kernel_template_compile_core_get_globalsr	   voidexec_compile_kernelr    r4   r   r   )r8   sigr4   return_typedevfnsigfuncnamekernelsourcecorefnglblr   stagerkernel	argdtypesresdtypes                 r   addDeviceVectorize.add  s   $88=[040;;''..t/D/D/7C"00:  (

OT&:TtT&:k!n=M&MO\ '(23%%f2=x}}==	K(+3+;uY'( ';s   D
c                     [         er   r   ry   s    r   build_ufuncDeviceVectorize.build_ufunc      !!r   c                     [        [        UR                  5      5       Vs/ s H  nSU-  PM
     nn[        USR	                  U5      SR	                  S U 5       5      S9nUR
                  " S0 UD6$ s  snf )Nza%dr   c              3   ,   #    U  H
  nS U-  v   M     g7f)z%s[__tid__]NrR   r   r,   s     r   r   5DeviceVectorize._get_kernel_source.<locals>.<genexpr>  s     &G$Q}q'8$s   )namer4   argitemsrR   )rg   r!   r4   dictr   r   )r8   templater   r   r,   r4   fmtss          r   r   "DeviceVectorize._get_kernel_source  sl    #(SXX#78#7a	#784!YY&G$&GGI &&&	 9s   A4c                     [         er   r   r8   r   s     r   r   DeviceVectorize._compile_core  r   r   c                     [         er   r   )r8   r   s     r   r   DeviceVectorize._get_globals  r   r   c                     [         er   r   r8   fnobjr   s      r   r   DeviceVectorize._compile_kernel  r   r   )r   r   r   r   )r   r   r   r   r:   propertyr   r   r   r   r   r   r   r   rR   r   r   r   r   j  s@    &*%r '   <("'"""r   r   c                   L    \ rS rSrSS0 S4S jr\S 5       rSS jrS rS	 r	S
r
g)DeviceGUFuncVectorizei  NFrR   c                    U(       a  [        S5      eU(       a  [        S5      eUR                  SS5      (       d  [        S5      eU(       aU  SR                  UR                  5        Vs/ s H  n[	        U5      PM     sn5      nSn	[        U	R                  U5      5      eXl        [        U5      U l        X l	        [        U R                  5      u  U l        U l        [        5       U l        g s  snf )Nr   zwritable_args are not supportedr   Tznopython flag must be Truer   z3The following target options are not supported: {0})rW   r   r   keysreprr   r   r   r   r   r   inputsig	outputsigr   r   )
r8   r   r   r   r   r   writable_argskoptsr   s
             r   r:   DeviceGUFuncVectorize.__init__  s    677=>>   T2289999}/A/A/CD/C!d1g/CDEDGCCJJt,--&x0(7(G%t~ % Es   +C1c                     U R                   $ r   r   ry   s    r   r   DeviceGUFuncVectorize.pyfunc  r   r   c                 @   U R                    Vs/ s H  n[        U5      PM     nnU R                   Vs/ s H  n[        U5      PM     nn[        R                  " U5      u  pVU[
        R                  S 4;   nU(       d  [        SU SU S35      eU R                  R                  n[        U R                  UXHU5      n	U R                  U5      n
[        X5        U
SR                  US9   n[        [!        XSU-   5      5      nU R#                  U[%        U5      S9n[        U5      nU Vs/ s H,  n[&        R(                  " [+        UR(                  5      5      PM.     nn[%        US U*  5      n[%        UU* S  5      nUU4U R,                  U'   g s  snf s  snf s  snf )Nz7guvectorized functions cannot return values: signature z specifies z return typez__gufunc_{name})r   )r   )r  r!   r  r
   r   r	   nonerW   r   r   expand_gufunc_templater   r   r   r   ri   _determine_gufunc_outer_typesr   r    rC   rK   r   r   )r8   r   xindimsoutdimsr4   r   valid_return_typer   srcglblsr  outertysr   noutr   dtypesindtypes	outdtypess                      r   r   DeviceGUFuncVectorize.add  s   "&--0-Q#a&-0#'>>2>a3q6>2$88= (EJJ+==  )),[ F## $ $ <<(($T%:%:F%,> !!#&S'..H.=>5dW<LMN%%ex%A7|2:;(Q"((3qww<((;$(&$.)	#,f#4x ; 120 <s   FF,3Fc                     [         er   r   r  s      r   r   %DeviceGUFuncVectorize._compile_kernel  r   r   c                     [         er   r   r   s     r   r   "DeviceGUFuncVectorize._get_globals  r   r   )r   r  r   r  r   r   r   )r   r   r   r   r:   r  r   r   r   r   r   rR   r   r   r	  r	    s5    +/uB!'0  5@""r   r	  c              #      #    [        X5       Hc  u  p#[        U[        R                  5      (       a  UR	                  US-   S9v   M:  US:  a  [        S5      e[        R                  " USSS9v   Me     g 7f)Nr   )rh   r   z,gufunc signature mismatch: ndim>0 for scalarA)rK   rh   layout)r"   r?   r	   Arraycopyr   )argtysdimsatnds       r   r  r    sb     f#b%++&&''rAv'&&Av !OPP++BQs;; $s   A4A6c           
      r   X-   n[        [        U5      5       Vs/ s H  nSR                  U5      PM     nnSR                  SR                  U Vs/ s H  nSR                  U5      PM     sn5      5      n	[	        XqU5       V
VVs/ s H  u  pn[        XU5      PM     nnn
n[	        U[        U5      S UU[        U5      S 5       V
VVs/ s H  u  pn[        XU5      PM     nnn
nX-   nU R                  USR                  U5      U	SR                  U5      S9nU$ s  snf s  snf s  snnn
f s  snnn
f )z"Expand gufunc source template
    zarg{0}zmin({0})r   z{0}.shape[0]N)r   r4   
checkedargr   )rg   r!   r   r   r"   _gen_src_for_indexing)r   r  r  r   r5   argdimsr,   argnamesr   r3  arefadimsatypeinputsoutputsr   r  s                    r   r  r    sa    G,1#g,,?@,?q",?H@""4997?.A7?! /=.C.CA.F7?.A $B CJ ),Hh(GI(G$$u $D7(G  I *-Xc&kl-CW-5c&kl-C*EF*E%4 %T%8*E  F H
//xdii.A%/#'99X#6  8C J A.AIFs   D!D&D+D2c                 4    SR                  U [        X5      S9$ )Nz{aref}[{sliced}])r7  sliced)r   _gen_src_index)r7  r8  r9  s      r   r4  r4    s&    $$$,:5,H % J Jr   c                     U S:  a  SR                  S/S/U -  -   5      $ [        U[        R                  5      (       a  UR                  S-
  U :X  a  gg)Nr   ,__tid__:r   z__tid__:(__tid__ + 1))r   r?   r	   r,  rh   )r8  r9  s     r   r>  r>    sM    qyxxseem344	E5;;	'	'EJJNe,C 'r   c                   4    \ rS rSrSr\S 5       rS rS rSr	g)GUFuncEnginei  zRDetermine how to broadcast and execute a gufunc
base on input shape and signature
c                     U " [        U5      6 $ r   r   )r   r   s     r   from_signatureGUFuncEngine.from_signature  s    OI.//r   c                     Xl         X l        [        U R                   5      U l        [        U R                  5      U l        g r   )sinsoutr!   ninr   )r8   r  r  s      r   r:   GUFuncEngine.__init__"  s*    	txx=		N	r   c                 
   [        U5      U R                  :w  a  [        S5      e0 n/ n/ n[        [	        XR
                  5      5       H  u  nu  pgUS-  n[        U5      n[        U5      U:  a  Sn	[        X4-  5      eU(       a  Xh* S  n
US U*  nOSn
Un[        [	        X5      5       H8  u  nu  pU[        U5      -  nX;   a  X.   U:w  a  Sn	[        XU4-  5      eXU'   M:     UR                  U5        UR                  U
5        M     / nU R                   H;  n/ nU H  nUR                  X.   5        M     UR                  [        U5      5        M=     U Vs/ s H  n[        [        R                  US5      PM      nn[        R                  " U5      nUU   nS/U R                  -  n[        U5       H3  u  nnUU:w  d  M  US:X  d  US:X  a  SUU'   M!  S	n	[        U	US-   4-  5      e   [        XUUU5      $ s  snf )
Nz invalid number of input argumentr   z%arg #%d: insufficient inner dimensionrR   z$arg #%d: shape[%d] mismatch argumentFr   Tz!arg #%d: outer dimension mismatch)r!   rK  rW   r(   r"   rI  r   rE   rJ  r    r   operatormulrC   argmaxGUFuncSchedule)r8   ishapes	symbolmapouter_shapesinner_shapesargnrd   symbols
inner_ndimr   inner_shapeouter_shapeaxisdimsymoshapesoutsigoshapessizes	largest_iloopdimspinnedr,   ds                            r   scheduleGUFuncEngine.schedule*  s   w<488#>?? 	&/GXX0F&G"D"5AIDWJ5zJ&= w//#KL1#Lj[1 #$-c+.G$H jsK((# ~,D(d|);<<!$# %I ,,- 'H2 iiFFin- NN5=)	   6BB\a+\BIIe$		*488#l+DAqH}9R $F1I=C$SAE8^44 , d'8VLL Cs   %H )rK  r   rI  rJ  N)
r   r   r   r   r   r   rF  r:   rg  r   rR   r   r   rD  rD    s&     0 0#7Mr   rD  c                        \ rS rSrS rS rSrg)rQ  id  c                     Xl         X l        X0l        X@l        [	        [
        R                  US5      U l        XPl        U Vs/ s H  odU-   PM	     snU l	        g s  snf )Nr   )
parentrR  r^  rd  r   rN  rO  loopnre  output_shapes)r8   rk  rR  r^  rd  re  ra  s          r   r:   GUFuncSchedule.__init__e  sK     HLL(A6
4;<GqlG<<s   Ac                     SS K nSnU Vs/ s H  o3[        X5      4PM     nnUR                  [        U5      5      $ s  snf )Nr   )rR  r^  rd  rl  re  )pprintrL   pformatr   )r8   rp  attrsr  valuess        r   __str__GUFuncSchedule.__str__r  s>    C167Agd&'7~~d6l++ 8s   =)rR  rd  rl  r^  rm  rk  re  N)r   r   r   r   r:   rt  r   rR   r   r   rQ  rQ  d  s    =,r   rQ  c                   D    \ rS rSrS rS rS rS rS rS r	S r
S	 rS
rg)GeneralizedUFunciz  c                 *    Xl         X l        SU l        g )Ni   @)r   enginemax_blocksize)r8   r   ry  s      r   r:   GeneralizedUFunc.__init__{  s    "$r   c                    U R                  U R                  R                  U R                  R                  X5      nU R	                  UR
                  UR                  5      u  pEpgUR                  U5        UR                  XV5      nUR                  5       n	U R                  XYU5      n
UR                  XuR                  U
5        UR                  U5      $ r   )_call_stepsry  rK  r   	_scheduler:  r;  adjust_input_typesprepare_outputsprepare_inputsrs   launch_kernelrl  post_process_outputs)r8   r4   r   	callstepsr"  rg  r#  r   r;  r:  
parameterss              r   __call__GeneralizedUFunc.__call__  s    $$T[[__dkk6F6F%)0	04i//11-I$$X.++H@))+__Xw?

C--g66r   c                    U Vs/ s H  o3R                   PM     nnU R                  R                  U5      n[        S U 5       5      n U R                  U   u  px[        UR                  U5       H$  u  pU
c  M
  XR                   :w  d  M  [        S5      e   XeXx4$ s  snf ! [
         a%    U R                  U5      nU R                  U   u  px Nuf = f)Nc              3   8   #    U  H  oR                   v   M     g 7fr   r_   r   s     r   r   -GeneralizedUFunc._schedule.<locals>.<genexpr>  s     1&Q&r   zoutput shape mismatch)
rd   ry  rg  r    r   r   _search_matching_signaturer"   rm  r   )r8   r:  outsr   input_shapesrg  r"  r#  r   sched_shaper   s              r   r~  GeneralizedUFunc._schedule  s    )/0A0;;''5 1&11	9 $x 8I !$H$:$:D AK;))#; !899 !B 944) 1  	9 66x@H $x 8Iv	9s   B
B" ",CCc                     U R                   R                  5        H'  n[        S [        X!5       5       5      (       d  M%  Us  $    [	        S5      e)z
Given the input types in `idtypes`, return a compatible sequence of
types that is defined in `kernelmap`.

Note: Ordering is guaranteed by `kernelmap` being a OrderedDict
c              3   R   #    U  H  u  p[         R                  " X5      v   M     g 7fr   )rC   can_cast)r   rT   desireds      r   r   >GeneralizedUFunc._search_matching_signature.<locals>.<genexpr>  s%      =*;v ;;v//*;s   %'zno matching signature)r   r  rV   r"   rW   )r8   idtypesr   s      r   r  +GeneralizedUFunc._search_matching_signature  sM     >>&&(C =*-c*;= = =
 )
 344r   c                    UR                   S:  d   S5       eUR                  (       d  SOUR                   n/ n[        X!R                  5       Ha  u  pgU(       d4  UR                  S:X  a$  U R                  Xd5      nUR                  U5        M@  UR                  U R                  XdU5      5        Mc     / n	[        X1R                  5       H'  u  pU	R                  U
R                  " U/UQ76 5        M)     [        U5      [        U	5      -   $ )Nr   zzero looping dimensionr   )rl  rd  r"   rR  size_broadcast_scalar_inputrE   _broadcast_arrayr^  r   r    )r8   rg  paramsretvalsodim	newparamspcsr   
newretvalsretvalr`  s               r   rs   GeneralizedUFunc._broadcast  s    ~~!;#;;! ))qx~~	!1!12EA!&&A+55a>  (   !6!6q!CD 3 
!'+;+;<NFfnnT;F;< =Y%
"333r   c                    U4U-   nUR                   U:X  a  U$ [        UR                   5      [        U5      :  a?  U[        UR                   5      * S  UR                   :X  d   S5       eU R                  X5      $ UR                  " U6 $ )Nz+cannot add dim and reshape at the same time)rd   r!   _broadcast_add_axisr   )r8   rM   newdiminnerdimnewshapes        r   r  !GeneralizedUFunc._broadcast_array  s    9x'99 J ^c(m+S^O,-: >=>:++C:: ;;))r   c                     [        S5      e)Nzcannot add new axisr   )r8   rM   r  s      r   r  $GeneralizedUFunc._broadcast_add_axis  s    !"788r   c                     [         er   r   r   s      r   r  (GeneralizedUFunc._broadcast_scalar_input  r   r   )ry  r   rz  N)r   r   r   r   r:   r  r~  r  rs   r  r  r  r   rR   r   r   rw  rw  z  s*    %
75.54&* 9"r   rw  c                       \ rS rSrSr/ SQr\S 5       r\S 5       r\S 5       r	\S 5       r
\S 5       rS	 rS
 rS rS rS rSrg)GUFuncCallStepsi  aJ  
Implements memory management and kernel launch operations for GUFunc calls.

One instance of this class is instantiated for each call, and the instance
is specific to the arguments given to the GUFunc call.

The base class implements the overall logic; subclasses provide
target-specific implementations of individual functions.
)r;  r:  _copy_result_to_hostc                     g)zImplement the kernel launchNrR   )r8   r   nelemr4   s       r   r  GUFuncCallSteps.launch_kernel      r   c                     g)zJ
Return True if `obj` is a device array for this target, False
otherwise.
NrR   r}   s     r   r=   GUFuncCallSteps.is_device_array  r  r   c                     g)zk
Return `obj` as a device array on this target.

May return `obj` directly if it is already on the target.
NrR   r}   s     r   r>   GUFuncCallSteps.as_device_array  r  r   c                     g)z;
Copy `hostary` to the device and return the device array.
NrR   )r8   r   s     r   r   GUFuncCallSteps.to_device  r  r   c                     g)zK
Allocate a new uninitialized device array with the given shape and
dtype.
NrR   )r8   rd   rK   s      r   r   %GUFuncCallSteps.allocate_device_array	  r  r   c           
        ^  UR                  S5      nUcJ  [        U5      XU-   4;  a7  S nSU" U5       SU" X-   5       SU" [        U5      5       S3n[        U5      eUb  [        U5      U:  a  [        S5      eU/U-  nSn/ T l        U Hb  n	T R                  U	5      (       a.  T R                  R                  T R                  U	5      5        S	nMG  T R                  R                  U	5        Md     [        U V
s/ s H  n
T R                  U
5      PM     sn
5      (       + nU=(       a    UT l	        U 4S
 jnU V
s/ s H
  o" U
5      PM     nn
US U T l
        XS  nU(       a  UT l        g g s  sn
f s  sn
f )Nr   c                     U  SSU S:g  -   3$ )Nz positional argumentra  r   rR   )ns    r   pos_argn*GUFuncCallSteps.__init__.<locals>.pos_argn  s    0Q0@AAr   zThis gufunc accepts z  (when providing input only) or z( (when providing input and output). Got .z<cannot specify argument 'out' as both positional and keywordTFc                 z   > TR                  U 5      (       a  TR                  nO[        R                  nU" U 5      $ r   )r=   r>   rC   rF   )r   convertr8   s     r   normalize_arg/GUFuncCallSteps.__init__.<locals>.normalize_arg;  s2    ##A&&..**1:r   )getr!   rW   r   r;  r=   rE   r>   anyr  r:  )r8   rK  r   r4   kwargsr;  r  msgall_user_outputs_are_hostoutputr   all_host_arraysr  normalized_argsunused_inputss   `              r   r:   GUFuncCallSteps.__init__  s   **U# ?s4yTz0CCB *(3- 9%%-cj%9$: ;--5c$i-@,ADC C. 3t9s? + , , i$&G
 %)!F##F++##D$8$8$@A,1)##F+  "D"IDq4#7#7#:D"IJJ &5 &?%> 	!
	 6::T=+T:%ds+ (-(DL - #J" ;s   =E/>E4c                 "   [        [        XR                  5      5       Hm  u  nu  p4X4R                  :w  d  M  [	        US5      (       d%  SR                  [        U5      5      n[        U5      eUR                  U5      U R                  U'   Mo     g)z
Attempt to cast the inputs to the required types if necessary
and if they are not device arrays.

Side effect: Only affects the elements of `inputs` that require
a type cast.
astypezNcompatible signature is possible by casting but {0} does not support .astype()N)	r(   r"   r:  rK   hasattrr   typerW   r  )r8   r"  r,   ityvalr  s         r   r  "GUFuncCallSteps.adjust_input_typesK  sq     's8[['ABMAziisH--<=CVDI=N #C.(!$CA Cr   c                     / n[        UR                  UU R                  5       H<  u  pEnUb  U R                  (       a  U R	                  XE5      nUR                  U5        M>     U$ )z
Returns a list of output parameters that all reside on the target
device.

Outputs that were passed-in to the GUFunc are used if they reside on the
device; other outputs are allocated as necessary.
)r"   rm  r;  r  r   rE   )r8   rg  r#  r;  rd   rK   r  s          r   r  GUFuncCallSteps.prepare_outputs\  s^     $'(>(>	(,%6 E&~!:!:33EANN6"	%6 r   c                 ^   ^  U 4S jnT R                    Vs/ s H
  o!" U5      PM     sn$ s  snf )zJ
Returns a list of input parameters that all reside on the target device.
c                 r   > TR                  U 5      (       a  TR                  nOTR                  nU" U 5      $ r   )r=   r>   r   )	parameterr  r8   s     r   ensure_device5GUFuncCallSteps.prepare_inputs.<locals>.ensure_deviceq  s3    ##I......9%%r   )r:  )r8   r  r  s   `  r   r  GUFuncCallSteps.prepare_inputsm  s+    	& +/++6+Qa +666s   *c                    U R                   (       a9  [        XR                  5       VVs/ s H  u  p#U R                  X#5      PM     nnnOU R                  S   b  U R                  n[	        U5      S:X  a  US   $ [        U5      $ s  snnf )a  
Moves the given output(s) to the host if necessary.

Returns a single value (e.g. an array) if there was one output, or a
tuple of arrays if there were multiple. Although this feels a little
jarring, it is consistent with the behavior of GUFuncs in general.
r   r   )r  r"   r;  r   r!   r    )r8   r;  r  self_outputs       r   r  $GUFuncCallSteps.post_process_outputs{  s     $$25g||2LN2L.6 ||F82L  NG\\!_(llGw<11:>!Ns   B)r  r:  r;  N)r   r   r   r   r   	__slots__r   r  r=   r>   r   r   r:   r  r  r  r  r   rR   r   r   r  r    s    I * *      
  9)v1""7"r   r  )	metaclass)&r   abcr   r   collectionsr   rN  r   	functoolsr   numpyrC   numba.np.ufunc.ufuncbuilderr   r   
numba.corer	   r
   numba.core.typingr   numba.np.ufunc.sigparser   r   r%   r.   objectr0   r   r   r	  r  r  r4  r>  rD  rQ  rw  r  rR   r   r   <module>r     s    ( #     I & ' 3I H"&Z"V Z"z:"' :"zA"- A"H<&J
	HM6 HMV,V ,,a"v a"Hn" n"r   