
    sh                     2   S SK Jr  S SKrS SKrS SKJr  S SKJr  S SK	J
r
Jr  S SKJr  S SKJr  S SKJrJr  S SKJr  S S	KJr  S
SKJr  S SKJr  S SKJrJrJr  S SKJ r J!r!  \
" 5       r"\"RF                  r#\"RH                  r%\"RL                  r&S r'\%" \RP                  " \5      S5      S 5       r)\%" \RP                  " \5      S5      S 5       r*\%" \RP                  " \5      S5      S 5       r+\%" \RP                  " \5      S5      S 5       r,\%" \RP                  " \5      S5      S 5       r-\%" \ S5      S 5       r.\%" \ S5      S 5       r/\%" \ S5      S 5       r0\#" \Rb                  Rd                  \Rf                  5      S  5       r4S q5S! r6\#" \Rn                  Rp                  \Rr                  \Rt                  5      S" 5       r;\#" \Rn                  Rp                  \Rx                  \Rt                  5      \#" \Rn                  Rp                  \Rz                  \Rt                  5      S# 5       5       r>\#" \R~                  Rp                  \Rr                  \Rt                  5      S$ 5       r@\#" \R~                  Rp                  \Rx                  \Rt                  5      \#" \R~                  Rp                  \Rz                  \Rt                  5      S% 5       5       rA\#" \R                  5      S& 5       rC\#" \R                  5      S' 5       rE\#" \R                  5      S( 5       rG\#" \R                  5      S) 5       rI\#" \R                  \R                  5      S* 5       rK\#" \R                  \R                  \R                  \R                  \R                  \R                  5      \#" \R                  \R                  \R                  \R                  \R                  \R                  5      \#" \R                  \R                  \R                  \R                  \R                  \R                  5      \#" \R                  \R                  \R                  \R                  \R                  \R                  5      S+ 5       5       5       5       rP\#" \R                  \R                  \R                  \R                  5      S, 5       rS\#" \R                  \R                  \R                  5      \#" \R                  \R                  \R                  5      \#" \R                  \R                  \R                  5      \#" \R                  \R                  \R                  5      S- 5       5       5       5       rU\#" \R                  \R                  \R                  5      \#" \R                  \R                  \R                  5      \#" \R                  \R                  \R                  5      \#" \R                  \R                  \R                  5      S. 5       5       5       5       rW\#" \R                  5      S/ 5       rY\#" \R                  5      S0 5       r[\#" \R                  \Rt                  5      S1 5       r]\#" \R                  \Rt                  \Rt                  \Rt                  5      S2 5       r_S3 r`\" \R                  \R                  5      S4 5       rc\" \R                  \R                  5      S5 5       rdS6 re\" \R                  \R                  5      S7 5       rg\" \R                  \R                  5      \" \Rr                  \R                  5      S8 5       5       rhS9 ri\i" \R                  R                  S:5        \i" \R                  S:5        \i" \R                  S:5        \i" \R                  R                  S;5        \i" \R                  S;5        \i" \R                  S;5        \i" \R                  R                  S<5        \i" \R                  S<5        \i" \R                  S<5        \#" \R                  R                  \R                  5      S= 5       ru\#" \R                  \R                  5      S> 5       rw\#" \R                  R                  \R                  5      S? 5       ry\#" \z\R                  5      S@ 5       r{\#" \R                  R                  \R                  \R                  \R                  5      SA 5       r}\#" \R                  \R                  \R                  5      \#" \R                  \R                  \R                  5      SB 5       5       rSCrSD r\#" \R                  GR                  \R                  \R                  5      " \" SE5      5        \#" \GR                  \R                  \R                  5      " \" SE5      5        \#" \R                  GR
                  \R                  \R                  5      " \" SF5      5        \#" \GR                  \R                  \R                  5      " \" SF5      5        \#" \R                  GR                  \R                  \R                  5      " \" SG5      5        \#" \GR                  \R                  \R                  5      " \" SG5      5        \#" \R                  GR                  \R                  \R                  5      " \" SH5      5        \#" \GR                  \R                  \R                  5      " \" SH5      5        \#" \R                  GR                  \R                  \R                  5      " \" SI5      5        \#" \GR                  \R                  \R                  5      " \" SI5      5        \#" \R                  GR                  \R                  \R                  5      " \" SJ5      5        \#" \GR                  \R                  \R                  5      " \" SJ5      5        SK r\" \R                  GR                   SLSH5        \" \R                  GR"                  SMSJ5        \GR$                  SN\GR&                  SO0r\#" \GR*                  \GR$                  5      \#" \GR*                  \GR&                  5      SP 5       5       r\#" \GR.                  \GR0                  5      SQ 5       r\#" \GR.                  \GR4                  5      SR 5       r\#" \GR8                  \Rt                  5      SS 5       r\#" \GR<                  \R                  5      \#" \GR<                  \GR0                  5      ST 5       5       r\#" \GR<                  \R                  5      \#" \GR<                  \GR4                  5      SU 5       5       r\#" \GRB                  \Rt                  \Rt                  \Rt                  5      SV 5       r\#" \\R                  \R                  5      SW 5       r\#" \\R                  \R                  5      \#" \\R                  \R                  5      \#" \\R                  \R                  5      SX 5       5       5       r\#" \\R                  \R                  5      SY 5       r\#" \\R                  \R                  5      \#" \\R                  \R                  5      \#" \\R                  \R                  5      SZ 5       5       5       r\#" \\R                  5      \#" \\R                  5      S[ 5       5       r\#" \\R                  \R                  5      \#" \\R                  \R                  5      S\ 5       5       rS] r\GRZ                  S^-  rS^\GRZ                  -  r\#" \GR`                  \R                  5      " \" \5      5        \#" \GR`                  \R                  5      " \" \5      5        \#" \GRb                  \R                  5      " \" \5      5        \#" \GRb                  \R                  5      " \" \5      5        S_ rS` r\#" \GRh                  R                  \Rf                  \GRj                  \Rt                  5      \#" \GRh                  R                  \Rf                  \Rz                  \Rt                  5      \#" \GRh                  R                  \Rf                  \Rx                  \Rt                  5      \Sa 5       5       5       5       r\#" \GRh                  R                  \Rf                  \GRj                  \Rt                  5      \#" \GRh                  R                  \Rf                  \Rz                  \Rt                  5      \#" \GRh                  R                  \Rf                  \Rx                  \Rt                  5      \Sb 5       5       5       5       r\#" \GRh                  GRp                  \Rf                  \GRj                  \Rt                  5      \#" \GRh                  GRp                  \Rf                  \Rz                  \Rt                  5      \#" \GRh                  GRp                  \Rf                  \Rx                  \Rt                  5      \Sc 5       5       5       5       r\#" \GRh                  GRt                  \Rf                  \GRj                  \Rt                  5      \#" \GRh                  GRt                  \Rf                  \Rz                  \Rt                  5      \#" \GRh                  GRt                  \Rf                  \Rx                  \Rt                  5      \Sd 5       5       5       5       rSe r\" \GRh                  GRz                  Sf5        \" \GRh                  GR|                  Sg5        \" \GRh                  GR~                  Sh5        \#" \GRh                  GR                  \Rf                  \GRj                  \Rt                  5      \#" \GRh                  GR                  \Rf                  \Rz                  \Rt                  5      \#" \GRh                  GR                  \Rf                  \Rx                  \Rt                  5      \Si 5       5       5       5       r\#" \GRh                  GRF                  \Rf                  \GRj                  \Rt                  5      \#" \GRh                  GRF                  \Rf                  \Rx                  \Rt                  5      \#" \GRh                  GRF                  \Rf                  \Rz                  \Rt                  5      \Sj 5       5       5       5       r\#" \GRh                  GRL                  \Rf                  \GRj                  \Rt                  5      \#" \GRh                  GRL                  \Rf                  \Rx                  \Rt                  5      \#" \GRh                  GRL                  \Rf                  \Rz                  \Rt                  5      \Sk 5       5       5       5       r\#" \GRh                  GR                  \Rf                  \GRj                  \Rt                  5      \#" \GRh                  GR                  \Rf                  \Rx                  \Rt                  5      \#" \GRh                  GR                  \Rf                  \Rz                  \Rt                  5      \Sl 5       5       5       5       r\#" \GRh                  GR                  \Rf                  \GRj                  \Rt                  5      \#" \GRh                  GR                  \Rf                  \Rx                  \Rt                  5      \#" \GRh                  GR                  \Rf                  \Rz                  \Rt                  5      \Sm 5       5       5       5       r\#" \GRh                  GR                  \Rf                  \Rt                  \Rt                  5      Sn 5       r\#" \GRh                  GR                  \Rf                  \GRj                  \Rt                  \Rt                  5      \#" \GRh                  GR                  \Rf                  \Rx                  \Rt                  \Rt                  5      \#" \GRh                  GR                  \Rf                  \Rz                  \Rt                  \Rt                  5      So 5       5       5       r\#" \GR                  \GR                  5      Sp 5       r SsSq jr\&" \!5      Sr 5       r\" \GR                  " 5       \#5        g)t    )reduceN)ir)Registry
lower_cast)parse_dtype)models)typescgutils)ufunc_db)register_ufuncs   )nvvm)cuda)	nvvmutilsstubserrors)dim3CUDADispatcherc                     [         R                  " U SU-  5      n[         R                  " U SU-  5      n[         R                  " U SU-  5      n[        R                  " XX445      $ )Nz%s.xz%s.yz%s.z)r   	call_sregr
   pack_struct)builderprefixxyzs        g/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/numba/cuda/cudaimpl.pyinitialize_dim3r      sY    GVf_5AGVf_5AGVf_5AwA	22    	threadIdxc                     [        US5      $ )Ntidr   contextr   sigargss       r   cuda_threadIdxr(       s    7E**r   blockDimc                     [        US5      $ )Nntidr#   r$   s       r   cuda_blockDimr,   %   s    7F++r   blockIdxc                     [        US5      $ )Nctaidr#   r$   s       r   cuda_blockIdxr0   *   s    7G,,r   gridDimc                     [        US5      $ )Nnctaidr#   r$   s       r   cuda_gridDimr4   /   s    7H--r   laneidc                 0    [         R                  " US5      $ )Nr5   )r   r   r$   s       r   cuda_laneidr7   4   s    w11r   r   c                 &    UR                  US5      $ Nr   extract_valuer$   s       r   dim3_xr<   9         q))r   r   c                 &    UR                  US5      $ )Nr   r:   r$   s       r   dim3_yr?   >   r=   r   r   c                 &    UR                  US5      $ )N   r:   r$   s       r   dim3_zrB   C   r=   r   c                     US   $ r9    r$   s       r   cuda_const_array_likerE   J   s     7Nr   c                 @    [         S-  q SR                  U [         5      $ )zDue to bug with NVVM invalid internalizing of shared memory in the
PTX output.  We can't mark shared memory to be internal. We have to
ensure unique name is generated for shared memory symbol.
r   z{0}_{1})_unique_smem_idformatnames    r   _get_unique_smem_idrK   T   s!     qOD/22r   c           
          UR                   S   R                  n[        UR                   S   5      n[        XU4U[	        S5      [
        R                  SS9$ Nr   r   _cudapy_smemTshapedtypesymbol_name	addrspacecan_dynsizedr'   literal_valuer   _generic_arrayrK   r   ADDRSPACE_SHAREDr%   r   r&   r'   lengthrQ   s         r   cuda_shared_array_integerr[   ^   sP    XXa[&&F$E'6)5&9.&I$($9$9'+- -r   c           
          UR                   S    Vs/ s H  oDR                  PM     nn[        UR                   S   5      n[        XXV[	        S5      [
        R                  SS9$ s  snf rM   rU   r%   r   r&   r'   srP   rQ   s          r   cuda_shared_array_tupler_   h   s^     (+xx{4{!oo{E4$E'%&9.&I$($9$9'+- - 5s   A%c           
          UR                   S   R                  n[        UR                   S   5      n[        XU4US[        R
                  SS9$ Nr   r   _cudapy_lmemFrO   r'   rV   r   rW   r   ADDRSPACE_LOCALrY   s         r   cuda_local_array_integerre   s   sK    XXa[&&F$E'6)5&4$($8$8',. .r   c           
          UR                   S    Vs/ s H  oDR                  PM     nn[        UR                   S   5      n[        XXVS[        R
                  SS9$ s  snf ra   rc   r]   s          r   ptx_lmem_alloc_arrayrg   }   sY     (+xx{4{!oo{E4$E'%&4$($8$8',. . 5s   Ac                     U(       a   eSnUR                   n[        R                  " [        R                  " 5       S5      n[        R
                  " XVU5      nUR                  US5        U R                  5       $ )Nzllvm.nvvm.membar.ctarD   moduler   FunctionTypeVoidTyper
   get_or_insert_functioncallget_dummy_valuer%   r   r&   r'   fnamelmodfntysyncs           r   ptx_threadfence_blockru      \    O8"E>>D??2;;="-D))$e<DLLr""$$r   c                     U(       a   eSnUR                   n[        R                  " [        R                  " 5       S5      n[        R
                  " XVU5      nUR                  US5        U R                  5       $ )Nzllvm.nvvm.membar.sysrD   ri   rp   s           r   ptx_threadfence_systemrx      rv   r   c                     U(       a   eSnUR                   n[        R                  " [        R                  " 5       S5      n[        R
                  " XVU5      nUR                  US5        U R                  5       $ )Nzllvm.nvvm.membar.glrD   ri   rp   s           r   ptx_threadfence_devicerz      s\    O8!E>>D??2;;="-D))$e<DLLr""$$r   c                     U R                  [        R                  S5      n[        R                  " [        R                  5      n[	        XXT/5      $ )Nl    )get_constantr	   int32noneptx_syncwarp_mask)r%   r   r&   r'   maskmask_sigs         r   ptx_syncwarpr      s9    Z8Dzz%++&HWx@@r   c                    SnUR                   n[        R                  " [        R                  " 5       [        R                  " S5      45      n[
        R                  " XVU5      nUR                  Xs5        U R                  5       $ )Nzllvm.nvvm.bar.warp.sync    )	rj   r   rk   rl   IntTyper
   rm   rn   ro   rp   s           r   r   r      s^    %E>>D??2;;=2::b>*;<D))$e<DLL""$$r   c           
      *   Uu  pEpgnUR                   S   n	U	[        R                  ;   a0  UR                  U[        R
                  " U	R                  5      5      nSn
UR                  n[        R                  " [        R                  " [        R
                  " S5      [        R
                  " S5      45      [        R
                  " S5      [        R
                  " S5      [        R
                  " S5      [        R
                  " S5      [        R
                  " S5      45      n[        R                  " XU
5      nU	R                  S:X  a  UR                  XXVXx45      nU	[        R                  :X  ab  UR                  US5      nUR                  US5      nUR                  U[        R                  " 5       5      n[        R                   " UUU45      nU$ UR#                  U[        R
                  " S5      5      nUR%                  X`R'                  [        R(                  S5      5      nUR#                  U[        R
                  " S5      5      nUR                  XUUXx45      nUR                  XUUXx45      nUR                  US5      nUR                  US5      nUR                  US5      nUR+                  U[        R
                  " S5      5      nUR+                  U[        R
                  " S5      5      nUR-                  UU R'                  [        R(                  S5      5      nUR/                  UU5      nU	[        R0                  :X  a%  UR                  U[        R2                  " 5       5      n[        R                   " XU45      nU$ )aw  
The NVVM intrinsic for shfl only supports i32, but the cuda intrinsic
function supports both 32 and 64 bit ints and floats, so for feature parity,
i64, f32, and f64 are implemented. Floats by way of bitcasting the float to
an int, then shuffling, then bitcasting back. And 64-bit values by packing
them into 2 32bit values, shuffling thoose, and then packing back together.
rA   zllvm.nvvm.shfl.sync.i32r   r   r   @   )r'   r	   real_domainbitcastr   r   bitwidthrj   rk   LiteralStructTyper
   rm   rn   float32r;   	FloatTypemake_anonymous_structtrunclshrr|   i8zextshlor_float64
DoubleType)r%   r   r&   r'   r   modevalueindexclamp
value_typerq   rr   rs   funcretrvpredfvvalue1
value_lshrvalue2ret1ret2rv1rv2rv1_64rv2_64rv_shls                               r   ptx_shfl_sync_i32r      s     '+#De!JU&&&rzz*2E2E'FG%E>>D??
bjjnbjjm<=ZZ^RZZ^RZZ^ZZ^RZZ^=D
 ))$e<Db ll4U!BC&&&sA.B((a0DR\\^4B//"dDC" J ubjjn5\\%)=)=ehh)KL
z2::b>:||Dvu"DE||Dvu"DE##D!,##D!,$$T1-c2::b>2c2::b>2VW%9%9%((B%GH[[(&R]]_5B++G$Z@Jr   c                    SnUR                   n[        R                  " [        R                  " [        R                  " S5      [        R                  " S5      45      [        R                  " S5      [        R                  " S5      [        R                  " S5      45      n[
        R                  " XVU5      nUR                  Xs5      $ )Nzllvm.nvvm.vote.syncr   r   )rj   r   rk   r   r   r
   rm   rn   )r%   r   r&   r'   rq   rr   rs   r   s           r   ptx_vote_syncr      s    !E>>D??2//B13A1@ AJJrNBJJrNBJJqMJLD ))$e<D<<##r   c                    Uu  pEUR                   S   R                  nUR                   S   [        R                  ;   a&  UR	                  U[
        R                  " U5      5      nSR                  U5      nUR                  n[
        R                  " [
        R                  " S5      [
        R                  " S5      [
        R                  " U5      45      n	[        R                  " XU5      n
UR                  XU45      $ )Nr   zllvm.nvvm.match.any.sync.i{}r   )r'   r   r	   r   r   r   r   rH   rj   rk   r
   rm   rn   r%   r   r&   r'   r   r   widthrq   rr   rs   r   s              r   ptx_match_any_syncr      s    
 KDHHQK  E
xx{e'''rzz%'89*11%8E>>D??2::b>BJJrNBJJu<M+NOD))$e<D<<Um,,r   c                 V   Uu  pEUR                   S   R                  nUR                   S   [        R                  ;   a&  UR	                  U[
        R                  " U5      5      nSR                  U5      nUR                  n[
        R                  " [
        R                  " [
        R                  " S5      [
        R                  " S5      45      [
        R                  " S5      [
        R                  " U5      45      n	[        R                  " XU5      n
UR                  XU45      $ )Nr   zllvm.nvvm.match.all.sync.i{}r   )r'   r   r	   r   r   r   r   rH   rj   rk   r   r
   rm   rn   r   s              r   ptx_match_all_syncr     s    
 KDHHQK  E
xx{e'''rzz%'89*11%8E>>D??2//B13A1@ AJJrNBJJu,=>@D ))$e<D<<Um,,r   c                     [         R                  " [         R                  " [         R                  " S5      / 5      SSSS9nUR	                  U/ 5      $ )Nr   zactivemask.b32 $0;=rTside_effectr   	InlineAsmrk   r   rn   r%   r   r&   r'   
activemasks        r   ptx_activemaskr     s>    boobjjnbA2DdLJ<<
B''r   c                     [         R                  " [         R                  " [         R                  " S5      / 5      SSSS9nUR	                  U/ 5      $ )Nr   zmov.u32 $0, %lanemask_lt;r   Tr   r   r   s        r   ptx_lanemask_ltr   $  s@    boobjjnbA94*.0J <<
B''r   c                 *    UR                  US   5      $ r9   )ctpopr$   s       r   ptx_popcr   ,  s    ==a!!r   c                      UR                   " U6 $ N)fmar$   s       r   ptx_fmar   1  s    ;;r   c                 j    SSS.n X   $ ! [          a    SU  S3n[        R                  " U5      ef = f)N)f32f)f64d)r   r   z$Conversion between float16 and float unsupportedKeyErrorr   CudaLoweringErrorr   typemapmsgs      r   float16_float_ty_constraintr   6  sH    \2G,   ,4XJlK&&s++,s    '2c                 B   UR                   UR                   :X  a  U$ [        UR                   5      u  pV[        R                  " U R	                  U5      [        R
                  " S5      /5      n[        R                  " USU S3SU S35      nUR                  X/5      $ )N   zcvt..f16 $0, $1;=,h)r   r   r   rk   get_value_typer   r   rn   	r%   r   fromtytotyvalty
constraintrs   asms	            r   float16_to_float_castr   @  s    $--'
0?NB??711$7"**R.9IJD
,,ttB4|4*R6H
IC<<U##r   c                 @   UR                   UR                   :X  a  U$ [        UR                   5      u  pV[        R                  " [        R                  " S5      U R                  U5      /5      n[        R                  " USU S3SU 35      nUR                  X/5      $ )Nr   cvt.rn.f16. $0, $1;=h,)r   r   r   rk   r   r   r   rn   r   s	            r   float_to_float16_castr   L  s    $--'
0ANB??2::b>G,B,B6,J+KLD
,,t{2$h73zl9K
LC<<U##r   c                 n    SSSSS.n X   $ ! [          a    SU  S3n[        R                  " U5      ef = f)Nchrl)   r   r   r   z"Conversion between float16 and intr   r   r   s      r   float16_int_constraintr   X  sL    CSc3G,   ,28*LI&&s++,s    '4c                 8   UR                   n[        U5      nUR                  (       a  SOSn[        R                  " U R                  U5      [        R                  " S5      /5      n[        R                  " USU U S3SU S35      n	UR                  X/5      $ )Nr^   ur   zcvt.rni.r   r   r   )	r   r   signedr   rk   r   r   r   rn   
r%   r   r   r   r   r   r   
signednessrs   r   s
             r   float16_to_integer_castr   b  s    }}H'1JJ??711$7"**R.9IJD
,,t!*hZ|D:,b)+C <<U##r   c                 6   UR                   n[        U5      nUR                  (       a  SOSn[        R                  " [        R
                  " S5      U R                  U5      /5      n[        R                  " USU U S3SU 35      n	UR                  X/5      $ )Nr^   r   r   r   r   r   )	r   r   r   r   rk   r   r   r   rn   r   s
             r   integer_to_float16_castr   o  s     H'1J3J??2::b>#226:;=D
,,t$ZL
(CZL)+C <<U##r   c                 j   ^ [        U [        R                  [        R                  5      U4S j5       ng )Nc                   > [         R                  " [         R                  " S5      [         R                  " S5      [         R                  " S5      /5      n[         R                  " UT S3S5      nUR	                  XS5      $ )Nr   z.f16 $0,$1,$2;=h,h,hr   rk   r   r   rn   )r%   r   r&   r'   rs   r   ops         r   ptx_fp16_binary*lower_fp16_binary.<locals>.ptx_fp16_binary  s\    rzz"~ "

2

2?All4B4~!6A||C&&r   lowerr	   float16)fnr   r   s    ` r   lower_fp16_binaryr   ~  s&    
2u}}emm,' -'r   addsubmulc                     [         R                  " [         R                  " S5      [         R                  " S5      /5      n[         R                  " USS5      nUR	                  XS5      $ )Nr   zneg.f16 $0, $1;=h,hr   r%   r   r&   r'   rs   r   s         r   ptx_fp16_hnegr    G    ??2::b>BJJrN+;<D
,,t.
7C<<""r   c                     [        XX#5      $ r   )r  r$   s       r   operator_hnegr
        355r   c                     [         R                  " [         R                  " S5      [         R                  " S5      /5      n[         R                  " USS5      nUR	                  XS5      $ )Nr   zabs.f16 $0, $1;r  r   r  s         r   ptx_fp16_habsr    r  r   c                     [        XX#5      $ r   )r  r$   s       r   operator_habsr    r  r   c                 ,   [         R                  " S5      [         R                  " S5      [         R                  " S5      /n[         R                  " [         R                  " S5      U5      n[         R                  " USS5      nUR	                  Xc5      $ )Nr   zfma.rn.f16 $0,$1,$2,$3;z=h,h,h,h)r   r   rk   r   rn   )r%   r   r&   r'   argtysrs   r   s          r   ptx_hfmar    s`    jjnbjjnbjjn=F??2::b>62D
,,t6

CC<<""r   c                 ,    S nU R                  XX#5      $ )Nc                 @    [         R                  R                  X5      $ r   )r   fp16hdiv)r   r   s     r   fp16_divfp16_div_impl.<locals>.fp16_div  s    yy~~a##r   compile_internal)r%   r   r&   r'   r  s        r   fp16_div_implr    s    $ ##GsAAr   z{{
          .reg .pred __$$f16_cmp_tmp;
          setp.{op}.f16 __$$f16_cmp_tmp, $1, $2;
          selp.u16 $0, 1, 0, __$$f16_cmp_tmp;
        }}c                    ^  U 4S jnU$ )Nc                   > [         R                  " [         R                  " S5      [         R                  " S5      [         R                  " S5      /5      n[         R                  " U[        R                  T	S9S5      nUR                  XS5      nU R                  [        R                  S5      nUR                  U[         R                  " S5      5      nUR                  SX5      $ )Nr   )r   r   r   z!=)r   rk   r   r   	_fp16_cmprH   rn   r|   r	   int16r   icmp_unsigned)
r%   r   r&   r'   rs   r   resultzero
int_resultr   s
            r   ptx_fp16_comparison*_gen_fp16_cmp.<locals>.ptx_fp16_comparison  s    rzz"~

2

2/OPll4!1!1R!1!8(Cc(##EKK3__VRZZ^<
$$T:<<r   rD   )r   r$  s   ` r   _gen_fp16_cmpr&    s    = r   eqnegegtleltc                 j   ^ [        U [        R                  [        R                  5      U4S j5       ng )Nc                 X   > [        T5      " XX#5      nUR                  XCS   US   5      $ )Nr   r   )r&  select)r%   r   r&   r'   choicer   s        r   ptx_fp16_minmax*lower_fp16_minmax.<locals>.ptx_fp16_minmax  s-    r"7S?~~f1gtAw77r   r   )r   rq   r   r1  s     ` r   lower_fp16_minmaxr3    s&    
2u}}emm,8 -8r   maxmin
__nv_cbrtf	__nv_cbrtc                     UR                   n[        U   nU R                  U5      nUR                  n[        R
                  " Xf/5      n[        R                  " XxU5      n	UR                  X5      $ r   )	return_type
cbrt_funcsr   rj   r   rk   r
   rm   rn   )
r%   r   r&   r'   r   rq   ftyrr   rs   r   s
             r   ptx_cbrtr<    s`     
BrNE

 
 
$C>>D??3&D		'	'E	:B<<!!r   c           	          [         R                  " UR                  [        R                  " [        R
                  " S5      [        R
                  " S5      45      S5      nUR                  XC5      $ )Nr   	__nv_brevr
   rm   rj   r   rk   r   rn   r%   r   r&   r'   r   s        r   ptx_brev_u4rA    sP    
 
	'	'


2B(9:
B <<!!r   c           	          [         R                  " UR                  [        R                  " [        R
                  " S5      [        R
                  " S5      45      S5      nUR                  XC5      $ )Nr   __nv_brevllr?  r@  s        r   ptx_brev_u8rD  	  sP    
 
	'	'


2B(9:
B <<!!r   c                 h    UR                  US   U R                  [        R                  S5      5      $ r9   )ctlzr|   r	   booleanr$   s       r   ptx_clzrH    s.    <<QU]]A.0 0r   c           	          [         R                  " UR                  [        R                  " [        R
                  " S5      [        R
                  " S5      45      S5      nUR                  XC5      $ )Nr   __nv_ffsr?  r@  s        r   
ptx_ffs_32rK    sP     
	'	'


2B(9:
B <<!!r   c           	          [         R                  " UR                  [        R                  " [        R
                  " S5      [        R
                  " S5      45      S5      nUR                  XC5      $ )Nr   r   
__nv_ffsllr?  r@  s        r   
ptx_ffs_64rN  &  sP     
	'	'


2B(9:
B <<!!r   c                 0    Uu  pEnUR                  XEU5      $ r   )r/  )r%   r   r&   r'   testabs          r   ptx_selprS  0  s    JDQ>>$1%%r   c           	         [         R                  " UR                  [        R                  " [        R
                  " 5       [        R
                  " 5       [        R
                  " 5       45      S5      nUR                  XC5      $ )N
__nv_fmaxfr
   rm   rj   r   rk   r   rn   r@  s        r   
ptx_max_f4rW  6  X    		'	'
LLN\\^R\\^,	. 	
B <<!!r   c           
         [         R                  " UR                  [        R                  " [        R
                  " 5       [        R
                  " 5       [        R
                  " 5       45      S5      nUR                  UU R                  XS   UR                  S   [        R                  5      U R                  XS   UR                  S   [        R                  5      /5      $ )N	__nv_fmaxr   r   r
   rm   rj   r   rk   r   rn   castr'   r	   doubler@  s        r   
ptx_max_f8r^  A       
	'	'
MMO]]_bmmo.	0 	
B <<W1gsxx{ELLAW1gsxx{ELLA  r   c           	         [         R                  " UR                  [        R                  " [        R
                  " 5       [        R
                  " 5       [        R
                  " 5       45      S5      nUR                  XC5      $ )N
__nv_fminfrV  r@  s        r   
ptx_min_f4rb  R  rX  r   c           
         [         R                  " UR                  [        R                  " [        R
                  " 5       [        R
                  " 5       [        R
                  " 5       45      S5      nUR                  UU R                  XS   UR                  S   [        R                  5      U R                  XS   UR                  S   [        R                  5      /5      $ )N	__nv_fminr   r   r[  r@  s        r   
ptx_min_f8re  ]  r_  r   c           	      D   [         R                  " UR                  [        R                  " [        R
                  " S5      [        R                  " 5       45      S5      nUR                  UU R                  XS   UR                  S   [        R                  5      /5      $ )Nr   __nv_llrintr   )r
   rm   rj   r   rk   r   r   rn   r\  r'   r	   r]  r@  s        r   	ptx_roundrh  n  s}     
	'	'
JJrN]]_	  	
B <<W1gsxx{ELLA  r   c                 ,    S nU R                  XX#5      $ )Nc                    [         R                  " U 5      (       d  [         R                  " U 5      (       a  U $ US:  a=  US:  a  SUS-
  -  nSnOSU-  nSnX-  U-  n[         R                  " U5      (       a  U $ O
SU* -  nX-  n[        U5      n[         R                  " XE-
  5      S:X  a  S[        US-  5      -  nUS:  a
  UW-  U-  nU$ XR-  nU$ )Nr      g      $@gMDg      ?g      ?g       @)mathisinfisnanroundfabs)r   ndigitspow1pow2r   r   s         r   round_ndigits$round_to_impl.<locals>.round_ndigits  s    ::a==DJJqMMHa<| "-wT!Azz!}}  WH%DA!HIIae#eAGn$Aa<TT!A  IAr   r  )r%   r   r&   r'   rt  s        r   round_to_implrv    s    B ##GCHHr   c                    ^  U 4S jnU$ )Nc                 l   > UR                   u  nU R                  UT5      nUR                  XSS   5      $ r9   )r'   r|   fmul)r%   r   r&   r'   argtyfactorconsts         r   implgen_deg_rad.<locals>.impl  s3    %%eU3||FG,,r   rD   )r|  r}  s   ` r   gen_deg_radr    s    - Kr   g     f@c           
         U[         R                  ;   a  [         R                  " USS9nU/nO[        R                  " X[        U5      S9n[        X&5       VVs/ s H&  u  pxU R                  XU[         R                  5      PM(     nnnUR                  n	X:w  a  [        SU	< SU< 35      eUR                  [        U5      :w  a#  [        SUR                  [        U5      4-  5      eX&4$ s  snnf )z,
Convert integer indices into tuple of intp
r   )rQ   count)r  zexpect z	 but got z#indexing %d-D array with %d-D index)r	   integer_domainUniTupler
   unpack_tuplelenzipr\  intprQ   	TypeErrorndim)
r%   r   indtyindsarytyvaltyindicestirQ   s
             r   _normalize_indicesr    s     $$$U!4&&&wCJGu.0.41 ||G5::6.  0 KKE~%?@@zzSZ=SZ01 2 	2 >0s   -C-c                    ^  U 4S jnU$ )Nc           	         > UR                   u  pEnUu  pxn	UR                  n
[        XXXXF5      u  p[U R                  U5      " XU5      n[        R
                  " XXLUSS9nT" XXU	5      $ )NT
wraparound)r'   rQ   r  
make_arrayr
   get_item_pointer)r%   r   r&   r'   r  r  r  aryr  r   rQ   r  laryptrdispatch_fns                 r   imp_atomic_dispatcher.<locals>.imp  sy    !hhe3+Ge,1: !!%(3?&&wg268 7U==r   rD   )r  r  s   ` r   _atomic_dispatcherr    s    > Jr   c                 D   U[         R                  :X  a3  UR                  nUR                  [        R
                  " U5      X445      $ U[         R                  :X  a3  UR                  nUR                  [        R                  " U5      X445      $ UR                  SX4S5      $ )Nr  	monotonic)	r	   r   rj   rn   r   declare_atomic_add_float32r   declare_atomic_add_float64
atomic_rmwr%   r   rQ   r  r   rr   s         r   ptx_atomic_add_tupler        
 ~~||I@@F J( 	(	%--	~~||I@@F J( 	( !!%;??r   c                 D   U[         R                  :X  a3  UR                  nUR                  [        R
                  " U5      X445      $ U[         R                  :X  a3  UR                  nUR                  [        R                  " U5      X445      $ UR                  SX4S5      $ )Nr  r  )	r	   r   rj   rn   r   declare_atomic_sub_float32r   declare_atomic_sub_float64r  r  s         r   ptx_atomic_subr    r  r   c                     U[         R                  R                  ;   aD  UR                  nUR                  n[        [        SU 35      nUR                  U" U5      X445      $ [        SU S35      e)Ndeclare_atomic_inc_intzUnimplemented atomic inc with  array	r   cudadeclunsigned_int_numba_typesr   rj   getattrr   rn   r  r%   r   rQ   r  r   bwrr   r   s           r   ptx_atomic_incr    h    
 666^^~~Y"8 =>||BtHsj118vFGGr   c                     U[         R                  R                  ;   aD  UR                  nUR                  n[        [        SU 35      nUR                  U" U5      X445      $ [        SU S35      e)Ndeclare_atomic_dec_intzUnimplemented atomic dec with r  r  r  s           r   ptx_atomic_decr    r  r   c                    ^ [         U4S j5       n[        R                  [        R                  [        R                  4 H3  n[        U [        R                  U[        R                  5      " U5        M5     g )Nc                    > U[         R                  R                  ;   a  UR                  TX4S5      $ [	        ST SU S35      e)Nr  zUnimplemented atomic z with r  r   r  integer_numba_typesr  r  )r%   r   rQ   r  r   r   s        r   impl_ptx_atomic+ptx_atomic_bitwise.<locals>.impl_ptx_atomic  sE    T]]667%%b#K@@3B4veWFKLLr   )r  r	   r  r  Tupler   ArrayAny)stubr   r  r   s    `  r   ptx_atomic_bitwiser    sQ    M M zz5>>5;;7dEKKUYY/@ 8r   andorxorc                     U[         R                  R                  ;   a  UR                  SX4S5      $ [	        SU S35      e)Nxchgr  zUnimplemented atomic exch with r  r  )r%   r   rQ   r  r   s        r   ptx_atomic_exchr  /  s@    
 223!!&#K@@9%GHHr   c                    UR                   nU[        R                  :X  a'  UR                  [        R
                  " U5      X445      $ U[        R                  :X  a'  UR                  [        R                  " U5      X445      $ U[        R                  [        R                  4;   a  UR                  SX4SS9$ U[        R                  [        R                  4;   a  UR                  SX4SS9$ [        SU-  5      eNr4  r  orderingumaxz&Unimplemented atomic max with %s array)rj   r	   r   rn   r   declare_atomic_max_float64r   declare_atomic_max_float32r}   int64r  uint32uint64r  r  s         r   ptx_atomic_maxr  :      
 >>D||I@@F J( 	(	%--	||I@@F J( 	(	5;;,	,!!%K!HH	5<<.	.!!&#[!II@5HIIr   c                    UR                   nU[        R                  :X  a'  UR                  [        R
                  " U5      X445      $ U[        R                  :X  a'  UR                  [        R                  " U5      X445      $ U[        R                  [        R                  4;   a  UR                  SX4SS9$ U[        R                  [        R                  4;   a  UR                  SX4SS9$ [        SU-  5      eNr5  r  r  uminz&Unimplemented atomic min with %s array)rj   r	   r   rn   r   declare_atomic_min_float64r   declare_atomic_min_float32r}   r  r  r  r  r  r  s         r   ptx_atomic_minr  N  r  r   c                    UR                   nU[        R                  :X  a'  UR                  [        R
                  " U5      X445      $ U[        R                  :X  a'  UR                  [        R                  " U5      X445      $ U[        R                  [        R                  4;   a  UR                  SX4SS9$ U[        R                  [        R                  4;   a  UR                  SX4SS9$ [        SU-  5      er  )rj   r	   r   rn   r   declare_atomic_nanmax_float64r   declare_atomic_nanmax_float32r}   r  r  r  r  r  r  s         r   ptx_atomic_nanmaxr  b      
 >>D||ICCDI J( 	(	%--	||ICCDI J( 	(	5;;,	,!!%K!HH	5<<.	.!!&#[!II@5HIIr   c                    UR                   nU[        R                  :X  a'  UR                  [        R
                  " U5      X445      $ U[        R                  :X  a'  UR                  [        R                  " U5      X445      $ U[        R                  [        R                  4;   a  UR                  SX4SS9$ U[        R                  [        R                  4;   a  UR                  SX4SS9$ [        SU-  5      er  )rj   r	   r   rn   r   declare_atomic_nanmin_float64r   declare_atomic_nanmin_float32r}   r  r  r  r  r  r  s         r   ptx_atomic_nanminr  v  r  r   c                    UR                  UR                  S   [        R                  UR                  S   UR                  S   5      nUS   U R	                  [        R                  S5      US   US   4n[        XX#5      $ )Nr   r   rA   )r9  r'   r	   r  r|   ptx_atomic_casr$   s       r   ptx_atomic_compare_and_swapr    sj    
//#((1+uzz388A;
LCGW))%**a8$q'47KD'C66r   c           	         UR                   u  pEpgUu  pp[        XXYUU5      u  p\U R                  U5      " XU5      n[        R                  " XXMUSS9nUR
                  [        R                  R                  ;   a;  UR                  nUR
                  R                  n[        R                  " XUXU5      $ [        SUR
                  -  5      e)NTr  z&Unimplemented atomic cas with %s array)r'   r  r  r
   r  rQ   r   r  r  rj   r   r   atomic_cmpxchgr  )r%   r   r&   r'   r  r  oldtyr  r  r  oldr   r  r  r  rr   r   s                    r   r  r    s     "%E%Cs'%u(-/NE e$Ws;D

"
"7U'.24C {{t}}889~~;;''''x3OO@5;;NOOr   c                     [         R                  " [         R                  " [         R                  " 5       [         R                  " S5      /5      SSSS9nUS   nUR                  XE/5        g )Nr   znanosleep.u32 $0;r   Tr   r   )r   r   rk   rl   r   rn   )r%   r   r&   r'   	nanosleepnss         r   ptx_nanosleepr    sM    R__R[[]RZZ^<LM0#4II	aBLLD!r   c           
         [        [        R                  US5      nUS:*  =(       a    U=(       a    [        U5      S:H  nUS::  a  U(       d  [	        S5      eU R
                  U   n	[        U[        R                  [        R                  45      =(       d4    [        U	[        R                  5      =(       d    U[        R                  :H  n
U[        R                  ;  a  U
(       d  [        SU-  5      eU R                  U5      n[         R"                  " X5      nU[$        R&                  :X  a  [(        R*                  " XUS9nOUR,                  n[(        R.                  " XUU5      nU R1                  U5      nSUS-
  R3                  5       -  Ul        U(       a  SUl        O*[         R8                  " U[         R:                  5      Ul        UR?                  U[         R@                  " [         RB                  " S5      5      S5      n[D        RF                  " [$        RH                  " 5       RJ                  5      nU R                  U5      nURM                  U5      nUn/ n[O        [Q        U5      5       H  u  nnURS                  U5        UU-  nM     [Q        U5       Vs/ s H  nUPM     nnU Vs/ s H#  nU RU                  [        RV                  U5      PM%     nnU(       a  [         RX                  " [         RZ                  " [         RB                  " S	5      / 5      S
SSS9nUR]                  UR_                  U/ 5      [         RB                  " S5      5      nU RU                  [        RV                  U5      nURa                  UU5      /nO0U Vs/ s H#  nU RU                  [        RV                  U5      PM%     nn[        U5      n[        Rb                  " UUSS9nU Re                  U5      " X5      n U Rg                  U URi                  UU Rj                  Rl                  5      UUU RU                  [        RV                  U5      S S9  U Ro                  5       $ s  snf s  snf s  snf )Nr   r   zarray length <= 0zunsupported type: %srI   externalr   genericr   zmov.u32 $0, %dynamic_smem_size;r   Tr   r   C)rQ   r  layout)datarP   stridesitemsizememinfo)8r   operatorr  r  
ValueErrordata_model_manager
isinstancer	   RecordBooleanr   StructModelr   number_domainr  get_data_typer   	ArrayTyper   rd   r
   alloca_oncerj   add_global_variableget_abi_sizeof
bit_lengthalignlinkageConstant	UndefinedinitializeraddrspacecastPointerTyper   llcreate_target_dataNVVMdata_layoutget_abi_size	enumeratereversedappendr|   r  r   rk   r   rn   udivr  r  populate_arrayr   r  type	_getvalue)!r%   r   rP   rQ   rR   rS   rT   	elemcountdynamic_smem
data_modelother_supported_typelldtypelarytydataptrrr   gvmemr  
targetdatar  
laststriderstridesr  lastsizer^   r  kstridesget_dynshared_sizedynsmem_size	kitemsizekshaper  r  r  s!                                    r   rW   rW     s   x||UA.I >FlFs5zQLA~l,-- ++E2J55<<78 	"j&"4"45	"EMM! 
 E'''0D.677##E*G\\'-FD((( %%gKH~~ ++D+,57 &&w/ EAI2244&EM !#FBLL AE ''r~~bjjm/L(13 &&tyy{'>'>?J##E*G##J/H JH %18
#h
 2 #8,-,Qq,G-=DEW$$UZZ3WHE 
  \\"//"**R."*M*K*.DB ||GLL1CR$H$&JJrN4 ((X>	,,|Y78?DEu!'&&uzz15uE u:DKKe$s;E


U
#G
5C3 ' G!'#+$+$8$8X$N#'  ) ==?A .E$ Fs   #Q 5*Q%*Q*c                 "    U R                  5       $ r   )ro   )r%   r   r   pyvals       r   cuda_dispatcher_constr-    s    ""$$r   )F)	functoolsr   r  rl  llvmliter   llvmlite.bindingbindingr  numba.core.imputilsr   r   numba.core.typing.npydeclr   numba.core.datamodelr   
numba.corer	   r
   numba.npr   numba.np.npyimplr   cudadrvr   numbar   
numba.cudar   r   r   numba.cuda.typesr   r   registryr   lower_getattr
lower_attrlower_constantr   Moduler(   r,   r0   r4   r7   r<   r?   rB   r|  
array_liker  rE   rG   rK   sharedarrayIntegerLiteralr  r[   r  r  r_   localre   rg   threadfence_blockru   threadfence_systemrx   threadfencerz   syncwarpr   i4r   shfl_sync_intrinsicr   f4f8r   vote_sync_intrinsicrG  r   match_any_syncr   match_all_syncr   r   r   lanemask_ltr   popcr   r   r   r   r   Floatr   r   r   Integerr   r   r   r  haddr  iaddhsubr  isubhmulr  imulhnegr  negr
  habsr  absr  hfmar  truedivitruedivr  r  r&  heqr'  hner(  hger)  hgtr*  hler+  hltr,  r3  hmaxhminr   r   r:  cbrtr<  brevu4rA  u8rD  clzrH  ffsrK  rN  selprS  r4  rW  r^  r5  rb  re  ro  rh  rv  r  pi_deg2rad_rad2degradiansdegreesr  r  atomicr  r  r  incr  decr  r  and_r   r  exchr  r  r  nanmaxr  nanminr  compare_and_swapr  casr  r  r  r  rW   r-  
get_ufuncsrD   r   r   <module>r     s-        4 1 ' %  ,   / / 1:##
((3 ELL,+ -+ ELL
+, ,, ELL
+- ,- ELL	*. +. ELL)2 *2 D#* * D#* * D#* * tzzekk* + 3 t{{%..		:- ;- t{{%++uyy1t{{%..%))4- 5 2- tzz--uyy9. :. tzzeii0tzz3. 4 1. u%  % u % !% u% % u~~A A u~~uxx % !% u  %((EHHehhxxu  %((EHHehhxxu  %((EHHehhxxu  %((EHHehhxx++\ u  %((EHHemmD$ E$ uUXXuxx0uUXXuxx0uUXXuxx0uUXXuxx0	- 1 1 1 1	- uUXXuxx0uUXXuxx0uUXXuxx0uUXXuxx0- 1 1 1 1- u( ( u( ( uzz599" " uyy%))UYY		2 3, EMM5;;'$ ($ EKK'$ ($, EMM5==)	$ *	$ EMM5==)E  %--0
$ 1 *
$' %**//5 ) (,, & (-- ' %**//5 ) (,, & (-- ' %**//5 ) (,, & (-- ' uzz&# '# x||U]]#6 $6 uzz&# '# sEMM6 6 uzzu}}emmD# E# x6x%--7B 8 7B		 ejjnnemmU]] 3M$4G H hkk5==%-- 0t1D E ejjnnemmU]] 3M$4G H hkk5==%-- 0t1D E ejjnnemmU]] 3M$4G H hkk5==%-- 0t1D E ejjnnemmU]] 3M$4G H hkk5==%-- 0t1D E ejjnnemmU]] 3M$4G H hkk5==%-- 0t1D E ejjnnemmU]] 3M$4G H hkk5==%-- 0t1D E8 %**//5$ / %**//5$ / 
MM<	MM;
 uzz5==!uzz5==!" " "" uzz588" " uzz588" " uyy%))0 0 uyy%((uyy%(("  " uyy%((uyy%(("  " uzz599eii3& 4&
 sEHHehh"  " sEHHehhsEHHehhsEHHehh       sEHHehh"  " sEHHehhsEHHehhsEHHehh       uehhuehh	  	  uehh&uehh&"I ' '"IJ 77T>$''> dllEHH k(3 4 dllEHH k(3 4 dllEHH k(3 4 dllEHH k(3 4.$ u||ejj%))<u||enneii@u||ekk599=
@  > A =
@ u||ejj%))<u||enneii@u||ekk599=
@  > A =
@ u||ejj%))<u||enneii@u||ekk599=H  > A =H u||ejj%))<u||enneii@u||ekk599=H  > A =H	A 5<<$$e , 5<<##T * 5<<##U + u||%++uzz599=u||%++u~~uyyAu||%++u{{EII>I  ? B >I u||ejj%))<u||ekk599=u||enneii@J  A > =J  u||ejj%))<u||ekk599=u||enneii@J  A > =J  u||EKKUYY?u||EKKeii@u||EKKCJ  D A @J  u||EKKUYY?u||EKKeii@u||EKKCJ  D A @J  u||$$ekk599eiiH7 I7 u||ejj%))UYYGu||ekk599eiiHu||enneiiKP L I HP* u%" &" !&aH %  % ##%u -r   