
    shA                     f   S SK r S SKJr  S SKJr  S SKJr  S SKJ	r	J
r
JrJrJrJrJr  S SKJr  S SKJr  S SKJrJr  S SKJr  S S	KJr  S
SKJr  S SKJrJrJr  S SK J!r!   " S S\R&                  5      r"\ RF                  " S\ RH                  5      r% " S S\5      r& " S S\5      r' " S S\5      r(g)    N)cached_property)ir)cgutilsconfig	debuginfoitanium_manglertypestypingutils)
Dispatcher)BaseContext)BaseCallConvMinimalCallConv)	cmathdecl)	datamodel   )nvvm)codegen	nvvmutilsufuncs)cuda_data_managerc                   .   ^  \ rS rSrS rU 4S jrSrU =r$ )CUDATypingContext   c                    SSK JnJnJnJn  SSKJnJn  U R                  UR                  5        U R                  UR                  5        U R                  UR                  5        U R                  [        R                  5        U R                  UR                  5        U R                  UR                  5        U R                  UR                  5        g )Nr   )cudadeclcudamathlibdevicedeclvector_typesr   )enumdecl
cffi_utils) r   r   r   r   numba.core.typingr    r!   install_registryregistryr   typing_registry)selfr   r   r   r   r    r!   s          e/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/numba/cuda/target.pyload_additional_registries,CUDATypingContext.load_additional_registries   s    EE:h//0j112h//0i001m445h//0l::;    c                   > SSK Jn  [        U[        5      (       a  [        X5      (       d   UR                  n[        [        U ];  U5      $ ! [
         a    UR                  (       d  [        S5      eUR                  R                  5       nSUS'   UR                  SS5      US'   UR                  SS5      US'   U" UR                  U5      nXAl        Un Nf = f)	Nr   )CUDADispatcherz<using cpu function on device but its compilation is disabledTdevicedebugFopt)numba.cuda.dispatcherr-   
isinstancer   _CUDATypingContext__dispatcherAttributeError_can_compile
ValueErrortargetoptionscopygetpy_funcsuperr   resolve_value_type)r'   valr-   r7   disp	__class__s        r(   r<   $CUDATypingContext.resolve_value_type#   s    8sJ''3//&&  &@EE " ''$ &G H H # 1 1 6 6 8*.h')6):):7E)Jg&'4'8'8'Ee$%ckk=A $( s   A B
CC )__name__
__module____qualname____firstlineno__r)   r<   __static_attributes____classcell__r?   s   @r(   r   r      s    
<F Fr+   r   z	[^a-z0-9]c                      ^  \ rS rSrSrSrSU 4S jjr\S 5       r\S 5       r	S r
S rS rS	 r\S
 5       r\S 5       r\S 5       rSSS.S jr SS jrS rS rS rS rS rS rSrU =r$ )CUDATargetContextC   Tc                 v   > [         TU ]  X5        [        R                  " [        R
                  5      U l        g N)r;   __init__r   chainr   default_managerdata_model_manager)r'   	typingctxtargetr?   s      r(   rN   CUDATargetContext.__init__G   s,    +"3"9"9%%#
r+   c                 "    [         R                  $ rM   )r   	DIBuilderr'   s    r(   rV   CUDATargetContext.DIBuilderM   s    """r+   c                     g)NFrA   rW   s    r(   enable_boundscheck$CUDATargetContext.enable_boundscheckQ   s     r+   c                 8    U R                   R                  U5      $ rM   )_internal_codegen_create_empty_module)r'   names     r(   create_moduleCUDATargetContext.create_moduleW   s    %%::4@@r+   c                 H    [         R                  " S5      U l        S U l        g )Nznumba.cuda.jit)r   JITCUDACodegenr]   _target_datarW   s    r(   initCUDATargetContext.initZ   s    !(!7!78H!I r+   c                    SSK JnJnJn  SSK JnJnJn  SSK JnJn  SSK J	n	  SSK
Jn
  SSKJn  SSKJn  S	S
KJnJnJnJnJn  SSKJn  U R/                  UR0                  5        U R/                  U
R0                  5        U R/                  UR0                  5        U R/                  UR0                  5        U R/                  U	R0                  5        U R/                  UR0                  5        U R/                  UR2                  5        g )Nr   )numberstupleobjslicing)rangeobj	iteratorsenumimpl)unicodecharseq)	cmathimpl)cffiimpl)arrayobj)
npdatetimer   )cudaimpl	printimpllibdeviceimplmathimplr   )ndarray)numba.cpythonrh   ri   rj   rk   rl   rm   rn   ro   rp   
numba.miscrq   numba.nprr   rs   r"   rt   ru   rv   rw   r   numba.np.unsaferx   r$   r%   impl_registry)r'   rh   ri   rj   rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rw   r   rx   s                      r(   r)   ,CUDATargetContext.load_additional_registries^   s     	=<??2+'%'	
 	
 	,h//0h//0i001m445i001h//0l889r+   c                     U R                   $ rM   )r]   rW   s    r(   r   CUDATargetContext.codegenv   s    %%%r+   c                     U R                   c8  [        R                  " [        R                  " 5       R
                  5      U l         U R                   $ rM   )rd   llcreate_target_datar   NVVMdata_layoutrW   s    r(   target_dataCUDATargetContext.target_datay   s9    $ " 5 5diik6M6M ND   r+   c                     SSK Jn  Sn[        U Vs/ s H  n[        R                  " U5      U4PM     sn5      nU$ s  snf )z
Some CUDA intrinsics are at the module level, but cannot be treated as
constants, because they are loaded from a special register in the PTX.
These include threadIdx, blockDim, etc.
r   cuda)	threadIdxblockDimblockIdxgridDimlaneidwarpsize)numbar   tupler	   Module)r'   r   	nonconstsncnonconsts_with_mods        r(   nonconst_module_attrs'CUDATargetContext.nonconst_module_attrs   sM     	!	".7$9.7 &+\\$%7$<.7$9 :!!$9s   "=c                     [        U 5      $ rM   )CUDACallConvrW   s    r(   	call_convCUDATargetContext.call_conv   s    D!!r+   rA   Nabi_tagsuidc                .    [         R                  " XUUS9$ )Nr   )r   mangle)r'   r_   argtypesr   r   s        r(   manglerCUDATargetContext.mangler   s    %%dx*-/ 	/r+   c	           	          [         R                  " UR                  SS9n	U R                  5       R	                  UR
                   S3U	UUS9n
U
R                  U5        U R                  XU	X4UU5      nX4$ )a;  
Adapt a code library ``codelib`` with the numba compiled CUDA kernel
with name ``fname`` and arguments ``argtypes`` for NVVM.
A new library is created with a wrapper function that can be used as
the kernel entry point for the given kernel.

Returns the new code library and the wrapper function.

Parameters:

codelib:       The CodeLibrary containing the device function to wrap
               in a kernel call.
fndesc:        The FunctionDescriptor of the source function.
debug:         Whether to compile with debug.
lineinfo:      Whether to emit line info.
nvvm_options:  Dict of NVVM options used when compiling the new library.
filename:      The source filename that the function is contained in.
linenum:       The source line that the function is on.
max_registers: The max_registers argument for the code library.
cudapyns_kernel_)
entry_namenvvm_optionsmax_registers)r   prepend_namespacellvm_func_namer   create_libraryr_   add_linking_librarygenerate_kernel_wrapper)r'   codelibfndescr/   lineinfor   filenamelinenumr   kernel_namelibrarywrappers               r(   prepare_cuda_kernel%CUDATargetContext.prepare_cuda_kernel   s    . &77!!h
 ,,.//7<<.0I;F=I>K 0 M 	##G,..w/4/68 r+   c                 "	  ^!^" UR                   nU R                  U5      n	[        U	R                  5      n
[        R
                  " [        R                  " 5       U
5      nU R                  S5      m"[        R
                  " [        R                  " S5      U R                  R                  [        R                  5      /U
-   5      n[        R                  " T"XR                  5      n[        R                   " UR"                  SS9n[        R                  " T"X5      m![        R$                  " T!R'                  S5      5      nU(       d  U(       aR  U=(       a    U(       + nU R)                  T"UU US9nUR*                  " T!X2R,                  X5        UR.                  " X5        U!U"4S jnU" S5      n/ n/ nS	 H7  nUR1                  U" S
U-  5      5        UR1                  U" SU-  5      5        M9     U	R3                  UT!R,                  5      nU R                  R5                  X[        R6                  UU5      u  nnU(       Gak  [8        R:                  " UUR<                  5         UR?                  5         SSS5        URA                  URC                  URD                  5      5         [        RF                  " URH                  RJ                  S5      nURM                  UUURN                  SS5      nURQ                  US5      n[R        RT                  " U5      nURA                  U5         [W        S	U5       H)  u  nnURY                  U5      n UR[                  U U5        M+     [W        S	U5       H)  u  nnUR]                  U5      n UR[                  U U5        M+     SSS5        SSS5        UR?                  5         [^        R`                  " T!5        URc                  T"5        U(       d  U(       a  WRd                  " 5         URe                  5         [f        Rh                  (       a  [j        Rl                  " UT"5        URo                  T!R"                  5      $ ! , (       d  f       GN= f! , (       d  f       N= f! , (       d  f       N= f)z
Generate the kernel wrapper in the given ``library``.
The function being wrapped is described by ``fndesc``.
The wrapper function is returned.
zcuda.kernel.wrapper    r   r   r"   )modulefilepathcgctxdirectives_onlyc                    > TR                   U -   n[        R                  " T[        R                  " S5      U5      n[        R
                  " UR                  R                  S 5      Ul        U$ )Nr   )	r_   r   add_global_variabler   IntTypeConstanttypepointeeinitializer)postfixr_   gvwrapfnwrapper_modules      r(   define_error_gvBCUDATargetContext.generate_kernel_wrapper.<locals>.define_error_gv   sO    ;;(D,,^RZZ^-13B[[$?BNIr+   __errcode__xyzz	__tid%s__z__ctaid%s__N	monotonicr   )8r   get_arg_packerlistargument_typesr   FunctionTypeVoidTyper`   r   r   get_return_typer	   pyobjectFunctionr   r   r   r_   	IRBuilderappend_basic_blockrV   mark_subprogramargsmark_locationappendfrom_argumentscall_functionvoidr   	if_likelyis_okret_voidif_thennot_is_python_excr   r   r   cmpxchgcodeextract_valuer   SRegBuilderziptidstorectaidr   set_cuda_kerneladd_ir_modulefinalizer   	DUMP_LLVMr   	dump_llvmget_function)#r'   r   r   r   r/   r   r   r   r   arginfoargtyswrapfntyfntyfuncprefixedbuilderr   r   r   gv_excgv_tidgv_ctaidicallargsstatus_oldxchgchangedsregdimptrr=   r   r   s#                                    @@r(   r   )CUDATargetContext.generate_kernel_wrapper   s{    ??%%h/g,,-??2;;=&9++,ABrzz"~ $ > >u~~ NO!' () {{>41F1FG"44TYY8L^X@,,v88<=H&4u9On08-17F ' HI %%[[( ##G5	 !/AMM/+/:;OOOMA,=>?  ))'6;;?NN005::x;	 ""7FLL9  " : f.B.B!CDkk&++"5"5t<
 vsFKK'2KA!//a8 !,,W5__W-%(%7	S"hhsmc3/ &8 &)%9	S"jjoc3/ &: . E, 	V$n-H OOFN3##FKK00I :9  .- EDs2   &Q)BR -A3Q/ R 
Q,/
Q=	9R  
Rc           
         UR                   n[        UR                  SS95       Vs/ s H#  nU R                  [        R
                  U5      PM%     nn[        R                  " [        R                  " S5      [        U5      5      n[        R                  " Xv5      n[        R                  n	[        R                  " XHR                  SU	S9n
SU
l        SU
l        Xl        U R'                  UR(                  5      nU R+                  U5      nSUS	-
  R-                  5       -  U
l        [        R0                  " [        R                  " S5      5      nUR3                  XS
5      nU R5                  U5      " X5      nUR6                   Vs/ s H#  nU R                  [        R8                  U5      PM%     nnUR:                   Vs/ s H#  nU R                  [        R8                  U5      PM%     nnU R=                  XR?                  XR@                  R                  5      UUURB                  URD                  SS9  URG                  5       $ s  snf s  snf s  snf )Q
Unlike the parent version.  This returns a a pointer in the constant
addrspace.
A)order   _cudapy_cmem	addrspaceinternalT   r   genericN)datashapestridesitemsizeparentmeminfo)$r   itertobytesget_constantr	   byter   	ArrayTyper   lenr   r   ADDRSPACE_CONSTANTr   r   r   linkageglobal_constantr   get_data_typedtypeget_abi_sizeof
bit_lengthalignPointerTypeaddrspacecast
make_arrayr  intpr  populate_arraybitcastr  r  r  	_getvalue)r'   r   arytyarrlmodr  	constvals
constarytyconstaryr  r   lldtyper*  ptrtygenptraryskshapekstridess                      r(   make_constant_array%CUDATargetContext.make_constant_array  s    ~~ #++C+01
1 ejj!,1 	 
 \\"**Q-Y@
;;z5++	((}}n3<>
!! $$U[[1##G,..00 rzz!}-&&r)< ooe$T3<?IIFIq$##EJJ2IF>AkkJkD%%ejj!4kJCoofhhmm&L"($,%(\\#**$(	 	 	* }}C
2 GJs   *I
	*I*Ic                    [         R                  " UR                  S5      S-   5      nSR                  S[        R
                  " U5      /5      nUR                  R                  U5      nUcB  [         R                  " XR                  U[        R                  S9nSUl        SUl        X5l        UR                  R                  R                   nUR#                  UR%                  [        R                  5      5      $ )r  zutf-8    $__conststring__r  r  T)r   make_bytearrayencodejoinr   mangle_identifierglobalsr9   r   r   r   r#  r$  r%  r   r   elementr0  
as_pointer)r'   modstringtextr_   r   chartys          r(   insert_const_string%CUDATargetContext.insert_const_string@  s    
 %%fmmG&<w&FGxx*(::6BD E [[__T":,,S))T7;7N7NPB#BJ!%B!N ((zz&++D,C,CDEEr+   c                     UR                   nU R                  X25      n[        R                  " [        R                  " S5      5      nUR                  XES5      $ )z
Insert a constant string in the constant addresspace and return a
generic i8 pointer to the data.

This function attempts to deduplicate.
r  r  )r   rP  r   r+  r   r,  )r'   r   rM  r4  r   	charptrtys         r(   insert_string_const_addrspace/CUDATargetContext.insert_string_const_addrspaceV  sF     ~~%%d3NN2::a=1	$$RI>>r+   c                     g)zRun O1 function passes
        NrA   r'   r   s     r(   optimize_function#CUDATargetContext.optimize_functionb  s     	r+   c                 .    [         R                  " U5      $ rM   )r   get_ufunc_info)r'   	ufunc_keys     r(   r[   CUDATargetContext.get_ufunc_infoo  s    $$Y//r+   )r]   rd   rQ   r   rM   )rB   rC   rD   rE   implement_powi_as_math_callstrict_alignmentrN   propertyrV   rZ   r`   re   r)   r   r   r   r   r   r   r   r   r?  rP  rT  rX  r[  rF   rG   rH   s   @r(   rJ   rJ   C   s    "&
 # #  
A!:0& ! !
 " " " " 35$ / +/" HZ1x)VF,
?0 0r+   rJ   c                       \ rS rSrSrg)r   is  rA   N)rB   rC   rD   rE   rF   rA   r+   r(   r   r   s  s    r+   r   c                   Z    \ rS rSrSrS rS r  SS jrS rS r	SS	 jr
S
 rS rS rSrg)CUDACABICallConviw  z
Calling convention aimed at matching the CUDA C/C++ ABI. The implemented
function signature is:

    <Python return type> (<Python arguments>)

Exceptions are unsupported in this convention.
c                     g rM   rA   )r'   r   s     r(   _make_call_helper"CUDACABICallConv._make_call_helper  s     r+   c                 $    UR                  U5      $ rM   )ret)r'   r   retvals      r(   return_valueCUDACABICallConv.return_value  s    {{6""r+   Nc                     Sn[        U5      e)Nz7Python exceptions are unsupported in the CUDA C/C++ ABINotImplementedError)r'   r   excexc_argsloc	func_namemsgs          r(   return_user_exc CUDACABICallConv.return_user_exc  s    G!#&&r+   c                     Sn[        U5      e)Nz2Return status is unsupported in the CUDA C/C++ ABIrm  )r'   r   r  rs  s       r(   return_status_propagate(CUDACABICallConv.return_status_propagate  s    B!#&&r+   c                     U R                  U5      n[        UR                  5      n[        R                  " U R                  U5      U5      nU$ )z=
Get the LLVM IR Function type for *restype* and *argtypes*.
)_get_arg_packerr   r   r   r   r   )r'   restyper   r   r   s        r(   get_function_type"CUDACABICallConv.get_function_type  sD     &&x0../t33G<hGr+   c                     U(       a   eU R                  U5      nUR                  U R                  U5      U Vs/ s H  nSU-   PM
     sn5        gs  snf )z1
Set names and attributes of function arguments.
zarg.N)rz  assign_namesget_arguments)r'   fnr   fe_argtypesnoaliasr   as          r(   decorate_function"CUDACABICallConv.decorate_function  sP     {&&{3T//3267$Qfqj$7	97s   A
c                     UR                   $ )z0
Get the Python-level arguments of LLVM *func*.
)r   rW  s     r(   r  CUDACABICallConv.get_arguments  s     yyr+   c                     U R                  U5      nUR                  X5      nUR                  X'5      nSn	U R                  R	                  XU5      n
X4$ )z#
Call the Numba-compiled *callee*.
N)rz  as_argumentscallcontextget_returned_value)r'   r   calleerestyr   r   r   realargsr   r  outs              r(   r   CUDACABICallConv.call_function  sW     &&v.''6||F- ll--gdC{r+   c                 P    U R                   R                  U   R                  5       $ rM   )r  rQ   r   )r'   tys     r(   r    CUDACABICallConv.get_return_type  s     ||..r2BBDDr+   rA   )NNN)F)rB   rC   rD   rE   __doc__re  rj  rt  rw  r|  r  r  r   r   rF   rA   r+   r(   rc  rc  w  s?    
# @D"&'
'9Er+   rc  ))re	functoolsr   llvmlite.bindingbindingr   llvmliter   
numba.corer   r   r   r   r	   r
   r   numba.core.dispatcherr   numba.core.baser   numba.core.callconvr   r   r#   r   r   cudadrvr   
numba.cudar   r   r   numba.cuda.modelsr   r   compileIVALID_CHARSrJ   r   rc  rA   r+   r(   <module>r     s    	 %  ' ' ' , ' = '    1 1 /$F** $FT jjrtt,m0 m0`		? 	AE| AEr+   