
    sh>                        S SK Jr  S SKJr  S SKJrJrJrJrJ	r	J
r
  S SKJrJrJrJrJrJr  S SKJr  S SKJrJrJr  S SKJr  S SKJrJrJr  S S	KJr  S S
K J!r!  S SK"J#r#  S r$ " S S\5      r% " S S\5      r&S r'\" SSS9 " S S\5      5       r(\" SSS9 " S S\5      5       r) " S S\5      r*\   S&S j5       r+S r,\   S'S j5       r-   S(S jr.  S)S  jr/   S*S! jr0S" r1S# r2 " S$ S%\35      r4g)+    )ir)ConcreteTemplate)typestypingfuncdescconfigcompilersigutils)sanitize_compile_result_entriesCompilerBaseDefaultPassBuilderFlagsOptionCompileResult)global_compiler_lock)LoweringPassPassManagerregister_pass)NumbaInvalidConfigWarning)IRLegalizationNativeLoweringAnnotateTypes)warn)get_current_device)CUDACABICallConvc                 <    U c  g [        U [        5      (       d   eU $ N)
isinstancedict)xs    g/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/numba/cuda/compiler.py_nvvm_options_typer"      s$    y !T""""    c                   4    \ rS rSr\" \SSS9r\" \SSS9rSr	g)	CUDAFlags   NzNVVM options)typedefaultdoczCompute Capability )
__name__
__module____qualname____firstlineno__r   r"   nvvm_optionstuplecompute_capability__static_attributes__r*   r#   r!   r%   r%      s+    L
   r#   r%   c                   $    \ rS rSr\S 5       rSrg)CUDACompileResult8   c                     [        U 5      $ r   )idselfs    r!   entry_pointCUDACompileResult.entry_point9   s    $xr#   r*   N)r+   r,   r-   r.   propertyr:   r2   r*   r#   r!   r4   r4   8   s     r#   r4   c                  .    [        U 5      n [        S0 U D6$ )Nr*   )r   r4   )entriess    r!   cuda_compile_resultr?   >   s    -g6G'w''r#   TF)mutates_CFGanalysis_onlyc                   $    \ rS rSrSrS rS rSrg)CUDABackendC   cuda_backendc                 0    [         R                  " U 5        g r   r   __init__r8   s    r!   rH   CUDABackend.__init__H       d#r#   c                 .   US   n[         R                  " UR                  /UR                  Q76 n[	        UR
                  UR                  UR                  R                  UR                  UR                  UR                  UUR                  S9Ul        g)z8
Back-end: Packages lowering output in a compile result
cr)typing_contexttarget_contexttyping_errortype_annotationlibrarycall_helper	signaturefndescT)r   rS   return_typeargsr?   	typingctx	targetctxstatusfail_reasonrP   rQ   rR   rT   rL   )r9   stateloweredrS   s       r!   run_passCUDABackend.run_passK   s{     +$$U%6%6DD	& ?? ??11!11MM++>>	
 r#   r*   N)r+   r,   r-   r.   _namerH   r]   r2   r*   r#   r!   rC   rC   C   s     E$r#   rC   c                   (    \ rS rSrSrSrS rS rSrg)CreateLibrary_   z
Create a CUDACodeLibrary for the NativeLowering pass to populate. The
NativeLowering pass will create a code library if none exists, but we need
to set it up with nvvm_options from the flags if they are present.
create_libraryc                 0    [         R                  " U 5        g r   rG   r8   s    r!   rH   CreateLibrary.__init__i   rJ   r#   c                     UR                   R                  5       nUR                  R                  nUR                  R
                  nUR                  X4S9Ul        UR                  R                  5         g)N)r/   T)	rX   codegenfunc_idfunc_qualnameflagsr/   rc   rQ   enable_object_caching)r9   r[   rg   namer/   s        r!   r]   CreateLibrary.run_passl   sZ    //))+}}**{{//..t.O++-r#   r*   N)	r+   r,   r-   r.   __doc__r_   rH   r]   r2   r*   r#   r!   ra   ra   _   s     E$r#   ra   c                        \ rS rSrS rS rSrg)CUDACompilerw   c                    [         n[        S5      nUR                  U R                  5      nUR                  R                  UR                  5        UR                  U R                  5      nUR                  R                  UR                  5        U R                  U R                  5      nUR                  R                  UR                  5        UR                  5         U/$ )Ncuda)	r   r   define_untyped_pipeliner[   passesextenddefine_typed_pipelinedefine_cuda_lowering_pipelinefinalize)r9   dpbpmuntyped_passestyped_passeslowering_passess         r!   define_pipelinesCUDACompiler.define_pipelinesx   s      44TZZ@
		../00<
		,,-<<TZZH
		//0
tr#   c                    [        S5      nUR                  [        S5        UR                  [        S5        UR                  [        S5        UR                  [
        S5        UR                  [        S5        UR                  5         U$ )Ncuda_loweringz$ensure IR is legal prior to loweringzannotate typeszcreate libraryznative loweringzcuda backend)r   add_passr   r   ra   r   rC   ry   )r9   r[   r{   s      r!   rx   *CUDACompiler.define_cuda_lowering_pipeline   sh    )
N:	<
M#34 	M#34
N$56
K0
	r#   r*   N)r+   r,   r-   r.   r   rx   r2   r*   r#   r!   rp   rp   w   s     r#   rp   Nc	                 2   Uc  [        S5      eSSKJn	  U	R                  n
U	R                  n[        5       nSUl        SUl        SUl        U(       d  U(       a  SUl	        U(       a  SUl
        U(       a  SUl        OSUl        U(       a  SUl        U(       a  SUl        U(       a  X|l        Xl        SSKJn  U" S	5         [$        R&                  " U
UU UUU0 [(        S
9nS S S 5        WR*                  nUR-                  5         U$ ! , (       d  f       N,= f)Nz#Compute Capability must be supplied   cuda_targetTpythonnumpyr   )target_overriders   )rW   rX   funcrV   rU   rj   localspipeline_class)
ValueError
descriptorr   rM   rN   r%   
no_compileno_cpython_wrapperno_cfunc_wrapper	debuginfodbg_directives_onlyerror_modelforceinlinefastmathr/   r1   numba.core.target_extensionr   r	   compile_extrarp   rQ   ry   )pyfuncrU   rV   debuglineinfoinliner   r/   ccr   rW   rX   rj   r   cresrQ   s                   r!   compile_cudar      s     
z>??'**I**IKEE#E!E $(!$# )! <		 %%	09+1+/2=,1-/5AC 
! llGK 
!	 s    D
Dc                    UR                   R                  UR                   S3UUS9nUR                  U5        UR                  nUR
                  n[        U 5      nUR                  Xv5      n	U R                  R                  UR
                  U5      n
U R                  S5      n[        R                  " XUR                  5      n[        R                  " XU5      n[        R                  " UR                  S5      5      nU R                  U5      nUR!                  XR"                  5      nU R                  R%                  XXvU5      u  nnUR'                  U5        UR)                  U5        UR+                  5         U$ )z
Wrap a Numba ABI function in a C ABI wrapper at the NVVM IR level.

The C ABI wrapper will have the same name as the source Python function.

_function_)
entry_namer/   zcuda.cabi.wrapper )rg   rc   rl   add_linking_libraryargtypesrestyper   get_function_type	call_convcreate_moduler   Functionllvm_func_name	IRBuilderappend_basic_blockget_arg_packerfrom_argumentsrV   call_functionretadd_ir_modulery   )contextlibrT   wrapper_function_namer/   rQ   r   r   c_call_convwrapfntyfntywrapper_moduler   wrapfnbuilderarginfocallargs_return_values                      r!   cabi_wrap_functionr      sR    kk((CHH:Z)@4I6B ) DG $ HnnG"7+K,,W?H..v~~xHD **+>?N;;~V-B-BCD
 [[3HIFll644R89G$$X.G%%g{{;H ''55w(4OA|KK.)Nr#   c                    US;  a  [        SU 35      eUS:X  a  U(       d  [        S5      eU
S;  a  [        SU
 35      eU(       a  U(       a  Sn[        [        U5      5        U
S:H  nU	=(       d
    [        5       n	UU(       a  S	OS
S.nU(       a  SUS'   [        R
                  " U5      u  pU=(       d    [        R                  n[        XXX5XS9nUR                  R                  nU(       a&  U(       d  U[        R                  :w  a  [        S5      eUR                  nU(       aH  UR                  nUS:X  a5  U	R!                  SU R"                  5      n[%        UUUR&                  UU5      nOQU R(                  nUR*                  nUR,                  nUR/                  UR                  UR&                  UX=UU5      u  nnU(       a  UR1                  US9nUU4$ UR3                  US9nUU4$ )a#  Compile a Python function to PTX or LTO-IR for a given set of argument
types.

:param pyfunc: The Python function to compile.
:param sig: The signature representing the function's input and output
            types. If this is a tuple of argument types without a return
            type, the inferred return type is returned by this function. If
            a signature including a return type is passed, the compiled code
            will include a cast from the inferred return type to the
            specified return type, and this function will return the
            specified return type.
:param debug: Whether to include debug info in the compiled code.
:type debug: bool
:param lineinfo: Whether to include a line mapping from the compiled code
                 to the source code. Usually this is used with optimized
                 code (since debug mode would automatically include this),
                 so we want debug info in the LLVM IR but only the line
                 mapping in the final output.
:type lineinfo: bool
:param device: Whether to compile a device function.
:type device: bool
:param fastmath: Whether to enable fast math flags (ftz=1, prec_sqrt=0,
                 prec_div=, and fma=1)
:type fastmath: bool
:param cc: Compute capability to compile for, as a tuple
           ``(MAJOR, MINOR)``. Defaults to ``(5, 0)``.
:type cc: tuple
:param opt: Enable optimizations. Defaults to ``True``.
:type opt: bool
:param abi: The ABI for a compiled function - either ``"numba"`` or
            ``"c"``. Note that the Numba ABI is not considered stable.
            The C ABI is only supported for device functions at present.
:type abi: str
:param abi_info: A dict of ABI-specific options. The ``"c"`` ABI supports
                 one option, ``"abi_name"``, for providing the wrapper
                 function's name. The ``"numba"`` ABI has no options.
:type abi_info: dict
:param output: Type of output to generate, either ``"ptx"`` or ``"ltoir"``.
:type output: str
:return: (code, resty): The compiled code and inferred return type
:rtype: tuple
)numbaczUnsupported ABI: r   z&The C ABI is not supported for kernels)ptxltoirzUnsupported output type: z{debug=True with opt=True (the default) is not supported by CUDA. This may result in a crash - set debug=False or opt=False.r      r   )r   optNzgen-lto)r   r   r   r/   r   z'CUDA kernel must have void return type.abi_name)r   )NotImplementedErrorr   r   r   r
   normalize_signaturer   CUDA_DEFAULT_PTX_CCr   rS   rU   r   void	TypeErrorrN   rQ   getr+   r   rT   __code__co_filenameco_firstlinenoprepare_cuda_kernel	get_ltoirget_asm_str)r   sigr   r   devicer   r   r   abiabi_infooutputmsgltor/   rV   rU   r   restytgtr   wrapper_namecodefilenamelinenumkernels                            r!   compiler      s   \ . !$5cU";<<
cz&!"JKK%%!$=fX"FGG2 	&s+,WC!46H qQL
 "&Y 44S9D		)v))BT!)%1:D NN&&EV 3ABB


Cll#:#<<
FOODL$S#t{{L%13C ##%%--dllDKK.6h.57V }}}# ; "%;r#   c
                 F    [        5       R                  n
[        XX#UXZXgXS9$ )zCompile a Python function to PTX or LTO-IR for a given signature for the
current device's compute capabilility. This calls :func:`compile` with an
appropriate ``cc`` value for the current device.	r   r   r   r   r   r   r   r   r   )r   r1   r   )r   r   r   r   r   r   r   r   r   r   r   s              r!   compile_for_current_devicer   j  s-     
		0	0B6ev$$5 5r#   c
                      [        XX#UXVXxU	SS9$ )zCompile a Python function to PTX for a given signature. See
:func:`compile`. The defaults for this function are to compile a kernel
with the Numba ABI, rather than :func:`compile`'s default of compiling a
device function with the C ABI.r   r   )r   )
r   r   r   r   r   r   r   r   r   r   s
             r!   compile_ptxr   v  s!     6ev$$U4 4r#   c	                 D    [        5       R                  n	[        XX#XEXXxS9
$ )z~Compile a Python function to PTX for a given signature for the current
device's compute capabilility. See :func:`compile_ptx`.)r   r   r   r   r   r   r   r   )r   r1   r   )
r   r   r   r   r   r   r   r   r   r   s
             r!   compile_ptx_for_current_devicer     s+    
 
		0	0Bv%$B3 3r#   c                 .    [        XU5      R                  $ r   ) declare_device_function_templatekeyrl   r   r   s      r!   declare_device_functionr     s    +D8DHHHr#   c                 $  ^^	 SSK Jn  UR                  nUR                  n[        R
                  " U/UQ76 m	[        U T	5      m " UU	4S jS[        5      n[        R                  " XUS9nUR                  TU5        UR                  TU5        U$ )Nr   r   c                   $   > \ rS rSr Y r Y/rSrg)Bdeclare_device_function_template.<locals>.device_function_templatei  r*   N)r+   r,   r-   r.   r   casesr2   )extfnr   s   r!   device_function_templater     s    r#   r   r   )r   r   rM   rN   r   rS   ExternFunctionr   r   ExternalFunctionDescriptorinsert_user_function)
rl   r   r   r   rW   rX   r   rT   r   r   s
           @@r!   r   r     s    '**I**I


7
.X
.C4%E #3  00X7F""5*BC""5&1##r#   c                       \ rS rSrS rSrg)r   i  c                     Xl         X l        g r   rl   r   )r9   rl   r   s      r!   rH   ExternFunction.__init__  s    	r#   r   N)r+   r,   r-   r.   rH   r2   r*   r#   r!   r   r     s    r#   r   )FFFFNN)	FFTFNTr   Nr   )FFTFTr   Nr   )FFFFNTr   N)FFFFTr   N)5llvmliter   numba.core.typing.templatesr   
numba.corer   r   r   r   r	   r
   numba.core.compilerr   r   r   r   r   r   numba.core.compiler_lockr   numba.core.compiler_machineryr   r   r   numba.core.errorsr   numba.core.typed_passesr   r   r   warningsr   numba.cuda.apir   numba.cuda.targetr   r"   r%   r4   r?   rC   ra   rp   r   r   r   r   r   r   r   r   objectr   r*   r#   r!   <module>r     sC    8 J J0 0 :G G 74 4  - .
 
: (
 4u5,  66 56L  7.< B BG<@7 7t)X =AAEg gT CH@D>C	5 BGIM4 GLEI9=3I$&V r#   