
    shD                     z   S r SSKrSSKrSSKrSSKJrJrJr  SSK	J
r
  SSKJr  \R                  r\R                  r\R                   r\S)S j5       rS*S	 jrS
 rS r\S+S j5       r\\R,                  SSS4S j5       r\\R,                  SSSS4S j5       r\\R,                  SS4S j5       r\\R,                  SSSSS4S j5       r\R6                  \S,S j5       5       rS rS rS rS-S jr S.S jr!S r"\S 5       r#\S 5       r$\S 5       r%\S 5       r&\S 5       r'\\R6                  S 5       5       r(\\R6                  S  5       5       r)S*S! jr*\RV                  r+S" r,S# r-S$ r.S% r/S/S& jr0S' r1\R6                  S( 5       r2\" \Rf                  5      r3\" \Rh                  5      r4\" \Rj                  5      r5g)0z%
API that are reported to numba.cuda
    N   )devicearraydevicesdriver)config)prepare_shape_strides_dtypeTc                 l   U R                  S5      nSU::  a  U R                  S5      nUb  [        S5      eU S   nU R                  S5      n[        R                  " U S   5      n[	        XVUS	S
9u  pVn[
        R                  " XVUR                  5      n[
        R                  " U S   S   5      n	[
        R                  " [        5       XUS9n
U R                  SS5      nUb8  [        U5      nU(       a%  [        R                  (       a  UR                  5         OSn[        R                   " XVXzUS9nU$ )a	  Create a DeviceNDArray from a cuda-array-interface description.
The ``owner`` is the owner of the underlying memory.
The resulting DeviceNDArray will acquire a reference from it.

If ``sync`` is ``True``, then the imported stream (if present) will be
synchronized.
versionr   maskNzMasked arrays are not supportedshapestridestypestrC)orderdatar   )sizeownerstream)r   r   dtypegpu_datar   )getNotImplementedErrornpr   r   r   memory_size_from_infoitemsizeget_devptr_for_active_ctxMemoryPointercurrent_contextexternal_streamr   CUDA_ARRAY_INTERFACE_SYNCsynchronizer   DeviceNDArray)descr   syncr
   r   r   r   r   r   devptrr   
stream_ptrr   das                 b/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/numba/cuda/api.pyfrom_cuda_array_interfacer)      s    hhy!GG|xx%&GHHMEhhy!GHHT)_%E7S*EE''GD--d6l1o>F6E;D(D)J ,F44 		"	").*0
2B I    c                 `    [        U 5      (       d  [        S5      e[        U R                  XS9$ )aY  Create a DeviceNDArray from any object that implements
the :ref:`cuda array interface <cuda-array-interface>`.

A view of the underlying GPU buffer is created.  No copying of the data
is done.  The resulting DeviceNDArray will acquire a reference from `obj`.

If ``sync`` is ``True``, then the imported stream (if present) will be
synchronized.
z1*obj* doesn't implement the cuda array interface.)r   r$   )is_cuda_array	TypeErrorr)   __cuda_array_interface__)objr$   s     r(   as_cuda_arrayr0   ?   s3     KLL()E)E/2? 	?r*   c                     [        U S5      $ )zyTest if the object has defined the `__cuda_array_interface__` attribute.

Does not verify the validity of the interface.
r.   )hasattr)r/   s    r(   r,   r,   P   s    
 3233r*   c                      g)zoWhether 16-bit floats are supported.

float16 is always supported in current versions of Numba - returns True.
T r4   r*   r(   is_float16_supportedr5   X   s    
 r*   c                 l    Uc  [         R                  " XUSS9u  p4U$ U(       a  UR                  XS9  U$ )a  to_device(obj, stream=0, copy=True, to=None)

Allocate and transfer a numpy ndarray or structured scalar to the device.

To copy host->device a numpy array::

    ary = np.arange(10)
    d_ary = cuda.to_device(ary)

To enqueue the transfer to a stream::

    stream = cuda.stream()
    d_ary = cuda.to_device(ary, stream=stream)

The resulting ``d_ary`` is a ``DeviceNDArray``.

To copy device->host::

    hary = d_ary.copy_to_host()

To copy device->host to an existing array::

    ary = np.empty(shape=d_ary.shape, dtype=d_ary.dtype)
    d_ary.copy_to_host(ary)

To enqueue the transfer to a stream::

    hary = d_ary.copy_to_host(stream=stream)
T)r   copyuser_explicitr   )r   auto_devicecopy_to_device)r/   r   r7   tonews        r(   	to_devicer>   `   s@    > 
z))#48<>	
#-Ir*   r   c                 N    [        XUU5      u  pn[        R                  " XUUS9$ )zdevice_array(shape, dtype=np.float64, strides=None, order='C', stream=0)

Allocate an empty device ndarray. Similar to :meth:`numpy.empty`.
)r   r   r   r   )r   r   r"   r   r   r   r   r   s        r(   device_arrayrA      s4     88=?EE$$5,24 4r*   c                 :   [        XUU5      u  pn[        R                  " XUR                  5      n[	        5       R                  UUS9n[        R                  " XXUS9n[        R                  R                  U[        R                  S9n	U	R                  XtS9  U	$ )a  managed_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
                 attach_global=True)

Allocate a np.ndarray with a buffer that is managed.
Similar to np.empty().

Managed memory is supported on Linux / x86 and PowerPC, and is considered
experimental on Windows and Linux / AArch64.

:param attach_global: A flag indicating whether to attach globally. Global
                      attachment implies that the memory is accessible from
                      any stream on any device. If ``False``, attachment is
                      *host*, and memory is only accessible by devices
                      with Compute Capability 6.0 and later.
)attach_globalr   r   r   r   buffertyper9   )r   r   r   r   r   memallocmanagedr   ndarrayviewr   ManagedNDArraydevice_setup)
r   r   r   r   r   rC   bytesizerE   nparymanagedviews
             r(   managed_arrayrP      s    $ 88=?EE++EENNKH..x=J / LFJJU5$&E**//%k.H.H/IKV3r*   c                     [        XUU5      u  pn[        R                  " XUR                  5      n[	        5       R                  U5      n[        R                  " XXUS9$ )zpinned_array(shape, dtype=np.float64, strides=None, order='C')

Allocate an :class:`ndarray <numpy.ndarray>` with a buffer that is pinned
(pagelocked).  Similar to :func:`np.empty() <numpy.empty>`.
rD   )r   r   r   r   r   memhostallocr   rI   )r   r   r   r   rM   rE   s         r(   pinned_arrayrS      s^     88=?EE++E,1NN<H++H5F::E%#% %r*   Fc                 :   [        XUU5      u  pn[        R                  " XUR                  5      n[	        5       R                  USS9n[        R                  " XXUS9n	[        R                  R                  U	[        R                  S9n
U
R                  XS9  U
$ )a  mapped_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
                portable=False, wc=False)

Allocate a mapped ndarray with a buffer that is pinned and mapped on
to the device. Similar to np.empty()

:param portable: a boolean flag to allow the allocated device memory to be
          usable in multiple devices.
:param wc: a boolean flag to enable writecombined allocation which is faster
    to write by the host and to read by the device, but slower to
    write by the host and slower to write by the device.
TmappedrD   rF   r9   )r   r   r   r   r   rR   r   rI   rJ   r   MappedNDArrayrL   )r   r   r   r   r   portablewcrM   rE   rN   
mappedviews              r(   mapped_arrayr[      s     88=?EE++EENNKH++HT+BFJJU5$&E[-F-FGJF2r*   c              #     #    [         R                  " U5      n[         R                  " U5      UR                  -  n[        R
                  (       a%  [        R                  R                  5       nXl        O[        R                  R                  " U 6 n[        R                  " SXeUS9nUR                  [        5       UX2S9v   UR                  5         g7f)a  
A context manager that opens a IPC *handle* (*CUipcMemHandle*) that is
represented as a sequence of bytes (e.g. *bytes*, tuple of int)
and represent it as an array of the given *shape*, *strides* and *dtype*.
The *strides* can be omitted.  In that case, it is assumed to be a 1D
C contiguous array.

Yields a device array.

The IPC handle is closed automatically when context manager exits.
N)offset)r   r   r   )r   r   prodr   r   USE_NV_BINDINGbindingCUipcMemHandlereserveddrvapicu_ipc_mem_handle	IpcHandle
open_arrayr   close)handler   r   r   r]   r   driver_handle	ipchandles           r(   open_ipc_arrayrk      s      HHUOE775>ENN*D557!'77@  }6JI


0'.  = =OOs   CCc                  2    [        5       R                  5       $ )z Synchronize the current context.)r   r!   r4   r*   r(   r!   r!      s    ((**r*   c                    U R                   S   (       d$  U R                   S   (       d  U R                  S::  a  g[        U R                  5       Vs/ s H  oPM     nnUR	                  S S9  S/[        U R                  5      -  nU R                  R                  nU H  u  pVXCU'   X@R                  U   -  nM     [        U5      $ s  snf )zO
Given an array, compute strides for a new contiguous array of the same
shape.
C_CONTIGUOUSF_CONTIGUOUSr   Nc                     U S   $ )Nr   r4   )xs    r(   <lambda>0_contiguous_strides_like_array.<locals>.<lambda>  s    !A$r*   )keyr   )
flagsndim	enumerater   sortlenr   r   r   tuple)aryrq   
stridepermr   stridei_perm_s          r(   _contiguous_strides_like_arrayr      s     yy CIIn$=Q (46414J6OOO' cC$$GYYF	 ))F##   > 7s   Cc                 V    U R                   S   (       a  U R                   S   (       d  gg)Nro   rn   Fr   )ru   )r{   s    r(   _order_like_arrayr     s    
yy >)Br*   c                 n    [        U 5      n[        U 5      n[        U R                  U R                  UX1S9$ )zX
Call :func:`device_array() <numba.cuda.device_array>` with information from
the array.
r@   )r   r   rA   r   r   )r{   r   r   r   s       r(   device_array_liker   #  s6    
 -S1Gc"Eciisyy'#4 4r*   c           
      p    [        U 5      n[        U 5      n[        U R                  U R                  UXQX#S9$ )z\
Call :func:`mapped_array() <numba.cuda.mapped_array>` with the information
from the array.
)r   r   r   r   r   rX   rY   )r   r   r[   r   r   )r{   r   rX   rY   r   r   s         r(   mapped_array_liker   .  s:    
 -S1Gc"Eciisyy'#XN Nr*   c                 n    [        U 5      n[        U 5      n[        U R                  U R                  UUS9$ )z\
Call :func:`pinned_array() <numba.cuda.pinned_array>` with the information
from the array.
)r   r   r   r   )r   r   rS   r   r   )r{   r   r   s      r(   pinned_array_liker   9  s6    
 -S1Gc"Eciisyy'#% %r*   c                  2    [        5       R                  5       $ )zF
Create a CUDA stream that represents a command queue for the device.
)r   create_streamr4   r*   r(   r   r   E  s    
 **,,r*   c                  2    [        5       R                  5       $ )af  
Get the default CUDA stream. CUDA semantics in general are that the default
stream is either the legacy default stream or the per-thread default stream
depending on which CUDA APIs are in use. In Numba, the APIs for the legacy
default stream are always the ones in use, but an option to use APIs for
the per-thread default stream may be provided in future.
)r   get_default_streamr4   r*   r(   default_streamr   M  s     //11r*   c                  2    [        5       R                  5       $ )z%
Get the legacy default CUDA stream.
)r   get_legacy_default_streamr4   r*   r(   legacy_default_streamr   Y  s    
 6688r*   c                  2    [        5       R                  5       $ )z)
Get the per-thread default CUDA stream.
)r   get_per_thread_default_streamr4   r*   r(   per_thread_default_streamr   a  s    
 ::<<r*   c                 4    [        5       R                  U 5      $ )zCreate a Numba stream object for a stream allocated outside Numba.

:param ptr: Pointer to the external stream to wrap in a Numba Stream
:type ptr: int
)r   create_external_stream)ptrs    r(   r   r   i  s     33C88r*   c               '      #    / nU  HV  n[        5       R                  U[        R                  " U5      [        R                  " U5      SS9nUR                  U5        MX     Sv   g7f)zIA context manager for temporary pinning a sequence of host ndarrays.
    FrU   N)r   mempinr   host_pointerhost_memory_sizeappend)arylistpmlistr{   pms       r(   pinnedr   t  sa     
 F%%c6+>+>s+C&,&=&=c&B-2 & 4 	b	 
 
s   A$A&c               /   
  #    U(       a  SU;   d   S5       eUR                  SS5      n/ n/ nU  H|  n[        5       R                  U[        R                  " U5      [        R
                  " U5      SS9nUR                  U5        [        R                  " XVUS9nUR                  U5        M~      [        U5      S:X  a  US   v   OUv   U H  nUR                  5         M     g! U H  nUR                  5         M     f = f7f)	zKA context manager for temporarily mapping a sequence of host ndarrays.
    r   z Only accept 'stream' as keyword.r   TrU   )r   r   r   N)r   r   r   r   r   r   r   r   from_array_likery   free)r   kwsr   r   
devarylistr{   r   devarys           r(   rV   rV     s     
 h#oI'II%WWXq!FFJ%%c6+>+>s+C&,&=&=c&B-1 & 3 	b,,SfM&! 
z?aQ-
 BGGI &BGGI s   B-D0C% D%D  Dc                 4    [        5       R                  U S9nU$ )zg
Create a CUDA event. Timing data is only recorded by the event if it is
created with ``timing=True``.
)timing)r   create_event)r   evts     r(   eventr     s    
 

(
(
(
7CJr*   c                 F    [         R                  " U 5      nUR                  $ )z
Make the context associated with device *device_id* the current context.

Returns a Device instance.

Raises exception on error.
)r   get_contextdevice)	device_idcontexts     r(   select_devicer     s     !!),G>>r*   c                  *    [        5       R                  $ )z5Get current device associated with the current thread)r   r   r4   r*   r(   get_current_devicer     s    ###r*   c                  "    [         R                  $ )z%Return a list of all detected devices)r   gpusr4   r*   r(   list_devicesr     s    <<r*   c                  .    [         R                  " 5         g)z{
Explicitly clears all contexts in the current thread, and destroys all
contexts if the current thread is the main thread.
N)r   resetr4   r*   r(   rg   rg     s    
 MMOr*   c                 ,    [         R                  " XUS9$ )N)r   r7   )r   r:   )r{   r   r7   s      r(   _auto_devicer     s    ""3DAAr*   c                     [        5       n [        S[        U 5      -  5        SnU  GH  n/ nUR                  nUR                  nUR
                  nUR                  nUSSU-  4/-  nUSUR                  4/-  nUSUR                  4/-  nUSUR                  4/-  nUSU(       a  S	OS
4/-  n[        R                  S:X  a  USU(       a  SOS4/-  nUSU4/-  nUS:  a  SnOUS:  a  SnUS-  nOSnUS-  n[        SUR                  UR                  U4-  5        U H  u  p[        U	<S SU
< 35        M     GM     [        S5        [        SU[        U 5      4-  5        US:  $ )z
Detect supported CUDA hardware and print a summary of the detected hardware.

Returns a boolean indicating whether any supported devices were detected.
zFound %d CUDA devicesr   zCompute Capabilityz%d.%dzPCI Device IDz
PCI Bus IDUUIDWatchdogEnabledDisabledntzCompute ModeTCCWDDMzFP32/FP64 Performance Ratio)      z[NOT SUPPORTED: CC < 3.5])r   r   z[SUPPORTED (DEPRECATED)]r   z[SUPPORTED]zid %d    %20s %40sz>40z: zSummary:z	%d/%d devices are supported)r   printry   compute_capabilityKERNEL_EXEC_TIMEOUT
TCC_DRIVER%SINGLE_TO_DOUBLE_PRECISION_PERF_RATIOPCI_DEVICE_ID
PCI_BUS_IDuuidosnameid)devlistsupported_countdevattrscckernel_timeouttccfp32_to_fp64_ratiosupportrt   vals              r(   detectr     s    nG	
!CL
01O##00nn FF'2677?C$5$5677<0116388$%%:Ny
KLL77d?~u@AAE02DEFF;1G&[0Gq O#Gq O"cffchh%@@AHCS)* 1 6 
*	
)_c'l,K
KLQr*   c               #   x   #    [        5       R                  5          Sv   SSS5        g! , (       d  f       g= f7f)a2  
Temporarily disable memory deallocation.
Use this to prevent resource deallocation breaking asynchronous execution.

For example::

    with defer_cleanup():
        # all cleanup is deferred in here
        do_speed_critical_code()
    # cleanup can occur here

Note: this context manager can be nested.
N)r   defer_cleanupr4   r*   r(   r   r     s$      
		(	(	* 
+	*	*s   :)	:
7:)NT)T)r   TN)Nr   )r   )r   FF)r   T)6__doc__
contextlibr   numpyr   cudadrvr   r   r   
numba.corer   numba.cuda.api_utilr   require_contextr   r   r   r)   r0   r,   r5   r>   float64rA   rP   rS   r[   contextmanagerrk   r!   r   r   r   r   r   r   r   r   r   r   r   rV   r   event_elapsed_timer   r   r   rg   r   r   r   	profilingprofile_startprofile_stopr4   r*   r(   <module>r      sm  
  	  1 1  ; ))%%|| % %P?"4 $ $N  jj$c! 4 4 !zz4s1 $ :  jj$c % %  jj$c!E 2   8+
:4N% - - 2 2 9 9 = = 9 9 	
  	
   6 .. 
	$

B&R  $ F,,-	 4 45v223r*   