
    shV                       S r SSKrSSKrSSKrSSKrSSKrSSKrSSKrSSKrSSK	r	SSK
r
SSKJr  SSKJrJr  SSKJrJrJrJrJrJrJrJrJr  SSKrSSKrSSKrSSKJrJr  SSK J!r!  SSK"J#r#J$r$J%r%  S	S
K&J'r'J(r(  S	SK)J*r*  S	SK)J+r+J,r,J-r-  SSK.J/r/J)r)J0r0J1r1  \%Rd                  r3\3(       a  SSK4J4r5  Sr6Sr7\Rp                  Rs                  S5      r:\Rv                  Rx                  r=\Rv                  R|                  r?\R                  /\=lA        \R                  /\?lA        S rB " S S\C5      rD " S S\C5      rE " S S\(5      rFS rGS rHS rISrJSrKS rLS rMS rNS  rO\N" 5       rP " S! S"\Q5      rR " S# S$\Q5      rS\R" 5       rTS% rU\U" 5       rV " S& S'\Q5      rWS( rX " S) S*\Q\S+9rY " S, S-\Y5      rZ " S. S/5      r[ " S0 S1\[\Z5      r\S	r]Sq^S2 r_S3 r` " S4 S5\a5      rb\b" 5       rb " S6 S7\Q5      rc\" S8S95      rd  " S: S;\Q5      reS< rfS= rgS> rhS? riS@ rjSA rkSB rlSC rmSD rn " SE SF\Q5      ro " SG SH\Q5      rp " SI SJ\Q5      rq " SK SL\Q5      rr " SM SN\r5      rs " SO SP\s5      rt " SQ SR\!R                  5      rv " SS ST\s5      rw " SU SV\Q5      rx " SW SX\x\!R                  5      ry " SY SZ\x\!R                  5      rz " S[ S\\Q5      r{ " S] S^\Q5      r|S_ r} " S` Sa\S+9r~ " Sb Sc\~5      r " Sd Se\~5      r\" Sf/ SgQ5      r " Sh Si\S+9r " Sj Sk\5      r " Sl Sm\5      r SSn jr\3(       aY  \5GR                  r\GR                  \GR                  \GR                  \GR                  \GR                  \GR                  So.rOK\/GR                  \/GR                  \/GR                  \/GR                  \/GR                  \/GR                  So.r " Sp Sq\S+9rSrr " Ss St\5      r " Su Sv\5      r " Sw Sx\5      rSy rSz rS{ rS| rS} rSS~ jrS rS rS rS rS rS rS rS rSS jrSS jrSS jrSS jrS rS r\GRN                  S 5       rS rg)a  
CUDA driver bridge implementation

NOTE:
The new driver implementation uses a *_PendingDeallocs* that help prevents a
crashing the system (particularly OSX) when the CUDA context is corrupted at
resource deallocation.  The old approach ties resource management directly
into the object destructor; thus, at corruption of the CUDA context,
subsequent deallocation could further corrupt the CUDA context and causes the
system to freeze in some cases.

    N)product)ABCMetaabstractmethod)	c_intbyrefc_size_tc_charc_char_p	addressofc_void_pc_floatc_uint)
namedtupledeque)mviewbuf)utils	serializeconfig   )CudaSupportErrorCudaDriverError)API_PROTOTYPES)cu_occupancy_b2d_sizecu_stream_callback_pyobjcu_uuid)enumsdrvapinvrtc_extrascuda)      linuxc                  l   [         R                  " [        5      n U R                  5       (       Gd  [	        [
        R                  5      R                  5       n[        [         US 5      n[        U[        5      (       d  [         R                  nU R                  U5        [
        R                  (       a\  [         R                  " [        R                  5      nSnUR!                  [         R"                  " US95        U R%                  U5        U $ U R%                  [         R&                  " 5       5        U $ )Nz;== CUDA [%(relativeCreated)d] %(levelname)5s -- %(message)s)fmt)logging	getLogger__name__hasHandlersstrr   CUDA_LOG_LEVELuppergetattr
isinstanceintCRITICALsetLevelStreamHandlersysstderrsetFormatter	Formatter
addHandlerNullHandler)loggerlvlhandlerr&   s       m/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/numba/cuda/cudadrv/driver.pymake_loggerr>   ;   s    x(F&''(..0gsD)#s##""C  ++CJJ7GOC  !2!2s!;<g& M g1134M    c                       \ rS rSrSrg)DeadMemoryErrorS    Nr)   
__module____qualname____firstlineno____static_attributes__rC   r?   r=   rA   rA   S       r?   rA   c                       \ rS rSrSrg)LinkerErrorW   rC   NrD   rC   r?   r=   rK   rK   W   rI   r?   rK   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )CudaAPIError[   c                 D   > Xl         X l        [        [        U ]  X5        g N)codemsgsuperrN   __init__)selfrR   rS   	__class__s      r=   rU   CudaAPIError.__init__\   s    	lD*45r?   c                 >    SU R                   < SU R                  < 3$ )N[z] rR   rS   rV   s    r=   __str__CudaAPIError.__str__a   s     IItxx00r?   r[   )r)   rE   rF   rG   rU   r]   rH   __classcell__rW   s   @r=   rN   rN   [   s    6
1 1r?   rN   c            	         [         R                  n U S:X  a
  [        5         [        R                  S:X  a  [
        R                  nS/nS/nOC[        R                  S:X  a  [
        R                  nS/nS/nO[
        R                  nSS	/nS
S/nU (       aX   [        R                  R                  U 5      n [        R                  R                  U 5      (       d  [        SU -  5      eU /nX4$ U[        X#5       VVs/ s H$  u  pV[        R                  R                  XV5      PM&     snn-   nX4$ ! [         a    [        SU -  5      ef = fs  snnf )N0win32z\windows\system32z
nvcuda.dlldarwinz/usr/local/cuda/libzlibcuda.dylibz/usr/libz
/usr/lib64z
libcuda.sozlibcuda.so.1z(NUMBA_CUDA_DRIVER %s is not a valid pathzoNUMBA_CUDA_DRIVER %s is not a valid file path.  Note it must be a filepath of the .so/.dll/.dylib or the driver)r   CUDA_DRIVER_raise_driver_not_foundr4   platformctypesWinDLLCDLLospathabspath
ValueErrorisfiler   join)envpathdlloaderdldirdlnames
candidatesxys          r=   locate_driver_and_loaderrx   e   s[     G#~! ||w==&'.		!;;&'"# ;;\*0	&ggoog.G ww~~g&& 9;BC D D Y
  ,3E,C E,CDA !#Q 2,C E E
   	&G$% & &	& Es   D1 =+E1E
c                 j   / n/ nU H  n U " U5      nXT4s  $    [        U5      (       a  [        5         g SR                  S U 5       5      n[        U5        g ! [          aO  nUR                  [        R                  R	                  U5      (       + 5        UR                  U5         S nAM  S nAff = f)N
c              3   8   #    U  H  n[        U5      v   M     g 7frQ   )r+   ).0es     r=   	<genexpr>load_driver.<locals>.<genexpr>   s     =+<a3q66+<s   )	OSErrorappendrk   rl   ro   allrf   rp   _raise_driver_error)rr   ru   path_not_existdriver_load_errorrl   dllr}   errmsgs           r=   load_driverr      s     N	4.C 9  >!=+<==F#  	(!!bggnnT&:":;$$Q''	(s   A
B2#AB--B2c                  8    [        5       u  p[        X5      u  p#U$ rQ   )rx   r   )rr   ru   r   rl   s       r=   find_driverr      s    35HH1ICJr?   z
CUDA driver library cannot be found.
If you are sure that a CUDA driver is installed,
try setting environment variable NUMBA_CUDA_DRIVER
with the file path of the CUDA driver shared library.
zM
Possible CUDA driver libraries are found but error occurred during load:
%s
c                       [        [        5      erQ   )r   DRIVER_NOT_FOUND_MSGrC   r?   r=   rf   rf      s    
/
00r?   c                 &    [        [        U -  5      erQ   )r   DRIVER_LOAD_ERROR_MSG)r}   s    r=   r   r      s    
014
55r?   c                      Sn [         R                  " 5       n[        [        5       H/  nUR	                  U 5      (       d  M  [        [        U5      nX!U'   M1     U$ )N
CUDA_ERROR)r   
UniqueDictdirr   
startswithr.   )prefixmapnamerR   s       r=   _build_reverse_error_mapr      sL    F



CE
??6""5$'DI  Jr?   c                  ,    [         R                  " 5       $ rQ   )rk   getpidrC   r?   r=   _getpidr      s    99;r?   c                       \ rS rSrSrSrS rS rS rS r	\
S 5       rS	 rSS
 jrS rS rS rS rS rSS jrS rS rS rS rS rS rSrg)Driver   z(
Driver API functions are lazily bound.
Nc                 ^    U R                   nUb  U$ [        R                  U 5      nXl         U$ rQ   )
_singletonobject__new__)clsobjs     r=   r   Driver.__new__   s-    nn?J..%C N
r?   c                 $   [         R                  " 5       U l        SU l        S U l        S U l         [        R                  (       a  Sn[        U5      e[        5       U l
        g ! [         a"  nSU l        UR                  U l         S nAg S nAff = f)NFzzCUDA is disabled due to setting NUMBA_DISABLE_CUDA=1 in the environment, or because CUDA is unsupported on 32-bit systems.T)r   r   devicesis_initializedinitialization_errorpidr   DISABLE_CUDAr   r   librS   )rV   rS   r}   s      r=   rU   Driver.__init__   s{    '')#$(!		."") 's++"}DH 	."&D()D%%	.s   1A# #
B-B

Bc                 Z   U R                   (       a  g [        5       qSU l          [        R                  S5        U R	                  S5        [        5       U l        U R                  5         g ! [         a5  nUR                   SUR                   S3nX l
        [        SU 35      eS nAff = f)NTinitr   z ()zError at driver init: )r   r>   _loggerinfocuInitr   r   rN   rS   rR   r   r   _initialize_extras)rV   r}   descriptions      r=   ensure_initializedDriver.ensure_initialized   s     -"	!LL KKN yDH!  	KUUG2affXQ/K(3%"%;K=#IJJ	Ks   &A+ +
B*50B%%B*c                    [         (       a  g [        R                  " S [        5      nU" [        R
                  5      nU" U R                  S5      5        [        R                  " [        [        R                  " [        R                  5      [        R                  " [        R                  5      [        R                  5      nU" [        R                  5      nSUl        U R                  SU5      nXPl        g )NcuIpcOpenMemHandlecall_cuIpcOpenMemHandle)USE_NV_BINDINGrh   	CFUNCTYPEr   r   set_cuIpcOpenMemHandle	_find_apir   POINTERr   cu_device_ptrcu_ipc_mem_handler   r   r)   _ctypes_wrap_fnr   )rV   	set_protor   
call_protor   	safe_calls         r=   r   Driver._initialize_extras  s    > $$T84	!*7+I+I!Jt~~.BCD%%e&,nnV5I5I&J&,nnV5M5M&N&,mm5
 #-W-L-L"M+D((()B)@B	 #,r?   c                 >    U R                  5         U R                  S L $ rQ   )r   r   r\   s    r=   is_availableDriver.is_available  s    !((D00r?   c                     U R                  5         U R                  b  [        SU R                  -  5      e[        (       a  U R	                  U5      $ U R                  U5      $ )NzError at driver init: 
%s:)r   r   r   r   _cuda_python_wrap_fnr   rV   fnames     r=   __getattr__Driver.__getattr__"  sa    !$$0"#@#'#<#<$= > > >,,U33''..r?   c                 P  ^ ^^ Tc3   [         T   nUS   nUSS  nT R                  T5      mUTl        UTl        UUU 4S jnUUU 4S jn[        R                  (       a  UnOUn[        R                  " T5      " U5      n	[        T TU	5        U	$ ! [         a    [        T5      ef = f)Nr   r   c                     > SR                  U  Vs/ s H  n[        U5      PM     sn5      n[        R                  STR                  U5        T" U 6 nTR                  TU5        g s  snf Nz, zcall driver api: %s(%s))rp   r+   r   debugr)   _check_ctypes_error)argsargargstrretcoder   libfnrV   s       r=   verbose_cuda_api_call5Driver._ctypes_wrap_fn.<locals>.verbose_cuda_api_call>  sW    YYD9DSCD9:FMM3U^^VLTlG$$UG4  :s   A&c                  t   > [         R                  STR                  5        T" U 6 nTR                  TU5        g Nzcall driver api: %s)r   r   r)   r   )r   r   r   r   rV   s     r=   safe_cuda_api_call2Driver._ctypes_wrap_fn.<locals>.safe_cuda_api_callD  s/    MM/@TlG$$UG4r?   )r   KeyErrorAttributeErrorr   restypeargtypesr   CUDA_LOG_API_ARGS	functoolswrapssetattr)
rV   r   r   protor   r   r   r   wrapperr   s
   ```       r=   r   Driver._ctypes_wrap_fn/  s    =,&u- AhGQRyH NN5)E#EM%EN	5	5
 ##+G(GOOE*73	eY'9  ,$U++,s   	B B%c                    ^ ^^ [        [        T5      mUUU 4S jnUUU 4S jn[        R                  (       a  UnOUn[        R
                  " T5      " U5      n[        T TU5        U$ )Nc                     > SR                  U  Vs/ s H  n[        U5      PM     sn5      n[        R                  STR                  U5        TR                  TT" U 6 5      $ s  snf r   )rp   r+   r   r   r)   _check_cuda_python_error)r   r   r   r   r   rV   s      r=   r   :Driver._cuda_python_wrap_fn.<locals>.verbose_cuda_api_callU  sU    YYD9DSCD9:FMM3U^^VL00tEE  :s   A#c                  n   > [         R                  STR                  5        TR                  TT" U 6 5      $ r   )r   r   r)   r   )r   r   r   rV   s    r=   r   7Driver._cuda_python_wrap_fn.<locals>.safe_cuda_api_callZ  s-    MM/@00tEEr?   )r.   bindingr   r   r   r   r   )rV   r   r   r   r   r   r   s   ``    @r=   r   Driver._cuda_python_wrap_fnR  sU    '	F
	F ##+G(GOOE*73	eY'r?   c                    ^ [         R                  (       a  [        (       d  SnOSnU H  n [        U R                  T U 35      s  $    U4S jn[        U TU5        U$ ! [
         a     MD  f = f)N)_v2_ptds_v2_ptsz_ptds_ptsz_v2 )r   r   c                      > [        ST 35      e)NzDriver missing function: )r   )r   kwsr   s     r=   absent_function)Driver._find_api.<locals>.absent_functionx  s    !$=eW"EFFr?   )r   CUDA_PER_THREAD_DEFAULT_STREAMr   r.   r   r   r   )rV   r   variantsvariantr   s    `   r=   r   Driver._find_apig  ss     00LH"HGtxxE77))<==  	G 	e_- " s   A  
A.-A.c                     U R                   bO  [        5       U R                   :w  a6  Sn[        R                  U[        5       U R                   5        [	        S5      eg g )Nz0pid %s forked from pid %s after CUDA driver initzCUDA initialized before forking)r   r   r   criticalr   )rV   rS   s     r=   _detect_forkDriver._detect_fork~  sJ    88GI$9DCS')TXX6!"CDD %:r?   c                     U[         R                  :w  ad  [        R                  US5      nSU< SU< 3n[        R                  U5        U[         R                  :X  a  U R                  5         [        X$5      eg )NUNKNOWN_CUDA_ERRORCall to  results in )	r   CUDA_SUCCESS	ERROR_MAPgetr   errorCUDA_ERROR_NOT_INITIALIZEDr  rN   )rV   r   r   errnamerS   s        r=   r   Driver._check_ctypes_error  sa    e(((mmG-ABG05w?CMM#%:::!!#w,, )r?   c                 B   US   nUSS  n[        U5      S:X  a  US   nU[        R                  R                  :w  ab  SU< SUR                  < 3n[
        R                  U5        U[        R                  R                  :X  a  U R                  5         [        X55      eU$ )Nr   r   r  r  )
lenr   CUresultr	  r   r   r  r  r  rN   )rV   r   returnedr   retvalrS   s         r=   r   Driver._check_cuda_python_error  s    1+!"v;!AYFg&&33305w||DCMM#'**EEE!!#w,,r?   c                     U R                   R                  U5      nUc  [        U5      nX R                   U'   [        R                  " U5      $ rQ   )r   r  Deviceweakrefproxy)rV   devnumdevs      r=   
get_deviceDriver.get_device  s?    llv&;.C#&LL }}S!!r?   c                     [         (       a  U R                  5       $ [        5       nU R                  [        U5      5        UR                  $ rQ   )r   cuDeviceGetCountr   r   value)rV   counts     r=   get_device_countDriver.get_device_count  s9    >((**eEl+{{r?   c                 H    [        U R                  R                  5       5      $ )z)Returns a list of active devices
        )listr   valuesr\   s    r=   list_devicesDriver.list_devices  s     DLL'')**r?   c                 f    U R                   R                  5        H  nUR                  5         M     g)zReset all devices
        N)r   r&  reset)rV   r  s     r=   r*  Driver.reset  s$     <<&&(CIIK )r?   c                 @   U R                  5        nUR                  bf  [        (       a  [        R	                  5       sSSS5        $ [
        R                  " 5       n[        R	                  [        U5      5        UsSSS5        $  SSS5        g! , (       d  f       g= f)z^Pop the active CUDA context and return the handle.
If no CUDA context is active, return None.
N)get_active_contextr  r   drivercuCtxPopCurrentr   
cu_contextr   )rV   acpoppeds      r=   pop_active_contextDriver.pop_active_context  sq     $$&"yy$!>!113 '&
 $..0F**5=9! '&$ '&&s   ,B4B
Bc                     [        5       $ )z3Returns an instance of ``_ActiveContext``.
        )_ActiveContextr\   s    r=   r-  Driver.get_active_context  s     r?   c                     [         (       a  [        R                  5       nOK[        R                  " S5      n[        R                  [        R
                  " U5      5        UR                  nUS-  nXS-  -
  S-  nX44$ )z=
Returns the CUDA Runtime version as a tuple (major, minor).
r   i  
   )r   r.  cuDriverGetVersionrh   r   r   r   )rV   versiondvmajorminors        r=   get_versionDriver.get_version  sh     >//1GaB%%fll2&67hhG 4DL)b0~r?   )r   r   r   r   r   r   rQ   r   )r)   rE   rF   rG   __doc__r   r   rU   r   r   propertyr   r   r   r   r   r  r   r   r  r"  r'  r*  r3  r-  r?  rH   rC   r?   r=   r   r      s~     J. "*,* 1 1/!F*.E-"+
" 
r?   r   c                   P    \ rS rSrSr\R                  " 5       rS rS r	S r
\
rSrg)r6  i  zAn contextmanager object to cache active context to reduce dependency
on querying the CUDA driver API.

Once entering the context, it is assumed that the active CUDA context is
not changed until the context is exited.
c                    Sn[        U R                  S5      (       a  U R                  R                  u  p#O[        (       a&  [        R                  5       n[        U5      S:X  a  S nOI[        R                  " S5      n[        R                  [        U5      5        UR                  (       a  UOS nUc  S nO|[        (       a  [        [        R                  5       5      nO?[        R                  " 5       n[        R                  [        U5      5        UR                  nX#4U R                  l        SnXl        X l        X0l        U $ )NF
ctx_devnumr   T)hasattr
_tls_cacherF  r   r.  cuCtxGetCurrentr0   r   r0  r   r   cuCtxGetDevice	cu_device_is_topcontext_handler  )rV   is_tophctxr  hdevices        r=   	__enter___ActiveContext.__enter__  s    4??L11??55LD& ~--/t9>D((+&&uT{3#zztt|!> !6!6!89F$..0G))%.9$]]F.2^*"r?   c                 T    U R                   (       a  [        U R                  S5        g g )NrF  )rL  delattrrH  )rV   exc_typeexc_valexc_tbs       r=   __exit___ActiveContext.__exit__  s    <<DOO\2 r?   c                     U R                   SL$ )zAReturns True is there's a valid and active CUDA context.
        N)rM  r\   s    r=   __bool___ActiveContext.__bool__  s     ""$..r?   )rL  rM  r  N)r)   rE   rF   rG   rB  	threadinglocalrH  rQ  rX  r[  __nonzero__rH   rC   r?   r=   r6  r6    s,     "J D3/
 Kr?   r6  c                      Sn [         R                  " 5       n[        [        5       H9  nUR	                  U 5      (       d  M  [        [        U5      X[        U 5      S  '   M;     U$ )NCU_DEVICE_ATTRIBUTE_)r   r   r   r   r   r.   r  )r   r   r   s      r=   _build_reverse_device_attrsrb    sS    #F



CE
??6""&-eT&:CS[\"#  Jr?   c                   t    \ rS rSrSr\S 5       rS rS rS r	S r
S rS	 rS
 rS rS rS r\S 5       rSrg)r  i  z}
The device object owns the CUDA contexts.  This is owned by the driver
object.  User should not construct devices directly.
c                     [        [        R                  5       5       H0  n[        R                  U5      nUR	                  5       U:X  d  M.  Us  $    SR                  U5      n[        U5      e)zWCreate Device object from device identity created by
``Device.get_device_identity()``.
zKNo device of {} is found. Target device may not be visible in this process.)ranger.  r"  r  get_device_identityformatRuntimeError)rV   identitydeviddr   s        r=   from_identityDevice.from_identity$  sd    
 62245E!!%(A$$&(2 6DfX  v&&r?   c           
         [         (       a'  [        R                  U5      nX l        [	        U5      nO;[        5       n[        R                  [        U5      U5        UR                  nX0l        SU SU 3nX:w  a  [        U5      e0 U l	        U R                  U R                  4U l        Sn[         (       a@  [        R                  XPR                  5      nUR                  S5      R                  S5      nO:[         U-  " 5       n[        R                  XeU R                  5        UR                  nXpl        [         (       a5  [        R%                  U R                  5      n['        UR(                  5      n	OG[+        5       n[        R%                  [        U5      U R                  5        ['        [)        U5      5      n	Sn
U
S-  nU
S-  nU
S	-  nS
U SU SU SU SU 3
nX-  U l        S U l        g )NzDriver returned device z instead of    utf-8 z%02x         zGPU--)r   r.  cuDeviceGetidr0   r   r   r   rh  
attributesCOMPUTE_CAPABILITY_MAJORCOMPUTE_CAPABILITY_MINORcompute_capabilitycuDeviceGetNamedecoderstripr	   r   cuDeviceGetUuidtuplebytesr   uuidprimary_context)rV   r  result
got_devnumrS   bufszbufr   r  	uuid_valsbb2b4b6r&   s                  r=   rU   Device.__init__4  s   >''/FGVJWFuV}f5J G'
|<xHs## $(#@#@#'#@#@#B >((8C::g&--d3DE>$C""3tww799D	 >))$''2Ddjj)I9D""5;8eDk*IUUURD"Qrd!B4q-O	#r?   c                 J    U R                   U R                  U R                  S.$ )N)pci_domain_id
pci_bus_idpci_device_id)PCI_DOMAIN_ID
PCI_BUS_IDPCI_DEVICE_IDr\   s    r=   rf  Device.get_device_identityh  s%    !////!//
 	
r?   c                 8    SU R                   U R                  4-  $ )Nz<CUDA device %d '%s'>)rw  r   r\   s    r=   __repr__Device.__repr__o  s    &$''499)===r?   c                 r   [         (       a=  [        [        R                  SU 35      n[        R                  X R                  5      nOI [        U   n[        5       n[        R                  [        U5      X R                  5        UR                  n[        XU5        U$ ! [         a    [        U5      ef = f)zRead attributes lazily
        ra  )r   r.   r   CUdevice_attributer.  cuDeviceGetAttributerw  DEVICE_ATTRIBUTESr   r   r   r   r   r   )rV   attrrR   r   r  s        r=   r   Device.__getattr__r  s     >7551$8:D//gg>E+(. WF''ftWWELLEE"  +$T**+s   
	B   B6c                 ,    [        U R                  5      $ rQ   )hashrw  r\   s    r=   __hash__Device.__hash__  s    DGG}r?   c                 `    [        U[        5      (       a  U R                  UR                  :H  $ gNF)r/   r  rw  rV   others     r=   __eq__Device.__eq__  s%    eV$$77ehh&&r?   c                     X:X  + $ rQ   rC   r  s     r=   __ne__Device.__ne__  s    ""r?   c                 l   U R                   b  U R                   $ [        U 5        [        (       a   [        R	                  U R
                  5      nO>[        R                  " 5       n[        R	                  [        U5      U R
                  5        [        [        R                  " U 5      U5      nX l         U$ )zW
Returns the primary context for the device.
Note: it is not pushed to the CPU thread.
)r  met_requirement_for_devicer   r.  cuDevicePrimaryCtxRetainrw  r   r0  r   Contextr  r  )rV   rO  ctxs      r=   get_primary_contextDevice.get_primary_context  s    
 +'''"4(>22477;D$$&D++E$KAgmmD)40"
r?   c                 t    U R                   (       a'  [        R                  U R                  5        SU l         gg)z?
Release reference to primary context if it has been retained.
N)r  r.  cuDevicePrimaryCtxReleaserw  r\   s    r=   release_primary_contextDevice.release_primary_context  s,     ,,TWW5#'D   r?   c                      U R                   b  U R                   R                  5         U R                  5         [        R	                  U R
                  5        g ! [        R	                  U R
                  5        f = frQ   )r  r*  r  r.  cuDevicePrimaryCtxResetrw  r\   s    r=   r*  Device.reset  sX    	4##/$$**,((* **4773F**4773s   7A !A:c                      U R                   S:  $ )N)r#   r"   )r{  r\   s    r=   supports_float16Device.supports_float16  s    &&&00r?   )rx  r{  rw  r   r  r  N)r)   rE   rF   rG   rB  classmethodrl  rU   rf  r  r   r  r  r  r  r  r*  rC  r  rH   rC   r?   r=   r  r    sb     ' '2$h
>(
#((4 1 1r?   r  c                 X    U R                   [        :  a  [        U < S[        < 35      eg )Nz has compute capability < )r{  MIN_REQUIRED_CCr   )devices    r=   r  r    s,      ?2 & 9 : 	: 3r?   c                       \ rS rSrSrS r\S 5       r\S 5       r\S 5       r	\S 5       r
\S 5       r\S	 5       r\S
 5       r\S 5       r\\S 5       5       rSrg)BaseCUDAMemoryManageri  zAAbstract base class for External Memory Management (EMM) Plugins.c                 R    SU;  a  [        S5      eUR                  S5      U l        g )Ncontextz!Memory manager requires a context)rh  popr  )rV   r   kwargss      r=   rU   BaseCUDAMemoryManager.__init__  s&    F"BCCzz),r?   c                     g)z
Allocate on-device memory in the current context.

:param size: Size of allocation in bytes
:type size: int
:return: A memory pointer instance that owns the allocated memory
:rtype: :class:`MemoryPointer`
NrC   )rV   sizes     r=   memallocBaseCUDAMemoryManager.memalloc      r?   c                     g)a  
Allocate pinned host memory.

:param size: Size of the allocation in bytes
:type size: int
:param mapped: Whether the allocated memory should be mapped into the
               CUDA address space.
:type mapped: bool
:param portable: Whether the memory will be considered pinned by all
                 contexts, and not just the calling context.
:type portable: bool
:param wc: Whether to allocate the memory as write-combined.
:type wc: bool
:return: A memory pointer instance that owns the allocated memory. The
         return type depends on whether the region was mapped into
         device memory.
:rtype: :class:`MappedMemory` or :class:`PinnedMemory`
NrC   )rV   r  mappedportablewcs        r=   memhostalloc"BaseCUDAMemoryManager.memhostalloc  r  r?   c                     g)a  
Pin a region of host memory that is already allocated.

:param owner: The object that owns the memory.
:param pointer: The pointer to the beginning of the region to pin.
:type pointer: int
:param size: The size of the region in bytes.
:type size: int
:param mapped: Whether the region should also be mapped into device
               memory.
:type mapped: bool
:return: A memory pointer instance that refers to the allocated
         memory.
:rtype: :class:`MappedMemory` or :class:`PinnedMemory`
NrC   rV   ownerpointerr  r  s        r=   mempinBaseCUDAMemoryManager.mempin  r  r?   c                     g)zd
Perform any initialization required for the EMM plugin instance to be
ready to use.

:return: None
NrC   r\   s    r=   
initialize BaseCUDAMemoryManager.initialize  r  r?   c                     g)z
Return an IPC handle from a GPU allocation.

:param memory: Memory for which the IPC handle should be created.
:type memory: :class:`MemoryPointer`
:return: IPC handle for the allocation
:rtype: :class:`IpcHandle`
NrC   rV   memorys     r=   get_ipc_handle$BaseCUDAMemoryManager.get_ipc_handle  r  r?   c                     g)z
Returns ``(free, total)`` memory in bytes in the context. May raise
:class:`NotImplementedError`, if returning such information is not
practical (e.g. for a pool allocator).

:return: Memory info
:rtype: :class:`MemoryInfo`
NrC   r\   s    r=   get_memory_info%BaseCUDAMemoryManager.get_memory_info  r  r?   c                     g)z@
Clears up all memory allocated in this context.

:return: None
NrC   r\   s    r=   r*  BaseCUDAMemoryManager.reset  r  r?   c                     g)z~
Returns a context manager that ensures the implementation of deferred
cleanup whilst it is active.

:return: Context manager
NrC   r\   s    r=   defer_cleanup#BaseCUDAMemoryManager.defer_cleanup"  r  r?   c                     g)z
Returns an integer specifying the version of the EMM Plugin interface
supported by the plugin implementation. Should always return 1 for
implementations of this version of the specification.
NrC   r\   s    r=   interface_version'BaseCUDAMemoryManager.interface_version+  r  r?   r  N)r)   rE   rF   rG   rB  rU   r   r  r  r  r  r  r  r*  r  rC  r  rH   rC   r?   r=   r  r    s    K-
    (  "             r?   r  )	metaclassc                   z   ^  \ rS rSrSrU 4S jrS r  SS jrSS jrS r	S r
\R                  S	 5       rS
rU =r$ )HostOnlyCUDAMemoryManageri5  a)  Base class for External Memory Management (EMM) Plugins that only
implement on-device allocation. A subclass need not implement the
``memhostalloc`` and ``mempin`` methods.

This class also implements ``reset`` and ``defer_cleanup`` (see
:class:`numba.cuda.BaseCUDAMemoryManager`) for its own internal state
management. If an EMM Plugin based on this class also implements these
methods, then its implementations of these must also call the method from
``super()`` to give ``HostOnlyCUDAMemoryManager`` an opportunity to do the
necessary work for the host allocations it is managing.

This class does not implement ``interface_version``, as it will always be
consistent with the version of Numba in which it is implemented. An EMM
Plugin subclassing this class should implement ``interface_version``
instead.
c                 x   > [         TU ]  " U0 UD6  [        R                  " 5       U l        [        5       U l        g rQ   )rT   rU   r   r   allocations_PendingDeallocsdeallocationsrV   r   r  rW   s      r=   rU   "HostOnlyCUDAMemoryManager.__init__G  s1    $)&) ++--/r?   c                     U" 5       $ ! [          ar  n[        (       a  [        R                  R                  nO[
        R                  nUR                  U:X  a&  U R                  R                  5         U" 5       s SnA$ e SnAff = f)z
Attempt allocation by calling *allocator*.  If an out-of-memory error
is raised, the pending deallocations are flushed and the allocation
is retried.  If it fails in the second attempt, the error is reraised.
N)	rN   r   r   r  CUDA_ERROR_OUT_OF_MEMORYr   rR   r  clear)rV   	allocatorr}   oom_codes       r=   _attempt_allocation-HostOnlyCUDAMemoryManager._attempt_allocationL  si    	; 	~"++DD 99vv!""((* {"	s!   	 
BA&B 9B?B  Bc                 f  ^^
^ Sm
U(       a  T
[         R                  -  m
U(       a  T
[         R                  -  m
U(       a  T
[         R                  -  m
[        (       a*  U
U4S jnU(       a  U R                  U5      mOU" 5       mTnO>[        5       mU
UU4S jnU(       a  U R                  U5        OU" 5         TR                  n[        U TUTU5      n[        R                  " U R                  5      nU(       a*  [        UTTUS9n	XR                  U'   U	R                  5       $ [        UTTUS9$ )zImplements the allocation of pinned host memory.

It is recommended that this method is not overridden by EMM Plugin
implementations - instead, use the :class:`BaseCUDAMemoryManager`.
r   c                  0   > [         R                  TT 5      $ rQ   )r.  cuMemHostAlloc)flagsr  s   r=   r  9HostOnlyCUDAMemoryManager.memhostalloc.<locals>.allocators  s    ,,T599r?   c                  F   > [         R                  [        T5      TT 5        g rQ   )r.  r  r   r   r  r  s   r=   r  r    s    %%eGndEBr?   	finalizer)r   CU_MEMHOSTALLOC_DEVICEMAPCU_MEMHOSTALLOC_PORTABLECU_MEMHOSTALLOC_WRITECOMBINEDr   r  r   r   _hostalloc_finalizerr  r  r  MappedMemoryr  ownPinnedMemory)rV   r  r  r  r  r  	alloc_keyr  r  memr   r  s    `        @@r=   r  &HostOnlyCUDAMemoryManager.memhostallocc  s     U444EU333EU888E>: 229=#+IjGC ((3I(w	4P	mmDLL)sGTYGC*-Y'779WdiHHr?   c                   ^^^
 [        T[        5      (       a  [        (       d  [        T5      m[        (       a  TnOTR                  nSm
U(       a  T
[
        R                  -  m
U
UU4S jnU(       a  U R                  U5        OU" 5         [        U TXT5      n[        R                  " U R                  5      nU(       a+  [        UTTUUS9n	XR                  U'   U	R                  5       $ [        UTTUUS9$ )zImplements the pinning of host memory.

It is recommended that this method is not overridden by EMM Plugin
implementations - instead, use the :class:`BaseCUDAMemoryManager`.
r   c                  4   > [         R                  TTT 5        g rQ   )r.  cuMemHostRegisterr  s   r=   r  3HostOnlyCUDAMemoryManager.mempin.<locals>.allocator  s    $$WdE:r?   )r  r  )r/   r0   r   r   r   r   CU_MEMHOSTREGISTER_DEVICEMAPr  _pin_finalizerr  r  r  r
  r  r  r  )rV   r  r  r  r  r  r  r  r  r  r   s     ``      @r=   r   HostOnlyCUDAMemoryManager.mempin  s     gs##NNw'G>II
 U777E	; $$Y/K"4)D	mmDLL)sGT)24C*-Y'779Wd%*35 5r?   c                 x  ^^^ [         (       a  UU4S jnU R                  U5      mTnO:[        R                  " 5       mUUU4S jnU R                  U5        TR                  n[        U TUT5      n[        R                  " U R                  5      n[        UTTUS9nXpR                  U'   UR                  5       $ )Nc                     > [         R                  n T(       a  U R                  R                  nOU R                  R                  n[
        R                  TU5      $ rQ   )r   CUmemAttach_flagsCU_MEM_ATTACH_GLOBALr   CU_MEM_ATTACH_HOSTr.  cuMemAllocManaged)ma_flagsr   attach_globalr  s     r=   r  <HostOnlyCUDAMemoryManager.memallocmanaged.<locals>.allocator  sF    "44 $99??E$77==E//e<<r?   c                     > [        5       n T(       a  [        R                  n O[        R                  n [        R                  [        T5      TU 5        g rQ   )r   r   r  r  r.  r  r   )r   r  ptrr  s    r=   r  r    s9     !66E!44E((sT5Ar?   r  )r   r  r   r   r   _alloc_finalizerr  r  r  ManagedMemoryr  r  )	rV   r  r  r  r  r  r  r  r!  s	    ``     @r=   memallocmanaged)HostOnlyCUDAMemoryManager.memallocmanaged  s    >= **95CI &&(CB $$Y/		I$T3	4@	mmDLL)Cdi@&)#wwyr?   c                 l    U R                   R                  5         U R                  R                  5         g)zClears up all host memory (mapped and/or pinned) in the current
context.

EMM Plugins that override this method must call ``super().reset()`` to
ensure that host allocations are also cleaned up.N)r  r  r  r\   s    r=   r*  HostOnlyCUDAMemoryManager.reset  s(     	   "r?   c              #   |   #    U R                   R                  5          Sv   SSS5        g! , (       d  f       g= f7f)a   Returns a context manager that disables cleanup of mapped or pinned
host memory in the current context whilst it is active.

EMM Plugins that override this method must obtain the context manager
from this method before yielding to ensure that cleanup of host
allocations is also deferred.N)r  disabler\   s    r=   r  'HostOnlyCUDAMemoryManager.defer_cleanup  s'      '') *))s   <+	<
9<r  r  FFFF)r)   rE   rF   rG   rB  rU   r  r  r  r$  r*  
contextlibcontextmanagerr  rH   r_   r`   s   @r=   r  r  5  sL    "0
. 9>.I`(5T$L#  r?   r  c                       \ rS rSrSrS rSrg)GetIpcHandleMixini  zLA class that provides a default implementation of ``get_ipc_handle()``.
    c                    [        U5      u  p#[        (       a7  [        R                  U5      n[	        UR
                  5      [	        U5      -
  nOM[        R                  " 5       n[        R                  [        U5      U5        UR
                  R                  U-
  nU R                  R                  R                  5       n[        XUR                  UUS9$ )a3  Open an IPC memory handle by using ``cuMemGetAddressRange`` to
determine the base pointer of the allocation. An IPC handle of type
``cu_ipc_mem_handle`` is constructed and initialized with
``cuIpcGetMemHandle``. A :class:`numba.cuda.IpcHandle` is returned,
populated with the underlying ``ipc_mem_handle``.
)offset)device_extentsr   r.  cuIpcGetMemHandler0   handler   r   r   r   r  r  rf  	IpcHandler  )rV   r  baseend	ipchandler3  source_infos          r=   r   GetIpcHandleMixin.get_ipc_handle  s     #6*	>006I'#d)3F002I$$U9%5t<]]((4/Fll))==?FKK &( 	(r?   rC   N)r)   rE   rF   rG   rB  r  rH   rC   r?   r=   r1  r1    s    (r?   r1  c                   :    \ rS rSrSrS rS rS r\S 5       r	Sr
g)	NumbaCUDAMemoryManageri  zInternal on-device memory management for Numba. This is implemented using
the EMM Plugin interface, but is not part of the public API.c                     U R                   R                  [        :X  a*  U R                  5       R                  U R                   l        g g rQ   )r  memory_capacity_SizeNotSetr  totalr\   s    r=   r  !NumbaCUDAMemoryManager.initialize  s:     --<151E1E1G1M1MD. =r?   c                 r  ^^ [         (       a  U4S jnU R                  U5      mTnO9[        R                  " 5       mUU4S jnU R                  U5        TR                  n[        U TUT5      n[        R                  " U R                  5      n[        UTTUS9nX`R                  U'   UR                  5       $ )Nc                  .   > [         R                  T 5      $ rQ   )r.  
cuMemAlloc)r  s   r=   r  2NumbaCUDAMemoryManager.memalloc.<locals>.allocator  s    ((..r?   c                  D   > [         R                  [        T 5      T5        g rQ   )r.  rF  r   )r!  r  s   r=   r  rG  %  s    !!%*d3r?   r  )r   r  r   r   r   r"  r  r  r  AutoFreePointerr  r  )rV   r  r  r  r  r  r  r!  s    `     @r=   r  NumbaCUDAMemoryManager.memalloc  s    >/ **95CI&&(C4 $$Y/		I$T3	4@	mmDLL)c3	B&)#wwyr?   c                     [         (       a  [        R                  5       u  pOT[        5       n[        5       n[        R                  [	        U5      [	        U5      5        UR
                  nUR
                  n[        XS9$ )N)freerB  )r   r.  cuMemGetInfor   r   r   
MemoryInfo)rV   rL  rB  s      r=   r  &NumbaCUDAMemoryManager.get_memory_info1  s\    > --/KD%:DJEdU5\:::DKKEt11r?   c                     [         $ rQ   ) _SUPPORTED_EMM_INTERFACE_VERSIONr\   s    r=   r  (NumbaCUDAMemoryManager.interface_version=  s    //r?   rC   N)r)   rE   rF   rG   rB  r  r  r  rC  r  rH   rC   r?   r=   r>  r>    s,    DN,
2 0 0r?   r>  c                     [         (       a  g [        R                  S:X  a  [        q g  [        R
                  " [        R                  5      n [        U R                  5        g ! [         a    [        S[        R                  -  5      ef = f)Ndefaultz$Failed to use memory manager from %s)
_memory_managerr   CUDA_MEMORY_MANAGERr>  	importlibimport_moduleset_memory_manager_numba_memory_manager	Exceptionrh  )
mgr_modules    r=   _ensure_memory_managerr]  G  sx     !!Y.07,,V-G-GH
:;;< 7A!556 7 	77s   9A# #'B
c                 l    U " SS9nUR                   nU[        :w  a  SU[        4-  n[        U5      eU qg)a"  Configure Numba to use an External Memory Management (EMM) Plugin. If
the EMM Plugin version does not match one supported by this version of
Numba, a RuntimeError will be raised.

:param mm_plugin: The class implementing the EMM Plugin.
:type mm_plugin: BaseCUDAMemoryManager
:return: None
Nr  z9EMM Plugin interface has version %d - version %d required)r  rQ  rh  rU  )	mm_plugindummyiverrs       r=   rY  rY  Y  sE     d#E		 	 B	--I5673Or?   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )rA  in  z;
Dummy object for _PendingDeallocs when *size* is not set.
c                 $   > [         TU ]  U S5      $ Nr   )rT   r   )r   r   r  rW   s      r=   r   _SizeNotSet.__new__s  s    wsA&&r?   c                     g)N?rC   r\   s    r=   r]   _SizeNotSet.__str__v  s    r?   rC   )	r)   rE   rF   rG   rB  r   r]   rH   r_   r`   s   @r=   rA  rA  n  s    ' r?   rA  c                       \ rS rSrSr\4S jr\S 5       r\4S jr	S r
\R                  S 5       r\S 5       rS	 rS
rg)r  i}  z
Pending deallocations of a context (or device since we are using the primary
context). The capacity defaults to being unset (_SizeNotSet) but can be
modified later once the driver is initialized and the total memory capacity
known.
c                 J    [        5       U l        SU l        SU l        Xl        g re  )r   _cons_disable_count_sizer@  )rV   capacitys     r=   rU   _PendingDeallocs.__init__  s     W

'r?   c                 N    [        U R                  [        R                  -  5      $ rQ   )r0   r@  r   CUDA_DEALLOCS_RATIOr\   s    r=   _max_pending_bytes#_PendingDeallocs._max_pending_bytes  s    4''&*D*DDEEr?   c                 `   [         R                  SUR                  U5        U R                  R	                  XU45        U =R
                  [        U5      -  sl        [        U R                  5      [        R                  :  d  U R
                  U R                  :  a  U R                  5         gg)a/  
Add a pending deallocation.

The *dtor* arg is the destructor function that takes an argument,
*handle*.  It is used as ``dtor(handle)``.  The *size* arg is the
byte size of the resource added.  It is an optional argument.  Some
resources (e.g. CUModule) has an unknown memory footprint on the device.
z add pending dealloc: %s %s bytesN)r   r   r)   rl  r   rn  r0   r  r   CUDA_DEALLOCS_COUNTrs  r  rV   dtorr6  r  s       r=   add_item_PendingDeallocs.add_item  sx     	7M

4./

c$i


Of888

T444JJL 5r?   c                 
   U R                   (       dr  U R                  (       aY  U R                  R                  5       u  pn[        R	                  SUR
                  U5        U" U5        U R                  (       a  MY  SU l        gg)zP
Flush any pending deallocations unless it is disabled.
Do nothing if disabled.
zdealloc: %s %s bytesr   N)is_disabledrl  popleftr   r   r)   rn  rw  s       r=   r  _PendingDeallocs.clear  s`    
 **'+zz'9'9';$t3T]]DIV *** DJ  r?   c              #      #    U =R                   S-  sl          Sv   U =R                   S-  sl         U R                   S:  d   eg! U =R                   S-  sl         U R                   S:  d   ef = f7f)z[
Context manager to temporarily disable flushing pending deallocation.
This can be nested.
r   Nr   rm  r\   s    r=   r)  _PendingDeallocs.disable  sm      	q 	,1$&&!+++ 1$&&!+++s   A1A (A1)A..A1c                      U R                   S:  $ re  r  r\   s    r=   r|  _PendingDeallocs.is_disabled  s    ""Q&&r?   c                 ,    [        U R                  5      $ )z*
Returns number of pending deallocations.
)r  rl  r\   s    r=   __len___PendingDeallocs.__len__  s     4::r?   )rl  rm  rn  r@  N)r)   rE   rF   rG   rB  rA  rU   rC  rs  ry  r  r.  r/  r)  r|  r  rH   rC   r?   r=   r  r  }  si     !, ( F F +6  
 
, 
, ' 'r?   r  rN  z
free,totalc                   "   \ rS rSrSrS rS rS r S'S jrS r	S	 r
 S'S
 jrS rS rS rS rS rS rS(S jrS)S jrS*S jrS rS rS+S jrS rS rS rS rS rS rS rS rS r S(S  jr!S! r"\#RH                  S" 5       r%S# r&S$ r'S% r(S&r)g),r  i  zg
This object wraps a CUDA Context resource.

Contexts should not be constructed directly by user code.
c                     Xl         X l        [        R                  " 5       U l        [        5       U l        [        5         [        U S9U l	        [        R                  " 5       U l
        0 U l        g )Nr  )r  r6  r   r   r  r  r  r]  rU  memory_managermodulesextras)rV   r  r6  s      r=   rU   Context.__init__  sR     ++--/ -d;'')r?   c                     [         R                  SU R                  R                  5        U R                  R                  5         U R                  R                  5         U R                  R                  5         g)z/
Clean up all owned resources in this context.
zreset context of device %sN)	r   r   r  rw  r  r*  r  r  r  r\   s    r=   r*  Context.reset  sO    
 	14;;>>B!!#  "r?   c                 6    U R                   R                  5       $ )z>Returns (free, total) memory in bytes in the context.
        )r  r  r\   s    r=   r  Context.get_memory_info  s     ""2244r?   Nc                 \    XX44n[         (       a  U R                  " U6 $ U R                  " U6 $ )zReturn occupancy of a function.
:param func: kernel for which occupancy is calculated
:param blocksize: block size the kernel is intended to be launched with
:param memsize: per-block dynamic shared memory usage intended, in bytes
)r   -_cuda_python_active_blocks_per_multiprocessor(_ctypes_active_blocks_per_multiprocessor)rV   func	blocksizememsizer   r   s         r=   $get_active_blocks_per_multiprocessor,Context.get_active_blocks_per_multiprocessor  s5     0>EEtLL@@$GGr?   c                     UR                   X#/nU(       d  [        R                  " U6 $ UR                  U5        [        R                  " U6 $ rQ   )r6  r.  +cuOccupancyMaxActiveBlocksPerMultiprocessorr   4cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags)rV   r  r  r  r   pss         r=   r  5Context._cuda_python_active_blocks_per_multiprocessor  sA    kk9.EErJJ
		%JJBOOr?   c                     [        5       n[        U5      UR                  X#4nU(       d  [        R                  " U6   UR                  $ [        R
                  " U6   UR                  $ rQ   )r   r   r6  r.  r  r  r   )rV   r  r  r  r   r  r   s          r=   r  0Context._ctypes_active_blocks_per_multiprocessor  sT    ft{{I?>>E || GGN||r?   c                 ^    XX4U4n[         (       a  U R                  " U6 $ U R                  " U6 $ )a"  Suggest a launch configuration with reasonable occupancy.
:param func: kernel for which occupancy is calculated
:param b2d_func: function that calculates how much per-block dynamic
                 shared memory 'func' uses based on the block size.
                 Can also be the address of a C function.
                 Use `0` to pass `NULL` to the underlying CUDA API.
:param memsize: per-block dynamic shared memory usage intended, in bytes
:param blocksizelimit: maximum block size the kernel is designed to
                       handle
)r   %_cuda_python_max_potential_block_size _ctypes_max_potential_block_size)rV   r  b2d_funcr  blocksizelimitr   r   s          r=   get_max_potential_block_size$Context.get_max_potential_block_size  s7     ?>==tDD88$??r?   c                 4   [        5       n[        5       n[        U5      n[        U5      [        U5      UR                  UX4/n	U(       d  [        R
                  " U	6   O$U	R                  U5        [        R                  " U	6   UR                  UR                  4$ rQ   )	r   r   r   r6  r.   cuOccupancyMaxPotentialBlockSizer   )cuOccupancyMaxPotentialBlockSizeWithFlagsr   )
rV   r  r  r  r  r   gridsizer  b2d_cbr   s
             r=   r  (Context._ctypes_max_potential_block_size&  sz    7G	&x0hy!14;;) 33T:KK<<dC	00r?   c                 :   [         R                  " [        [        5      " U5      n[        R                  USS9n[        R                  " U5      nUR                  XU/n	U(       d  [        R                  " U	6 $ U	R                  U5        [        R                  " U	6 $ )Nlittle	byteorder)rh   r   r   r   r0   
from_bytesr   CUoccupancyB2DSizer6  r.  r  r   r  )
rV   r  r  r  r  r   r  r!  driver_b2d_cbr   s
             r=   r  -Context._cuda_python_max_potential_block_size6  s|    !!(E28<nnVxn82237]^D::DAAKKCCTJJr?   c                 8    U R                   R                  5         g)zGInitialize the context for use.
It's safe to be called multiple times.
N)r  r  r\   s    r=   prepare_for_useContext.prepare_for_useC  s     	&&(r?   c                 b    [         R                  U R                  5        U R                  5         g)z0
Pushes this context on the current CPU Thread.
N)r.  cuCtxPushCurrentr6  r  r\   s    r=   pushContext.pushI  s"     	,r?   c                     [         R                  5       n[        (       a%  [        U5      [        U R                  5      :X  d   egUR
                  U R                  R
                  :X  d   eg)z
Pops this context off the current CPU thread. Note that this context
must be at the top of the context stack, otherwise an error will occur.
N)r.  r3  r   r0   r6  r   )rV   r2  s     r=   r  Context.popP  sN    
 **,>v;#dkk"2222<<4;;#4#4444r?   c                 8    U R                   R                  U5      $ rQ   )r  r  )rV   bytesizes     r=   r  Context.memalloc[  s    ""++H55r?   c                 8    U R                   R                  X5      $ rQ   )r  r$  )rV   r  r  s      r=   r$  Context.memallocmanaged^  s    ""228KKr?   c                 :    U R                   R                  XX45      $ rQ   )r  r  )rV   r  r  r  r  s        r=   r  Context.memhostalloca  s    ""//(OOr?   c                     U(       a3  U R                   R                  (       d  [        SU R                   -  5      eU R                  R	                  XX45      $ )Nz%s cannot map host memory)r  CAN_MAP_HOST_MEMORYr   r  r  r  s        r=   r  Context.mempind  s?    $++99!"="KLL""))%$GGr?   c                 d    [         (       d  [        S5      eU R                  R                  U5      $ )z/
Returns an *IpcHandle* from a GPU allocation.
zOS does not support CUDA IPC)SUPPORTS_IPCr   r  r  r  s     r=   r  Context.get_ipc_handlei  s,     |899""11&99r?   c                     Sn[         (       a  [        R                  X5      nO4[        R                  " 5       n[        R                  [        U5      X5        [        [        R                  " U 5      UUS9$ )Nr   )r  r  r  )	r   r.  r   r   r   r   MemoryPointerr  r  )rV   r6  r  r   dptrs        r=   open_ipc_handleContext.open_ipc_handleq  s]    >,,V;D'')D%%eDk6A W]]4%8$"&( 	(r?   c                 H    US:X  d   S5       e[         R                  X5        g)zLEnable peer access between the current context and the peer context
        r   z$*flags* is reserved and MUST be zeroN)r.  cuCtxEnablePeerAccess)rV   peer_contextr   s      r=   enable_peer_accessContext.enable_peer_access~  s$     zAAAz$$\9r?   c                 ,   [         (       aA  [        R                  " U5      n[        R	                  U R
                  R                  U5      nO>[        5       n[        R	                  [        U5      U R
                  R                  U5        [        U5      $ )zcReturns a bool indicating whether the peer access between the
current and peer device is possible.
)
r   r   CUdevicer.  cuDeviceCanAccessPeerr  rw  r   r   bool)rV   peer_devicecan_access_peers      r=   r  Context.can_access_peer  sp     >!**;7K$::4;;>>;FHO $gO(()?)-G O$$r?   c                     [        U[        5      (       a  UR                  S5      n[        (       a  UnO[	        U5      nU R                  U5      $ Nutf8)r/   r+   encoder   r
   create_module_image)rV   ptximages      r=   create_module_ptxContext.create_module_ptx  sA    c3**V$C>ESME''..r?   c                     [        X5      n[        (       a  UR                  nOUR                  R                  nX R                  U'   [
        R                  " U5      $ rQ   )load_module_imager   r6  r   r  r  r  )rV   r  modulekeys       r=   r  Context.create_module_image  sD    "4/>--C--%%C"S}}V$$r?   c                 z    [         (       a  UR                  nOUR                  R                  nU R                  U	 g rQ   )r   r6  r   r  )rV   r  r  s      r=   unload_moduleContext.unload_module  s+    >--C--%%CLLr?   c                     [         (       a  [        R                  " [        5      nO$[        R
                  " [        R                  5      n[        [        R                  " U 5      US 5      $ rQ   )	r   r   CUstreamCU_STREAM_DEFAULTr   	cu_streamStreamr  r  rV   r6  s     r=   get_default_streamContext.get_default_stream  sF    >%%&78F%%f&>&>?FgmmD)6488r?   c                     [         (       a%  [        R                  " [        R                  5      nO$[        R
                  " [        R                  5      n[        [        R                  " U 5      US 5      $ rQ   )	r   r   r  CU_STREAM_LEGACYr   r  r  r  r  r  s     r=   get_legacy_default_stream!Context.get_legacy_default_stream  sK    >%%g&>&>?F%%f&=&=>FgmmD)6488r?   c                     [         (       a%  [        R                  " [        R                  5      nO$[        R
                  " [        R                  5      n[        [        R                  " U 5      US 5      $ rQ   )	r   r   r  CU_STREAM_PER_THREADr   r  r  r  r  r  s     r=   get_per_thread_default_stream%Context.get_per_thread_default_stream  sK    >%%g&B&BCF%%f&A&ABFgmmD)6488r?   c                 ^   [         (       a:  [        R                  R                  R                  n[
        R                  U5      nO4[        R                  " 5       n[
        R                  [        U5      S5        [        [        R                  " U 5      U[        U R                  U5      5      $ re  )r   r   CUstream_flagsr  r   r.  cuStreamCreater   r  r   r  r  r  _stream_finalizerr  )rV   r   r6  s      r=   create_streamContext.create_stream  s~    >
 **<<BBE**51F%%'F!!%-3gmmD)6'(:(:FCE 	Er?   c                     [        U[        5      (       d  [        S5      e[        (       a  [        R
                  " U5      nO[        R                  " U5      n[        [        R                  " U 5      US SS9$ )Nz&ptr for external stream must be an intT)external)r/   r0   	TypeErrorr   r   r  r   r  r  r  r  )rV   r!  r6  s      r=   create_external_streamContext.create_external_stream  s_    #s##DEE>%%c*F%%c*FgmmD)64#% 	%r?   c                 J   SnU(       d  U[         R                  -  n[        (       a  [        R	                  U5      nO4[
        R                  " 5       n[        R	                  [        U5      U5        [        [        R                  " U 5      U[        U R                  U5      S9$ Nr   r  )r   CU_EVENT_DISABLE_TIMINGr   r.  cuEventCreater   cu_eventr   Eventr  r  _event_finalizerr  )rV   timingr   r6  s       r=   create_eventContext.create_event  s{    U222E>))%0F__&F  v6W]]4(&/0B0BFKM 	Mr?   c                 ,    [         R                  5         g rQ   )r.  cuCtxSynchronizer\   s    r=   synchronizeContext.synchronize  s    !r?   c              #      #    U R                   R                  5          U R                  R                  5          S v   S S S 5        S S S 5        g ! , (       d  f       N= f! , (       d  f       g = f7frQ   )r  r  r  r)  r\   s    r=   r  Context.defer_cleanup  sH       ..0##++- . 10-- 10s2   A0AAA	A0
A	A
A-)A0c                 L    SU R                   U R                  R                  4-  $ )Nz<CUDA context %s of device %d>)r6  r  rw  r\   s    r=   r  Context.__repr__  s    /4;;2OOOr?   c                 j    [        U[        5      (       a  U R                  UR                  :H  $ [        $ rQ   )r/   r  r6  NotImplementedr  s     r=   r  Context.__eq__  s(    eW%%;;%,,..!!r?   c                 .    U R                  U5      (       + $ rQ   )r  r  s     r=   r  Context.__ne__  s    ;;u%%%r?   )r  r  r  r  r6  r  r  rQ   )Tr,  r-  rA  )*r)   rE   rF   rG   rB  rU   r*  r  r  r  r  r  r  r  r  r  r  r  r$  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r.  r/  r  r  r  r  rH   rC   r?   r=   r  r    s    		#5 48HP
 <@@$1 K)	56LPH
:(:%/%999E%
M"  
P"&r?   r  c                 D    [         (       a  [        X5      $ [        X5      $ )
image must be a pointer
)r   load_module_image_cuda_pythonload_module_image_ctypes)r  r  s     r=   r  r    s     ~,W<<'77r?   c                    [         R                  n[        U-  " 5       n[        U-  " 5       n[        R                  [        U5      [        R                  [        U5      [        R                  [        U5      [        R                  [        U5      [        R                  [        [         R                  5      0n[        R                  [        U5      -  " UR                  5       6 n[        [        U5      -  " UR!                  5       6 n[        R"                  " 5       n [$        R'                  [)        U5      U[        U5      Xg5        UR,                  n[3        [4        R6                  " U 5      X[9        X5      5      $ ! [*         a9  n	SUR,                  R/                  S5      -  n
[+        U	R0                  U
5      eS n	A	ff = f)NcuModuleLoadDataEx error:
%sr  )r   CUDA_LOG_SIZEr	   r   CU_JIT_INFO_LOG_BUFFERr   !CU_JIT_INFO_LOG_BUFFER_SIZE_BYTESr   CU_JIT_ERROR_LOG_BUFFER"CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTESCU_JIT_LOG_VERBOSECUDA_VERBOSE_JIT_LOGr   cu_jit_optionr  keysr&  	cu_moduler.  cuModuleLoadDataExr   rN   r   r}  rR   CtypesModuler  r  _module_finalizer)r  r  logszjitinfo	jiterrorsoptionsoption_keysoption_valsr6  r}   rS   info_logs               r=   r"  r"    sX     E~ G%"I 	$$i&8//%%%y';00(5/  (6+F+F"GG ''#g,6HKc'l*W^^-=>KF(!!%-G"-	< }}Hg.)':< <  (-	0F0Fv0NN1663''(s    *F   
G
4F>>Gc           
         [         R                  n[        U5      n[        U5      n[        R                  nUR
                  UUR                  UUR                  UUR                  UUR                  [         R                  0nUR                  5        Vs/ s H  owPM     nnUR                  5        V	s/ s H  oPM     n
n	 [        R                  U[        U5      UU
5      nUR#                  S5      n['        [(        R*                  " U 5      X[-        X5      5      $ s  snf s  sn	f ! [          a1  nUR#                  S5      nSU-  n[!        UR$                  U5      eSnAff = f)r   rp  r$  N)r   r%  	bytearrayr   CUjit_optionr&  r'  r(  r)  r*  r+  r-  r&  r.  r/  r  rN   r}  rR   CudaPythonModuler  r  r1  )r  r  r2  r3  r4  
jit_optionr5  kr6  vr7  r6  r}   
err_stringrS   r8  s                   r=   r!  r!  $  sF      EG% I%%J))744e**I55u%%v'B'BG &lln-n1nK-%nn./.1.K/(**5#g,+68 ~~g&HGMM'2F-g>@ @ ./
  (%%g.
-
:1663''(s$   D4D#!D( (
E#2,EE#c                 T   ^^^^^ U R                   mU R                  mUUUUU4S jnU$ )Nc                  \   > T(       a  TT 	 TR                  [        R                  TT5        g rQ   )ry  r.  	cuMemFree)r  r  r  r!  r  s   r=   core_alloc_finalizer.<locals>.coreK  s&    I&v//d;r?   r+  )r  r!  r  r  rD  r  r  s    ``` @@r=   r"  r"  G  s*     ,,K"00M< <
 Kr?   c                 r   ^^^^^^ U R                   mU R                  mT(       d  [        mUUUUUU4S jnU$ )aC  
Finalize page-locked host memory allocated by `context.memhostalloc`.

This memory is managed by CUDA, and finalization entails deallocation. The
issues noted in `_pin_finalizer` are not relevant in this case, and the
finalization is placed in the `context.deallocations` queue along with
finalization of device objects.

c                  j   > T(       a
  T(       a  TT 	 TR                  [        R                  TT5        g rQ   )ry  r.  cuMemFreeHost)r  r  r  r  r!  r  s   r=   rD  "_hostalloc_finalizer.<locals>.coreb  s(    kI&v33S$?r?   )r  r  rA  )r  r!  r  r  r  rD  r  r  s    ```` @@r=   r	  r	  S  s6     !,,K"00M@ @
 Kr?   c                 8   ^^^^ U R                   mUUUU4S jnU$ )a  
Finalize temporary page-locking of host memory by `context.mempin`.

This applies to memory not otherwise managed by CUDA. Page-locking can
be requested multiple times on the same memory, and must therefore be
lifted as soon as finalization is requested, otherwise subsequent calls to
`mempin` may fail with `CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED`, leading
to unexpected behavior for the context managers `cuda.{pinned,mapped}`.
This function therefore carries out finalization immediately, bypassing the
`context.deallocations` queue.

c                  R   > T(       a
  T(       a  TT 	 [         R                  T5        g rQ   )r.  cuMemHostUnregister)r  r  r  r!  s   r=   rD  _pin_finalizer.<locals>.corey  s    kI&""3'r?   )r  )r  r!  r  r  rD  r  s    ``` @r=   r  r  j  s!     !,,K( (
 Kr?   c                    ^ ^ U U4S jnU$ )Nc                  F   > T R                  [        R                  T5        g rQ   )ry  r.  cuEventDestroydeallocsr6  s   r=   rD  _event_finalizer.<locals>.core  s    &//8r?   rC   rR  r6  rD  s   `` r=   r  r    s    9 Kr?   c                    ^ ^ U U4S jnU$ )Nc                  F   > T R                  [        R                  T5        g rQ   )ry  r.  cuStreamDestroyrQ  s   r=   rD  _stream_finalizer.<locals>.core  s    &00&9r?   rC   rT  s   `` r=   r   r     s    : Kr?   c                    ^^^^ U R                   mU R                  m[        (       a  TmOTR                  mUUUU4S jnU$ )Nc                  \   >^ [         R                  mUUU4S jn TR                  U T5        g )Nc                 X   > T" 5       (       d  TT;  d   e[         R                  U 5        g rQ   )r.  cuModuleUnload)r6  r  r  shutting_downs    r=   module_unload6_module_finalizer.<locals>.core.<locals>.module_unload  s'     !??c&888!!&)r?   )r   r]  ry  )r^  r]  deallocr6  r  r  s    @r=   rD  _module_finalizer.<locals>.core  s%    ++	* 	/r?   )r  r  r   r   )r  r6  rD  r`  r  r  s    ` @@@r=   r1  r1    s;    ##GooG~ll
0 
0 Kr?   c                   *    \ rS rSrSrS rS rS rSrg)_CudaIpcImpli  zbImplementation of GPU IPC using CUDA driver API.
This requires the devices to be peer accessible.
c                     UR                   U l         UR                  U l        UR                  U l        UR                  U l        S U l        g rQ   )r8  r6  r  r3  _opened_mem)rV   parents     r=   rU   _CudaIpcImpl.__init__  s6    KK	mmKK	mmr?   c                 &   U R                   b  [        S5      eU R                  b  [        S5      eUR                  U R                  U R
                  U R                  -   5      nX l        UR                  5       R                  U R
                  5      $ )D
Import the IPC memory and returns a raw CUDA memory pointer object
z'opening IpcHandle from original processIpcHandle is already opened)	r8  rn   re  r  r6  r3  r  r  view)rV   r  r  s      r=   open_CudaIpcImpl.open  sv     99 FGG':;;%%dkk4;;3JK wwy~~dkk**r?   c                     U R                   c  [        S5      e[        R                  U R                   R                  5        S U l         g NzIpcHandle not opened)re  rn   r.  cuIpcCloseMemHandler6  r\   s    r=   close_CudaIpcImpl.close  s<    #344""4#3#3#:#:;r?   )re  r8  r6  r3  r  N	r)   rE   rF   rG   rB  rU   rl  rq  rH   rC   r?   r=   rc  rc    s     +" r?   rc  c                   *    \ rS rSrSrS rS rS rSrg)_StagedIpcImpli  z~Implementation of GPU IPC using custom staging logic to workaround
CUDA IPC limitation on peer accessibility between devices.
c                     Xl         UR                  U l        UR                  U l        UR                  U l        X l        g rQ   )rf  r8  r6  r  r;  )rV   rf  r;  s      r=   rU   _StagedIpcImpl.__init__  s.    KK	mmKK	&r?   c                 L   SSK Jn  [        R                  U R                  5      n[
        (       a  [        UR                  5      nOUR                  n[        U R                  S9nUR                  U      UR                  UR                  R                  5       5      nS S S 5        UR                  U R                  5      n[!        UWU R                  5        UR                  U      UR#                  5         S S S 5        U$ ! , (       d  f       Nj= f! , (       d  f       U$ = f)Nr   r    )rf  )numbar!   r  rl  r;  r   r0   rw  rc  rf  gpusrl  r   get_contextr  r  device_to_devicerq  )rV   r  r!   srcdev	srcdev_idimpl
source_ptrnewmems           r=   rl  _StagedIpcImpl.open  s    %%d&6&67>FIII		I4;;/YYy!4<<#;#;#=>J " !!$)), 	TYY7 YYy!JJL "  "! "! s   5*D(D
D
D#c                     g rQ   rC   r\   s    r=   rq  _StagedIpcImpl.close  s    r?   )r8  r6  rf  r  r;  Nrs  rC   r?   r=   ru  ru    s    '4r?   ru  c                   f    \ rS rSrSrSS jrS rS rS rS r	S	 r
SS
 jrS rS r\S 5       rSrg)r7  i  a7  
CUDA IPC handle. Serialization of the CUDA IPC handle object is implemented
here.

:param base: A reference to the original allocation to keep it alive
:type base: MemoryPointer
:param handle: The CUDA IPC handle, as a ctypes array of bytes.
:param size: Size of the original allocation
:type size: int
:param source_info: The identity of the device on which the IPC handle was
                    opened.
:type source_info: dict
:param offset: The offset into the underlying allocation of the memory
               referred to by this IPC handle.
:type offset: int
Nc                 N    Xl         X l        X0l        X@l        S U l        XPl        g rQ   )r8  r6  r  r;  _implr3  )rV   r8  r6  r  r;  r3  s         r=   rU   IpcHandle.__init__  s#    		&
r?   c                 4    U R                   c  [        S5      eg )Nz#IPC handle doesn't have source info)r;  rh  r\   s    r=   _sentry_source_infoIpcHandle._sentry_source_info  s    #DEE $r?   c                     U R                  5         U R                  UR                  R                  5       :X  a  g[        R                  U R                  5      nUR                  UR                  5      $ )zTReturns a bool indicating whether the active context can peer
access the IPC handle
T)r  r;  r  rf  r  rl  r  rw  )rV   r  source_devices      r=   r  IpcHandle.can_access_peer  s[     	  "w~~AACC,,T-=-=>&&}'7'788r?   c                     U R                  5         U R                  b  [        S5      e[        X R                  5      U l        U R                  R                  U5      $ )zCOpen the IPC by allowing staging on the host memory first.
        rj  )r  r  rn   ru  r;  rl  rV   r  s     r=   open_stagedIpcHandle.open_staged  sK     	  "::!:;;#D*:*:;
zzw''r?   c                     U R                   b  [        S5      e[        U 5      U l         U R                   R                  U5      $ )ri  rj  )r  rn   rc  rl  r  s     r=   open_directIpcHandle.open_direct'  s9     ::!:;;!$'
zzw''r?   c                     U R                   b  U R                  U5      (       a  U R                  nOU R                  nU" U5      $ )ay  Open the IPC handle and import the memory for usage in the given
context.  Returns a raw CUDA memory pointer object.

This is enhanced over CUDA IPC that it will work regardless of whether
the source device is peer-accessible by the destination device.
If the devices are peer-accessible, it uses .open_direct().
If the devices are not peer-accessible, it uses .open_staged().
)r;  r  r  r  )rV   r  fns      r=   rl  IpcHandle.open1  s@     #t';';G'D'D!!B!!B'{r?   c                 n    SSK Jn  Uc  UR                  nU R                  U5      nUR	                  X$X6S9$ )z3
Similar to `.open()` but returns an device array.
r   )devicearray)shapestridesdtypegpu_data)r   r  itemsizerl  DeviceNDArray)rV   r  r  r  r  r  r  s          r=   
open_arrayIpcHandle.open_array@  sD     	" ?nnGyy!((u/4 ) E 	Er?   c                 v    U R                   c  [        S5      eU R                   R                  5         S U l         g ro  )r  rn   rq  r\   s    r=   rq  IpcHandle.closeN  s/    ::344


r?   c                     [         (       a  U R                  R                  nO[        U R                  5      nU R                  UU R
                  U R                  U R                  4n[        R                  U4$ rQ   )
r   r6  reservedr  rW   r  r;  r3  r   _rebuild_reduction)rV   preprocessed_handler   s      r=   
__reduce__IpcHandle.__reduce__T  sa    >"&++"6"6"'"4NNIIKK
 ,,d33r?   c                     [         (       a  [        R                  " 5       nXl        O[        R
                  " U6 nU " S XRX4S9$ )N)r8  r6  r  r;  r3  )r   r   CUipcMemHandler  r   r   )r   
handle_aryr  r;  r3  r6  s         r=   _rebuildIpcHandle._rebuildc  s>    >++-F(O--z:FV*; 	;r?   )r  r8  r6  r3  r  r;  re  rQ   )r)   rE   rF   rG   rB  rU   r  r  r  r  rl  r  rq  r  r  r  rH   rC   r?   r=   r7  r7    sJ     F9	((E4 ; ;r?   r7  c                   v    \ rS rSrSrSrSS jr\S 5       rS r	S r
SS	 jrSS
 jr\S 5       r\S 5       rSrg)r  in  a?  A memory pointer that owns a buffer, with an optional finalizer. Memory
pointers provide reference counting, and instances are initialized with a
reference count of 1.

The base ``MemoryPointer`` class does not use the
reference count for managing the buffer lifetime. Instead, the buffer
lifetime is tied to the memory pointer instance's lifetime:

- When the instance is deleted, the finalizer will be called.
- When the reference count drops to 0, no action is taken.

Subclasses of ``MemoryPointer`` may modify these semantics, for example to
tie the buffer lifetime to the reference count, so that the buffer is freed
when there are no more references.

:param context: The context in which the pointer was allocated.
:type context: Context
:param pointer: The address of the buffer.
:type pointer: ctypes.c_void_p
:param size: The size of the allocation in bytes.
:type size: int
:param owner: The owner is sometimes set by the internals of this class, or
              used for Numba's internal memory management. It should not be
              provided by an external user of the ``MemoryPointer`` class
              (e.g. from within an EMM Plugin); the default of `None`
              should always suffice.
:type owner: NoneType
:param finalizer: A function that is called when the buffer is to be freed.
:type finalizer: function
TNc                     Xl         X l        X0l        X0l        US LU l        SU l        U R                  U l        X@l        Ub  [        R                  " X5      U l
        g g Nr   )r  device_pointerr  _cuda_memsize_
is_managedrefctr6  _ownerr  finalize
_finalizerrV   r  r  r  r  r  s         r=   rU   MemoryPointer.__init__  sZ    %	"#4/
)) %..t?DO !r?   c                 8    U R                   c  U $ U R                   $ rQ   )r  r\   s    r=   r  MemoryPointer.owner  s    {{*t;;r?   c                 @    [        [        R                  " U 5      5      $ rQ   )OwnedPointerr  r  r\   s    r=   r  MemoryPointer.own  s    GMM$/00r?   c                     U R                   (       aT  U R                  R                  (       d  [        S5      eU R                  5         U R                  R                  (       a   egg)z(
Forces the device memory to the trash.
zFreeing dead memoryN)r  r  aliverh  r\   s    r=   rL  MemoryPointer.free  sI     ????(("#899OO,,,,,	 r?   c                     Uc  U R                   OUnU(       a,  [        R                  U R                  XUR                  5        g [        R                  U R                  X5        g rQ   )r  r.  cuMemsetD8Asyncr  r6  
cuMemsetD8)rV   byter!  streams       r=   memsetMemoryPointer.memset  sK    "]		""4#6#6#)==2 d114?r?   c                 n   Uc  U R                   U-
  nOX!-
  nU R                  (       d  US:w  a  [        S5      eU nOU R                  U-   nUS:  a  [        S5      e[        (       aI  [        R
                  " 5       n[        R                  R                  UR                  5       5      nXWl
        O[        R                  " U5      n[        U R                  XcU R                  S9n[        U R                  [        [        45      (       a*  [        [         R"                  " U R                  5      U5      $ U$ )Nr   z non-empty slice into empty slicezsize cannot be negative)r  )r  device_pointer_valuerh  r   r   CUdeviceptrr   r   from_addressgetPtrr   r  r  r  r/   r  r  r  )rV   startstopr  rk  r8  r  
ctypes_ptrs           r=   rk  MemoryPointer.view  s    <99u$D<D ((qy"#EFFD ,,u4Dax"#<==~!--/#11>>w~~?OP
#'  ..t4 wDJJODdjj=,"?@@djj 94@@ Kr?   c                     U R                   $ rQ   )r  r\   s    r=   device_ctypes_pointer#MemoryPointer.device_ctypes_pointer  s    """r?   c                     [         (       a  [        U R                  5      =(       d    S $ U R                  R                  $ rQ   )r   r0   r  r   r\   s    r=   r  "MemoryPointer.device_pointer_value  s/    >t**+3t3&&,,,r?   )	r  r  r  r  r  r6  r  r  r  NNre  rQ   )r)   rE   rF   rG   rB  __cuda_memory__rU   rC  r  r  rL  r  rk  r  r  rH   rC   r?   r=   r  r  n  se    < O@ < <1-@> # # - -r?   r  c                   ,   ^  \ rS rSrSrU 4S jrSrU =r$ )rI  i  a  Modifies the ownership semantic of the MemoryPointer so that the
instance lifetime is directly tied to the number of references.

When the reference count reaches zero, the finalizer is invoked.

Constructor arguments are the same as for :class:`MemoryPointer`.
c                 X   > [         [        U ]
  " U0 UD6  U =R                  S-  sl        g r  )rT   rI  rU   r  r  s      r=   rU   AutoFreePointer.__init__  s&    ot-t>v> 	

a
r?   rC   )r)   rE   rF   rG   rB  rU   rH   r_   r`   s   @r=   rI  rI    s     r?   rI  c                   :   ^  \ rS rSrSrSrSU 4S jjrS rSrU =r	$ )r
  i  a  A memory pointer that refers to a buffer on the host that is mapped into
device memory.

:param context: The context in which the pointer was mapped.
:type context: Context
:param pointer: The address of the buffer.
:type pointer: ctypes.c_void_p
:param size: The size of the buffer in bytes.
:type size: int
:param owner: The owner is sometimes set by the internals of this class, or
              used for Numba's internal memory management. It should not be
              provided by an external user of the ``MappedMemory`` class
              (e.g. from within an EMM Plugin); the default of `None`
              should always suffice.
:type owner: NoneType
:param finalizer: A function that is called when the buffer is to be freed.
:type finalizer: function
Tc                   > X@l         X l        [        (       a(  [        R	                  US5      nU R                  U l        OP[        R                  " 5       n[        R	                  [        U5      US5        U R                  R                  U l        X`l
        [        [        U ]7  XUUS9  U R                  U l        U R                  U l        g r	  )ownedhost_pointerr   r.  cuMemHostGetDevicePointer_bufptr_r   r   r   r   r  rT   r
  rU   r6  r  _buflen_rV   r  r  r  r  r  devptrrW   s          r=   rU   MappedMemory.__init__  s    
#>55gqAF --DM))+F,,U6]GQG --33DM$lD*7D5> 	+ 	@'' 		r?   c                 @    [        [        R                  " U 5      5      $ rQ   )MappedOwnedPointerr  r  r\   s    r=   r  MappedMemory.own  s    !'--"566r?   )r  r  r  r6  r  r  r  
r)   rE   rF   rG   rB  r  rU   r  rH   r_   r`   s   @r=   r
  r
    s    & O"(7 7r?   r
  c                   (    \ rS rSrSrSS jrS rSrg)r  i  a  A pointer to a pinned buffer on the host.

:param context: The context in which the pointer was mapped.
:type context: Context
:param owner: The object owning the memory. For EMM plugin implementation,
              this ca
:param pointer: The address of the buffer.
:type pointer: ctypes.c_void_p
:param size: The size of the buffer in bytes.
:type size: int
:param owner: An object owning the buffer that has been pinned. For EMM
              plugin implementation, the default of ``None`` suffices for
              memory allocated in ``memhostalloc`` - for ``mempin``, it
              should be the owner passed in to the ``mempin`` method.
:param finalizer: A function that is called when the buffer is to be freed.
:type finalizer: function
Nc                 .   Xl         X@l        X0l        X l        US LU l        U R                  U l        U R                  U l        [        (       a  U R                  U l        OU R                  R                  U l        Ub  [        R                  " X5        g g rQ   )r  r  r  r  r  r6  r  r   r  r   r  r  r  s         r=   rU   PinnedMemory.__init__0  s{    
	##4/'' 		> --DM --33DM T- !r?   c                     U $ rQ   rC   r\   s    r=   r  PinnedMemory.ownB  s    r?   )r  r  r  r6  r  r  r  r  r  )r)   rE   rF   rG   rB  rU   r  rH   rC   r?   r=   r  r    s    $.$r?   r  c                   :   ^  \ rS rSrSrSrSU 4S jjrS rSrU =r	$ )r#  iF  a	  A memory pointer that refers to a managed memory buffer (can be accessed
on both host and device).

:param context: The context in which the pointer was mapped.
:type context: Context
:param pointer: The address of the buffer.
:type pointer: ctypes.c_void_p
:param size: The size of the buffer in bytes.
:type size: int
:param owner: The owner is sometimes set by the internals of this class, or
              used for Numba's internal memory management. It should not be
              provided by an external user of the ``ManagedMemory`` class
              (e.g. from within an EMM Plugin); the default of `None`
              should always suffice.
:type owner: NoneType
:param finalizer: A function that is called when the buffer is to be freed.
:type finalizer: function
Tc                    > X@l         Un[        TU ]	  XX5S9  U R                  U l        [
        (       a  U R                  U l        g U R                  R                  U l        g )Nr  )	r  rT   rU   r  r  r   r  r  r   r  s          r=   rU   ManagedMemory.__init__\  sQ    
$D 		> //DM //55DMr?   c                 @    [        [        R                  " U 5      5      $ rQ   )ManagedOwnedPointerr  r  r\   s    r=   r  ManagedMemory.ownh  s    "7==#677r?   )r  r  r  r  r  r`   s   @r=   r#  r#  F  s    & O
68 8r?   r#  c                   $    \ rS rSrSS jrS rSrg)r  il  Nc                    ^ Xl         Uc  U R                   U l        OUR                  (       a   eX l        U R                   mU4S jnU R                   =R                  S-  sl        [        R
                  " X5        g )Nc                     >  T =R                   S-  sl         T R                   S:  d   eT R                   S:X  a  T R                  5         g g ! [         a     g f = f)Nr   r   )r  rL  ReferenceError)r  s   r=   deref$OwnedPointer.__init__.<locals>.derefx  sP    		Q	yyA~%~99>HHJ "! s   AA 
AAr   )_mem_viewr  r  r  r  )rV   memptrrk  r  r  s       @r=   rU   OwnedPointer.__init__m  sZ    	<DJ&&Jii	 			1%r?   c                 .    [        U R                  U5      $ )z$Proxy MemoryPointer methods
        )r.   r  r   s     r=   r   OwnedPointer.__getattr__  s     tzz5))r?   )r  r  rQ   )r)   rE   rF   rG   rU   r   rH   rC   r?   r=   r  r  l  s    &0*r?   r  c                       \ rS rSrSrg)r  i  rC   NrD   rC   r?   r=   r  r    rI   r?   r  c                       \ rS rSrSrg)r  i  rC   NrD   rC   r?   r=   r  r    rI   r?   r  c                       \ rS rSrSS jrS rS rS r\R                  S 5       r
SS jr\\S	 5       5       rS
\R                   R"                  4S jrSrg)r  i  c                 \    Xl         X l        X@l        Ub  [        R                  " X5        g g rQ   )r  r6  r  r  r  )rV   r  r6  r  r  s        r=   rU   Stream.__init__  s*      T- !r?   c                     [         (       a  [        U R                  5      $ U R                  R                  =(       d    [        R
                  $ rQ   )r   r0   r6  r   r   r  r\   s    r=   __int__Stream.__int__  s3    >t{{## ;;$$@(@(@@r?   c                    [         (       aG  [        S[        R                  S[        R                  S0n[        U R                  5      =(       d    SnO_[        R                  S[        R                  S[        R                  S0nU R                  R                  =(       d    [        R                  nX!;   a  X   U R                  -  $ U R                  (       a  SX R                  4-  $ SX R                  4-  $ )Nz<Default CUDA stream on %s>z"<Legacy default CUDA stream on %s>z&<Per-thread default CUDA stream on %s>r   z<External CUDA stream %d on %s>z<CUDA stream %d on %s>)r   r  r   r  r  r0   r6  r   r   r  r  )rV   default_streamsr!  s      r=   r  Stream.__repr__  s    >!#@((8,,<O dkk"'aC ((*G'')M++<	O ++##?v'?'?C!"'$,,66]]4\\7JJJ+sLL.AAAr?   c                 B    [         R                  U R                  5        g)za
Wait for all commands in this stream to execute. This will commit any
pending memory transfers.
N)r.  cuStreamSynchronizer6  r\   s    r=   r  Stream.synchronize  s    
 	""4;;/r?   c              #   4   #    U v   U R                  5         g7f)z
A context manager that waits for all commands in this stream to execute
and commits any pending memory transfers upon exiting the context.
N)r  r\   s    r=   auto_synchronizeStream.auto_synchronize  s      
s   Nc                    XU4n[        U5        [        (       a@  [        R                  U R                  SS9n[
        R                  " U5      n[        U5      nOU R                  n[        R                  U R                  XSS5        g)a  
Add a callback to a compute stream.
The user provided function is called from a driver thread once all
preceding stream operations are complete.

Callback functions are called from a CUDA driver thread, not from
the thread that invoked `add_callback`. No CUDA API functions may
be called from within the callback function.

The duration of a callback function should be kept short, as the
callback will block later work in the stream and may block other
callbacks from being executed.

Note: The driver function underlying this method is marked for
eventual deprecation and may be replaced in a future CUDA release.

:param callback: Callback function with arguments (stream, status, arg).
:param arg: Optional user data to be passed to the callback function.
r  r  r   N)
_py_increfr   r0   r  _stream_callbackr   CUstreamCallbackrw  r.  cuStreamAddCallbackr6  )rV   callbackr   datar!  stream_callbacks         r=   add_callbackStream.add_callback  sm    ( $4>..!6!6(.KC%66s;Od8D"33O""4;;qIr?   c                      Uu  p4nU" X1U5        [        U5        g ! [          a#  n[        R                  " SU 35         S nAN4S nAff = f! [        U5        f = f)NzException in stream callback: )r[  warningswarn
_py_decref)r6  statusr  r  r  r   r}   s          r=   r  Stream._stream_callback  s[    	$(!FcVS) t  	@MM:1#>??	@ ts%    
A	AA A		A Areturnc                    ^ ^^ [         R                  " 5       mTR                  5       nU 4S jmUU4S jnT R                  X!5        U$ )z
Return an awaitable that resolves once all preceding stream operations
are complete. The result of the awaitable is the current stream.
c                    > U R                  5       (       a  g US:X  a  U R                  T5        g U R                  [        SU 35      5        g )Nr   zStream error )done
set_resultset_exceptionr[  )futurer"  rV   s     r=   resolver#Stream.async_done.<locals>.resolver  s@    {{}}1!!$'$$Yvh/G%HIr?   c                 *   > TR                  TX!5        g rQ   )call_soon_threadsafe)r  r"  r*  loopr+  s      r=   r  #Stream.async_done.<locals>.callback	  s    %%h?r?   )asyncioget_running_loopcreate_futurer  )rV   r*  r  r/  r+  s   `  @@r=   
async_doneStream.async_done  sB    
 '')##%	J	@ 	(+r?   )r  r  r6  r-  rQ   )r)   rE   rF   rG   rU   r	  r  r  r.  r/  r  r  staticmethodr   r  r1  futuresFuturer4  rH   rC   r?   r=   r  r    sj    .AB40  J>   GOO22 r?   r  c                   D    \ rS rSrS
S jrS rSS jrS rSS jrS r	S	r
g)r  i	  Nc                 P    Xl         X l        Ub  [        R                  " X5        g g rQ   )r  r6  r  r  )rV   r  r6  r  s       r=   rU   Event.__init__	  s%     T- !r?   c                      [         R                  U R                  5        g! [         a)  nUR                  [
        R                  :X  a   SnAge SnAff = f)za
Returns True if all work before the most recent record has completed;
otherwise, returns False.
TNF)r.  cuEventQueryr6  rN   rR   r   CUDA_ERROR_NOT_READY)rV   r}   s     r=   queryEvent.query	  sH    
	,   	vv333		s   " 
AAAAc                     [         (       a*  U(       a  UR                  O[        R                  " S5      nOU(       a  UR                  OSn[        R                  U R                  U5        g)z
Set the record point of the event to the current point in the given
stream.

The event will be considered to have occurred when all work that was
queued in the stream at the time of the call to ``record()`` has been
completed.
r   N)r   r6  r   r  r.  cuEventRecord)rV   r  hstreams      r=   recordEvent.record#	  sB     >'-fmm73C3CA3FG'-fmm1GT[['2r?   c                 B    [         R                  U R                  5        g)z>
Synchronize the host thread for the completion of the event.
N)r.  cuEventSynchronizer6  r\   s    r=   r  Event.synchronize2	  s     	!!$++.r?   c                     [         (       a*  U(       a  UR                  O[        R                  " S5      nOU(       a  UR                  OSnSn[        R                  X R                  U5        g)zJ
All future works submitted to stream will wait util the event completes.
r   N)r   r6  r   r  r.  cuStreamWaitEvent)rV   r  rC  r   s       r=   wait
Event.wait8	  sG     >'-fmm73C3CA3FG'-fmm1G  ++u=r?   c                     [        X5      $ rQ   )event_elapsed_time)rV   evtends     r=   elapsed_timeEvent.elapsed_timeC	  s    !$//r?   )r  r6  rQ   rA  )r)   rE   rF   rG   rU   r?  rD  r  rK  rP  rH   rC   r?   r=   r  r  	  s     .3/	>0r?   r  c                     [         (       a*  [        R                  U R                  UR                  5      $ [	        5       n[        R                  [        U5      U R                  UR                  5        UR                  $ )z>
Compute the elapsed time between two events in milliseconds.
)r   r.  cuEventElapsedTimer6  r   r   r   )evtstartrO  msecs      r=   rN  rN  G	  sS     ~((&--HHy!!%+xNzzr?   c                   H    \ rS rSrSrS	S jrS r\S 5       r\S 5       r	Sr
g)
ModuleiS	  zAbstract base class for modulesNc                 f    Xl         X l        X0l        Ub  [        R                  " X5      U l        g g rQ   )r  r6  r8  r  r  r  )rV   r  r6  r8  r  s        r=   rU   Module.__init__V	  s.      %..t?DO !r?   c                 :    U R                   R                  U 5        g)z#Unload this module from the contextN)r  r  r\   s    r=   unloadModule.unload]	  s    ""4(r?   c                     g)z:Returns a Function object encapsulating the named functionNrC   rV   r   s     r=   get_functionModule.get_functiona	  r  r?   c                     g)z4Return a MemoryPointer referring to the named symbolNrC   r^  s     r=   get_global_symbolModule.get_global_symbole	  r  r?   )r  r  r6  r8  rQ   )r)   rE   rF   rG   rB  rU   r[  r   r_  rb  rH   rC   r?   r=   rW  rW  S	  s<    )@) I I C Cr?   rW  c                        \ rS rSrS rS rSrg)r0  ij	  c                     [         R                  " 5       n[        R                  [	        U5      U R
                  UR                  S5      5        [        [        R                  " U 5      X!5      $ r  )
r   cu_functionr.  cuModuleGetFunctionr   r6  r  CtypesFunctionr  r  rV   r   r6  s      r=   r_  CtypesModule.get_functionl	  sL    ##%""5=$++#';;v#6	8gmmD16@@r?   c                     [         R                  " 5       n[         R                  " 5       n[        R	                  [        U5      [        U5      U R                  UR                  S5      5        [        U R                  X#5      UR                  4$ r  )r   r   r   r.  cuModuleGetGlobalr   r6  r  r  r  r   rV   r   r!  r  s       r=   rb  CtypesModule.get_global_symbolr	  sb    ""$   sU4[$++!%V!4	6T\\35tzzAAr?   rC   Nr)   rE   rF   rG   r_  rb  rH   rC   r?   r=   r0  r0  j	  s    ABr?   r0  c                        \ rS rSrS rS rSrg)r<  iz	  c                     [         R                  U R                  UR                  S5      5      n[	        [
        R                  " U 5      X!5      $ r  )r.  rg  r6  r  CudaPythonFunctionr  r  ri  s      r=   r_  CudaPythonModule.get_function|	  s7    ++DKKV9LM!'--"5vDDr?   c                     [         R                  U R                  UR                  S5      5      u  p#[	        U R
                  X#5      U4$ r  )r.  rl  r6  r  r  r  rm  s       r=   rb  "CudaPythonModule.get_global_symbol	  s9    ,,T[[$++f:MN	T\\35t;;r?   rC   Nro  rC   r?   r=   r<  r<  z	  s    E<r?   r<  FuncAttr)regssharedr^  const
maxthreadsc                   x    \ rS rSrSrSrSrSrS rS r	\
S 5       r\  SS j5       r\S 5       r\S	 5       rS
rg)Functioni	  )r   r   r   r   c                 R    Xl         X l        X0l        U R                  5       U l        g rQ   )r  r6  r   read_func_attr_allattrs)rV   r  r6  r   s       r=   rU   Function.__init__	  s!    	,,.
r?   c                      SU R                   -  $ )Nz<CUDA function %s>)r   r\   s    r=   r  Function.__repr__	  s    #dii//r?   c                 B    U R                   R                  R                  $ rQ   )r  r  r  r\   s    r=   r  Function.device	  s    {{"")))r?   c                     g)z.Set the cache configuration for this function.NrC   )rV   prefer_equalprefer_cacheprefer_shareds       r=   cache_configFunction.cache_config	  r  r?   c                     g)z0Return the value of the attribute with given ID.NrC   rV   attrids     r=   read_func_attrFunction.read_func_attr	  r  r?   c                     g)zHReturn a FuncAttr object with the values of various function
attributes.NrC   r\   s    r=   r~  Function.read_func_attr_all	  r  r?   )r  r6  r  r   Nr,  )r)   rE   rF   rG   griddimblockdimr  	sharedmemrU   r  rC  r  r   r  r  r~  rH   rC   r?   r=   r|  r|  	  sv    GHFI/0 * * <A#(= = ? ?  r?   r|  c                   .    \ rS rSr  SS jrS rS rSrg)rh  i	  c                    U=(       d    U=(       a    UnU(       a  [         R                  nO@U(       a  [         R                  nO(U(       a  [         R                  nO[         R                  n[
        R                  U R                  U5        g rQ   )r   CU_FUNC_CACHE_PREFER_EQUALCU_FUNC_CACHE_PREFER_L1CU_FUNC_CACHE_PREFER_SHAREDCU_FUNC_CACHE_PREFER_NONEr.  cuFuncSetCacheConfigr6  )rV   r  r  r  flags        r=   r  CtypesFunction.cache_config	  sZ    #G(F33D00D44D22D##DKK6r?   c                     [        5       n[        R                  [        U5      XR                  5        UR
                  $ rQ   )r   r.  cuFuncGetAttributer   r6  r   )rV   r  r  s      r=   r  CtypesFunction.read_func_attr	  s+    !!%-E||r?   c                 N   U R                  [        R                  5      nU R                  [        R                  5      nU R                  [        R                  5      nU R                  [        R
                  5      nU R                  [        R                  5      n[        XX4US9$ N)rw  ry  r^  rx  rz  )r  r   CU_FUNC_ATTRIBUTE_NUM_REGS"CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES"CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES#CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES'CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCKrv  )rV   nregscmemlmemsmemmaxtpbs         r=   r~  !CtypesFunction.read_func_attr_all	  s    ##E$D$DE""5#K#KL""5#K#KL""5#L#LM$$99;Ud#)+ 	+r?   rC   Nr,  r)   rE   rF   rG   r  r  r~  rH   rC   r?   r=   rh  rh  	  s    <A#(7
+r?   rh  c                   .    \ rS rSr  SS jrS rS rSrg)rr  i	  c                    U=(       d    U=(       a    Un[         R                  nU(       a  UR                  nO4U(       a  UR                  nO U(       a  UR                  nOUR
                  n[        R                  U R                  U5        g rQ   )	r   CUfunction_attributer  r  r  r  r.  r  r6  )rV   r  r  r  r  r  s         r=   r  CudaPythonFunction.cache_config	  se    #G(F++22D//D33D11D##DKK6r?   c                 @    [         R                  XR                  5      $ rQ   )r.  r  r6  r  s     r=   r  !CudaPythonFunction.read_func_attr	  s    ((==r?   c                 F   [         R                  nU R                  UR                  5      nU R                  UR                  5      nU R                  UR
                  5      nU R                  UR                  5      nU R                  UR                  5      n[        X#XEUS9$ r  )	r   r  r  r  r  r  r  r  rv  )rV   r  r  r  r  r  r  s          r=   r~  %CudaPythonFunction.read_func_attr_all	  s    ++##D$C$CD""4#J#JK""4#J#JK""4#K#KL$$88:Ud#)+ 	+r?   rC   Nr,  r  rC   r?   r=   rr  rr  	  s    <A#(7>	+r?   rr  c                 *   U	 Vs/ s H  n[        U5      PM     nn[        [        U5      -  " U6 n[        (       a  [        U5      nSnOUnS nU
(       a  [        R                  U XUXEUUUU5
        g [        R                  U XUXEUUUUU5        g s  snf re  )r   r   r  r   r.  cuLaunchCooperativeKernelcuLaunchKernel)cufunc_handlegxgygzbxbybzr  rC  r   cooperativer   
param_ptrsparamsparams_for_launchextras                   r=   launch_kernelr  	  s     -11DS)C.DJ1Z(:6F~%f-"(()+)+)2)0):	< 	m b b'%/#	%% 2s   B)or  ar   cubinfatbinc                       \ rS rSrSr\SS j5       r\S 5       r\	\S 5       5       r
\	\S 5       5       r\S 5       rS	 r\S
 5       rS rS r\S 5       rSrg)Linkeri!
  zAbstract base class for linkersNc                     [         R                  (       a  [        XU5      $ [        (       a  [	        XU5      $ [        XU5      $ rQ   )r   'CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY	MVCLinkerr   CudaPythonLinkerCtypesLinker)r   max_registerslineinfoccs       r=   new
Linker.new$
  s7    99]b99^#MR@@<<r?   c                     SU l         g r  lto)rV   r  r  r  s       r=   rU   Linker.__init__-
  s     r?   c                     g)z.Return the info log from the linker invocationNrC   r\   s    r=   r8  Linker.info_log3
  r  r?   c                     g)z/Return the error log from the linker invocationNrC   r\   s    r=   	error_logLinker.error_log8
  r  r?   c                     g)z&Add PTX source in a string to the linkNrC   )rV   r  r   s      r=   add_ptxLinker.add_ptx=
  r  r?   c                    [         R                  5        n[         R                  UR                  5      nUR                  nSSS5        [
        R                  " XW5      u  pg[        R                  (       a4  [        SU-  R                  SS5      5        [        U5        [        S5        [        R                  R                  U5      S   S-   nU R                  UR                  5       U5        g! , (       d  f       N= f)zcAdd CUDA source in a string to the link. The name of the source
file should be specified in `name`.NzASSEMBLY %sP   ru  zP================================================================================r   z.ptx)r.  r-  r  r  r{  r   compiler   DUMP_ASSEMBLYprintcenterrk   rl   splitextr  r  )	rV   cur   r1  r  r  r  logptx_names	            r=   add_cuLinker.add_cuA
  s     &&(B##BII.C''B ) ==2.=4'//C89#J(O 77##D)!,v5SZZ\8, )(s   ,C11
C?c                     g)z Add code from a file to the linkNrC   )rV   rl   kinds      r=   add_fileLinker.add_fileS
  r  r?   c                     [        US5       nUR                  5       nS S S 5        U R                  W[        R                  R                  U5      5        g ! , (       d  f       N>= f)Nrb)rl  readr  rk   rl   basename)rV   rl   fr  s       r=   add_cu_fileLinker.add_cu_fileW
  sD    $B B((./ s   A
A$c                    [         R                  R                  U5      S   SS nUS:X  a  [        S5      eUS:X  a  U R	                  U5        g[
        R                  US5      nUc  [        SU 35      eU R                  X5        g)z=Add a file to the link, guessing its type from its extension.r   Nr   z-Don't know how to link file with no extensionr  z,Don't know how to link file with extension .)rk   rl   r  rh  r  FILE_EXTENSION_MAPr  r  )rV   rl   extr  s       r=   add_file_guess_extLinker.add_file_guess_ext\
  s    ggt$Q'+"9NOOD[T"%))#t4D|" $''*e$- . .MM$%r?   c                     g)zComplete the link. Returns (cubin, size)

cubin is a pointer to a internal buffer of cubin owned by the linker;
thus, it should be loaded before the linker is destroyed.
NrC   r\   s    r=   completeLinker.completej
  r  r?   r  r   FN)r)   rE   rF   rG   rB  r  r  r   rU   rC  r8  r  r  r  r  r  r  r   rH   rC   r?   r=   r  r  !
  s    )= =  
 =  = >  > 5 5-$ / /0
&  r?   r  zYMinor version compatibility requires ptxcompiler and cubinlinker packages to be availablec                   f   ^  \ rS rSrSrS
U 4S jjr\S 5       r\S 5       rSS jr	S r
S rS	rU =r$ )r  iy
  zS
Linker supporting Minor Version Compatibility, backed by the cubinlinker
package.
c                 j  >  SSK Jn  Uc  [	        S5      e[
        T	U ]  XU5        SUS   S-  US   -    3nSUS/nU(       a  S	U 3nUR                  U5        U(       a  UR                  S
5        [        U5      U l	        U" SU 35      U l
        g ! [         a  n[        [        5      UeS nAff = f)Nr   )CubinLinkerzEMVCLinker requires Compute Capability to be specified, but cc is Nonesm_r9  r   z
--gpu-namez-cz--maxrregcount=z--generate-line-infoz--arch=)cubinlinkerr  ImportError_MVC_ERROR_MESSAGErh  rT   rU   r   r  ptx_compile_options_linker)
rV   r  r  r  r  rb  archptx_compile_optsr   rW   s
            r=   rU   MVCLinker.__init__~
  s    	;/ :  ; < < 	"5RURZ"Q%'()($5#M?3C##C(##$:;#()9#: "WTF#34%  	;01s:	;s   B 
B2B--B2c                 .    U R                   R                  $ rQ   )r  r8  r\   s    r=   r8  MVCLinker.info_log
  s    ||$$$r?   c                 .    U R                   R                  $ rQ   )r  r  r\   s    r=   r  MVCLinker.error_log
  s    ||%%%r?   c                     SSK Jn  SSKJn  U" UR                  5       U R                  5      n U R                  R                  UR                  U5        g ! [         a  n[	        [
        5      UeS nAff = f! U a  n[        UeS nAff = f)Nr   )compile_ptxCubinLinkerError)ptxcompilerr  r  r  r  r	  r}  r
  r  	add_cubincompiled_programrK   )rV   r  r   r  r  rb  compile_resultr}   s           r=   r  MVCLinker.add_ptx
  s    	;/4 %SZZ\43K3KL	%LL"">#B#BDI	  	;01s:	;
   	%1$	%s.   A &A9 
A6!A11A69B?BBc                     SSK Jn   [	        US5       nUR                  5       nS S S 5        [        R                  " U5      R                  nU[        S   :X  a  U R                  R                  nOlU[        S   :X  a  U R                  R                  nOHU[        S   :X  a  [        SU 35      eU[        S	   :X  a  U R                  WU5      $ [        SU 35      e U" WU5        g ! [         a  n[        [        5      UeS nAff = f! , (       d  f       N= f! [         a    [        U S35      ef = f! U a  n	[        U	eS n	A	ff = f)
Nr   r  r  
 not foundr  r  r  zDon't know how to link r  )r  r  r  r	  rl  r  FileNotFoundErrorrK   pathlibPathr   r  r  r  
add_fatbinr  )
rV   rl   r  r  rb  r  r  r   r  r}   s
             r=   r  MVCLinker.add_file
  s>   	;4	3dD!Qvvx "
 ||D!&&%g..''B'11((B',, 7v>??'..<<d++ 7v>??	%tTN-  	;01s:	; "!  	3j122	3"   	%1$	%sV   C) D DD 	D8 )
D3DD
DD D D58E
>EE
c                      SSK Jn   U R                  R                  5       $ ! [         a  n[        [        5      UeS nAff = f! U a  n[        UeS nAff = f)Nr   r  )r  r  r  r	  r  r   rK   )rV   r  rb  r}   s       r=   r   MVCLinker.complete
  s[    	;4	%<<((**	  	;01s:	;
   	%1$	%s)   # A 
A=AAAA)r  r
  )NFNz<cudapy-ptx>r)   rE   rF   rG   rB  rU   rC  r8  r  r  r  r   rH   r_   r`   s   @r=   r  r  y
  sI    5. % % & &
%%:	% 	%r?   r  c                   f   ^  \ rS rSrSrS
U 4S jjr\S 5       r\S 5       rSS jr	S r
S rS	rU =r$ )r  i
  )
Links for current device if no CC given
c                 j  > [         TU ]  XU5        [        R                  n[        U-  " 5       n[        U-  " 5       n[
        R                  [        U5      [
        R                  [        U5      [
        R                  [        U5      [
        R                  [        U5      [
        R                  [        S5      0nU(       a  [        U5      U[
        R                  '   U(       a  [        S5      U[
        R                  '   Uc  SU[
        R                  '   O*US   S-  US   -   n[        U5      U[
        R                   '   [#        UR%                  5       5      n	[#        UR'                  5       5      n
[(        R*                  [-        U	5      -  " U	6 n[        [-        U
5      -  " U
6 n[(        R.                  " 5       =U l        n[2        R5                  [-        U	5      X[7        U R0                  5      5        [8        R:                  " U [2        R<                  U5        XPl        X`l         XVX/U l!        g )Nr   r   r9  )"rT   rU   r   r%  r	   r   r&  r   r'  r   r(  r)  r*  CU_JIT_MAX_REGISTERSCU_JIT_GENERATE_LINE_INFOCU_JIT_TARGET_FROM_CUCONTEXTCU_JIT_TARGETr%  r-  r&  r   r,  r  cu_link_stater6  r.  cuLinkCreater   r  r  cuLinkDestroylinker_info_buflinker_errors_buf_keep_alive)rV   r  r  r  r2  
linkerinfolinkererrorsr5  cc_valraw_keys
raw_valuesr6  r7  r6  rW   s                 r=   rU   CtypesLinker.__init__
  s   "5$$un'
) (()J*?33Xe_))9\+B44huo$$hqk
 2:=2IGE../7?{GE334::;GE667URZ"Q%'F+3F+;GE''(''..*+
++c(m;hG#j/1J?%3355fCM;!$++.	0 	v33V<)!-&kOr?   c                 L    U R                   R                  R                  S5      $ r  )r1  r   r}  r\   s    r=   r8  CtypesLinker.info_log  s    ##))0088r?   c                 L    U R                   R                  R                  S5      $ r  )r2  r   r}  r\   s    r=   r  CtypesLinker.error_log  s    %%++226::r?   c           
      R   [        U5      n[        UR                  S5      5      nU =R                  X4/-  sl         [        R	                  U R
                  [        R                  U[        U5      USS S 5        g ! [         a!  n[        U< SU R                  < 35      eS nAff = fNr  r   rz   )r
   r  r3  r.  cuLinkAddDatar6  r   CU_JIT_INPUT_PTXr  rN   rK   r  )rV   r  r   ptxbufnamebufr}   s         r=   r  CtypesLinker.add_ptx	  s    #4;;v./V--	>  e.D.D!'S7AtTK 	>!T^^<==	>s   =A; ;
B&B!!B&c                 ^   [        UR                  S5      5      nU R                  R                  U5         [        R                  U R                  X#SS S 5        g ! [         aG  nUR                  [        R                  :X  a  U S3nOU< SU R                  < 3n[        U5      eS nAff = fNr  r   r  rz   )r
   r  r3  r   r.  cuLinkAddFiler6  rN   rR   r   CUDA_ERROR_FILE_NOT_FOUNDr  rK   rV   rl   r  pathbufr}   rS   s         r=   r  CtypesLinker.add_file  s    4;;v./(	#  dQdK 	#vv888j)"#T^^4c""	#s   #A 
B,%AB''B,c                    [        S5      n[        S5      n [        R                  U R                  [        U5      [        U5      5        UR                  nUS:  d   S5       eU R                  S S 2	 [        R                  " U[        R                  " [        R                  5      5      n[        [         R"                  R%                  XB4S95      $ ! [         a!  n[        U< SU R                  < 35      eS nAff = f)Nr   rz   "linker returned a zero sized cubinr  )r   r   r.  cuLinkCompleter6  r   rN   rK   r  r   r3  rh   castr   r	   r  np	ctypeslibas_arrayrV   	cubin_bufr  r}   	cubin_ptrs        r=   r   CtypesLinker.complete   s    QK	{	>!!$++uY/?tM zzax===xQ KK	6>>&--+HI	R\\**9G*DEE  	>!T^^<==	>s   3C 
C>C99C>r3  r6  r2  r1  r  r%  r&  r`   s   @r=   r  r  
  sM    )PV 9 9 ; ;>#F Fr?   r  c                   f   ^  \ rS rSrSrS
U 4S jjr\S 5       r\S 5       rSS jr	S r
S rS	rU =r$ )r  i2  r(  c           
      .  > [         TU ]  XU5        [        R                  n[	        U5      n[	        U5      n[
        R                  nUR                  UUR                  UUR                  UUR                  UUR                  S0nU(       a  XUR                  '   U(       a  SXR                  '   Uc  SXR                  '   O9US   S-  US   -   n	[        [
        R                   SU	 35      n
XUR"                  '   [%        UR'                  5       5      n[%        UR)                  5       5      n[*        R-                  [/        U5      X5      U l        [2        R4                  " U [*        R6                  U R0                  5        XPl        X`l        XVX/U l        g )Nr   r   r9  CU_TARGET_COMPUTE_)rT   rU   r   r%  r:  r   r;  r&  r'  r(  r)  r*  r*  r+  r,  r.   CUjit_targetr-  r%  r-  r&  r.  r/  r  r6  r  r  r0  r1  r2  r3  )rV   r  r  r  r2  r4  r5  r=  r5  r6  cc_enumr7  r8  rW   s                r=   rU   CudaPythonLinker.__init__6  sa   "5$$u%
 '))
 --z88%..995))1
 7DJ334<=G889:?@G;;<URZ"Q%'Fg22 26(;=G07J,,-''..*+
))#h-Nv33T[[A)!-&hKr?   c                 8    U R                   R                  S5      $ r  )r1  r}  r\   s    r=   r8  CudaPythonLinker.info_log`  s    ##**622r?   c                 8    U R                   R                  S5      $ r  )r2  r}  r\   s    r=   r  CudaPythonLinker.error_logd  s    %%,,V44r?   c           
      @   UR                  S5      nU =R                  X/-  sl         [        R                  R                  n[
        R                  U R                  XA[        U5      US/ / 5        g ! [         a!  n[        U< SU R                  < 35      eS nAff = fr?  )r  r3  r   CUjitInputTyperA  r.  r@  r6  r  rN   rK   r  )rV   r  r   rC  	input_ptxr}   s         r=   r  CudaPythonLinker.add_ptxh  s    ++f%SN*	>..??I  ic#h!(!R5 	>!T^^<==	>s   AA2 2
B<BBc                 `   UR                  S5      nU R                  R                  U5         [        R	                  U R
                  X#S/ / 5        g ! [         aQ  nUR                  [        R                  R                  :X  a  U S3nOU< SU R                  < 3n[        U5      eS nAff = frF  )r  r3  r   r.  rG  r6  rN   rR   r   r  rH  r  rK   rI  s         r=   r  CudaPythonLinker.add_filer  s    ++f%(	#  dQBG 	#vv))CCCj)"#T^^4c""	#s   #A 
B-AB((B-c                     [         R                  U R                  5      u  pUS:  d   S5       eU R                  S S 2	 [        R                  " U[        R                  " [        R                  5      5      n[        [        R                  R                  XB4S95      $ ! [         a!  n[	        U< SU R
                  < 35      eS nAff = f)Nrz   r   rM  rN  )r.  rO  r6  rN   rK   r  r3  rh   rP  r   r	   r  rQ  rR  rS  rT  s        r=   r   CudaPythonLinker.complete  s    	>$33DKK@OI ax===xQKK	6>>&--+HI	R\\**9G*DEE  	>!T^^<==	>s   !B 
C
)CC
rX  r  r%  r&  r`   s   @r=   r  r  2  sM    (LT 3 3 5 5>#
F 
Fr?   r  c                    U S:w  a  [         (       aG  [        R                  nUR                  n[        R                  " U 5      n[
        R                  X#5      $ [        R                  " 5       n[        R                  n[
        R                  [        U5      X 5        U$ [         (       a  [        R                  " 5       $ [        R                  " 5       $ )zRQuery the device pointer usable in the current context from an arbitrary
pointer.
r   )r   r   CUpointer_attribute#CU_POINTER_ATTRIBUTE_DEVICE_POINTERr  r.  cuPointerGetAttributer   r   r   r   )r!  	ptr_attrsr  ptrobjr  s        r=   get_devptr_for_active_ctxrq    s     ax>33I@@D((-F//==))+F<<D((vBM>&&((''))r?   c                 n   [        U 5      n[        (       a;  [        R                  U5      u  p#U[        R
                  " [        U5      U-   5      4$ [        R                  " 5       n[        5       n[        R                  [        U5      [        U5      U5        UR                  UR                  p2X"U-   4$ )zFind the extents (half open begin and end pointer) of the underlying
device memory allocation.

NOTE: it always returns the extents of the allocation but the extents
of the device memory view that can be a subsection of the entire allocation.
)r  r   r.  cuMemGetAddressRanger   r  r0   r   r   r   r   r   )devmemr  sns       r=   r4  r4    s     #6*F~**62'%%c!fqj111  "J##E!HeAh?ww1a%xr?   c                     [        U SS5      nUc:  [        U 5      u  p#[        (       a  [        U5      [        U5      -
  nOX2-
  nXl        US:  d   SR                  U5      5       eU$ )zCheck the memory size of the device memory.
The result is cached in the device memory object.
It may query the driver for the memory size of the device memory allocation.
r  Nr   z{} length array)r.   r4  r   r0   r  rg  )rt  szru  r}   s       r=   device_memory_sizery    si    
 
)4	0B	zf%>Q#a&BB "70%,,R007Ir?   c                 P    [        U SS5      nUSL=(       a    UR                  S;   $ )z?Returns True if the obj.dtype is datetime64 or timedelta64
    r  NMm)r.   char)r   r  s     r=   _is_datetime_dtyper}    s+     C$'E3t!33r?   c                 d    [        U 5      (       a  U R                  [        R                  5      n U $ )zVWorkaround for numpy#4983: buffer protocol doesn't support
datetime64 or timedelta64.
)r}  rk  rQ  int64r   s    r=   _workaround_for_datetimer    s&     #hhrxx Jr?   c                     [        U [        5      (       a  U $ SnU(       d,  [        U [        R                  5      =(       d    [	        U 5      n[        U 5      n [        R                  " XU5      $ )zGet host pointer from an obj.

If `readonly` is False, the buffer must be writable.

NOTE: The underlying data pointer from the host data buffer is used and
it should not be changed until the operation which can be asynchronous
completes.
F)r/   r0   rQ  voidr}  r  r   memoryview_get_buffer)r   readonlyforcewritables      r=   r  r    sV     #s
M"30K4Fs4K
"3
'C))#hGGr?   c                 D    [        U 5      n [        R                  " U 5      $ )zHReturns (start, end) the start and end pointer of the array (half open).)r  r   memoryview_get_extentsr  s    r=   host_memory_extentsr    s    
"3
'C**3//r?   c                     [        U 5      [        U5      :X  d   S5       e[        U 5      n[        R                  " XX25      u  pEXT-
  $ )zWGet the byte size of a contiguous memory buffer given the shape, strides
and itemsize.
z# dim mismatch)r  r   memoryview_get_extents_info)r  r  r  ndimru  r}   s         r=   memory_size_from_infor    sD     u:W%7'77%u:D//ODA5Lr?   c                 <    [        U 5      u  pX!:  d   S5       eX!-
  $ )zGet the size of the memoryzmemory extend of negative size)r  )r   ru  r}   s      r=   host_memory_sizer    s%    s#DA633365Lr?   c                 Z    [         (       a  U R                  $ [        U 5      R                  $ )z$Get the device pointer as an integer)r   r  r   r  s    r=   r  r    s$    ~((($S)///r?   c                 L    U c  [        S5      $ [        U 5        U R                  $ )z,Get the ctypes object for the device pointerr   )r   require_device_memoryr  r  s    r=   r  r    s%    
{{#$$$r?   c                     [        U SS5      $ )aK  All CUDA memory object is recognized as an instance with the attribute
"__cuda_memory__" defined and its value evaluated to True.

All CUDA memory object should also define an attribute named
"device_pointer" which value is an int object carrying the pointer
value of the device memory address.  This is not tested in this method.
r  F)r.   r  s    r=   is_device_memoryr    s     3)511r?   c                 :    [        U 5      (       d  [        S5      eg)z9A sentry for methods that accept CUDA memory object.
    zNot a CUDA memory object.N)r  r[  r  s    r=   r  r    s      C  344 !r?   c                 @    [        U S/ 5      nUR                  U5        g)zAdd dependencies to the device memory.

Mainly used for creating structures that points to other device memory,
so that the referees are not GC and released.
	_depends_N)r.   extend)rt  objsdepsets      r=   device_memory_dependsr  &  s     V["-F
MM$r?   c                     / nU(       aC  [        U[        5      (       d   e[        R                  nUR	                  UR
                  5        O[        R                  nU" [        U 5      [        USS9U/UQ76   g)
NOTE: The underlying data pointer from the host data buffer is used and
it should not be changed until the operation which can be asynchronous
completes.
T)r  N)	r/   r  r.  cuMemcpyHtoDAsyncr   r6  cuMemcpyHtoDr  r  dstsrcr  r  varargsr  s         r=   host_to_devicer  0  sb     G&&))))%%v}}%  ~cLt<dMWMr?   c                     / nU(       aC  [        U[        5      (       d   e[        R                  nUR	                  UR
                  5        O[        R                  nU" [        U 5      [        U5      U/UQ76   gr  N)	r/   r  r.  cuMemcpyDtoHAsyncr   r6  cuMemcpyDtoHr  r  r  s         r=   device_to_hostr  B  s`     G&&))))%%v}}%  |C.-t>g>r?   c                     / nU(       aC  [        U[        5      (       d   e[        R                  nUR	                  UR
                  5        O[        R                  nU" [        U 5      [        U5      U/UQ76   gr  )r/   r  r.  cuMemcpyDtoDAsyncr   r6  cuMemcpyDtoDr  r  s         r=   r|  r|  T  s`     G&&))))%%v}}%  ~cN3/@@r?   c                     / nU(       aC  [        U[        5      (       d   e[        R                  nUR	                  UR
                  5        O[        R                  nU" [        U 5      X/UQ76   g)zMemset on the device.
If stream is not zero, asynchronous mode is used.

dst: device memory
val: byte value to be written
size: number of byte to be written
stream: a CUDA stream
N)r/   r  r.  r  r   r6  r  r  )r  valr  r  r  r  s         r=   device_memsetr  f  sY     G&&))))##v}}%~cC00r?   c                  ,    [         R                  5         g)z3
Enable profile collection in the current context.
N)r.  cuProfilerStartrC   r?   r=   profile_startr  {  s     r?   c                  ,    [         R                  5         g)z4
Disable profile collection in the current context.
N)r.  cuProfilerStoprC   r?   r=   profile_stopr    s     r?   c               #   <   #    [        5         Sv   [        5         g7f)zQ
Context manager that enables profiling on entry and disables profiling on
exit.
N)r  r  rC   r?   r=   	profilingr    s      O	Ns   c                  *    [         R                  5       $ )z8
Return the driver version as a tuple of (major, minor)
)r.  r?  rC   r?   r=   r?  r?    s     r?   r-  rA  )rB  r4   rk   rh   r  r   r  r'   r]  r1  r  	itertoolsr   abcr   r   r   r   r   r	   r
   r   r   r   r   r.  rW  numpyrQ  collectionsr   r   ry  r   
numba.corer   r   r   r  r   r   r   r   r   r   r   numba.cuda.cudadrvr   r   r   CUDA_USE_NVIDIA_BINDINGr   r!   r   r  r  rg   r   r  	pythonapi	Py_DecRefr!  	Py_IncRefr  	py_objectr   r>   rh  rA   rK   rN   rx   r   r   r   r   rf   r   r   r   r
  r   r   r6  r.  rb  r  r  r  r  r  r1  r>  rQ  rU  r]  rY  r0   rA  r  rN  r  r  r"  r!  r"  r	  r  r  r   r1  rc  ru  r7  r  rI  r
  MemAllocr  r#  r  r  r  r  r  rN  rW  r0  r<  rv  r|  rh  rr  r  rd  jittyCU_JIT_INPUT_OBJECTrA  CU_JIT_INPUT_LIBRARYCU_JIT_INPUT_CUBINCU_JIT_INPUT_FATBINARYr  r  r	  r  r  r  rq  r4  ry  r}  r  r  r  r  r  r  r  r  r  r  r  r  r|  r  r  r  r/  r  r?  rC   r?   r=   <module>r     s    	          '/ / /    )  / / 4 " L L < <//$ ||&&w/ ''
''
''(
 ''(
 0	l 		, 	1? 1( V$0  16 %&	BV BJ4V 4n 
 01 Y1V Y1x:qFg qh@ 5 @F( (2.0.0I .0b $%  7$ *	# 	 mBv BJ l3
	i&f i&X	8<> @F	..0! 6 ! H'V 'Tv; v;ro-F o-dm +7? +7\&8$$ &R#8O #8L*6 *>	x'8'8 		,(9(9 	wV wt70F 70t	Cw C.B6 B <v < j #1 2 D+X +>+ +J $ %F ""E&&%%'')))).. &&%%''))))..Ow Of V% V%r]F6 ]F@WFv WFz*,&"4H(00%25N$?$A$1*   r?   