
    sh                         S r SSKrSSKJrJr  SSKJr  \" S5       " S S\5      5       r\S:X  a  \R                  " 5         gg)	a  
Matrix multiplication example via `cuda.jit`.

Reference: https://stackoverflow.com/a/64198479/13697228 by @RobertCrovella

Contents in this file are referenced from the sphinx-generated docs.
"magictoken" is used for markers as beginning and ending of example text.
    N)CUDATestCaseskip_on_cudasim)captured_stdoutz4cudasim doesn't support cuda import at non-top-levelc                   >   ^  \ rS rSrSrU 4S jrU 4S jrS rSrU =r	$ )
TestMatMul   zc
Text matrix multiplication using simple, shared memory/square, and shared
memory/nonsquare cases.
c                 t   > [        5       U l        U R                  R                  5         [        TU ]  5         g N)r   _captured_stdout	__enter__supersetUpself	__class__s    }/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/numba/cuda/tests/doc_examples/test_matmul.pyr   TestMatMul.setUp   s)     / 1'')    c                 \   > U R                   R                  S S S 5        [        TU ]  5         g r
   )r   __exit__r   tearDownr   s    r   r   TestMatMul.tearDown   s&    &&tT48r   c                 x  ^^^ SSK JmJm  SSKnSSKnTR
                  U4S j5       nUR                  S5      R                  SS/5      nUR                  SS/5      nUR                  SS/5      nTR                  U5      nTR                  U5      nTR                  U5      n	Sn
UR                  UR                  S   U
S   -  5      nUR                  UR                  S   U
S   -  5      nX4nX=U
4   " XxU	5        U	R                  5       n[        U5        [        XE-  5        SmTR
                  UUU4S	 j5       nUR                  S5      R                  SS/5      nUR                  SS/5      nUR                  SS/5      nTR                  U5      nTR                  U5      nTR                  U5      n	TT4n
UR                  UR                  S   U
S   -  5      nUR                  UR                  S   U
S   -  5      nX4nXU
4   " XxU	5        U	R                  5       n[        U5        [        XE-  5        S
nU R                  UR!                  XdU-  :H  5      US9  UR                  S5      R                  SS/5      nUR                  SS/5      nUR                  SS/5      nTR                  U5      nTR                  U5      nTR                  U5      n	TT4n
[#        UR                  S   UR                  S   5      n[#        UR                  S   UR                  S   5      nUR                  UU
S   -  5      nUR                  UU
S   -  5      nX4nXU
4   " XxU	5        U	R                  5       n[        U5        [        XE-  5        SnU R                  UR!                  XdU-  :H  5      US9  g)z/Test of matrix multiplication on various cases.r   )cudafloat32Nc                    > TR                  S5      u  p4X2R                  S   :  aI  XBR                  S   :  a6  Sn[        U R                  S   5       H  nXPX64   XU4   -  -  nM     XRX44'   ggg)z2Perform square matrix multiplication of C = A * B.   r              N)gridshaperange)ABCijtmpkr   s          r   matmul)TestMatMul.test_ex_matmul.<locals>.matmul)   sr     99Q<DA771:~!ggaj.qwwqz*AQT7Q!tW,,C +!$	 #1~r         )r,   r,   r   c                 *  > TR                   R                  TT4TS9nTR                   R                  TT4TS9nTR                  S5      u  pVTR                  R                  nTR                  R
                  nTR                  R                  n	T" S5      n
[        U	5       H  nSX8U4'   SXHU4'   X`R                  S   :  a'  X{T-  -   U R                  S   :  a  XX{T-  -   4   X8U4'   XQR                  S   :  a(  XT-  -   UR                  S   :  a  XUT-  -   U4   XHU4'   TR                  5         [        T5       H  nXX4   XLU4   -  -  n
M     TR                  5         M     XbR                  S   :  a  XRR                  S   :  a  XXe4'   ggg)z
Perform matrix multiplication of C = A * B using CUDA shared memory.

Reference: https://stackoverflow.com/a/64198479/13697228 by @RobertCrovella
)r!   dtyper   r   r   r   N)
sharedarrayr    	threadIdxxygridDimr"   r!   syncthreads)r#   r$   r%   sAsBr3   r4   txtybpgr(   r&   r'   TPBr   r   s                r   fast_matmul.TestMatMul.test_ex_matmul.<locals>.fast_matmulN   s    ""#s7"CB""#s7"CB99Q<DA!!B!!B,,..C "+C3Zr6
r6
wwqz>rG|qwwqz&A!"bs7l?!3B2vJwwqz>rG|qwwqz&A!"C<?!3B2vJ   " sAbe9rR%y00C $   "#  $ 771:~!ggaj.!$ #1~r   z5fast_matmul incorrect for shared memory, square case.)msgs            z9fast_matmul incorrect for shared memory, non-square case.)numbar   r   numpymathjitarangereshapeoneszeros	to_deviceceilr!   copy_to_hostprint
assertTrueallmax)r   nprF   r*   x_hy_hz_hx_dy_dz_dthreadsperblockblockspergrid_xblockspergrid_yblockspergridr=   r?   
grid_y_max
grid_x_maxr<   r   r   s                     @@@r   test_ex_matmulTestMatMul.test_ex_matmul    s    	( 
	 
	 iim##QF+ggq!fohh1vnnS!nnS!nnS!"))CIIaL?13E$EF))CIIaL?13E$EF(:o-.s= c
ci 	'	 
'	V iim##QF+ggq!fohh1vnnS!nnS!nnS!*))CIIaL?13E$EF))CIIaL?13E$EF(:?23CcB c
ci FsCi/0c: iin$$aW-ggr1ghh1vnnS!nnS!nnS!*1syy|4
1syy|4
))J1C$CD))J1C$CD(:?23CcB c
ci JsCi/0c:r   )r   )
__name__
__module____qualname____firstlineno____doc__r   r   r`   __static_attributes____classcell__)r   s   @r   r   r      s    

I; I;r   r   __main__)
rf   unittestnumba.cuda.testingr   r   numba.tests.supportr   r   rb   main r   r   <module>ro      sS     < / GHZ; Z; IZ;z zMMO r   