
    sh                        S SK r S SKJr  S SKrS SKJs  Jr  S SKJ	r	  SSK
Jr  SSKJr  SSKJr  SSKJr  SS	KJr  SS
KJrJrJrJrJrJrJr  SSKJrJrJr  SSKJrJ r J!r!  SSK"J#r#J$r$      SS\S\%\   S\%\   4S jjr&S\&l'        S r(g)    N)Optional)mm_args   )ir)CppGemmTemplate)CppGroupedGemmTemplatecreate_epilogue_with_attr)	TensorBox)addadd_needs_realized_inputsatenpermuteregister_loweringto_dtypeview)autotune_select_algorithmChoiceCallerExternKernelChoice)use_aten_gemm_kernelsuse_cpp_gemm_templateuse_max_autotune)opsVxwbc           
         U R                  5       n[        U5      S:  a  [        U SUS   /5      n [        U5      n[        5       (       d   eU V	s/ s H&  oc  U	O[        R
                  R                  U	5      PM(     nn	/ n
[        U [        US   SS/5      US9Gt pp[        U V	s/ s H  oS LPM     sn	SS [        U5       Vs0 s H  oU _M     snS9nU /UQnUR                  U V	s/ s H	  oc  M  U	PM     sn	5        [        R                  " U
UU40 UD6  [        U
5      S:w  d   e[        SU
UU5      nUR                  R                  n[        U5       Vs/ s H"  n[        R                   " UU["        U4/5      PM$     nn[        R$                  " US   R'                  5       S	9Ul        UUl        [        U5       Vs/ s H%  n[        R,                  R/                  UU   5      PM'     nn[        U5      S:  a@  [        U5       H1  n[        UU   / US S QUU   R                  5       S   P75      UU'   M3     U$ s  sn	f s  sn	f s  snf s  sn	f s  snf s  snf )
N   r   r   layoutT)has_biastrans_wepilogue_creatoract_mappinggrouped_gemm)device)get_sizelenr   r   r   ExternKernelrealize_inputr   r   dictrangeextendr   add_choicesr   dataMultiOutputlistMultiOutputLayout
get_devicer"   outputsr   create)r   r   r   attrscalars	algorithmr"   x_sizenum_gemmbiaschoices_numkwargsinput_nodesresulttemplate_bufgemm_idxreturn_bufsreturn_tensorss                       t/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/torch/_inductor/mkldnn_lowerings.pygrouped_gemm_loweringrI       sw    ZZ\F
6{QR$%1vHSTUST42??#@#@#F	FSTAU"$Gq'!A$A"7GQ/01qtd"q1',X7!V7	F 'q'K??@&& 	 w<1&	F ;;##L h'H 	v|tX.>-?@'   ..k!n6O6O6QRL&LCH?CRxK12?   6{QhH'+x(G&"+G~h7@@B2FG(N8$ (
 [ 	V 2 8 @"s*   -I(I
I+I5I)I,ITc            !        ^^^^^^ [         R                  R                  (       GaZ  SSKJm  [        [         R                  R                  R                  SSTR                  R                  S9m[        [         R                  R                  R                  R                  SSTR                  R                  S9m[        [         R                  R                  R                  SSTR                  R                  S9m[        [         R                  R                  R                  R                  SSTR                   R                  S9m[         R                  R                  R"                  [         R                  R                  R$                  [         R                  R                  R&                  [         R                  R                  R                  [(        R*                  R,                  [         R                  R                  R.                  /n [1        [         R                  R                  R"                  5      S[2        S[2        S	[2        4U4S
 jj5       n[1        [         R                  R                  R"                  R                  5      S[2        S[2        S[2        S	[2        4U4S jj5       n[1        [         R                  R                  R$                  R                  5      S[2        S[2        S[2        S	[2        4U4S jj5       n[1        [         R                  R                  R                  5       S3S[2        S[2        S[2        4U4S jjj5       n[1        [         R                  R                  R                  R                  5       S3S[2        S[2        S[2        S[2        4U4S jjj5       n[1        [         R                  R                  R&                  5      S[2        S[2        S	[2        4U4S jj5       n[1        [(        R*                  R,                  5      S[2        S[2        S[2        S[2        S[2        S[2        S[2        S[4        S[6        [8           S[8        S[8        S[8        S[4        S [4        S![4        S"[4        4 U4S# jj5       n[1        [         R                  R                  R.                  S S$9S[2        S%[2        S&[2        S'[2        S	[2        4
U4S( jj5       n[1        [         R                  R                  R.                  R                  S S$9[1        [         R                  R                  R.                  R:                  S S$9S[2        S%[2        S&[2        S'[2        S)[2        S	[2        4U4S* jj5       5       n	[1        [         R                  R                  R                  S S$9 S3S[2        S%[2        S&[2        S'[2        S	[2        4
U4S+ jjj5       n
[1        [         R                  R                  R                  R                  S S$9[1        [         R                  R                  R                  R:                  S S$9 S3S[2        S%[2        S&[2        S'[2        S,[2        S	[2        4U4S- jjj5       5       n[         R                  R<                  (       a  [        [         R                  R>                  R@                  S.STRB                  R                  S9mU RE                  [         R                  R>                  R@                  5        [1        [         R                  R>                  R@                  5      S S/.S[2        S0[2        S1[2        S[F        [2           4U4S2 jjj5       n[I        U 5        g g )4Nr   )	mkldnn_irzmkldnn::_linear_pointwiseF)has_out_variantkernel_creatorzonednn::qlinear_pointwiser   weightr=   c
                 t   > [         R                  " T
R                  R                  U UUUUUUUUU	5
      5      $ N)r   r7   ConvolutionUnary)r   rN   r=   paddingstridedilationgroupsr8   r9   r:   rK   s             rH   convolution_unary5register_onednn_fusion_ops.<locals>.convolution_unary   sJ     ##**11     otherc                 z   > [         R                  " TR                  R                  U UUUUUUUUU	U
UU5      5      $ rP   )r   r7   ConvolutionBinaryr   rY   rN   r=   rR   rS   rT   rU   binary_attrbinary_alpha
unary_attrunary_scalarsunary_algorithmrK   s                rH   convolution_binary6register_onednn_fusion_ops.<locals>.convolution_binary   sS      ##++22 !# rX   c                 z   > [         R                  " TR                  R                  U UUUUUUUUU	U
UU5      5      $ rP   )r   r7   ConvolutionBinaryInplacer\   s                rH   convolution_binary_inplace>register_onednn_fusion_ops.<locals>.convolution_binary_inplace   sS      ##2299 !# rX   r   r   c                 l  >^^^ U R                  5       n[        U5      S:  a  [        U SUS   /5      n Ub  [        R                  R                  U5      n/ n[        5       (       at  [        USS/5      n	[        X	US9Gt pp	[        X`U	5      (       aH  UUU4S jn[        US LSTS:X  a  S OUS	9nUb  / S
QUS'   [        R                  " UUUc  X/OXU/40 UD6  [        U5      S:X  d  [        5       (       a>  [        TTTS9nUc  S US'   UR                  TR                  " Uc  X/OXU/U40 UD65        UR!                  5       ["        R$                  R&                  ;   d   eSS 0n[)        SUUc  X/OXU/UUS9n[        U5      S:  a%  [        U/ US S QUR                  5       S   P75      nU$ )Nr   r    r   r   r!   c                    > [        U TTTS9$ )Nr9   r:   r	   )bufr:   r8   r9   s    rH   r%   Jregister_onednn_fusion_ops.<locals>.linear_unary.<locals>.epilogue_creator   s    8w)  rX   Tnoner#   r$   r%   )r   r   r   input_indices)r8   r9   r:   Bc                 X    [         R                  R                  U R                  5          $ rP   r   graph	constantsget_namer   s    rH   <lambda>Bregister_onednn_fusion_ops.<locals>.linear_unary.<locals>.<lambda>      QWW..qzz|<rX   linear_unaryinput_gen_fnsr)   r*   r   r   r+   r,   r   r   r   r   r-   r   r0   r   appendbindru   r   rs   rt   r   )r   r   r   r8   r9   r:   r"   r;   r>   transposed_wr?   r%   rA   r|   rC   aten_mkldnn_linear_unarys      ```         rH   rz   0register_onednn_fusion_ops.<locals>.linear_unary   s    ZZ\F6{QR,-}OO11!4*,G!!&q1a&1.5af.U+A(LAA
 "!"$ $15EUF
 }2;/#//"#)! !	 7|q $9$;$;4IN9"&F3K,11"#)! ! ::<177#4#4444<M /)!+F 6{Qf&Ks&KV__5Fr5J&KLMrX   yc                   >^^ U R                  5       n[        U5      S:  a  [        U SUS   /5      n TR                  5       n[        U5      S:  a  [        TSUS   /5      mUb  [        R                  R                  U5      n/ n[        5       (       at  [        USS/5      n	[        X	TUS9Gt pp	m[        XPU	5      (       aF  UU4S jn[        US LSUS9nUc  / S	QO/ S
QUS'   [        R                  " UUUc  U TU/OU TX#/40 UD6  [        U5      S:X  d  [        5       (       a?  [        TS9nUc  S US'   UR                  TR                  " Uc  U TU/OU TX#/U40 UD65        UR!                  5       ["        R$                  R&                  ;   d   eSS 0n[)        SUUc  U TU/OU TX#/UUS9n[        U5      S:  a%  [        U/ US S QUR                  5       S   P75      nU$ )Nr   r    r   r   r!   c                    > [        U TTS9$ )N)rY   r	   )rk   r8   r   s    rH   r%   Kregister_onednn_fusion_ops.<locals>.linear_binary.<locals>.epilogue_creator?  s    8d!LLrX   Trn   )r   r   r   )   r   r   r   ro   )r8   rp   c                 X    [         R                  R                  U R                  5          $ rP   rr   rv   s    rH   rw   Cregister_onednn_fusion_ops.<locals>.linear_binary.<locals>.<lambda>[  ry   rX   linear_binaryr{   r}   )r   r   r   r   r8   r"   r;   y_sizer>   r   r?   r%   rA   r|   rC   aten_mkldnn_linear_binarys    `  `          rH   r   1register_onednn_fusion_ops.<locals>.linear_binary*  s    ZZ\F6{QR,-ZZ\F6{QR,-}OO11!4*,G!!&q1a&118Qv2.AQ )LAAM "!"$ $)9F
 <=9i,F?+#//%&YAq	Q1L !	 7|q $9$;$;49"&F3K-22%&YAq	Q1L ! ::<177#4#4444<M /YAq	Q1L+F 6{Qf&Ks&KV__5Fr5J&KLMrX   c                 v   > [         R                  " TR                  R                  U UUUUUUUUU	U
5      5      $ rP   )r   r7   ConvolutionTransposeUnary)r   rN   r=   rR   output_paddingrS   rT   rU   r8   r9   r:   rK   s              rH   convolution_transpose_unary?register_onednn_fusion_ops.<locals>.convolution_transpose_unaryh  sM     ##33::" rX   w0w1w2w3hxcxreversebatch_sizesmodehidden_size
num_layers
has_biasesbidirectionalbatch_firsttrainc                    > [         R                  " [        R                  TR                  R                  U UUUUUUUUU	U
UUUUU5      5      $ rP   )pytreetree_mapr   r7   MkldnnRnnLayer)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rK   s                   rH   mkldnn_rnn_layer4register_onednn_fusion_ops.<locals>.mkldnn_rnn_layer  sc    & ??  ((//!! rX   )type_promotion_kindpacked_weightw_scalew_zpc                   > [        U5      [        :X  d   e[        R                  R	                  [
        R                  " U[
        R                  S9SS9n[        U5      [        :X  d   e[        R                  R	                  [
        R                  " U[
        R                  S9SS9n[        R                  " TR                  R                  U UUUUUUUUU	U
UUUUUU5      5      $ )Ndtypex_scalenamex_zp)typefloatr   rs   add_tensor_constanttorchtensorfloat32intint32r   r7   QConvPointWisePT2E)r   r   r   r   r   r   r=   rS   rR   rT   rU   o_inv_scaleo_zero_pointoutput_dtyper8   r9   r:   rK   s                    rH   qconvolution_unary6register_onednn_fusion_ops.<locals>.qconvolution_unary  s    * =E)))gg11WEMM: 2 G :$$$77..T5F / D ##,,33!  # rX   accumc                   > [        U5      [        :X  d   e[        R                  R	                  [
        R                  " U[
        R                  S9SS9n[        U5      [        :X  d   e[        R                  R	                  [
        R                  " U[
        R                  S9SS9nUS:X  au  U[
        R                  [
        R                  4;   aQ  UR                  5       [
        R                  [
        R                  4;   a  UR                  5       U:w  a  [        Xn5      n[        R                  " TR                  R                  U UUUUUUUUU	U
UUUUUUUUUUU5      5      $ )Nr   r   r   r   sum)r   r   r   rs   r   r   r   r   r   r   bfloat16	get_dtyper   r   r7   QConvPointWiseBinaryPT2E)r   r   r   r   r   r   r   r=   rS   rR   rT   rU   r   r   r   accum_scaleaccum_zpr]   alphar_   r`   unary_algorithmmrK   s                         rH   qconvolution_binary7register_onednn_fusion_ops.<locals>.qconvolution_binary  sD   > =E)))gg11WEMM: 2 G :$$$77..T5F / D
 u$ U]]ENN$CCOO%%--)HHOO%5 !5##2299!  !$- rX   c                   >^^^^^^^	^
^^^^ UR                  5       [        R                  L d   S5       eU R                  5       n[	        U5      S:  a  [        U SUS   /5      n [        T[        R                  5      (       dU  [        T5      [        :X  d   e[        R                  R                  [        R                  " T[        R                  S9SS9mOeTR!                  5         [#        S TR                  5        5       5      (       a  [        T/ 5      m[	        TR                  5       5      S;   d   S	5       eTc?  [        R                  R                  [        R                  " S
[        R$                  S9SS9m[        T[        R                  5      (       dU  [        T5      [&        :X  d   e[        R                  R                  [        R                  " T[        R$                  S9SS9mOTR!                  5         TR)                  5       S:X  d   S5       eUc?  [        R                  R                  [        R                  " S
[        R$                  S9SS9nTR!                  5         UR!                  5         UR                  5       [        R$                  :w  a  [        [        R*                  R-                  U5      [        R.                  5      (       a  [        R                  R0                  UR3                  5          R5                  [        R$                  5      n[        R                  R                  [        R                  " U[        R$                  S9UR3                  5       S9nTc  S OTR                  5       m/ n[7        5       (       Ga  [9        XUT	S9Gt npn[        [        R*                  R-                  U5      [        R.                  5      (       Ga  [        R:                  " [        R<                  " [        R                  R0                  UR3                  5          5      [        R                  R0                  UR3                  5          5      (       Ga'  [?        XU5      (       Ga  [        R                  R0                  UR3                  5          RA                  5       n[        RB                  " UR5                  [        R                  5      S
S9n[        R                  R                  UUR3                  5       S-   S9mUU
UUUUU	UUUUU4S jnU R                  5       [        RD                  [        R                  4;   d   e[F        RH                  " UUTc  U TTUTU/OU TTUTUT/TS LUTc  / SQO/ SQS9  [	        U5      S
:X  d  [K        5       (       aK  [M        TTT	T
TTS9nTc  S US'   URO                  TRP                  " Tc  U TTUTU4OU TTUTUT4U40 UD65        UR3                  5       [        R                  R0                  ;   d   eS S S S S.n[        [        R*                  R-                  T5      [        R.                  5      (       a  S US'   [        [        R*                  R-                  T5      [        R.                  5      (       a  S US'   [S        SUTc  U TTUTU/OU TTUTUT/UUS 9n[	        U5      S:  a%  [        U/ US S QUR                  5       S   P75      nU$ )!Nz2Only int8 weights are supported by oneDNN qlinear.r   r    r   r   r   c              3   *   #    U  H	  oS :H  v   M     g7fr   N .0dims     rH   	<genexpr>Dregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<genexpr>P       >+=Cax+=   r   r   x_scale must be 0D or 1Dr   r   r   z(x_zp is incompatible with oneDNN qlinearr   r"   	out_dtyper   _BMatrixCompensc                 "  >^^^^^	^
^^^ T[         R                  [         R                  [         R                  [         R                  4;   d   eU R                  5       mTR                  5       mTR                  5       mTR                  5       m
TR                  5       mS mTb  TR                  5       mUUUUU
UUU4S jn[        R                  " U R                  5       [         R                  UU R                  5       S9nTS:w  a  [        UTTTS9nT[         R                  :X  aL  UR                  5       mUU4S jn[        R                  " UR                  5       TUUR                  5       S9nU$ T[         R                  [         R                  4;   aw  SSKJm  UR                  5       m	UUU	4S jn[        R                  " UR                  5       T[        R                  " U[!        T5      [#        T5      S	9UR                  5       S9nU$ )
Nc                   > T" U 5      n[         R                  " U[        R                  5      nU S   4nT" S5      nT" S5      nT" U5      nT" U5      n[         R                  " [         R                  " UU5      U5      n[         R
                  " U[         R                  " [         R                  " [         R                  " UU5      U5      U5      5      nT	b}  T" U5      nT
[        R                  [        R                  4;   d   eT
[        R                  :X  a%  [         R                  " U[        R                  5      n[         R                  " Xx5      nU$ )Nr    r   r   r   r   r   mulsubr   r   )indexinputweight_compens_index_x_scale_x_zp_w_scale_weight_compotemp_biasr=   
bias_dtypebias_loaderinput_loaderw_scale_loaderweight_compens_loaderx_scale_loaderx_zp_loaders            rH   inner_fn]register_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn  s'   $0$7E %(LL$FE49"I<0'5b'9H$/OE'56J'KH,ABV,WM $'77 #$)$,!" !)$D $'77 $ #$'GG(+,4,4)* ).%& %2	!"$D  $/(34H(I'1emmU^^5T'T T'T#-#?,/LL,NE'*wwt';#'KrX   r(   r   r   rangesrm   rj   c                 B   > T" U 5      n[         R                  " UT5      $ rP   r   r   r   r   output_cast_loaderr   s     rH   inner_fn_cast_output_to_bf16qregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn_cast_output_to_bf16      (:5(A'*||E<'H HrX   r   _create_constantsc                   > T" U 5      nT	" SU-  U[         R                  S9u  pB[        R                  " X4-  5      U-   nT
[         R                  :X  a  T	" SS[         R                  S9u  pgOT	" SS[         R                  S9u  pg[        R
                  " [        R                  " XV5      U5      n[        R                  " UT
5      $ Ng      ?r   r      i   r   r   r   rounduint8minimummaximumr   r   scale
zero_pointr   	inv_scalevalqminqmaxclampedr   r   requant_input_loaders            rH   inner_fn_requanteregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn_requant   s    (<U(C8I$'%K5==9" 5	 '*ii0A&BZ&O#/5;;#>1B()3emm2&JD$ 2C(,c2&JD +.++ckk#6Ld*S'*||G\'J JrX   r
  r  r   r   r   r  int8make_loaderr   	Pointwiser5   r)   r
   get_device_or_errorloweringr   	functoolspartialr   r   )input_bufferr   
output_bufr   r  r   r   r   r   r  r   r   r   r   r:   r8   r=   r   o_scaler   r   r9   r   weight_compensr   r   s        @@@@@@@@@rH   r%   Kregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator  s   +!MM!NN!KK!JJ	0      (4'?'?'A0>0J0J0L-)0)<)<)>)0)<)<)>&*&6&6&8&*+*.*:*:*<K3( 3(j &(\\#/#:#:#<"'--%-#/#8#8#:	&
  6>)B *D'Y*J
 (5>>91;1G1G1I.I *,'1'E'E'G&2)E'1':':'<	*JN  *)C *ekk5::-FFC3=3I3I3K0K" *,'1'E'E'G&2)2):):$4*/./2</@*"
 (2':':'<	*J  *)rX   )r   r   r   r         )   r   r   r   r   r"  r#  r#   r%   ro   )output_scaleoutput_zero_pointr   post_op_namepost_op_argspost_op_algorithmr=   c                 X    [         R                  R                  U R                  5          $ rP   rr   rv   s    rH   rw   Cregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>A  ry   rX   c                 X    [         R                  R                  U R                  5          $ rP   rr   rv   s    rH   rw   r,  B  ry   rX   c                 X    [         R                  R                  U R                  5          $ rP   rr   rv   s    rH   rw   r,  C  ry   rX   c                 X    [         R                  R                  U R                  5          $ rP   rr   rv   s    rH   rw   r,  D  ry   rX   )r   r"  r#  r$  c                 X    [         R                  R                  U R                  5          $ rP   rr   rv   s    rH   rw   r,  K      QWW->->qzz|-LrX   c                 X    [         R                  R                  U R                  5          $ rP   rr   rv   s    rH   rw   r,  P  r1  rX   qlinear_unaryr{   )*r   r   r  r)   r*   r   
isinstancer   r   r   r   r   rs   r   r   r   realizeallr   r   	get_numelInputsKernelunwrap_storage_for_inputConstantBufferrt   ru   tor   r   equal
zeros_liker   to_denser   r  r   r0   r   r-   r~   r   r   )r   r   r   r   r   r   r=   r  r   r   r8   r9   r:   r"   r;   w_zp_tensorr>   r?   W_tensorweight_compens_tensorr%   rA   r|   rC   r   r   aten_mkldnn_qlinear_unarys    `` ` ```````           @@rH   r3  1register_onednn_fusion_ops.<locals>.qlinear_unary1  s   " !**,

: D: ZZ\F6{QR,-gr||44G}---''55LL>Y 6  !>7+;+;+=>>> #7B/G7++-.&8T:TT8|
 ww22LL%++6V 3  dBLL11DzS(((ww22LLU[[9 3  >>#q(T*TT(
 | ww22LL%++6V 3  OOLLN~~5;;.:88>!!4 4
  gg//@CCEKKPww22LLEKK@t}} 3  "&4>>3CJ*,G!!/6V|0,F} @@F))  (():):4==?)KL))$--/:  ,F}EE ww001G1G1IJSSUH,1IIhkk%++6NTU,V)%&WW%@%@-*3358II &A &N
C* C* C*J ;;=U[[%**,EEEE#//< GT='4H$wdS!%T!1)9< '92 7|q $9$;$;!(&2!-!%!(&/ <%)F6N-22< GT='4H$wdS	
 ! !))+qww/@/@@@@<<<<	M 88A!! 
 $Ma 88>!!  $Ma .< GT='4@$wdK+F 6{Qf&Ks&KV__5Fr5J&KLMrX   x2c                   >^^^^^^^	^
^^^^^^  U R                  5       nTR                  5       n[        U5      [        U5      :X  d   e[        U5      S:  a(  US:X  a"  [        U SUS   /5      n [        TSUS   /5      m[        T[        R
                  5      (       dU  [        T5      [        :X  d   e[        R                  R                  [        R                  " T[        R                  S9SS9mOeTR                  5         [        S TR                  5        5       5      (       a  [        T/ 5      m[        TR                  5       5      S;   d   S	5       eTc?  [        R                  R                  [        R                  " S
[        R                   S9SS9mUc?  [        R                  R                  [        R                  " S
[        R                   S9SS9n[        T[        R
                  5      (       dU  [        T5      ["        :X  d   e[        R                  R                  [        R                  " T[        R                   S9SS9mOTR                  5         TR                  5         UR                  5         UR%                  5       [        R                   :w  a  [        [        R&                  R)                  U5      [        R*                  5      (       a  [        R                  R,                  UR/                  5          R1                  [        R                   5      n[        R                  R                  [        R                  " U[        R                   S9UR/                  5       S9nUS:X  a  T
[        R                  [        R2                  4;   aS  TR%                  5       [        R                  [        R2                  4;   a!  TR%                  5       T
:w  a  [5        TT
5      mOTR%                  5       T
:X  d   S5       eTR%                  5       m Tb  TR%                  5       OS m/ n[7        5       (       Ga4  US:X  Ga-  [9        XTUT
S9Gt nnpm[        [        R&                  R)                  T5      [        R*                  5      (       Ga  [        TR;                  5       R<                  5      S
:X  Ga  [        [        R&                  R)                  U5      [        R*                  5      (       Gaz  [        R>                  " [        R@                  " [        R                  R,                  UR/                  5          5      [        R                  R,                  UR/                  5          5      (       a  [C        UX5      (       a  [        R                  R,                  UR/                  5          nURE                  5       n[        RF                  " UR1                  [        R                  5      S
S9n[        R                  R                  UUR/                  5       S-   S9mUUUU	U
UUUUUUU UU4S jn[H        RJ                  " UUTc	  U TTUTUT/O	U TTUTUTT/TS LUTc  / SQO/ SQS9  [        U5      S
:X  d  [M        5       (       aQ  [O        TT	T
UUUUTTTS9
nTc  S US'   URQ                  T!RR                  " Tc	  U TTUTUT4O	U TTUTUTT4U40 UD65        UR/                  5       [        R                  R,                  ;   d   eS S S S.nTb  S US'   [U        SUTc	  U TTUTUT/O	U TTUTUTT/UUS9n[        U5      S:  a+  US:X  a%  [        U/ US S QUR                  5       S   P75      nU$ ) Nr   r   r    r   r   r   c              3   *   #    U  H	  oS :H  v   M     g7fr   r   r   s     rH   r   Eregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<genexpr>  r   r   r   r   r   r   r   r   zCdtype of accum for qlinear post op sum should be the same as outputr   r   r   c                 d  >^^^^^	^
^^^^ T[         R                  [         R                  [         R                  [         R                  4;   d   eU R                  5       mTR                  5       mTR                  5       mTR                  5       mTR                  5       m
TR                  5       mS mTb  TR                  5       mUUUUU
UUUUU4
S jn[        R                  " U R                  5       [         R                  UU R                  5       S9nTS:w  a  [        UTTTS9nT[         R                  :X  aL  UR                  5       mUU4S jn[        R                  " UR                  5       TUUR                  5       S9nU$ T[         R                  [         R                  4;   a  SSKJm  UR                  5       m	UUU	4S jn[        R                  " UR                  5       [         R                  [        R                  " U[!        T5      [#        T5      S	9UR                  5       S9nU$ )
Nc                   >
 T" U 5      nT" U 5      nT" S5      nT" S5      n[         R                  " U[        R                  5      nU S   4nT" U5      nT" U5      n[         R                  " [         R                  " UU5      U5      n[         R
                  " U[         R                  " [         R                  " [         R                  " UU5      U5      U5      5      nT
b}  T" U5      n	T[        R                  [        R                  4;   d   eT[        R                  :X  a%  [         R                  " U	[        R                  5      n	[         R                  " X5      nT[        R                  [        R                  4;   d   eT[        R                  :X  a%  [         R                  " U[        R                  5      n[         R                  " X5      nU$ )Nr   r    r   )r   r   _x2r   r   r   r   _weight_compensr   r   r=   r   r   r   r   r   x2_dtype	x2_loaderr   r   s             rH   r   ^register_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn  s~   $0$7E"+E"2C'5b'9H$/OE %(LL$FE49"I<0'56J'KH.C 4/O $'77 #$)$,!" !)$D $'77 $ #$'GG(+,4,4)* ).%& %4	!"$D  $/(34H(I'1emmU^^5T'T T'T#-#?,/LL,NE'*wwt'; $,u~~/N#NN#N'5>>9&)ll3&F#&774#5D#'KrX   r   rm   rj   c                 B   > T" U 5      n[         R                  " UT5      $ rP   r   r   s     rH   r   rregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn_cast_output_to_bf16=  r   rX   r   r   c                   > T" U 5      nT	" SU-  U[         R                  S9u  pB[        R                  " X4-  5      U-   nT
[         R                  :X  a  T	" SS[         R                  S9u  pgOT	" SS[         R                  S9u  pg[        R
                  " [        R                  " XV5      U5      n[        R                  " U[         R                  5      $ r  r  r	  s            rH   r  fregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn_requantL  s    (<U(C8I$'%K5==9" 5	 '*ii0A&BZ&O#/5;;#>1B()3emm2&JD$ 2C(,c2&JD +.++ckk#6Ld*S'*||GU[['I IrX   r  r  )r  r   r  r   r  r   r   r   r   r  r   r   rM  r   r   r=   r   r  r   r   r   r_   r`   r   r   rD  rL  r   r   s        @@@@@@@@@@rH   r%   Lregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator  s   +!MM!NN!KK!JJ	0      (4'?'?'A$&NN$4	0>0J0J0L-)0)<)<)>)0)<)<)>&*&6&6&8&*+*.*:*:*<K5( 5(n &(\\#/#:#:#<"'--%-#/#8#8#:	&
 &/)B * *(5*:	*J (5>>91;1G1G1I.I *,'1'E'E'G&2)E'1':':'<	*JN  *)C *ekk5::-FFC3=3I3I3K0J" *,'1'E'E'G&+kk)2):):$4*/./2</@*"
 (2':':'<	*J  *)rX   )r   r   r   r   r"  r#  r$  )   r   r   r   r   r"  r#  r$  r%  )
r&  r'  r   other_scaleother_zpbinary_post_opr^   unary_post_opunary_post_op_argsunary_post_op_algorithmr=   c                 X    [         R                  R                  U R                  5          $ rP   rr   rv   s    rH   rw   Dregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<lambda>  ry   rX   c                 X    [         R                  R                  U R                  5          $ rP   rr   rv   s    rH   rw   r\    ry   rX   c                 X    [         R                  R                  U R                  5          $ rP   rr   rv   s    rH   rw   r\    ry   rX   )r   r"  r#  c                 X    [         R                  R                  U R                  5          $ rP   rr   rv   s    rH   rw   r\    r1  rX   rT  qlinear_binaryr{   )+r)   r*   r   r4  r   r   r   r   r   rs   r   r   r   r   r5  r6  r   r   r   r8  r9  r:  rt   ru   r;  r   r   r   r   
get_layoutsizer<  r=  r   r>  r   r   r0   r   r-   r~   r   r   )"r   r   r   r   r   r   rD  r=   r  r   r   x2_scalex2_zpr]   r   r_   r`   r   r"   r;   x2_sizer?  r>   r?   r@  rA  r%   rA   r|   rC   r   r   rL  aten_mkldnn_qlinear_binarys"    `` ` `````    ```            @@@rH   r`  2register_onednn_fusion_ops.<locals>.qlinear_binary_  su   6 ZZ\FkkmGv;#g,...6{Q;%#7R,-"r72;/0gr||44G}---''55LL>Y 6  !>7+;+;+=>>> #7B/G7++-.&8T:TT8|ww22LL%++6V 3  |ww22LL%++6V 3  dBLL11DzS(((ww22LLU[[9 3  
 OOLLN~~5;;.:88>!!4 4  gg//@CCEKKPww22LLEKK@t}} 3  e#MMNN$  lln(GG||~5
 &b,7<<>\9 ]9 ||~H-1-=)4J*,G ""{e';3:b<40FAb @@F))  DOO-223q8"@@F))  (():):4==?)KL))$--/:  .faGG ww001G1G1IJH'002H,1IIhkk%++6NTU,V)%&WW%@%@-*3358II &A &N
J* J* J*X $//< GT='4L$wbRVW!%T!1)9  < '<5 7|q $9$;$;!(&2!- ("#.!&",'4,< <%)F6N.33< GT='4L$wbRVW	
 ! !))+qww/@/@@@@<<<M
 #La . < GT='4D$wb$O+F 6{Q;%#7f&Ks&KV__5Fr5J&KLMrX   zmkl::_mkl_linearr!   packed_worig_wc          	      <  > / n[        5       (       aH  [        USS/5      n[        XUS9Gt pp[        XPU5      (       a  [        R
                  " UUXU/SSS/S9  [        U5      S:X  d  [        5       (       a#  UR                  TR                  XU4US US95        UR                  5       [        R                  R                  ;   d   eUR                  5       [        R                  R                  ;   d   eS S	 S
.n	[        SUXU/UU	S9n
Ub  [        X5      n
U
$ )Nr   r   r!   Tr   )r$   ro   )rp   
batch_sizec                 X    [         R                  R                  U R                  5          $ rP   rr   rv   s    rH   rw   Gregister_onednn_fusion_ops.<locals>.mkl_packed_linear.<locals>.<lambda>      !2!21::<!@rX   c                 X    [         R                  R                  U R                  5          $ rP   rr   rv   s    rH   rw   rm    rn  rX   )r   r   packed_linearr{   )r   r   r   r   r   r0   r*   r   r~   r   ru   r   rs   rt   r   r   )r   rh  ri  r   rk  r"   r>   r   r?   r|   rC   aten_mkl_linears              rH   mkl_packed_linear5register_onednn_fusion_ops.<locals>.mkl_packed_linear  s7    /1#%%#*6Aq6#:L293/Q -VEE'33#"&1$(+,a& w<1$(=(?(?NN',,&16Tj -   ((*agg.?.????(AGG,=,==== A@! %>#&)"/% = ^FrX   rP   )%r   _C_has_mkldnn rK   r   r   mkldnn_linear_pointwiseLinearUnaryr7   binaryLinearBinaryonednnqlinear_pointwiseQLinearPointwisePT2EQLinearPointwiseBinaryPT2E_convolution_pointwise_convolution_pointwise_ _convolution_transpose_pointwiser   r   defaultqconv2d_pointwiser   r   boolr3   r   binary_tensorhas_mklmkl_mkl_linearMKLPackedLinearr~   r   r   )cpu_needs_realized_inputsrV   rb   rf   rz   r   r   r   r   r   r3  r`  rr  rq  r   r   rf  rB  rK   s                @@@@@@rH   register_onednn_fusion_opsr  c   s   xx#5II..'!$0077	$
  %7II..55'!$1188	%
! %7II..'!$99@@	%
! &8II..55'!$??FF	&
" II33II44II==II..!!))II..%
! 
599++BB	C			 	 
D	6 
599++BBII	J			 	 		 
K	B 
599++CCJJ	K			 	 		 
L	B 
599++==	> ?	?	?	 ?	 
??	B 
599++==DD	EQU;	;	&;	+4;	9B;	 
F;	z 
599++LL	M			 	 
N	: 
40088	9&	&	&	 &	 	&	
 &	 &	 &	 &	 c&	 &	 &	 &	 &	  &	 &	  !&	 
:&	P 
599++==SW	X1	1	 %	1	
 1	 1	 1	 
Y1	f 
II..554

 
II..<<RV

F	F	 %	F	
 F	 F	 F	 F	



F	P 
599++==SW	X k	k	 %	k	
 k	 k	 k	 
Yk	Z	 
II..554

 
II..<<RV

, '~	~	 %	~	
 ~	 ~	 ~	 ~	



~	@
 880		))" %(88??	O &,,UYY]]-F-FGuyy}}889 00#0 "0 I&	0 :0d 	"";<rX   )NNNN))r  typingr   r   torch.utils._pytreeutils_pytreer    torch._inductor.kernel.mm_commonr   rv  r   codegen.cpp_gemm_templater   !codegen.cpp_grouped_gemm_templater   codegen.cpp_utilsr
   r   r  r   r   r   r   r   r   r   select_algorithmr   r   r   r   r   r   virtualizedr   r   r3   rI   _inductor_lowering_functionr  r   rX   rH   <module>r     s       $ $ 4  6 E 8    
 R Q  
==I= I=@ 59  1@rX   