
    sh                        S SK r S SKJr  S SKrS SKJr  S SKJrJr  S SK	J
r
Jr  \" SS5      r\R                  \R                  \R                  \R                   \R"                  /r\R&                  \R(                  /r\ V s0 s H9  o \R-                  U 5      R.                  \R-                  U 5      R0                  4_M;     sn r\R5                  \ V s0 s HE  o \" \R9                  U 5      R.                  5      \" \R9                  U 5      R0                  5      4_MG     sn 5        S r\R=                  S	5        \
" \S
S5      S\R>                  S\ S\S\S\S\RB                  S\R>                  4S j5       r"\
" \S
S5      S\R>                  S\ S\S\S\S\RB                  S\R>                  4S j5       r#\R=                  S5        \
" \SS5      S\R>                  S\R>                  S\R>                  S\S\S\RB                  S\R>                  4S j5       r$\
" \SS5      S\R>                  S\R>                  S\R>                  S\S\S\RB                  S\R>                  4S j5       r%\R=                  S5        \
" \SS5      S\R>                  S\R>                  S\R>                  S\R>                  S\R>                  S\RB                  S\R>                  4S j5       r&\
" \SS5      S\R>                  S\R>                  S\R>                  S\R>                  S\R>                  S\RB                  S\R>                  4S j5       r'\R=                  S5        \
" \SS5      SS .S\R>                  S\ S\S\S\S\RB                  S!\\RB                     S\R>                  4S" jj5       r(\
" \SS5      SS .S\R>                  S\R>                  S\R>                  S\S\S\RB                  S!\\RB                     S\R>                  4S# jj5       r)\R=                  S$5        \
" \S%S5      SS .S\R>                  S\R>                  S\R>                  S\S\S\RB                  S!\\RB                     S\R>                  4S& jj5       r*\
" \S%S5      SS .S\R>                  S\R>                  S\R>                  S\S\S\RB                  S!\\RB                     S\R>                  4S' jj5       r+\R=                  S(5        \
" \S)S5      SS .S\R>                  S\R>                  S\R>                  S\R>                  S\R>                  S\RB                  S!\\RB                     S\R>                  4S* jj5       r,\
" \S)S5      SS .S!\\RB                     S\R>                  4S+ jj5       r-\R=                  S,5        \
" \S-S5      S\R>                  S.\S/\S0\ S\RB                  S\.\R>                  \R>                  4   4S1 j5       r/\R=                  S25        \
" \S3S5      S\R>                  S.\S/\S0\ S\RB                  S\.\R>                  \R>                  4   4S4 j5       r0\
" \S-S5      S\R>                  S\S\S0\ S\RB                  S\.\R>                  \R>                  4   4S5 j5       r1\
" \S3S5      S\R>                  S\S\S0\ S\RB                  S\.\R>                  \R>                  4   4S6 j5       r2S7 r3\R=                  S85        \
" \S9S5      S\R>                  S:\R>                  S;\R>                  S<\S\S\S\RB                  S\R>                  4S= j5       r4\
" \S9S5      S\R>                  S:\R>                  S;\R>                  S<\S\S\S\RB                  S\R>                  4S> j5       r5\R=                  S?5        \
" \S@S5      SS .S\R>                  S:\R>                  S;\\R>                     S<\S\S\S\RB                  S!\\RB                     S\R>                  4SA jj5       r6\
" \S@S5      SS .S\R>                  S:\R>                  S;\\R>                     S<\S\S\S\RB                  S!\\RB                     S\R>                  4SB jj5       r7\R=                  SC5        \
" \SDS5      S\R>                  S\RB                  S\.\R>                  \R>                  4   4SE j5       r8\
" \SDS5      S\R>                  S\RB                  S\.\R>                  \R>                  4   4SF j5       r9\R=                  SG5        \
" \SHSI5      S\R>                  S\RB                  S\.\R>                  \R>                  4   4SJ j5       r:\R=                  SK5        \
" \SLS5      S\R>                  S\RB                  S\.\R>                  \R>                  4   4SM j5       r;\
" \SLS5      S\R>                  S\RB                  S\.\R>                  \R>                  4   4SN j5       r<SO r=\R=                  SP5        \
" \SQS5      S\R>                  S:\R>                  S;\R>                  S\S\S\RB                  4SR j5       r>\
" \SQS5      S\R>                  S:\R>                  S;\R>                  S\S\S\RB                  4SS j5       r?\R=                  ST5        \
" \SUS5      \R                  4S\R>                  S:\R>                  S;\R>                  S\S\S\RB                  SV\RB                  4SW jj5       rA\
" \SUS5      \R                  4S\R>                  S:\R>                  S;\R>                  S\S\S\RB                  SV\RB                  4SX jj5       rB\R=                  SY5        \
" \SZS5       SnS\R>                  S:\R>                  S;\R>                  S\S\S\RB                  4S\ jj5       rC\
" \SZS5       SnS\R>                  S:\R>                  S;\R>                  S\S\S\RB                  4S] jj5       rD\R=                  S^5        \
" \S_S5      S[\R                  4S`\R>                  S:\R>                  S;\\R>                     S\S\S\RB                  Sa\SV\RB                  4Sb jj5       rE\R=                  Sc5         " Sd Se\R                  R                  5      rH\
" \SfSg5      S\R>                  S:\R>                  S;\R>                  S<\S\S\S\R>                  4Sh j5       rI\
" \SfS5      S\R>                  S:\R>                  S;\R>                  S<\S\S\S\R>                  4Si j5       rJ\R=                  Sj5        \
" \SkS5      S\R>                  S\RB                  S\R>                  4Sl j5       rK\
" \SkS5      S\R>                  S\RB                  S\R>                  4Sm j5       rLgs  sn f s  sn f )o    N)Optional)_unsqueeze_multiple)determine_qparamsvalidate_qmin_qmax)implLibraryquantized_decomposedDEFc                     U[         ;  a  [        SU 35      e[         U   u  p4X:  d   SU SU  35       eX::  d   SU SU 35       eg )NzUnsupported dtype: z9quant_min out of bound for dtype, quant_min_lower_bound: z quant_min: z9quant_max out of bound for dtype, quant_max_upper_bound: z quant_max: )_DTYPE_TO_QVALUE_BOUNDS
ValueError)	quant_min	quant_maxdtypequant_min_lower_boundquant_max_upper_bounds        x/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/torch/ao/quantization/fx/_decomposed.py_quant_min_max_bounds_checkr      s    ++.ug6773J53Q0- 	""7!8YK	Q-
 - 	""7!8YK	Q-    zxquantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> Tensorquantize_per_tensorCompositeExplicitAutogradinputscale
zero_pointr   r   r   returnc                    U R                   [        R                  [        R                  4;   a  U R	                  [        R
                  5      n U R                   [        R
                  :X  d   SU R                    35       e[        X4U5        SU-  n[        R                  " [        R                  " X-  5      U-   X45      R	                  U5      $ )a  Affine quantization for the Tensor using the same quantization parameters to map
from floating point to quantized values

Args:
   input (torch.Tensor): original float32 or bfloat16 Tensor
   scale (float): quantization parameter for affine quantization
   zero_point (int): quantization parameter for affine quantization
   quant_min (int): minimum quantized value for output Tensor
   quant_max (int): maximum quantized value for output Tensor
   dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor

Returns:
   Tensor with requested dtype (e.g. torch.uint8), note the quantization parameters
   are not stored in the Tensor, we are storing them in function arguments instead
<Expecting input to have dtype torch.float32, but got dtype:       ?)	r   torchfloat16bfloat16tofloat32r   clampround)r   r   r   r   r   r   	inv_scales          r   r   r   1   s    0 {{u}}enn55'u}}$T	Eekk]ST$	e<eI;;E%&3Ybir   Metac                 (   U R                   [        R                  [        R                  4;   a  U R	                  [        R
                  5      n U R                   [        R
                  :X  d   SU R                    35       e[        R                  " XS9$ )Nr   r   )r   r   r    r!   r"   r#   
empty_liker   r   r   r   r   r   s         r   quantize_per_tensor_metar,   V   sm     {{u}}enn55'u}}$T	Eekk]ST$E//r   zquantize_per_tensor.tensor(Tensor input, Tensor scale, Tensor zero_point, int quant_min, int quant_max, ScalarType dtype) -> Tensorzquantize_per_tensor.tensorc                    UR                  5       S:X  d   SUR                  5        35       eUR                  5       S:X  d   SUR                  5        35       e[        XR                  5       UR                  5       X4U5      $ zAffine quantization for the Tensor using the same quantization parameters to map
from floating point to quantized values
Same as `quantize_per_tensor` but scale and zero_point are Scalar Tensor instead of
scalar values
   >Expecting zero_point tensor to be one element, but received : 9Expecting scale tensor to be one element, but received : numelr   itemr+   s         r   quantize_per_tensor_tensorr5   m   s    " 	a]	G
HXHXHZG[\] 	S	B5;;=/RSzz|Z__.	e r   c                    U R                   [        R                  [        R                  4;   a  U R	                  [        R
                  5      n UR                  5       S:X  d   SUR                  5        35       eUR                  5       S:X  d   SUR                  5        35       eU R                   [        R
                  :X  d   SU R                    35       e[        R                  " XS9$ )Nr/   r0   r1   r   r)   )r   r   r    r!   r"   r#   r3   r*   r+   s         r   quantize_per_tensor_tensor_metar7      s     {{u}}enn55'a]	G
HXHXHZG[\] 	S	B5;;=/RS 	u}}$T	Eekk]ST$E//r   zquantize_per_tensor.tensor2(Tensor input, Tensor scale, Tensor zero_point, Tensor quant_min, Tensor quant_max, ScalarType dtype) -> Tensorzquantize_per_tensor.tensor2c                 B   UR                  5       S:X  d   SUR                  5        35       eUR                  5       S:X  d   SUR                  5        35       e[        U UR                  5       UR                  5       UR                  5       UR                  5       U5      $ r.   r2   r+   s         r   quantize_per_tensor_tensor2r9      s    " 	a]	G
HXHXHZG[\] 	S	B5;;=/RS

 r   c                     [        XX#XE5      $ N)r7   r+   s         r    quantize_per_tensor_tensor2_metar<      s     +jY r   zdequantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensordequantize_per_tensor	out_dtyper?   c                    U R                   U:X  d   SU SU R                    35       eUc  [        R                  nU[        ;   a  U R	                  U5      U-
  U-  $ [        SU 35      e)a  Affine dequantization for the Tensor using the same quantization parameters to map
from quantized values to floating point values

Args:
   input (torch.Tensor): Tensor with dtype matching `dtype` argument,
   e.g. (`torch.uint8`), it is a per tensor quantized Tensor if combined with
   quantization parameters in the argument of this function (scale/zero_point)

   scale (float): quantization parameter for affine quantization

   zero_point (int): quantization parameter for affine quantization

   quant_min (int): minimum quantized value for input Tensor (not used in computation,
   reserved for pattern matching)

   quant_max (int): maximum quantized value for input Tensor (not used in computation,
   reserved for pattern matching)

   dtype (torch.dtype): dtype for input Tensor (not used in computation,
   reserved for pattern matching)

   out_dtype (torch.dtype?): optional dtype for output Tensor

Returns:
   dequantized float32 Tensor
Expecting input to have dtype: z
, but got ,Unsupported dtype in dequantize_per_tensor: )r   r   r#   r   r"   r   r   r   r   r   r   r   r?   s          r   r=   r=      sz    L 	uH	(z%++GHMM	'' #j0E99GwOPPr   c                P    Uc  [         R                  n[         R                  " XS9$ Nr)   )r   r#   r*   rC   s          r   dequantize_per_tensor_metarF     s$     MM	E33r   zdequantize_per_tensor.tensor(Tensor input, Tensor scale, Tensor zero_point, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensorzdequantize_per_tensor.tensorc          
         UR                  5       S:X  d   SUR                  5        35       eUR                  5       S:X  d   SUR                  5        35       e[        U UR                  5       UR                  5       UUUUS9$ zAffine dequantization for the Tensor using the same quantization parameters to map
from quantized values to floating point values
Same as `dequantize_per_tensor` but scale and zero_point are Scalar Tensor instead of
scalar values
r/   r0   r1   r>   r3   r=   r4   rC   s          r   dequantize_per_tensor_tensorrJ   '  s    * 	a]	G
HXHXHZG[\] 	S	B5;;=/RS 

 r   c                d   Uc  [         R                  nUR                  5       S:X  d   SUR                  5        35       eUR                  5       S:X  d   SUR                  5        35       eU R                  U:X  d
   SU 35       eU[        ;   a  [         R
                  " XS9$ [        SU 35      e)Nr/   r0   r1   rA   r)   rB   )r   r#   r3   r   r   r*   r   rC   s          r   !dequantize_per_tensor_tensor_metarL   L  s     MM	a]	G
HXHXHZG[\] 	S	B5;;=/RS;;%J#B5'!JJ''77GwOPPr   zdequantize_per_tensor.tensor2(Tensor input, Tensor scale, Tensor zero_point, Tensor quant_min, Tensor quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensorzdequantize_per_tensor.tensor2c          
      @   UR                  5       S:X  d   SUR                  5        35       eUR                  5       S:X  d   SUR                  5        35       e[        U UR                  5       UR                  5       UR                  5       UR                  5       UUS9$ rH   rI   rC   s          r   dequantize_per_tensor_tensor2rN   m  s    * 	a]	G
HXHXHZG[\] 	S	B5;;=/RS 

 r   c          
          [        XX#XEUS9$ )Nr>   )rL   rC   s          r   "dequantize_per_tensor_tensor2_metarP     s     -jY r   zrchoose_qparams.tensor(Tensor input, int quant_min, int quant_max, float eps, ScalarType dtype) -> (Tensor, Tensor)zchoose_qparams.tensorqminqmaxepsc           
         U R                   [        R                  [        R                  [        R                  4;   d   SU R                    35       eU[
        ;   d   S[
        R                  5        SU 35       e[        X5        [        R                  " U 5      u  pV[        UUUUU[        R                  " U/5      SS9$ )3  Given an input Tensor, derive the per tensor affine quantization parameter
(scale and zero_point) for target quantized Tensor from the Tensor

Args:
   input (torch.Tensor): floating point input Tensor
   quant_min (int): minimum quantized value for target quantized Tensor
   quant_max (int): maximum quantized value for target quantized Tensor
   dtype (torch.dtype): dtype for target quantized Tensor

Returns:
   scale (float): quantization parameter for the target quantized Tensor
   zero_point (int): quantization parameter for the target quantized Tensor
CExpecting input to have dtype torch.float32/16/b16, but got dtype: $Expecting target dtype to be one of , but got: F)has_customized_qrange)r   r   r#   r    r!   r   keysr   aminmaxr   Tensorr   rQ   rR   rS   r   min_valmax_vals          r   choose_qparams_tensorr`     s    " ;;  [ 
MU[[MZ	[  	((a	-.E.J.J.L-M[Y^X_`a(t"}}U+GcU# r   z|choose_qparams_symmetric.tensor(Tensor input, int quant_min, int quant_max, float eps, ScalarType dtype) -> (Tensor, Tensor)zchoose_qparams_symmetric.tensorc                    U R                   [        R                  [        R                  [        R                  4;   d   SU R                    35       eU[
        ;   d   S[
        R                  5        SU 35       e[        X5        [        R                  " U 5      u  pV[        UUUUU[        R                  " U/5      S[        R                  S9$ )rU   rV   rW   rX   F)rY   qscheme)r   r   r#   r    r!   r   rZ   r   r[   r   r\   per_tensor_symmetricr]   s          r   choose_qparams_symmetric_tensorrd     s    * ;;  [ 
MU[[MZ	[  	((a	-.E.J.J.L-M[Y^X_`a(t"}}U+GcU#**	 	r   c                    U R                   [        R                  [        R                  [        R                  4;   d   SU R                    35       eX:  d   SU SU 35       e[        R
                  " S[        R                  U R                  S9[        R
                  " S[        R                  U R                  S94$ )NrV   zKExpecting quant_min to be smaller than quant_max but received min:         z max: r/   r   device)	r   r   r#   r    r!   emptydoublerg   int64r   r   r   rS   r   s        r   choose_qparams_tensor_metarl     s     ;;  [ 
MU[[MZ	[  	&
		6)&& ;;qU\\BEKK	U\\E  r   c                     [         R                  " S[         R                  U R                  S9[         R                  " S[         R                  U R                  S94$ )Nr/   rf   )r   rh   ri   rg   rj   rk   s        r   $choose_qparams_symmetric_tensor_metarn     sA     ;;qU\\BEKK	U\\E  r   c                     [        [        U R                  5       5      5      nSX!'   XS'   U R                  [	        U5      5      nX24$ )Nr   )listrangedimpermutetuple)xaxisnew_axis_listys       r   _permute_to_axis_zerory     sB    quuw(MM!			%&'Ar   zquantize_per_channel(Tensor input, Tensor scales, Tensor zero_points, int axis, int quant_min, int quant_max, ScalarType dtype) -> Tensorquantize_per_channelscaleszero_pointsrv   c                    U R                   [        R                  [        R                  4;   a  U R	                  [        R
                  5      n U R                   [        R
                  :X  d   SU R                    35       eX0R                  5       :  d   SU R                  5        35       e[        XEU5        [        X5      u  pS/U R                  5       -  nUR                  S   US'   UR                  U5      nUR                  U5      n[        R                  " [        R                  " U SU-  -  5      U-   XE5      n	U	R                  [        U5      5      n
U
R	                  U5      $ )a<  Affine per channel quantization for the Tensor using the same quantization
parameters for each channel/axis to map from floating point to quantized values

Args:
   input (torch.Tensor): original float32 or bfloat16 Tensor
   scales (torch.Tensor): a list of scale quantization parameter for
   affine quantization, one per channel
   zero_point (torch.Tensor): a list of zero_point quantization parameter for
   affine quantization, one per channel
   quant_min (int): minimum quantized value for output Tensor
   quant_max (int): maximum quantized value for output Tensor
   dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor

Returns:
   Tensor with requested dtype (e.g. torch.uint8), note the quantization parameters
   are not stored in the Tensor, we are storing them in function arguments instead
r   Expecting axis to be < r/   r   r   )r   r   r    r!   r"   r#   rr   r   ry   shapeviewr$   r%   rs   rt   )r   r{   r|   rv   r   r   r   permute_axis_list	new_shaperesouts              r   rz   rz   ,  s'   6 {{u}}enn55'u}}$T	Eekk]ST$))+F!8FF	e<4UAEeiik!I<<?IaL[[#F""9-K
++ES6\*+k99C ++e-.
/C66%=r   c                    U R                   [        R                  [        R                  4;   a  U R	                  [        R
                  5      n U R                   [        R
                  :X  d   SU R                    35       eX0R                  5       :  d   SU R                  5        35       e[        XEU5        [        R                  " XS9$ )Nr   r~   r)   )	r   r   r    r!   r"   r#   rr   r   r*   )r   r{   r|   rv   r   r   r   s          r   quantize_per_channel_metar   \  s     {{u}}enn55'u}}$T	Eekk]ST$))+F!8FF	e<E//r   zdequantize_per_channel(Tensor input, Tensor scales, Tensor? zero_points, int axis, int quant_min, int quant_max, ScalarType dtype, *, ScalarType? out_dtype=None) -> Tensordequantize_per_channelc                   U R                   U:X  d   SU SU R                    35       eUc  [        R                  nX0R                  5       :  d   SU R                  5        35       e[	        XEU5        [        X5      u  pS/U R                  5       -  n	UR                  S   U	S'   UR                  U	5      nUb  XR                  U	5      -
  U-  n
OX-  n
U
R                  U5      n
U
R                  [        U5      5      nU$ )aO  Affine per channel dequantization for the Tensor using the same quantization
parameters for each channel/axis to map from quantized values to floating point values

Args:
   input (torch.Tensor): Tensor with dtype matching `dtype` argument,
   e.g. (`torch.uint8`), it is a per channel quantized Tensor if combined with
   quantization parameter in the argument of this function (scales/zero_points/axis)

   scales (torch.Tensor): a list of scale quantization parameter for
   affine quantization, one per channel

   zero_points (torch.Tensor): a list of zero_point quantization parameter for
   affine quantization, one per channel

   quant_min (int): minimum quantized value for output Tensor (not used in computation,
   reserved for pattern matching)

   quant_max (int): maximum quantized value for output Tensor (not used in computation,
   reserved for pattern matching)

   dtype (torch.dtype): requested dtype for output Tensor (not used in computation,
   reserved for pattern matching)

   out_dtype (torch.dtype?): optional dtype for output Tensor

Returns:
   dequantized float32 Tensor
Expecting input to have dtype , but got dtype: r~   r/   r   )r   r   r#   rr   r   ry   r   r   r"   rs   rt   )r   r{   r|   rv   r   r   r   r?   r   r   r   r   s               r   r   r   z  s    R 	uN	'w.?}MNMM	))+F!8FF	e<4UAEeiik!I<<?IaL[[#F''	22f<n
&&
C
++e-.
/CJr   c                   U R                   U:X  d   SU SU R                    35       eUc  [        R                  nX0R                  5       :  d   SU R                  5        35       e[	        XEU5        [        R
                  " XS9$ )Nr   r   r~   r)   )r   r   r#   rr   r   r*   )r   r{   r|   rv   r   r   r   r?   s           r   dequantize_per_channel_metar     s     	uN	'w.?}MNMM	))+F!8FF	e<E33r   zLchoose_qparams_per_token(Tensor input, ScalarType dtype) -> (Tensor, Tensor)choose_qparams_per_tokenc                 h   U R                  5       R                  SSS9nUR                  [        R                  :X  a  UR                  5       nU[        R                  :X  a  SnSUS-
  -  S-
  nO[        SU 35      eUR                  SS	9R                  U5      n[        R                  " U5      nX%4$ )
  Choose quantization parameters for per token quantization. This means for a N dimension Tensor
(M1, M2, ...Mn, N), we calculate scales/zero_points for each N elements and quantize
every N elements with the same quantization parameter. The dimension for scales/zero_points
will be (M1 * M2 ... * Mn)

Args:
   input (torch.Tensor): original float32/float16 Tensor
   dtype (torch.dtype): dtype (e.g. torch.uint8) for input Tensor

Returns:
    scales and zero_points, both float32 Tensors
Trr   keepdim      r/   z/unsupported dtype in choose_qparams_per_token: gh㈵>min)absamaxr   r   r    floatint8	Exceptionr$   div
zeros_like)r   r   r{   n_bitsr   r|   s         r   r   r     s    , YY["d3F||u}}$LLN 	 

&1*%)	=eWE
 	
 \\d\#''	2F""6*Kr   c                     [        U R                  S S 5      S/-   n[        R                  " U[        R                  U R
                  S9[        R                  " U[        R                  U R
                  S94$ Nr   r/   rf   rp   r   r   rh   ri   rg   rj   r   r   sizes      r   choose_qparams_per_token_metar     ]     CR !QC'D;;t5<<Eu{{EKKH  r   z]_choose_qparams_per_token_asymmetric_impl(Tensor input, ScalarType dtype) -> (Tensor, Tensor))_choose_qparams_per_token_asymmetric_implCompositeImplicitAutogradc                    Su  p#[         R                  " U SSS9n[         R                  " U SSS9n[         R                  " U[         R                  " U5      5      n[         R
                  " U[         R                  " U5      5      n[         R                  " [         R                  5      R                  nXv-
  [        X2-
  5      -  n	U	R                  US9n	Xi-  n
Xy-  nX*-   nX;-   n[         R                  " X-   S:  X*-
  X;-
  5      n[         R                  " XU5      R                  5       nU	R                  [         R                  5      UR                  [         R                  5      4$ )r   )i   r   Tr   r   r   )r   aminr   r   r   maxfinfor#   rS   r   r$   wherer%   r"   float64rj   )r   r   rQ   rR   r^   r_   min_val_negmax_val_posrS   r   descaled_mindescaled_maxzero_point_from_min_errorzero_point_from_max_errorr   s                  r   r   r     s,   , JDjjB5GjjB5G))GU%5%5g%>?K))GU%5%5g%>?K
++emm
$
(
(C &%*<<EKKCK E &L&L $ 3 $ 3!=AJ
 Zt4::<J88EMM"JMM%++$>>>r   zWchoose_qparams_per_token_asymmetric(Tensor input, ScalarType dtype) -> (Tensor, Tensor)#choose_qparams_per_token_asymmetricc                     [        X5      $ r;   )r   r   r   s     r   r   r   E  s     5UBBr   c                     [        U R                  S S 5      S/-   n[        R                  " U[        R                  U R
                  S9[        R                  " U[        R                  U R
                  S94$ r   r   r   s      r   (choose_qparams_per_token_asymmetric_metar   Q  r   r   c                    [         R                  " [        U R                  5       5      S S 5      nX1R	                  5       :X  d   SU SUR                  5        35       eX2R	                  5       :X  d   SU SUR                  5        35       eg )Nr   znum_tokens: z	 scales: z zero_points: )mathprodrp   r   r3   )r   r{   r|   
num_tokenss       r   !_per_token_quant_qparam_dim_checkr   `  s    4

-cr23Jlln$;	j\6;;=/:;$ 	''))E	j\0@0@0B/CDE)r   z}quantize_per_token(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype) -> Tensorquantize_per_tokenc                     [        X4U5        [        XU5        U R                  SU-  5      R                  U5      R	                  5       R                  X45      R                  U5      n U $ )a  Per token quantization for the Tensor using the quantization parameters to map
from floating point to quantized values. This means for a N dimension Tensor
(M1, M2, ...Mn, N), we calculate scales/zero_points for each N elements and quantize
every N elements with the same quantization parameter. The dimension for scales/zero_points
will be (M1 * M2 ... * Mn)

Args:
   input (torch.Tensor): original float32 or bfloat16 Tensor
   scales (float32 torch.Tensor): quantization parameter for per token affine quantization
   zero_points (int32 torch.Tensor): quantization parameter for per token affine quantization
   quant_min (int): minimum quantized value for output Tensor
   quant_max (int): maximum quantized value for output Tensor
   dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor

Returns:
   Tensor with requested dtype (e.g. torch.uint8), note the quantization parameters
   are not stored in the Tensor, we are storing them in function arguments instead
r   )r   r   muladdr%   r$   r"   r   r{   r|   r   r   r   s         r   r   r   p  sX    6  	e<%e[A		#,	[			y	$	E 
 Lr   c                 B    [        X4U5        [        R                  " XS9$ rE   r   r   r*   r   s         r   quantize_per_token_metar     s      	e<E//r   zdequantize_per_token(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype, ScalarType output_dtype) -> Tensordequantize_per_tokenoutput_dtypec                 4    X-
  n X-  n U R                  U5      $ )a  Per token dequantization for the Tensor using the quantization parameters to map
from floating point to quantized values. This means for a N dimension Tensor
(M1, M2, ...Mn, N), we calculate scales/zero_points for each N elements and quantize
every N elements with the same quantization parameter. The dimension for scales/zero_points
will be (M1 * M2 ... * Mn)

Args:
   input (torch.Tensor): quantized Tensor (uint8, int8 etc.)
   scales (float64 torch.Tensor): quantization parameter for per token affine quantization
   zero_points (int64 torch.Tensor): quantization parameter for per token affine quantization
   quant_min (int): minimum quantized value for input Tensor
   quant_max (int): maximum quantized value for input Tensor
   dtype (torch.dtype): dtype (e.g. torch.uint8) for input Tensor
   output_dtype (torch.dtype): dtype (e.g. torch.float32) for output Tensor

Returns:
   dequantized Tensor with dtype `output_dtype`
)r"   r   r{   r|   r   r   r   r   s          r   r   r     s"    8 ENE88L!!r   c                 B    [        X4U5        [        R                  " XS9$ rE   r   r   s          r   dequantize_per_token_metar     s      	e<E66r   zquantize_per_channel_group(Tensor input, Tensor scales, Tensor zero_points, int quant_min, int quant_max, ScalarType dtype, int group_size) -> Tensorquantize_per_channel_group   c                 V   US:  d   eX`R                   S   :  a"  UR                   S   S:X  a  U R                   S   nU R                   S   U-  S:X  d   eU R                  5       S:X  d   eU R                  SU5      n[        R                  " U5      R                  5       S:X  d   eUR                  SS5      nUR                  SS5      nUR                  SU-  5      R                  U5      R                  5       R                  X45      R                  U5      R                  U 5      nU$ )Nr/   r   r   r   r   )r   rr   reshaper   isnansumr   r   r%   clamp_r"   
reshape_as)	r   r{   r|   r   r   r   
group_sizeto_quant
input_int8s	            r   r   r     s    >>KKO#R(8A(=[[_
;;r?Z'1,,,99;! }}R,H;;x $$&!+++^^B"F%%b!,K 	S6\"	[					%	E	E	  r   c                     US:  d   eX`R                   S   :  a"  UR                   S   S:X  a  U R                   S   nU R                   S   U-  S:X  d   eU R                  5       S:X  d   e[        R                  " XS9$ )a  Groupwise quantization within each channel for an 2-d Tensor using the quantization parameters
to map from floating point to quantized values. This means for each row of a 2-d Tensor
(M, N), we calculate scales/zero_points for each `group_size` elements
and quantize every `group_size` elements with the same quantization parameter.
The dimension for scales/zero_points will be (M * ceil(N, group_size),)

Args:
   input (torch.Tensor): original float32 or bfloat16 Tensor
   scales (float32 torch.Tensor): quantization parameter for per channel group affine quantization
   zero_points (int32 torch.Tensor): quantization parameter for per channel group affine quantization
   quant_min (int): minimum quantized value for output Tensor
   quant_max (int): maximum quantized value for output Tensor
   dtype (torch.dtype): requested dtype (e.g. torch.uint8) for output Tensor

Returns:
   Tensor with requested dtype (e.g. torch.uint8), note the quantization parameters
   are not stored in the Tensor, we are storing them in function arguments instead
r/   r   r   r   r)   )r   rr   r   r*   )r   r{   r|   r   r   r   r   s          r   quantize_per_channel_group_metar   	  s{    8 >>KKO#R(8A(=[[_
;;r?Z'1,,,99;!E//r   zdequantize_per_channel_group(Tensor input, Tensor scales, Tensor? zero_points, int quant_min, int quant_max, ScalarType dtype, int group_size, ScalarType output_dtype) -> Tensordequantize_per_channel_groupw_int8r   c                 &   US:  d   eX`R                   S   :  a"  UR                   S   S:X  a  U R                   S   nU R                   S   U-  S:X  d   eU R                  5       S:X  d   eU R                  SU5      nUR                  SS5      nUb  UR                  SS5      n	O.[        R                  " / [        R
                  UR                  S9n	UR                  U	5      R                  U5      R                  U 5      R                  U5      n
U
$ )a  Groupwise dequantization within each channel for an 2-d Tensor using the quantization parameters
to map from floating point to quantized values. This means for each row of a 2-d Tensor
(M, N), we calculate scales/zero_points for each `group_size` elements
and quantize every `group_size` elements with the same quantization parameter.
The dimension for scales/zero_points will be (M * ceil(N, group_size),)

Args:
   input (torch.Tensor): quantized Tensor (uint8/int8 etc.)
   scales (float32 torch.Tensor): quantization parameter for per channel group affine quantization
   zero_points (int32 torch.Tensor): quantization parameter for per channel group affine quantization
   quant_min (int): minimum quantized value for input Tensor
   quant_max (int): maximum quantized value for input Tensor
   dtype (torch.dtype): dtype (e.g. torch.uint8) for input Tensor
   output_dtype (torch.dtype): dtype (e.g. torch.float32) for output Tensor

Returns:
   dequantized Tensor with dtype `output_dtype`
r/   r   r   r   rf   )r   rr   r   r   zerosint32rg   subr   r   r"   )r   r{   r|   r   r   r   r   r   w_int8_groupedzpw_dqs              r   r   r   5  s    D >>LL$$b)9Q)>\\"%
<<j(A---::<1^^B
3N^^B"F  Q'[[5;;v}}Eb!%%f-88@CCLQDKr   zyfake_quant_per_channel(Tensor input, Tensor scales, Tensor zero_points, int axis, int quant_min, int quant_max) -> Tensorc                   4    \ rS rSr\S 5       r\S 5       rSrg)FakeQuantPerChannelin  c                     UR                   [        R                  :w  a  UR                  [        R                  5      nUR                   [        R                  :w  a  UR                  [        R                  5      nUR                   [        R                  :X  d   SUR                    35       eXAR                  5       :  d   SUR                  5        35       e[        [        SU5      5      [        [        US-   UR                  5      5      -   n[        X'5      n[        X75      n	[        R                  " USU-  -  5      U	-   n
[        R                  " XU5      U	-
  U-  n[        R                  " X:  X:*  5      nU R                  U5        U$ )Nr   r~   r   r/   r   )r   r   r#   r"   r   rr   rp   rq   ndimr   r%   r$   logical_andsave_for_backward)ctxr   r{   r|   rv   r   r   broadcast_dimsunsqueeze_scalesunsqueeze_zero_pointstempr   masks                r   forwardFakeQuantPerChannel.forwardo  sC   <<5==(YYu}}-F+%..5KKK5==(	XI%++W	X(iik!J%<UYY[M#JJ!eAtn-U4!8UZZ5P0QQ.vF 3K P{{5C*:$:;<?TTKK36KK   $"3t7HJd#
r   c                 2    U R                   u  nX-  S S S S S 4$ r;   )saved_tensors)r   gyr   s      r   backwardFakeQuantPerChannel.backward  s$    ##y$dD$66r    N)__name__
__module____qualname____firstlineno__staticmethodr   r   __static_attributes__r   r   r   r   r   n  s(     * 7 7r   r   fake_quant_per_channelAutogradc                 0    [         R                  XX#XE5      $ r;   )r   applyr   r{   r|   rv   r   r   s         r   r   r     s     $${) r   c                 .    [         R                  " U 5      $ r;   r   r*   r  s         r   fake_quant_per_channel_metar    s     E""r   zFconvert_element_type.no_fuse(Tensor input, ScalarType dtype) -> Tensorzconvert_element_type.no_fusec                 h    [         R                  R                  R                  R	                  X5      $ r;   )r   opsprimsconvert_element_typedefaultr   s     r   r  r    s#     99??//77EEr   c                 *    [         R                  " XS9$ rE   r  r   s     r   convert_element_type_metar    s    E//r   )r   )Mr   typingr   r   torch._refsr   torch.ao.quantization.utilsr   r   torch.libraryr   r   quantized_decomposed_libuint8r   uint16int16r   _INTEGER_DTYPESfloat8_e5m2float8_e4m3fn_FLOAT_DTYPESiinfor   r   r   updateintr   r   definer\   r   r   r   r,   r5   r7   r9   r<   r=   rF   rJ   rL   rN   rP   rt   r`   rd   rl   rn   ry   rz   r   r   r   r   r   r   r   r   r   r   r   r#   r   r   r   r   r   autogradFunctionr   r   r  r  r  )ks   0r   <module>r     s      + M '
 ##95A ;;

ELL%++u{{S""E$7$78 :I9HAAEKKN..//    DQRMqU[[^	 #ekk!n&8&8"9::MR    @  57RS!<<!! ! 	!
 ! ;;! \\! T!H  5v>0<<00 0 	0
 0 ;;0 \\0 ?0    @ :<W<<<<  	
  ;; \\0  <fE0<<0<<0 0 	0
 0 ;;0 \\0 F0.   F ;=X<<<<  ||	
 || ;; \\:  =vF
<<
<<
 
 ||	

 ||
 ;;
 \\
 G
"   _  79TU (,/Q<</Q/Q /Q 	/Q
 /Q ;;/Q $/Q \\/Q V/Qd  7@ (,4<<4<<4 4 	4
 4 ;;4 $4 \\4 A4   _ " (,<<<<  	
  ;; $ \\
@  >G (,Q<<Q<<Q Q 	Q
 Q ;;Q $Q \\Q HQ4   e # (,<<<<  ||	
 || ;; $ \\
@  ?H (, $ \\ I   7  79TU$<<$"$*-$49$BG++$
5<<%&$ V$N   7 %
$<<$"$*-$49$BG++$
5<<%&$
$N  7@<<$'47>CLQKK
5<<%& A"  A6J<<$'47>CLQKK
5<<%& K   @  68ST,<<,LL, , 	,
 , , ;;, \\, U,^  6?0<<0LL0 0 	0
 0 0 ;;0 \\0 @0.   _  8:UV (,;<<;LL; %,,'; 	;
 ; ; ;;; $; \\; W;|  8&A (,4<<4LL4 %,,'4 	4
 4 4 ;;4 $4 \\4 B4*   R
 
 << ;;  5<<%& 
 F 

<<;; 5<<%&
   c
 /
(?<<(?;;(? 5<<%&(?
(?V   ]
 )
C<<C;;C 5<<%&C
C )

<<;; 5<<%&
E   @  46QR#<<#LL# # 	#
 # ;;# S#L  4f=	0<<	0LL	0 	0 		0
 	0 ;;	0 >	0   Y  68ST !&"<<"LL" " 	"
 " ;;" ++" U"B  6? !&7<<7LL7 7 	7
 7 ;;7 ++7 @7   A :<W !<<!LL! ! 	!
 ! ;;!!H  <fE "0<<"0LL"0 "0 	"0
 "0 ;;"0 F"0J   Z "  %+LL+LL+ %,,'+ 	+
 + ;;+ + +++
+\   .7%..11 7:  8*E
<<
LL
 
 	

 
 
 \\
 F
  8&A#<<#LL# # 	#
 # # \\# B#   L
 "
F FU[[ FU\\ F
F  >G0U\\ 0%++ 0%,, 0 H0E% Ss   A w(Aw-