
    sh}                        S SK JrJrJr  S SKJrJrJrJrJ	r	J
r
JrJr  S SKrS SKrS SKJs  Jr  S SKJr  S SKJr  SSKJr  SSKJrJr  SS	KJr  \(       a  SS
KJr  \R@                  " 5        S1SSS\S\S\\\\!   4   4S jj5       r"\" SS9 " S S5      5       r#\" SS9 " S S5      5       r$ " S S5      r% " S S\%5      r& " S S5      r' " S S\'5      r( " S S 5      r) " S! S"\)5      r* " S# S$\)5      r+ " S% S&5      r, " S' S(\,5      r- " S) S*\,5      r. " S+ S,\,5      r/ " S- S.5      r0\R@                  " 5       \#" 5       4SSS\S/\#S\\$\\$   4   4S0 jj5       r1g)2    )	dataclassfieldreplace)TYPE_CHECKINGDictIterableListOptionalSequenceTupleUnionN)Tensor)Categorical   )CHUNK_LENGTH)	Tokenizerget_tokenizer)compression_ratio)Whispermodelr   mel	tokenizerreturnc                 ~   Uc  [        U R                  U R                  S9nUR                  b  UR                  UR
                  ;  a  [        S5      eUR                  S:H  nU(       a  UR                  S5      nUR                  SS U R                  R                  U R                  R                  4:w  a  U R                  U5      nUR                  S   n[        R                  " UR                   //U-  5      R#                  UR$                  5      nU R'                  XQ5      SS2S4   n[        R(                  " UR                  S   [        R*                  S9nS	U[-        UR.                  5      '   [0        R2                  * USS2U4'   UR5                  SS
9nUR7                  SS
9R9                  5       n	[;        U5       V
VVs/ s HJ  n
[=        UR.                  UR>                  5       VVs0 s H  u  pXX4   RA                  5       _M     snnPML     nnn
nU(       a
  US   nUS   nX4$ s  snnf s  snnn
f )aG  
Detect the spoken language in the audio, and return them as list of strings, along with the ids
of the most probable language tokens and the probability distribution over all language tokens.
This is performed outside the main decode loop in order to not interfere with kv-caching.

Returns
-------
language_tokens : Tensor, shape = (n_audio,)
    ids of the most probable language tokens, which appears after the startoftranscript token.
language_probs : List[Dict[str, float]], length = n_audio
    list of dictionaries containing the probability distribution over all languages.
N)num_languageszCThis model doesn't have language tokens so it can't perform lang id   r   )dtypeFdim)!r   is_multilingualr   languagelanguage_tokensot_sequence
ValueErrorndim	unsqueezeshapedimsn_audio_ctxn_audio_stateencodertorchtensorsottodevicelogitsonesboollistall_language_tokensnpinfargmaxsoftmaxcpurangezipall_language_codesitem)r   r   r   singlen_audioxr3   masklanguage_tokenslanguage_token_probsijclanguage_probss                 d/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/whisper/decoding.pydetect_languagerL      s
     !!!1D1D
	 	"##9+A+AAQ
 	
 XX]FmmA yy~%**00%**2J2JKKmmC  iilGy}}o&0144SZZ@A\\!!!Q$'F ::fll2&ejj9D05Di++	,-vvgF1d7Ommm+O!>>b>1557 w
  A I999;W;WX	
X AD)..00X	
     )!,'***	
s   	)H82 H2H82H8T)frozenc                   T   \ rS rSr% Sr\\S'   Sr\\   \S'   Sr	\
\S'   Sr\\   \S'   Sr\\   \S	'   Sr\\   \S
'   Sr\\
   \S'   Sr\\
   \S'   Sr\\\\\   4      \S'   Sr\\\\\   4      \S'   Sr\\\\\   4      \S'   Sr\\S'   Sr\\S'   Sr\\
   \S'   Sr\\S'   Srg)DecodingOptionsP   
transcribetaskNr#   g        temperature
sample_lenbest_of	beam_sizepatiencelength_penaltypromptprefixz-1suppress_tokensTsuppress_blankFwithout_timestamps      ?max_initial_timestampfp16 )__name__
__module____qualname____firstlineno__rR   str__annotations__r#   r
   rS   floatrT   intrU   rV   rW   rX   rY   r   r	   rZ   r[   r   r\   r5   r]   r_   r`   __static_attributes__ra       rK   rO   rO   P   s     D# #Hhsm" K $J$!GXc]!#Ix}# $Hhuo$ '+NHUO* /3FHU3S	>*+2.2FHU3S	>*+2 <@OXeC#$678?ND  %$-08E?0 D$rk   rO   c                       \ rS rSr% \\S'   \\S'   Sr\\	\\
4      \S'   \" \S9r\\   \S'   Sr\\S	'   \R$                  r\
\S
'   \R$                  r\
\S'   \R$                  r\
\S'   \R$                  r\
\S'   Srg)DecodingResultu   audio_featuresr#   NrJ   )default_factorytokens textavg_logprobno_speech_probrS   r   ra   )rb   rc   rd   re   r   rg   rf   rJ   r
   r   rh   r   r6   rq   r	   ri   rs   r8   nanrt   ru   rS   r   rj   ra   rk   rK   rm   rm   u   sz    M15NHT#u*-.5d3FDI3D#NKFFNE"K!vvu%rk   rm   c                   >    \ rS rSrS\S\S\4S jrS
S jrS
S jrS	rg)	Inference   rq   ro   r   c                     [         e)zAPerform a forward pass on the decoder and return per-token logitsNotImplementedErrorselfrq   ro   s      rK   r3   Inference.logits       !!rk   Nc                     [         e)z9Update the key-value cache according to the updated beamsr{   )r~   source_indicess     rK   rearrange_kv_cacheInference.rearrange_kv_cache   r   rk   c                     g)z:Clean up any resources or hooks after decoding is finishedNra   r~   s    rK   cleanup_cachingInference.cleanup_caching   s    rk   ra   )r   N)	rb   rc   rd   re   r   r3   r   r   rj   ra   rk   rK   rx   rx      s&    "V "V " ""rk   rx   c                   H    \ rS rSrSSS\4S jrS\S\S\4S	 jrS
 rS r	Sr
g)PyTorchInference   r   r   initial_token_lengthc                 j   Xl         X l        0 U l        / U l        U R                   R                  R
                   Vs/ s H  o3R                  R                  PM     nnU R                   R                  R
                   Vs/ s H  o3R                  R                  PM     nnXE-   U l	        g s  snf s  snf N)
r   r   kv_cachehooksdecoderblocksattnkeyvalue
kv_modules)r~   r   r   blockkey_modulesvalue_moduless         rK   __init__PyTorchInference.__init__   s     %
$8!
37::3E3E3L3LM3L%zz~~3LM7;zz7I7I7P7PQ7Pe))7PQ%5 NQs   B+ B0rq   ro   r   c                 
   U R                   (       d'  U R                  R                  5       u  U l         U l        UR                  S   U R
                  :  a  US S 2SS 24   nU R                  R                  XU R                   S9$ )Nr   )r   )r   r   install_kv_cache_hooksr   r)   r   r   r}   s      rK   r3   PyTorchInference.logits   sh    }}(,

(I(I(K%DM4:<<d777ArsF^Fzz!!&4==!QQrk   c                 f    U R                    H  nUR                  5         M     0 U l        / U l         g r   )r   remover   )r~   hooks     rK   r    PyTorchInference.cleanup_caching   s)    JJDKKM  
rk   c                     U[        [        [        U5      5      5      :w  aA  U R                   H0  nU R                  U   U   R                  5       U R                  U'   M2     g g r   )r6   r=   lenr   r   detach)r~   r   modules      rK   r   #PyTorchInference.rearrange_kv_cache   sQ    T%N(;"<==//(,f(=n(M(T(T(Vf% * >rk   )r   r   r   r   r   N)rb   rc   rd   re   ri   r   r   r3   r   r   rj   ra   rk   rK   r   r      s>    6i 6s 6RV RV R RWrk   r   c                   H    \ rS rSrS\\\      S\\\      S\\   4S jrSr	g)SequenceRanker   rq   sum_logprobsr   c                     [         e)z
Given a list of groups of samples and their cumulative log probabilities,
return the indices of the samples in each group to select as the final result
r{   r~   rq   r   s      rK   rankSequenceRanker.rank   s
     "!rk   ra   N)
rb   rc   rd   re   r	   r   rh   ri   r   rj   ra   rk   rK   r   r      s5    "4<("8<T%[8I"	c"rk   r   c                   V    \ rS rSrSrS\\   4S jrS\\\	      S\\\      4S jr
Srg	)
MaximumLikelihoodRanker   z
Select the sample with the highest log probabilities, penalized using either
a simple length normalization or Google NMT paper's length penalty
rX   c                     Xl         g r   rX   )r~   rX   s     rK   r    MaximumLikelihoodRanker.__init__   s    ,rk   rq   r   c           
         ^  U 4S jnU VVs/ s H  oD Vs/ s H  n[        U5      PM     snPM      nnn[        X&5       VVs/ s H!  u  px[        R                  " U" Xx5      5      PM#     snn$ s  snf s  snnf s  snnf )Nc                    > / n[        X5       H=  u  p4TR                  c  UnOSU-   S-  TR                  -  nUR                  X5-  5        M?     U$ )N      )r>   rX   append)logprobslengthsresultlogproblengthpenaltyr~   s         rK   scores,MaximumLikelihoodRanker.rank.<locals>.scores   s[    F#&x#9&&.$G !"F
a/D4G4GGGg/0 $: Mrk   )r   r>   r8   r:   )	r~   rq   r   r   str   pls	   `        rK   r   MaximumLikelihoodRanker.rank   sg    		 1771A&AqCFA&7474NO4NDA		&,'4NOO '7Os   	A4A/A4(A:/A4r   N)rb   rc   rd   re   __doc__r
   rh   r   r	   r   r   rj   ra   rk   rK   r   r      s?    
-x -P4V- PT$u+=N Prk   r   c            
       v    \ rS rSrS rS\S\S\S\\\4   4S jrS\S\S\\	\	\      \
\
\      4   4S jrS	rg
)TokenDecoder   c                     g)z=Initialize any stateful variables for decoding a new sequenceNra   r   s    rK   resetTokenDecoder.reset   s    rk   rq   r3   r   r   c                     [         e)a  Specify how to select the next token, based on the current trace and logits

Parameters
----------
tokens : Tensor, shape = (n_batch, current_sequence_length)
    all tokens in the context so far, including the prefix and sot_sequence tokens

logits : Tensor, shape = (n_batch, vocab_size)
    per-token logits of the probability distribution at the current step

sum_logprobs : Tensor, shape = (n_batch)
    cumulative log probabilities for each sequence

Returns
-------
tokens : Tensor, shape = (n_batch, current_sequence_length + 1)
    the tokens, appended with the selected next token

completed : bool
    True if all sequences has reached the end of text

r{   )r~   rq   r3   r   s       rK   updateTokenDecoder.update   s
    2 "!rk   c                     [         e)a^  Finalize search and return the final candidate sequences

Parameters
----------
tokens : Tensor, shape = (n_audio, n_group, current_sequence_length)
    all tokens in the context so far, including the prefix and sot_sequence

sum_logprobs : Tensor, shape = (n_audio, n_group)
    cumulative log probabilities for each sequence

Returns
-------
tokens : Sequence[Sequence[Tensor]], length = n_audio
    sequence of Tensors containing candidate token sequences, for each audio input

sum_logprobs : List[List[float]], length = n_audio
    sequence of cumulative log probabilities corresponding to the above

r{   r   s      rK   finalizeTokenDecoder.finalize   s
    , "!rk   ra   N)rb   rc   rd   re   r   r   r   r5   r   r   r	   rh   r   rj   ra   rk   rK   r   r      sp    L""&,"<B"	vt|	"6"",2"	x()4U+<<	="rk   r   c            
       \    \ rS rSrS\S\4S jrS\S\S\S\\\	4   4S	 jr
S\S\4S
 jrSrg)GreedyDecoderi  rS   eotc                     Xl         X l        g r   )rS   r   )r~   rS   r   s      rK   r   GreedyDecoder.__init__  s    &rk   rq   r3   r   r   c                     U R                   S:X  a  UR                  SS9nO#[        X R                   -  S9R                  5       n[        R
                  " UR                  5       SS9nU[        R                  " UR                  S   5      U4   nX6US S 2S4   U R                  :g  -  -  nU R                  XAS S 2S4   U R                  :H  '   [        R                  " XS S 2S 4   /SS9nUS S 2S4   U R                  :H  R                  5       nX4$ )Nr   r   r    )r3   )rS   r:   r   sampleFlog_softmaxrh   r.   aranger)   r   catall)r~   rq   r3   r   next_tokensr   current_logprobs	completeds           rK   r   GreedyDecoder.update  s     q  --B-/K%V6F6F-FGNNPK==R8#ELL1B$C[$PQF1b5MTXX,EFF151b5MTXX-.F4$89rBArE]dhh.335	  rk   c                 d    [         R                  " USU R                  S9nXR                  5       4$ )N)r   r   )r   )r   padr   tolistr   s      rK   r   GreedyDecoder.finalize'  s*    vvTXX6**,,,rk   )r   rS   N)rb   rc   rd   re   rh   ri   r   r   r   r5   r   r   rj   ra   rk   rK   r   r     sV    E  !!&,!<B!	vt|	!$-v -V -rk   r   c            
       v    \ rS rSr SS\S\S\S\\   4S jjrS r	S	\
S
\
S\
S\\
\4   4S jrS\
S\
4S jrSrg)BeamSearchDecoderi-  NrV   r   	inferencerW   c                     Xl         X l        X0l        U=(       d    SU l        [	        XR                  -  5      U l        S U l        U R
                  S:  d   SU SU S35       eg )Nr^   r   zInvalid beam size (z) or patience ())rV   r   r   rW   roundmax_candidatesfinished_sequences)r~   rV   r   r   rW   s        rK   r   BeamSearchDecoder.__init__.  sk     #" C#(]])B#C"& !#	G ?8*AF	G#rk   c                     S U l         g r   )r   r   s    rK   r   BeamSearchDecoder.reset@  s
    "&rk   rq   r3   r   r   c                   ^  UR                   S   T R                  -  S:w  a%  [        UR                    ST R                   S35      eUR                   S   T R                  -  nT R                  c!  [	        U5       Vs/ s H  n0 PM     snT l        [
        R                  " UR                  5       SS9n/ / / pn[	        U5       GHL  n
0 0 0 pn[	        T R                  5       H  nU
T R                  -  U-   nX   R                  5       n[        Xo   R                  T R                  S-   5      6  HA  u  nnX?   U-   R                  5       n[        UUR                  5       /-   5      nUUU'   XU'   MC     M     Sn[        XR                  SS9 Hm  nUS   T R                  :X  a
  UU   UU'   M   UU   U[!        U5      '   UR#                  U5        UR#                  UU   5        US-  nUT R                  :X  d  Mm    O   U	R#                  U5        GMO     [$        R&                  " XqR(                  S	9nT R*                  R-                  U5        [!        T R                  5      [!        U	5      :X  d   e[        T R                  U	5       HF  u  nn[        UUR                  SS9 H'  n[!        U5      T R.                  :  a    M:  UU   UU'   M)     MH     [1        U 4S
 jT R                   5       5      nUU4$ s  snf )Nr   z[0] % z != 0r   r    r   T)r   reverser2   c              3   T   >#    U  H  n[        U5      TR                  :  v   M     g 7fr   )r   r   ).0	sequencesr~   s     rK   	<genexpr>+BeamSearchDecoder.update.<locals>.<genexpr>z  s'      
4	 	Nd1114s   %()r)   rV   r&   r   r=   r   r   rh   r   r>   topkr@   tuplesortedgetr   r   r   r.   r/   r2   r   r   r   r   )r~   rq   r3   r   rB   _r   r   r   r   rG   r   sourcesfinishedrH   idxrZ   r   tokennew_logprobsequencesavedpreviously_finishednewly_finishedseqr   s   `                         rK   r   BeamSearchDecoder.updateC  s    <<?T^^+q0~VDNN3C5IJJ,,q/T^^3""*38>&B>ar>&BD#==R8:<b"%7wA(*BXF 4>>*$..(1,++-&)8=+=+=dnnq>P+Q&RNGU#/#4w#>"D"D"FK$Vuzz|n%<=H'2F8$(+H%	 'S + E"6zz4HB<488+)/)9HX&5;H5EL[!12&&x0"))'(*;<QJE. I %%h/7  : k--@)).9 4**+s3E/FFFF36##%74
/ n.2D2DdS*+t/B/BB+9#+>#C( T4
  
!44
 
	 y  g 'Cs   =K5preceding_tokensc           
         UR                  5       n[        U R                  5       H  u  p4[        U5      U R                  :  d  M   [        [        R                  " X#   5      5      S S S2    Hb  nXU4   R                  5       U R                  /-   nX#   U   R                  5       U[        U5      '   [        U5      U R                  :  d  Ma    M     M     U R                   VVs/ s H8  nUR                  5        Vs/ s H  n[        R                  " U5      PM     snPM:     nnnU R                   Vs/ s H  n[        UR                  5       5      PM     nnX4$ s  snf s  snnf s  snf )Nr   )r<   	enumerater   r   rV   r6   r8   argsortr   r   r@   r   keysr.   r/   values)	r~   r  r   rG   r   rH   r  r
  rq   s	            rK   r   BeamSearchDecoder.finalize  sB   #'')%d&=&=>LAI/bjj9:4R4@A/15<<>$((KH1=1C1H1H1JIeHo.9~7	 A	 ? "44&
4	 +4..*:;*:3U\\#*:;4 	 &

 7;6M6M+
6MD!!#$6M 	 +
 ## <&
+
s   &E#> EE#6#E)E#)rV   r   r   r   r   rW   r   )rb   rc   rd   re   ri   rx   r
   rh   r   r   r   r   r5   r   r   rj   ra   rk   rK   r   r   -  s     %)GG G 	G
 5/G$';!;!&,;!<B;!	vt|	;!z$ $v $rk   r   c                   *    \ rS rSrS\S\SS4S jrSrg)LogitFilteri  r3   rq   r   Nc                     [         e)aR  Apply any filtering or masking to logits in-place

Parameters
----------
logits : Tensor, shape = (n_batch, vocab_size)
    per-token logits of the probability distribution at the current step

tokens : Tensor, shape = (n_batch, current_sequence_length)
    all tokens in the context so far, including the prefix and sot_sequence tokens

r{   r~   r3   rq   s      rK   applyLogitFilter.apply  s
     "!rk   ra   )rb   rc   rd   re   r   r  rj   ra   rk   rK   r  r    s    "F "F "t "rk   r  c                   8    \ rS rSrS\S\4S jrS\S\4S jrSr	g	)
SuppressBlanki  r   sample_beginc                     Xl         X l        g r   )r   r  )r~   r   r  s      rK   r   SuppressBlank.__init__  s    "(rk   r3   rq   c                     UR                   S   U R                  :X  aJ  [        R                  * US S 2U R                  R                  S5      U R                  R                  /-   4'   g g )Nr    )r)   r  r8   r9   r   encoder   r  s      rK   r  SuppressBlank.apply  sS    <<?d///LNFF7F1dnn++C0DNN4F4F3GGGH 0rk   )r  r   N)
rb   rc   rd   re   r   ri   r   r   r  rj   ra   rk   rK   r  r    s+    )) )3 )SF SF Srk   r  c                   :    \ rS rSrS\\   4S jrS\S\4S jrSr	g)	SuppressTokensi  r[   c                 $    [        U5      U l        g r   )r6   r[   )r~   r[   s     rK   r   SuppressTokens.__init__  s    #O4rk   r3   rq   c                 H    [         R                  * US S 2U R                  4'   g r   )r8   r9   r[   r  s      rK   r  SuppressTokens.apply  s    +-66'q$&&&'rk   )r[   N)
rb   rc   rd   re   r   ri   r   r   r  rj   ra   rk   rK   r#  r#    s%    5 52F 2F 2rk   r#  c                   B    \ rS rSrS\S\S\\   4S jrS\S\4S jr	S	r
g
)ApplyTimestampRulesi  r   r  max_initial_timestamp_indexc                 (    Xl         X l        X0l        g r   )r   r  r*  )r~   r   r  r*  s       rK   r   ApplyTimestampRules.__init__  s     #(+F(rk   r3   rq   c                    U R                   R                  b,  [        R                  * US S 2U R                   R                  4'   [	        UR
                  S   5       GH  nX#U R                  S 24   nUR                  5        Vs/ s H  oUPM     nn[        U5      S:  =(       a    US   U R                   R                  :  n[        U5      S:  =(       d    US   U R                   R                  :  nU(       a^  U(       a,  [        R                  * XU R                   R                  S 24'   O+[        R                  * XS U R                   R                  24'   UUR                  U R                   R                  5         n	U	R                  5       S:  d  GM;  U(       a  U(       d  U	S   n
OU	S   S-   n
[        R                  * XU R                   R                  U
24'   GM     UR
                  S   U R                  :X  a{  [        R                  * US S 2S U R                   R                  24'   U R                  b@  U R                   R                  U R                  -   n[        R                  * US S 2US-   S 24'   [        R                  " UR!                  5       SS9n[	        UR
                  S   5       H  nXU R                   R                  S 24   R#                  SS9nXS U R                   R                  24   R%                  5       nX:  d  M]  [        R                  * XS U R                   R                  24'   M     g s  snf )Nr   r   r   r   r   r    )r   no_timestampsr8   r9   r=   r)   r  r   r   timestamp_beginr   genumelr*  r   r   rh   	logsumexpmax)r~   r3   rq   ksampled_tokensr   r
  last_was_timestamppenultimate_was_timestamp
timestampstimestamp_lastlast_allowedr   timestamp_logprobmax_text_token_logprobs                  rK   r  ApplyTimestampRules.apply  s   >>''379vvgF1dnn2223 v||A'A#t'8'8':$:;N,3356515C6CAK#b'T^^-K-K"K  C1IB4>>+I+I I & ",CE66'Fdnn<<>>?79vvgF2 2 2223'!!$.."@"@AJ !A% &.G%/^N%/^a%7NNPffW$..88>IIJ5 (8 <<?d///;=66'F1666667 //;NN22T5U5UU  24q,*,,- ==R8v||A'A (DNN,J,J,L)L M W W !X ! &.1Q4>>3Q3Q1Q.Q%R%V%V%X" 9?Avvg:DNN::::; (O 7s   M)r*  r  r   N)rb   rc   rd   re   r   ri   r
   r   r   r  rj   ra   rk   rK   r)  r)    sA    GG G &.c]	G5FF 5FF 5Frk   r)  c                      \ rS rSr% \\S'   \\S'   \\S'   \\	   \S'   SSS\
4S	 jrS\
S
\
4S jrS
\\   4S jrS
\\   4S jrS\4S jrS\S\4S jrS\S\4S jr\R,                  " 5       S\S
\\   4S j5       rSrg)DecodingTaski  r   sequence_rankerr   logit_filtersr   r   optionsc                    Xl         UR                  =(       d    Sn[        UR                  UR                  UUR
                  S9nX@l        U R                  U5      U l        UR                  =(       d    UR                  =(       d    SU l        UR                  R                  U l        UR                  =(       d    UR                  R                  S-  U l        UR                   U l        U R                  R"                  (       a  UR$                  U l        U R'                  5       U l        [+        U R(                  5      U l        U R(                  R/                  UR0                  5      U l        [5        U[+        U R(                  5      5      U l        [9        UR:                  5      U l        UR                  b<  [?        UR                  UR@                  U R6                  URB                  5      U l"        O%[G        URH                  UR@                  5      U l"        / U l%        U R                  RL                  (       a9  U RJ                  RO                  [Q        U R                  U R,                  5      5        U R                  RR                  (       a2  U RJ                  RO                  [U        U RW                  5       5      5        UR"                  (       d  [X        UR                  RZ                  -  nS nUR\                  (       a"  [_        U R                  R\                  U-  5      nU RJ                  RO                  [a        X@R,                  U5      5        g g )Nen)r   r#   rR   r   r   )1r   r#   r   r"   r   rR   r   _verify_optionsrB  rV   rU   n_groupr*   
n_text_ctxn_ctxrT   r%   r]   #sot_sequence_including_notimestamps_get_initial_tokensinitial_tokensr   r  indexr0   	sot_indexr   r   r   rX   r@  r   r   rW   r   r   rS   rA  r\   r   r  r[   r#  _get_suppress_tokensr   r+   r_   r   r)  )r~   r   rB  r#   r   	precisionr*  s          rK   r   DecodingTask.__init__  sp   
##+t!!!--	
	 %.(,(<(<W(E#--EEA**//
&11OUZZ5J5Ja5O(1(>(><<** ) M MD*.*B*B*D!$T%8%8!9"1177	F *%T5H5H1IJ  7w7M7MN (,!!9==$..'BRBRDL ))<)<immLDL  <<&&%%mDNNDDUDU&VW<<''%%nT5N5N5P&QR))$uzz'='==I*.',,.3LL66B/+ %%#002M *rk   r   c                 L   UR                   b  UR                  b  [        S5      eUR                  S:X  a  UR                  b  [        S5      eUR                  b  UR                   c  [        S5      eUR
                  b$  SUR
                  s=::  a  S::  d  O  [        S5      eU$ )Nz-beam_size and best_of can't be given togetherr   z4best_of with greedy sampling (T=0) is not compatiblez'patience requires beam_size to be givenr   z8length_penalty (alpha) should be a value between 0 and 1)rV   rU   r&   rS   rW   rX   )r~   rB  s     rK   rE  DecodingTask._verify_options<  s    (W__-HLMM!#* !WXX'G,=,=,EFGG!!-'',1,WXXrk   c                    [        U R                  5      nU R                  R                  =n(       au  [	        U[
        5      (       a,  U R                  R                  SUR                  5       -   5      OUnU R                  b!  U R                  S-  U R                  -
  nX4* S  nX-   nU R                  R                  =n(       as  [	        U[
        5      (       a,  U R                  R                  SUR                  5       -   5      OUnU R                  R                  /X`R                  S-  S-
  * S  -   U-   n[        U5      $ )Nr  r   r   )r6   r%   rB  rZ   
isinstancerf   r   r   striprT   rH  rY   sot_prevr   )r~   rq   rZ   prefix_tokensmax_prefix_lenrY   prompt_tokenss          rK   rJ   DecodingTask._get_initial_tokensK  s(   d''(\\(((6( fc** %%cFLLN&:; 
 *!%q4??!B -o.> ?+F\\(((6( fc** %%cFLLN&:;  (()**/A"5 6 89:  V}rk   c                 R   U R                   R                  n[        U[        5      (       a*  UR	                  S5       Vs/ s H  n[        U5      PM     nnSU;   a?  U Vs/ s H  o"S:  d  M
  UPM     nnUR                  U R                  R                  5        O1Ub  [        U5      S:X  a  / nO[        U[        5      (       d   S5       eUR                  U R                  R                  U R                  R                  U R                  R                  U R                  R                  U R                  R                  /5        U R                  R                   b%  UR#                  U R                  R                   5        [%        ['        [)        U5      5      5      $ s  snf s  snf )N,r   r   zsuppress_tokens must be a list)rB  r[   rT  rf   splitri   extendr   non_speech_tokensr   r6   rQ   	translater0   rV  sot_lm	no_speechr   r   r   set)r~   r[   r   s      rK   rN  !DecodingTask._get_suppress_tokensg  sJ   ,,66os++/>/D/DS/IJ/I!s1v/IOJ *9D/Q!Vq/OD""4>>#C#CD$O(<(A Oot44V6VV4))((""''%%	
 >>##/""4>>#;#;<VC0122/ K Es   F 	F$-F$r   c                    U R                   R                  (       a  UR                  5       nUR                  SS  U R                  R
                  R                  U R                  R
                  R                  4:X  a  UnOU R                  R                  U5      nUR                  U R                   R                  (       a  [        R                  O[        R                  :w  a  [        SUR                   35      $ U$ )Nr   z'audio_features has an incorrect dtype: )rB  r`   halfr)   r   r*   r+   r,   r-   r   r.   float16float32	TypeError)r~   r   ro   s      rK   _get_audio_features DecodingTask._get_audio_features  s    <<((*C99RS>JJOO''JJOO))
 

 !N!ZZ//4N!\\..EMMEMM
 9.:N:N9OP  rk   ro   rq   c                    U R                   R                  /UR                  S   -  nS nU R                   R                  b  U R                   R                  S:X  av  U R                  R                  XR                  5      u  pTU Vs/ s H  n[        XfR                  S9PM     nnU R                   R                  c  XRS S 2U R                  S-   4'   X44$ s  snf )Nr   lang_id)r   r   )
rB  r#   r)   rR   r   rL   r   r3  r   rM  )r~   ro   rq   	languages
lang_probslang_tokensprobss          rK   _detect_languageDecodingTask._detect_language  s    \\**+n.B.B1.EE	
<<  (DLL,=,=,J&*jj&@&@'#K AKK
uU		2
IK||$$,0;q$..1,,-$$	 Ls   Cc                 @   UR                   S   n[        R                  " X1R                  S9n[        R
                  /U-  n [        U R                  5       GH  nU R                  R                  X!5      nUS:X  ap  U R                  R                  bY  US S 2U R                  4   R                  5       R                  SS9nUS S 2U R                  R                  4   R                  5       nUS S 2S4   nU R                    H  n	U	R#                  Xr5        M     U R$                  R'                  X'U5      u  p*U
(       d   UR                   S   U R(                  :  d  GM    O   U R                  R+                  5         X$U4$ ! U R                  R+                  5         f = f)Nr   r   r   r    )r)   r.   zerosr2   r8   rv   r=   rT   r   r3   r   rb  rM  rh   r;   r   rA  r  r   r   rH  r   )r~   ro   rq   n_batchr   no_speech_probsrG   r3   probs_at_sotlogit_filterr   s              rK   
_main_loopDecodingTask._main_loop  sY   ,,q/${{7;P;PQ66(W,	-4??+..vF Ft~~77C#)!T^^*;#<#B#B#D#L#LQS#L#TL&21dnn6N6N3N&O&V&V&XO  2 %)$6$6L &&v6 %7 %)LL$7$7$U!R 04:: =) ,, NN**,_44 NN**,s   DF F Fc                 V   U R                   R                  5         U R                  nUR                  S   nU R	                  U5      n[
        R                  " U R                  /5      R                  US5      nU R                  XE5      u  pgU R                  R                  S:X  a,  [        XFU5       VV	V
s/ s H  u  pn
[        XU
S9PM     sn
n	n$ UR                  U R                  SS9R!                  UR"                  5      nU R%                  XE5      u  p[nUS S U R                  2   nUS S U R                  2   nUR                  S   ['        U5      s=:X  a  U:X  d   e   eUR)                  X0R                  S5      nUR)                  X0R                  5      nU R                   R+                  X[5      u  p[U VVs/ s H?  nU Vs/ s H.  oU R,                  XR.                  :H  R1                  5       S    PM0     snPMA     nnnU R2                  R5                  X[5      n[        X5       VVs/ s H  u  noU   R7                  5       PM     nnnU Vs/ s H!  oR9                  U5      R;                  5       PM#     nn[        X5       VVs/ s H  u  nnUU   PM     nnn[        X[5       VVs/ s H  u  nnU['        U5      S-   -  PM     nnnUUUUUU4n['        [=        [?        [&        U5      5      5      S:w  a%  [A        S[C        [?        [&        U5      5       35      e[        U6  VV	VVVVs/ s H6  u  npnnn[        UU	UUUUU R                  RD                  [G        U5      S	9PM8     snnnnn	n$ s  sn
n	nf s  snf s  snnf s  snnf s  snf s  snnf s  snnf s  snnnnn	nf )
Nr   r   rm  )ro   r#   rJ   r    r   )r   r   zinconsistent result lengths: )ro   r#   rq   rs   rt   ru   rS   r   )$r   r   r   r)   rj  r.   r/   rK  repeatrr  rB  rR   r>   rm   repeat_interleaverF  r1   r2   rz  r   reshaper   r  r   nonzeror@  r   r   decoderU  rc  mapRuntimeErrorr6   rS   r   )r~   r   r   rB   ro   rq   rn  rJ   featuresr#   rq  r   rw  r   r   selectedrG   textslpavg_logprobsfieldsrs   rt   ru   s                           rK   runDecodingTask.run  s   #~~	yy|!%!9!9#!>t':':&;<CCGQO %)$9$9.$Q!	<<	)
 25"~2	2-H #+u2	  ))$,,A)>AA.BWBWX 150W-o (4<<8)/T\\/:##A&#o*>I'IIIIIr:#++G\\B  $||44VJ &
 STTRSQt  A$6#?#?#A$#GHRST 	 &
 '',,VB=@=R"S=RTQQ4;;==R"SAGHA,,Q/557H8;H8S$T8Suq"RU8S$T+.v+D%
+D%!RB#a&1*+D 	 %

 
 s3sF#$%*!>tCVDT?U>VWXX RUR
 
RMh+~ '!'- LL44"3D"9	R
 	
g4 U&
 #TH$T%

s<   5M8=
N5M?<N/N
(NN0N3=N!?N)r   r   rK  rA  r   rH  rF  rB  r  rT   r@  rM  r%   r   N)rb   rc   rd   re   rx   rg   r   r   r	   r  rO   r   rE  r   ri   rJ  rN  r   rj  rr  rz  r.   no_gradrm   r  rj   ra   rk   rK   r?  r?    s    ##$$8i 8/ 8t ? U3Z 83eCj 3:v ,%v %v %5 5 5@ ]]_L
v L
$~"6 L
 L
rk   r?  rB  c                     UR                   S:H  =n(       a  UR                  S5      nU(       a  [        U40 UD6n[        X5      R	                  U5      nU(       a  US   $ U$ )a  
Performs decoding of 30-second audio segment(s), provided as Mel spectrogram(s).

Parameters
----------
model: Whisper
    the Whisper model instance

mel: torch.Tensor, shape = (80, 3000) or (*, 80, 3000)
    A tensor containing the Mel spectrogram(s)

options: DecodingOptions
    A dataclass that contains all necessary options for decoding 30-second segments

Returns
-------
result: Union[DecodingResult, List[DecodingResult]]
    The result(s) of decoding contained in `DecodingResult` dataclass instance(s)
r   r   )r'   r(   r   r?  r  )r   r   rB  kwargsrA   r   s         rK   r  r    s]    4 QvmmA',V,%)--c2F6!9*F*rk   r   )2dataclassesr   r   r   typingr   r   r   r	   r
   r   r   r   numpyr8   r.   torch.nn.functionalnn
functionalr   r   torch.distributionsr   audior   r   r   r   utilsr   r   r   r  dictrL   rO   rm   rx   r   r   r   r   r   r   r  r  r#  r)  r?  r  ra   rk   rK   <module>r     s   1 1 X X X      +  / $ :>:+:+!:+.7:+
64::+ :+z $! ! !H $	& 	& 	&  Wy  WF" "Pn P45" 5"p-L -:g$ g$T" " SK S2[ 2@F+ @FFY
 Y
x   /0!+!+	!+ !+
 >4//0!+ !+rk   