
    shQ                        S SK r S SKJr  S SKJrJr  S SKJrJr  S SK	r
S SKrS SKJs  Jr  SSKJr  SrSrS	rS
r\\-  r\" \\5      r\S-  r\" \\5      r\" \\5      r\4S\S\4S jjr\4SS.S\S\4S jjjr\" SS9S\S\R>                  4S j5       r    SS\\\
RB                  \R>                  4   S\S\S\\\\RD                  4      4S jjr#g)    N)	lru_cache)CalledProcessErrorrun)OptionalUnion   )	exact_divi>  i           filesrc                 z   SSSSSU SSSS	S
SS[        U5      S/n [        USSS9R                  n[        R                  " U[        R                  5      R                  5       R                  [        R                  5      S-  $ ! [         a,  n[	        SUR
                  R                  5        35      UeSnAff = f)a  
Open an audio file and read as mono waveform, resampling as necessary

Parameters
----------
file: str
    The audio file to open

sr: int
    The sample rate to resample the audio if necessary

Returns
-------
A NumPy array containing the audio waveform, in float32 dtype.
ffmpegz-nostdinz-threads0z-iz-fs16lez-ac1z-acodec	pcm_s16lez-ar-T)capture_outputcheckzFailed to load audio: Ng      @)strr   stdoutr   RuntimeErrorstderrdecodenp
frombufferint16flattenastypefloat32)r   r   cmdoutes        a/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/whisper/audio.py
load_audior'      s    * 	Cdgs;s2w
CP#d$7>> ==bhh'//188DwNN  P3AHHOO4E3FGHaOPs   B 
B:'B55B:)axislengthr)   c          	         [         R                  " U 5      (       a  U R                  U   U:  a,  U R                  U[         R                  " XR
                  S9S9n U R                  U   U:  a\  S/U R                  -  nSXR                  U   -
  4X2'   [        R                  " XSSS2    VVs/ s H  oD  H  oUPM     M     snn5      n U $ U R                  U   U:  a  U R                  [        U5      US9n U R                  U   U:  a;  S/U R                  -  nSXR                  U   -
  4X2'   [        R                  " X5      n U $ s  snnf )zG
Pad or trim the audio array to N_SAMPLES, as expected by the encoder.
)device)dimindex)r   r   r   Nr(   )indicesr)   )torch	is_tensorshapeindex_selectaranger,   ndimFpadtakeranger   )arrayr*   r)   
pad_widthssizesr7   s         r&   pad_or_trimr=   A   s:    u;;tv%&&VLL I ' E ;;tv% EJJ.J !6KK,=#=>JEE%dd3C!U3C%u#u#3C!UVE L ;;tv%JJuV}4J@E;;tv% EJJ.J !6KK,=#=>JFF5-EL "Vs   .E)maxsizen_melsreturnc                 \   US;   d
   SU 35       e[         R                  R                  [         R                  R                  [        5      SS5      n[
        R                  " USS9 n[        R                  " USU 3   5      R                  U 5      sSSS5        $ ! , (       d  f       g= f)	aD  
load the mel filterbank matrix for projecting STFT into a Mel spectrogram.
Allows decoupling librosa dependency; saved using:

    np.savez_compressed(
        "mel_filters.npz",
        mel_80=librosa.filters.mel(sr=16000, n_fft=400, n_mels=80),
        mel_128=librosa.filters.mel(sr=16000, n_fft=400, n_mels=128),
    )
>   P      zUnsupported n_mels: assetszmel_filters.npzF)allow_picklemel_N)
ospathjoindirname__file__r   loadr0   
from_numpyto)r,   r?   filters_pathfs       r&   mel_filtersrQ   [   s     Y?"6vh ??77<< 98EVWL	E	2aD/ 2366v> 
3	2	2s   (+B
B+audiopaddingr,   c                    [         R                  " U 5      (       d6  [        U [        5      (       a  [	        U 5      n [         R
                  " U 5      n Ub  U R                  U5      n US:  a  [        R                  " U SU45      n [         R                  " [        5      R                  U R                  5      n[         R                  " U [        [        USS9nUSSS24   R                  5       S-  n[        U R                  U5      nXv-  n[         R                   " USS	9R#                  5       n	[         R$                  " XR'                  5       S
-
  5      n	U	S-   S-  n	U	$ )a=  
Compute the log-Mel spectrogram of

Parameters
----------
audio: Union[str, np.ndarray, torch.Tensor], shape = (*)
    The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz

n_mels: int
    The number of Mel-frequency filters, only 80 and 128 are supported

padding: int
    Number of zero samples to pad to the right

device: Optional[Union[str, torch.device]]
    If given, the audio tensor is moved to this device before STFT

Returns
-------
torch.Tensor, shape = (n_mels, n_frames)
    A Tensor that contains the Mel spectrogram
Nr   T)windowreturn_complex.r(   r   g|=)ming       @g      @)r0   r1   
isinstancer   r'   rM   rN   r6   r7   hann_windowN_FFTr,   stft
HOP_LENGTHabsrQ   clamplog10maximummax)
rR   r?   rS   r,   rU   r[   
magnitudesfiltersmel_speclog_specs
             r&   log_mel_spectrogramrf   n   s   8 ??5!!eS!!u%E  ' {ea\*u%((6F::eUJvdSDc3B3h##%*J%,,/G#H{{8/557H}}X||~';<H3#%HO    )rB   r   N)$rG   	functoolsr   
subprocessr   r   typingr   r   numpyr   r0   torch.nn.functionalnn
functionalr6   utilsr	   SAMPLE_RATErZ   r\   CHUNK_LENGTH	N_SAMPLESN_FRAMESN_SAMPLES_PER_TOKENFRAMES_PER_SECONDTOKENS_PER_SECONDr   intr'   r=   TensorrQ   ndarrayr,   rf    rg   r&   <module>r{      s2   	  . "      
;&	Y
+ 1n k:6 k+>?  %0 %OS %Oc %OP &/ r s  4 4? ? ? ?( 15	/bjj%,,./// / U3,-.	/rg   