
    sh                         S SK r S SKrS SKr0 SS_SS_SS_SS	_S
S_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_rS$S\4S  jjrS\4S! jr " S" S#5      rg)%    Nu   œoeu   ŒOE   øo   ØO   æae   ÆAE   ßssu   ẞSSu   đdu   ĐD   ð   Ð   þth   Þu   łlu   ŁLsc                 d   ^ SR                  U4S j[        R                  " SU 5       5       5      $ )z
Replace any other markers, symbols, and punctuations with a space,
and drop any diacritics (category 'Mn' and some manual mappings)
 c              3      >#    U  H]  nUT;   a  UOOU[         ;   a	  [         U   O<[        R                  " U5      S :X  a  SO [        R                  " U5      S   S;   a  SOUv   M_     g7f)Mnr   r   MSP N)ADDITIONAL_DIACRITICSunicodedatacategory).0ckeeps     m/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/whisper/normalizers/basic.py	<genexpr>0remove_symbols_and_diacritics.<locals>.<genexpr>    su       2A Dy  -- &a( #++A.$6  + 4 4Q 7 :e C
 2s   A%A(NFKDjoinr!   	normalize)r   r%   s    `r&   remove_symbols_and_diacriticsr-      s2    
 77  &&vq1      c                 \    SR                  S [        R                  " SU 5       5       5      $ )zS
Replace any other markers, symbols, punctuations with a space, keeping diacritics
r   c              3   d   #    U  H&  n[         R                  " U5      S    S;   a  SOUv   M(     g7f)r   r   r   N)r!   r"   )r#   r$   s     r&   r'   !remove_symbols.<locals>.<genexpr>6   s3      1A ##A&q)U291s   .0NFKCr*   )r   s    r&   remove_symbolsr3   2   s/     77 &&vq1  r.   c                   8    \ rS rSrS	S\S\4S jjrS\4S jrSrg)
BasicTextNormalizer<   remove_diacriticssplit_lettersc                 @    U(       a  [         O[        U l        X l        g )N)r-   r3   cleanr8   )selfr7   r8   s      r&   __init__BasicTextNormalizer.__init__=   s    ->)N 	
 +r.   r   c                    UR                  5       n[        R                  " SSU5      n[        R                  " SSU5      nU R                  U5      R                  5       nU R                  (       a5  SR                  [        R                  " SU[        R                  5      5      n[        R                  " SSU5      nU$ )Nz[<\[][^>\]]*[>\]]r   z\(([^)]+?)\)r   z\Xz\s+)	lowerresubr:   r8   r+   regexfindallU)r;   r   s     r&   __call__BasicTextNormalizer.__call__C   s    GGIFF'Q/FF?B*JJqM!ua9:AFFC
 r.   )r:   r8   N)FF)	__name__
__module____qualname____firstlineno__boolr<   strrE   __static_attributes__ r.   r&   r5   r5   <   s!    +$ +t +# r.   r5   )r   )r@   r!   rB   r    rL   r-   r3   r5   rN   r.   r&   <module>rO      s    	  $$ 	# 	#	
 	$ 	$ 	$ 
4 	# 	# 	# 	# 	$ 	$ 	#  	#! (S .c  r.   