
    sh@                        S SK r S SKrS SKrS SKrS SKrS SKrS SKJr  S SKJ	r	  S SK
Jr  S SKJr  S SKJr  S SKJr  S SKJr  S S	KJr  S S
KJr  S SKJr  S SKJr  S SKJr  S SKJr  S SKJr  S SKJr  S SKJ r   S SKJ!r!  S SK"J#r#  S SK"J$r$  S SK"J%r%  S SK&J'r'  Sr( S SK)J*r*  Sr+ S SK-r.Sr/\ R`                  " S5      r1Sr2\Rf                  " S5      r4Sr51 Skr6S \74S!\74S"\74S#\84S$\94S%\84S&.r:S'S(S)S*S+.r;\%" S,5      r<\%" S-5      r= " S. S/5      r> " S0 S1\?5      r@ " S2 S3\A5      rB " S4 S5\?5      rC " S6 S7\?5      rD " S8 S9\?5      rES: rF\S\S; j5       rGS< rHS= rIS> rJS? rK\S@ 5       rLS]SA jrM   S^SC jrNS_SD\9SE\O4SF jjrP    S`SG\ \9   SH\!\9   SI\7SJ\7SE\O4
SK jjrQ      SaSL jrRSM rSSN rTSO rU\HSbSP j5       rV\HSQ 5       rWSSBS \>R                  S 4SR jrY     ScSS jrZ    SdST jr[SSBS \>R                  S 4SU jr\S\SV jr]SSBS \>R                  S S4SW jr^SXSBS \>R                  S 4SY jr_SZ r`\aS[:X  a  \b" \`" 5       5      eg! \, a    Sr+ GNf = f! \, a    Sr/ GNf = f)e    N)contextmanager)
QUOTE_NONE)ENOENT)wraps)iglob)BytesIO)environ)extsep)linesep)remove)normcase)normpath)realpath)NamedTemporaryFile)sleep)List)Optional)InvalidVersion)parse)Version)Image	tesseract)ndarrayTFpytesseractzutf-8z	^[a-z_]+$RGB>
   BMPGIFPBMPGMPNGPPMJPEGTIFFWEBPJPEG2000page_numorientationrotateorientation_confscriptscript_conf)zPage numberzOrientation in degreesRotatezOrientation confidenceScriptzScript confidencez.tessedit_create_boxfile=1 batch.nochop makeboxztessedit_create_alto=1ztessedit_create_hocr=1ztessedit_create_tsv=1)boxxmlhocrtsvz3.05z4.1.0c                   $    \ rS rSrSrSrSrSrSrg)OutputV   bytesz
data.framedictstring N)	__name__
__module____qualname____firstlineno__BYTES	DATAFRAMEDICTSTRING__static_attributes__r8       k/Users/tiagomarins/Projetos/claudeai/copy_bank/venv/lib/python3.13/site-packages/pytesseract/pytesseract.pyr3   r3   V   s    EIDFrB   r3   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )PandasNotSupported]   c                 $   > [         TU ]  S5        g )NzMissing pandas packagesuper__init__self	__class__s    rC   rJ   PandasNotSupported.__init__^   s    12rB   r8   r9   r:   r;   r<   rJ   rA   __classcell__rM   s   @rC   rE   rE   ]   s    3 3rB   rE   c                       \ rS rSrS rSrg)TesseractErrorb   c                 ,    Xl         X l        X4U l        g N)statusmessageargs)rL   rW   rX   s      rC   rJ   TesseractError.__init__c   s    %	rB   )rY   rX   rW   N)r9   r:   r;   r<   rJ   rA   r8   rB   rC   rS   rS   b   s    &rB   rS   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )TesseractNotFoundErrori   c                 2   > [         TU ]  [         S35        g )NzQ is not installed or it's not in your PATH. See README file for more information.)rI   rJ   tesseract_cmdrK   s    rC   rJ   TesseractNotFoundError.__init__j   s    o 5 6	
rB   r8   rO   rQ   s   @rC   r\   r\   i   s    
 
rB   r\   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )TSVNotSupportedq   c                 $   > [         TU ]  S5        g )Nz4TSV output not supported. Tesseract >= 3.05 requiredrH   rK   s    rC   rJ   TSVNotSupported.__init__r   s    B	
rB   r8   rO   rQ   s   @rC   rb   rb   q       
 
rB   rb   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )ALTONotSupportedx   c                 $   > [         TU ]  S5        g )Nz6ALTO output not supported. Tesseract >= 4.1.0 requiredrH   rK   s    rC   rJ   ALTONotSupported.__init__y   s    D	
rB   r8   rO   rQ   s   @rC   rh   rh   x   rf   rB   rh   c                     U R                  5          U R                  S5        U R                  5         Xl        g ! [         a    [        S5         N.[         a     N9f = f! U R                  5         Xl        f = f)N   )	terminatewait	TypeErrorr   	Exceptionkill
returncode)processcodes     rC   rr   rr      sd    "Q 	!  a  	!s-   : AA  	AA  AA   A8c              #     #     U(       de  U R                  5       S   v    U R                  R                  5         U R                  R                  5         U R                  R                  5         g  U R                  US9u  p#Uv    U R                  R                  5         U R                  R                  5         U R                  R                  5         g ! [
        R                   a    [        U S5        [        S5      ef = f! U R                  R                  5         U R                  R                  5         U R                  R                  5         f = f7f)Nrm   )timeoutzTesseract process timeout)	communicatestdinclosestdoutstderr
subprocessTimeoutExpiredrr   RuntimeError)procseconds_error_strings       rC   timeout_managerr      s    ""$Q'' 	

	<"..w.?OA
 	

 (( 	<rN:;;	< 	

s6   ED AE1C AE,DD AEEc                 B   ^ ^ [        T 5      U U4S j5       mTTl        T$ )Nc                     > UR                  SS5      (       a  TR                  TL a  T" U 0 UD6Tl        TR                  $ )NcachedF)pop_result)rY   kwargsfuncwrappers     rC   r   run_once.<locals>.wrapper   s:    zz(E**goo.H"D3F3GOrB   )r   r   )r   r   s   `@rC   run_oncer      s'    
4[ 
 GONrB   c                     SR                  S U R                  [        5      R                  5        5       5      R	                  5       $ )N c              3   $   #    U  H  ov   M     g 7frV   r8   .0lines     rC   	<genexpr>get_errors.<locals>.<genexpr>   s      KKs   )joindecodeDEFAULT_ENCODING
splitlinesstrip)r   s    rC   
get_errorsr      s<    88 %,,-=>IIK egrB   c                     [        U (       a  U  S3OU 5       H  n [        U5        M     g! [         a   nUR                  [        :w  a  e  SnAM9  SnAff = f)z5Tries to remove temp files by filename wildcard path.*N)r   r   OSErrorerrnor   )	temp_namefilenamees      rC   cleanupr      sO    YYKq/IF	8 G  	ww&  !	s   ,
AAAc                    [         (       a+  [        U [        5      (       a  [        R                  " U 5      n [        U [        R                  5      (       d  [        S5      eU R                  (       d  SOU R                  nU[        ;  a  [        S5      eSU R                  5       ;   aJ  [        R                  " [        U R                  S5      nUR                  U SU R                  S5      5        Un Xl        X4$ )NzUnsupported image objectr    zUnsupported image format/typeA)   r   r   )r   r   )numpy_installed
isinstancer   r   	fromarrayrp   formatSUPPORTED_FORMATSgetbandsnewRGB_MODEsizepaste
getchannel)image	extension
backgrounds      rC   preparer      s    :eW55&eU[[))233"\\u||I))788
ennYYx_E
(8(8(=>LrB   c           	   #     #     [        SSS9 n[        U [        5      (       aJ  UR                  [	        [        [        U 5      5      5      4v    S S S 5        [        UR                  5        g [        U 5      u  pUR                   S[         U 3nU R                  X0R                  S9  UR                  U4v   S S S 5        [        WR                  5        g ! , (       d  f       N$= f! [        WR                  5        f = f7f)Ntess_F)prefixdelete_input)r   )r   r   strnamer   r   r   r   r   r
   saver   )r   fr   input_file_names       rC   r   r      s     
wu=%%%ffhx'@AAA > 	  'u~E!"vhykBOJJ||J<&&/)) > 	 >= 	sG   D
C* ACC* D.AC;C* D
C'#C* *DDc                    [         R                  [         R                  S [        S.n[        [         S5      (       aV  [         R                  " 5       US'   US   =R
                  [         R                  -  sl        [         R                  US   l        U (       a  [         R                  US'   U$ [         R                  US'   U$ )N)rz   r}   startupinfoenvSTARTUPINFOr   r|   )
r~   PIPEr	   hasattrr   dwFlagsSTARTF_USESHOWWINDOWSW_HIDEwShowWindowDEVNULL)include_stdoutr   s     rC   subprocess_argsr      s    
 //	F z=)) * 6 6 8}}%%)H)HH%,6,>,>})%??x M &--xMrB    c                    / n[         R                  S:X  + nU(       a  US:w  a  USS[        U5      4-  nU[        X4-  nUb  USU4-  nU(       a  U[        R
                  " XHS9-  nUR                  5        H  n	U	S;  d  M  UR                  U	5        M     [        R                  SU5         [        R                  " U40 [        5       D6n
[!        X5       nU
R"                  (       a  [%        U
R"                  ['        U5      5      e S S S 5        g ! [         a$  nUR                  [        :w  a  e [        5       eS nAff = f! , (       d  f       g = f)	Nwin32r   nicez-n-l)posix>   r.   osdr1   r/   z%r)sysplatformr   r_   shlexsplitappendLOGGERdebugr~   Popenr   r   r   r   r\   r   rs   rS   r   )input_filenameoutput_filename_baser   langconfigr   rw   cmd_argsnot_windows
_extensionr   r   r   s                rC   run_tesseractr      s*    H||w/KtqyVT3t9--EEHT4L EKK::oo'
99OOJ' ( LLx +>O,=> 
	'<?? *\2JKK  
(	'  +77f(**	+ 
(	's$   /D 1E
ED==E
Er   return_bytesc                     [        U S5       nU(       a  UR                  5       sS S S 5        $ UR                  5       R                  [        5      sS S S 5        $ ! , (       d  f       g = f)Nrb)openreadr   r   )r   r   output_files      rC   _read_outputr     sK    	h	##% 
	 !(()9: 
		s   A"A
A(
extensionsr   r   rw   c           	      r   SR                  S U 5       5      R                  5       nU(       a  SU 3nOSn[        U 5       u  pxUUSR                  U5      UUUUS.n	[        S	0 U	D6  U V
s/ s H$  n
[	        U	S    [
         U
 3U
S;   a  SOU5      PM&     sn
sS S S 5        $ s  sn
f ! , (       d  f       g = f)
Nr   c              3   N   #    U  H  n[         R                  US 5      v   M     g7f)r   N)EXTENTION_TO_CONFIGget)r   r   s     rC   r   .run_and_get_multiple_output.<locals>.<genexpr>.  s$      @J9	2..
s   #%z-c r   r   r   r   r   r   r   rw   r   >   pdfr0   Tr8   )r   r   r   r   r   r
   )r   r   r   r   rw   r   r   r   r   r   r   s              rC   run_and_get_multiple_outputr   &  s     XX @J eg  vh	e3,$-*-
 	 (

 (		 0126(9+F!_4, (
 

 
s   +B(,+B#B(#B((
B6c           	          [        U 5       u  pxUUUUUUUS.n	[        S0 U	D6  [        U	S    [         U 3U5      sS S S 5        $ ! , (       d  f       g = f)Nr   r   r8   )r   r   r   r
   )
r   r   r   r   r   rw   r   r   r   r   s
             rC   run_and_get_outputr   L  se     
e3,$-"
 	,-.vhykB
 
s   0A
Ac                 L   0 nU R                  5       R                  S5       Vs/ s H  oDR                  U5      PM     nn[        U5      S:  a  U$ UR                  S5      n[        U5      n[        US   5      U:  a  US   R	                  S5        US:  a  X'-  n[        U5       H_  u  p[        5       X9'   U HH  n[        U5      U::  a  M  X:w  a   [        [        XH   5      5      n
OXH   n
X9   R	                  U
5        MJ     Ma     U$ s  snf ! [         a    XH   n
 N2f = f)N
   r   rx   r   )
r   r   lenr   r   	enumeratelistintfloat
ValueError)r1   cell_delimiterstr_col_idxresultrowrowsheaderlengthiheadvals              rC   file_to_dictr  g  s   F141B1B41HI1H#IIn%1HDI
4y1}XXa[F[F
48}v 	RQV$vC3x1}!eCFm,C fL$  %  M= J. " !&C!s   DDD#"D#c                     U[         L a  U R                  5       $ U[        L a   [        U 5        gg! [         a     gf = f)NTF)r   isdigitr   r   )r
  _types     rC   is_validr    sD    |{{}~	#J   		s   1 
>>c           	         S U R                  S5       5        Vs0 s H]  n[        U5      S:X  d  M  [        US   [        US      S   5      (       d  M7  [        US      S   [        US      S   " US   5      _M_     sn$ s  snf )Nc              3   B   #    U  H  oR                  S 5      v   M     g7f): N)r   r   s     rC   r   osd_to_dict.<locals>.<genexpr>  s     @::d##s   r   r   rm   r   )r   r   r  OSD_KEYS)r   kvs     rC   osd_to_dictr    s     A		$@@Br7a< 	6$RUHRUOA,>? 	6AHRUOA.r!u55@  s   BB*Bc                 2   [         S/nU (       a  U[        R                  " U 5      -  n [        R                  " U[        R
                  [        R                  S9nUR                  S;  a
  [        5       e/ nUR                  (       av  UR                  R                  [        5      R                  [        5       H@  nUR                  5       n[        R!                  U5      (       d  M/  UR#                  U5        MB     U$ ! [         a    [        5       ef = f)Nz--list-langs)r|   r}   )r   rm   )r_   r   r   r~   runr   STDOUTr   r\   rs   r|   r   r   r   r   LANG_PATTERNmatchr   )r   r   r  	languagesr   r   s         rC   get_languagesr    s    ~.HEKK'''??$$
 &$&&I}}MM(()9:@@ID::<D!!$''  & J
   '$&&'s   2D Dc                      [         R                  " [        S/[         R                  [        [         R
                  S9n U R                  [        5      nUR                  [        R                  SS 5      R                  S5      tp#UR                  S5      tp# [        U5      nU[        :  d   e U$ ! [         a    [        5       ef = f! [         ["        4 a    [%        SU S35      ef = f)	z1
Returns Version object of the Tesseract version
z	--version)r}   r   rz   
   Nr   -zInvalid tesseract version: "")r~   check_outputr_   r  r	   r   r   r\   r   r   lstripr7   	printable	partitionr   TESSERACT_MIN_VERSIONAssertionErrorr   
SystemExit)outputraw_versionstr_versionr   versions        rC   get_tesseract_versionr-    s    
'((K($$$$	
 -- 01K!(()9)9"#)>?II#NOK!++C0OKH$//// N  '$&&' N+ H7}AFGGHs   =B3 C 3C C+c                    ^ U SXX5/m[         R                  U4S j[         R                  U4S j[         R                  U4S j0U   " 5       $ )zK
Returns the result of a Tesseract OCR run on the provided image to string
txtc                     > [        T S/-   6 $ NTr   rY   s   rC   <lambda>!image_to_string.<locals>.<lambda>      044&=BrB   c                     > S[        T 6 0$ )Ntextr2  r3  s   rC   r4  r5    s    f&8$&?@rB   c                     > [        T 6 $ rV   r2  r3  s   rC   r4  r5        148rB   )r3   r=   r?   r@   r   r   r   r   output_typerw   rY   s         @rC   image_to_stringr=    sL     5$6D 	B@8 	  rB   c                 x    US;  a  [        SU 35      eUS:X  a  SUR                  5        3nXXX5S/n[        U6 $ )zM
Returns the result of a Tesseract OCR run on the provided image to pdf/hocr
>   r   r0   zUnsupported extension: r0   z-c tessedit_create_hocr=1 T)r   r   r   )r   r   r   r   r   rw   rY   s          rC   image_to_pdf_or_hocrr?    sS     '29+>??F-flln-=>dD4@Dt$$rB   c                 |    [        SS9[        :  a
  [        5       eSUR                  5        3nU SXX4S/n[	        U6 $ )zM
Returns the result of a Tesseract OCR run on the provided image to ALTO XML
Tr   z-c tessedit_create_alto=1 r/   )r-  TESSERACT_ALTO_VERSIONrh   r   r   )r   r   r   r   rw   rY   s         rC   image_to_alto_xmlrC    sI     D),BB  )&,,.)9:F5$t<Dt$$rB   c                    ^ UR                  5        S3nU SXX5/m[        R                  U4S j[        R                  U4S j[        R                  U4S j0U   " 5       $ )zJ
Returns string containing recognized characters and their box boundaries
z2 -c tessedit_create_boxfile=1 batch.nochop makeboxr.   c                     > [        T S/-   6 $ r1  r2  r3  s   rC   r4   image_to_boxes.<locals>.<lambda>)  r6  rB   c                  0   > [        S[        T 6  3SS5      $ )Nz char left bottom right top page
r   r   r  r   r3  s   rC   r4  rF  *  s!    \/0BD0I/JK
rB   c                     > [        T 6 $ rV   r2  r3  s   rC   r4  rF  /  r:  rB   r   r3   r=   r?   r@   r;  s         @rC   image_to_boxesrK    si     <<>
LM  5$6D 	B 

 	8   rB   c                     [         (       d
  [        5       e[        SS.n UR                  U5        [        R                  " [        [        U 6 5      40 UD6$ ! [        [
        4 a     N8f = f)N	)quotingsep)
pandas_installedrE   r   updaterp   r   pdread_csvr   r   )rY   r   r   s      rC   get_pandas_outputrT  3  sf     ""#D1Ff ;;w1489DVDD z" s   A A*)A*c           	      $  ^^ [        SS9[        :  a
  [        5       eSUR                  5        3nU SXX5/m[        R
                  U4S j[        R                  UU4S j[        R                  U4S j[        R                  U4S j0U   " 5       $ )	zh
Returns string containing box boundaries, confidences,
and other information. Requires Tesseract 3.05+
TrA  z-c tessedit_create_tsv=1 r1   c                     > [        T S/-   6 $ r1  r2  r3  s   rC   r4  image_to_data.<locals>.<lambda>U  r6  rB   c                  $   > [        T S/-   T5      $ r1  )rT  )rY   pandas_configs   rC   r4  rW  V  s    "3D6M#
rB   c                  *   > [        [        T 6 SS5      $ )NrM  rx   rH  r3  s   rC   r4  rW  Z  s    \*<d*CT2NrB   c                     > [        T 6 $ rV   r2  r3  s   rC   r4  rW  [  r:  rB   )	r-  r&  rb   r   r3   r=   r>   r?   r@   )r   r   r   r   r<  rw   rY  rY   s         `@rC   image_to_datar\  @  s     D),AA((89F5$6D 	B 
 	N8   rB   r   c                    ^ SUR                  5        3nU SXX5/m[        R                  U4S j[        R                  U4S j[        R                  U4S j0U   " 5       $ )zF
Returns string containing the orientation and script detection (OSD)
z--psm 0 r   c                     > [        T S/-   6 $ r1  r2  r3  s   rC   r4  image_to_osd.<locals>.<lambda>n  r6  rB   c                  &   > [        [        T 6 5      $ rV   )r  r   r3  s   rC   r4  r_  o  s    [);T)BCrB   c                     > [        T 6 $ rV   r2  r3  s   rC   r4  r_  p  r:  rB   rJ  r;  s         @rC   image_to_osdrb  _  s_     '(F5$6D 	BC8 	  rB   c                     [        [        R                  5      S:X  a  [        R                  S   S pOs[        [        R                  5      S:X  a=  [        R                  S   S:X  a&  [        R                  S   [        R                  S   pO[        S[        R                  S9  g [
        R                  " U 5       n[        [        X!S95        S S S 5        g ! , (       d  f       g = f! [         a.  n[        [        U5       S	3[        R                  S9   S nAgS nAf[         a:  n[        [        U5      R                   S
U 3[        R                  S9   S nAgS nAff = f)Nr   rm      r      z(Usage: pytesseract [-l lang] input_file
)file)r   r   r  )r   r   argvprintr}   r   r   r=  r\   r   r   typer9   )r   r   imgr   s       rC   mainrk  t  s    
388}!d$	SXX!	t 3!chhqk$9

KZZ!S/#12 "!!! Qm#**- a!!""QC(szz:sB   'C* =CC* 
C'#C* 'C* *
E$4$DE$*0EE$__main__rV   )T)r   r   r   )F)Nr   r   F)r   Nr   r   r   F)r   )Nr   r   r   r   )Nr   r   r   )cloggingrer   r7   r~   r   
contextlibr   csvr   r   r   	functoolsr   globr   ior   osr	   r
   r   r   os.pathr   r   r   tempfiler   timer   typingr   r   packaging.versionr   r   r   PILr   r_   numpyr   r   ModuleNotFoundErrorpandasrR  rP  	getLoggerr   r   compiler  r   r   r   r   r   r  r   r&  rB  r3   EnvironmentErrorrE   r   rS   r\   rb   rh   rr   r   r   r   r   r   r   r   r   boolr   r   r   r  r  r  r  r-  r@   r=  r?  rC  rK  rT  r\  rb  rk  r9   r(  r8   rB   rC   <module>r     sJ    	    
 %             '    , # %  O 
		=	) zz+&  $,c2o159o'/ <#$"	     )  3) 3
&\ &
- 

& 

' 

"  $*  : 	
&LR;3 ;d ; #
S	#
 3-#
 	#

 #
 #
P 		

6 F 
 
8 
 
: 
	
, 
	
%2 
	
%, 
	
6
E 
	
B 
	
*( z
TV
 E  O  s$   H< #I <IIII