
    :'a!                        	 d dl Zn# e$ r d dlZY nw xY wd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZ  ee	          d
edefd            Z ee	          d
edefd            Z ee	          d
edee         fd            Z ee	          d
edefd            Z d
edefdZ! ee	          d
edefd            Z" ee	          d
edefd            Z# ee	          d
edefd            Z$ ee	          d
edefd            Z%d
edefdZ& ee	          d
edefd            Z' ee	          d
edefd            Z( ee	          d
edefd            Z) ee	          d
edefd            Z* ee	          d
edefd            Z+ e e,e          	          dedefd            Z-d1de.de/dee         fd Z0 ed!	          d"edefd#            Z1de.de	ee         e.f         fd$Z2d%edefd&Z3d2d(ed)edefd*Z4d+edee         fd,Z5d-ed.ede6fd/Z7d-ed.edefd0Z8dS )3    N)IncrementalDecoder)findall)OptionalTupleUnionListSet)MultibyteIncrementalDecoder)aliases)	lru_cache)UNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDRE_POSSIBLE_ENCODING_INDICATIONENCODING_MARKSUTF8_MAXIMAL_ALLOCATIONIANA_SUPPORTED_SIMILAR)maxsize	characterreturnc                 x    	 t          j        |           }n# t          $ r Y dS w xY wd|v pd|v pd|v pd|v pd|v S )NFz
WITH GRAVEz
WITH ACUTEzWITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXunicodedataname
ValueErrorr   descriptions     \/mounts/lovelace/software/anaconda3/lib/python3.11/site-packages/charset_normalizer/utils.pyis_accentuatedr      s    !&y11   uu;&  n,+*E  n[fIf  njz  J  kJ  n  N_  cn  Nn  n    
%%c                     t          j        |           }|s| S |                    d          }t          t	          |d         d                    S )N r      )r   decompositionsplitchrint)r   
decomposedcodess      r   remove_accentr)      s\    *955J S!!E!H	
 	
      c                 f    t          |           }t          j                    D ]\  }}||v r|c S dS )zK
    Retrieve the Unicode range official name from a single character.
    N)ordr   items)r   character_ord
range_name	ord_ranges       r   unicode_ranger1   ,   sO    
 	NNM!8!>!@!@  
II%% & 4r*   c                 X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFLATINr   r   s     r   is_latinr4   :   sF    !&y11   uuk!!r   c                 V    	 |                      d           n# t          $ r Y dS w xY wdS )NasciiFT)encodeUnicodeEncodeErrorr   s    r   is_asciir:   C   sE    !!!!   uu4s    
&&c                 d    t          j        |           }d|v rdS t          |           }|dS d|v S )NPTFPunctuationr   categoryr1   r   character_categorycharacter_ranges      r   is_punctuationrC   J   sG    $-i88
   t#I..OuO++r*   c                 l    t          j        |           }d|v sd|v rdS t          |           }|dS d|v S )NSNTFFormsr>   r@   s      r   	is_symbolrH   Y   sR    $-i88
   C+=$=$=t#I..Ouo%%r*   c                 f    |                                  s| dv rdS t          j        |           }d|v S )N)u   ｜+,;<>TZ)isspacer   r?   r   rA   s     r   is_separatorrR   h   sC     i+KKKt$-i88$$$r*   c                 V    |                                  |                                 k    S N)islowerisupperr9   s    r   is_case_variablerW   r   s%    )"3"3"5"555r*   c                 6    t          j        |           }d|k    S )NCo)r   r?   rQ   s     r   is_private_use_onlyrZ   w   s    $-i88%%%r*   c                 X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFCJKr   r   character_names     r   is_cjkr_   }   sH    $))44   uu N""r   c                 X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFHIRAGANAr   r]   s     r   is_hiraganarb      H    $))44   uu ''r   c                 X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFKATAKANAr   r]   s     r   is_katakanarf      rc   r   c                 X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFHANGULr   r]   s     r   	is_hangulri      sH    $))44   uu ~%%r   c                 X    	 t          j        |           }n# t          $ r Y dS w xY wd|v S )NFTHAIr   r]   s     r   is_thairl      sH    $))44   uu ^##r   r/   c                 (    t           D ]	}|| v r dS 
dS )NTF)r   )r/   keywords     r   is_unicode_range_secondaryro      s/    2  j  44 ! 5r*      sequencesearch_zonec                    t          | t                    st          t          |           }t	          t
          | d||k    r|n|                             dd                    }t          |          dk    rdS |D ][}|                                                    dd          }t          j
                    D ]\  }}||k    r|c c S ||k    r|c c S \dS )zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nr6   ignoreerrorsr   -_)
isinstancebytes	TypeErrorlenr   r   decodelowerreplacer   r-   )rq   rr   seq_lenresultsspecified_encodingencoding_aliasencoding_ianas          r   any_specified_encodingr      s    h&& (mmG'DW33''DELLW]eLff G
 7||qt% % %/5577??SII-4]__ 	% 	%)NM!333$$$$$$ 222$$$$$$ 3	% 4r*      r   c                     | dv p>t          t          j        d                    |                     j        t
                    S )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	   utf_7utf_8utf_16utf_32	utf_16_be	utf_16_le	utf_32_be	utf_32_le	utf_8_sigencodings.{})
issubclass	importlibimport_moduleformatr   r
   )r   s    r   is_multi_byte_encodingr      sL    
 zz   I 5 5d ; ;<<O#  r*   c                     t           D ]I}t           |         }t          |t                    r|g}|D ]}|                     |          r||fc c S  JdS )z9
    Identify and extract SIG/BOM in given sequence.
    )Nr*   )r   ry   rz   
startswith)rq   iana_encodingmarksmarks       r   identify_sig_or_bomr      s    
 ( + +}-eU## 	GE 	+ 	+D""4(( +$d******+	+ 9r*   r   c                 
    | dvS )N>   r   r    )r   s    r   should_strip_sig_or_bomr      s     444r*   Tcp_namestrictc                     |                                                      dd          } t          j                    D ]\  }}| |k    s| |k    r|c S |r"t	          d                    |                     | S )Nrw   rx   z Unable to retrieve IANA for '{}')r~   r   r   r-   r   r   )r   r   r   r   s       r   	iana_namer      s    mmoo%%c3//G)0 ! !%n$$=(@(@     )A  M;BB7KKLLLNr*   decoded_sequencec                     t                      }| D ])}t          |          }||                    |           *t          |          S rT   )setr1   addlist)r   rangesr   rB   s       r   
range_scanr     s\    UUF% 
 
	'	22"

	
 	
 	
 	
 <<r*   iana_name_aiana_name_bc                    t          |           st          |          rdS t          j        d                    |                     j        }t          j        d                    |                    j        } |d          } |d          }d}t          dd          D ]C}t          |g          }|                    |          |                    |          k    r|dz  }D|dz  S )	Ng        r   rt   ru   r            )r   r   r   r   r   rangerz   r}   )	r   r   	decoder_a	decoder_bid_aid_bcharacter_match_countito_be_decodeds	            r   cp_similarityr     s    k** .D[.Q.Q r'(=(=k(J(JKK^I'(=(=k(J(JKK^I9H%%%D9H%%%D1c]] ' 'qc

;;}%%])C)CCC!Q&! 3&&r*   c                 2    | t           v o|t           |          v S )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   s     r   is_cp_similarr   (  s!    
 00g[DZ[fDg5ggr*   )rp   )T)9unicodedata2r   ImportErrorcodecsr   rer   typingr   r   r   r   r	   r   _multibytecodecr
   encodings.aliasesr   	functoolsr   charset_normalizer.constantr   r   r   r   r   r   strboolr   r)   r1   r4   r:   rC   rH   rR   rW   rZ   r_   rb   rf   ri   rl   r|   ro   rz   r&   r   r   r   r   r   r   floatr   r   r   r*   r   <module>r      s`  &&&&&    & % % % % %       4 4 4 4 4 4 4 4 4 4 4 4 4 4     7 7 7 7 7 7 % % % % % %      e e e e e e e e e e e e e e e e *+++nc nd n n n ,+n *+++S S    ,+ *+++
S 
Xc] 
 
 
 ,+
 *+++" " " " " ,+"      *+++,c ,d , , , ,+, *+++& & & & & ,+& *+++%C %D % % % ,+% *+++6 6 6 6 6 ,+6&3 &4 & & & & *+++#c #d # # # ,+# *+++(3 (4 ( ( ( ,+( *+++(3 (4 ( ( ( ,+( *+++& & & & & ,+& *+++$s $t $ $ $ ,+$ 33.//0003 4    10 U  QT    : 3     % E(3-2F,G    $53 54 5 5 5 5
 
s 
D 
C 
 
 
 
 c     's ' ' ' ' ' '*hs h h h h h h h hs    	