
    :'a0                        d dl Z d dlmZ d dlmZ d dlmZ d dlmZm	Z	m
Z
mZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZmZ  G d d          Z G d d          Ze
eef         Ze	e         Z G d d          Z eZ!dS )    N)aliases)sha256)dumps)OptionalListTupleSet)Counter)subcompile)TOO_BIG_SEQUENCE)
mess_ratio)	iana_nameis_multi_byte_encodingunicode_rangec                      e Zd Z	 d)dededededddee         fd	Zd
efdZ	d
efdZ
ed
efd            Zed
efd            Zed
efd            Zd
efdZd
efdZd*dZed
efd            Zed
ee         fd            Zed
efd            Zed
efd            Zed
ee         fd            Zed
efd            Zed
efd            Zed
efd            Zed
efd            Zed
efd            Zed
efd            Zed
ed          fd            Zed
efd             Z ed
ee         fd!            Z!ed
ee         fd"            Z"d+d#Z#d+d$Z$d,d&ed
efd'Z%ed
efd(            Z&dS )-CharsetMatchNpayloadguessed_encodingmean_mess_ratiohas_sig_or_bom	languagesCoherenceMatchesdecoded_payloadc                     || _         || _        || _        || _        || _        d | _        g | _        d| _        d | _        d | _	        || _
        d S )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string)selfr   r   r   r   r   r   s          ]/mounts/lovelace/software/anaconda3/lib/python3.11/site-packages/charset_normalizer/models.py__init__zCharsetMatch.__init__   s[      ) /#-#%'"# $&    returnc                     t          |t                    sGt          d                    t	          |j                  t	          | j                                      | j        |j        k    o| j        |j        k    S )Nz&__eq__ cannot be invoked on {} and {}.)
isinstancer   	TypeErrorformatstr	__class__encodingfingerprintr(   others     r)   __eq__zCharsetMatch.__eq__(   sl    %.. 	xDKKCPUP_L`L`befjftbubuvvwww}.X43CuGX3XXr+   c                     t          |t                    st          t          | j        |j        z
            }|dk     r| j        |j        k    S | j        |j        k     S )zQ
        Implemented to make sorted available upon CharsetMatches items.
        g{Gz?)r.   r   
ValueErrorabschaos	coherence)r(   r6   chaos_differences      r)   __lt__zCharsetMatch.__lt__-   s^     %.. 	tzEK788 d"">EO33zEK''r+   c                 p    t          j        dt                     t          t	          |           d          S )z
        Check once again chaos in decoded text, except this time, with full content.
        Use with caution, this can be very slow.
        Notice: Will be removed in 3.0
        z=chaos_secondary_pass is deprecated and will be removed in 3.0g      ?)warningswarnDeprecationWarningr   r1   r(   s    r)   chaos_secondary_passz!CharsetMatch.chaos_secondary_pass<   s6     	UWijjjII
 
 	
r+   c                 :    t          j        dt                     dS )zy
        Coherence ratio on the first non-latin language detected if ANY.
        Notice: Will be removed in 3.0
        z<coherence_non_latin is deprecated and will be removed in 3.0r   )r@   rA   rB   rC   s    r)   coherence_non_latinz CharsetMatch.coherence_non_latinI   s     	TVhiiirr+   c                     t          j        dt                     t          d          }t	          |dt          |                                                     }t          |                                          S )z_
        Word counter instance on decoded text.
        Notice: Will be removed in 3.0
        z2w_counter is deprecated and will be removed in 3.0z[0-9\W\n\r\t]+ )	r@   rA   rB   
re_compiler   r1   lowerr
   split)r(   not_printable_patternstring_printable_onlys      r)   	w_counterzCharsetMatch.w_counterR   se     	JL^___ *+< = = #$93D		@Q@Q R R,2244555r+   c                 ^    | j          t          | j        | j        d          | _         | j         S )Nstrict)r'   r1   r   r   rC   s    r)   __str__zCharsetMatch.__str__^   s)    <t}dnhGGDL|r+   c                 B    d                     | j        | j                  S )Nz<CharsetMatch '{}' bytes({})>)r0   r3   r4   rC   s    r)   __repr__zCharsetMatch.__repr__d   s    .55dmTEUVVVr+   r6   c                     t          |t                    r|| k    r't          d                    |j                            d |_        | j                            |           d S )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r.   r   r9   r0   r2   r'   r#   appendr5   s     r)   add_submatchzCharsetMatch.add_submatchg   sa    %.. 	t%4--ZaabgbqrrsssE"""""r+   c                     | j         S N)r   rC   s    r)   r3   zCharsetMatch.encodingn   s
    ~r+   c                     g }t          j                    D ]F\  }}| j        |k    r|                    |           &| j        |k    r|                    |           G|S )z
        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
        )r   itemsr3   rU   )r(   also_known_asups       r)   encoding_aliaseszCharsetMatch.encoding_aliasesr   sn    
 MOO 	( 	(DAq}!!$$Q''''!##$$Q'''r+   c                     | j         S rX   r!   rC   s    r)   bomzCharsetMatch.bom       ##r+   c                     | j         S rX   r`   rC   s    r)   byte_order_markzCharsetMatch.byte_order_mark   rb   r+   c                 $    d | j         D             S )z
        Return the complete list of possible languages found in decoded sequence.
        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
        c                     g | ]
}|d          S )r    ).0es     r)   
<listcomp>z*CharsetMatch.languages.<locals>.<listcomp>   s    ...!...r+   r    rC   s    r)   r   zCharsetMatch.languages   s     /.do....r+   c                    | j         shd| j        v rdS ddlm}m} t          | j                  r || j                  n || j                  }t          |          dk    sd|v rdS |d         S | j         d         d         S )z
        Most probable language found in decoded sequence. If none were detected or inferred, the property will return
        "Unknown".
        asciiEnglishr   )mb_encoding_languagesencoding_languageszLatin BasedUnknown)r    could_be_from_charsetcharset_normalizer.cdro   rp   r   r3   len)r(   ro   rp   r   s       r)   languagezCharsetMatch.language   s      	  $444 y XWWWWWWW@VW[Wd@e@e  M--dm<<<k}k}  C  L  lM  lMI9~~""my&@&@ yQ<q!!$$r+   c                     | j         S rX   )r   rC   s    r)   r;   zCharsetMatch.chaos   s    $$r+   c                 :    | j         sdS | j         d         d         S )Nr   r      rk   rC   s    r)   r<   zCharsetMatch.coherence   s#     	2q!!$$r+   c                 4    t          | j        dz  d          S Nd      )ndigits)roundr;   rC   s    r)   percent_chaoszCharsetMatch.percent_chaos   s    TZ#%q1111r+   c                 4    t          | j        dz  d          S rz   )r~   r<   rC   s    r)   percent_coherencezCharsetMatch.percent_coherence   s    T^c)15555r+   c                     | j         S )z+
        Original untouched bytes.
        )r   rC   s    r)   rawzCharsetMatch.raw   s    
 }r+   c                     | j         S rX   )r#   rC   s    r)   submatchzCharsetMatch.submatch   s
    |r+   c                 2    t          | j                  dk    S )Nr   )rt   r#   rC   s    r)   has_submatchzCharsetMatch.has_submatch   s    4<  1$$r+   c                    | j         | j         S t                      }t          |           D ]5}t          |          }|r"|                    t          |                     6t          t          |                    | _         | j         S rX   )r"   setr1   r   addsortedlist)r(   detected_ranges	characterdetected_ranges       r)   	alphabetszCharsetMatch.alphabets   s    +''%%T 	 	I*955N ##!),,    &d?&;&;<<##r+   c                 6    | j         gd | j        D             z   S )z
        The complete list of encoding that output the exact SAME str result and therefore could be the originating
        encoding.
        This list does include the encoding available in property 'encoding'.
        c                     g | ]	}|j         
S rg   )r3   )rh   ms     r)   rj   z6CharsetMatch.could_be_from_charset.<locals>.<listcomp>   s    "D"D"D!1:"D"D"Dr+   )r   r#   rC   s    r)   rr   z"CharsetMatch.could_be_from_charset   s%     "D"Dt|"D"D"DDDr+   c                     | S z>
        Kept for BC reasons. Will be removed in 3.0.
        rg   rC   s    r)   firstzCharsetMatch.first   	     r+   c                     | S r   rg   rC   s    r)   bestzCharsetMatch.best   r   r+   utf_8r3   c                     | j         | j         |k    r/|| _         t          |                               |d          | _        | j        S )z
        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
        Any errors will be simply ignored by the encoder NOT replaced.
        Nreplace)r&   r1   encoder%   )r(   r3   s     r)   outputzCharsetMatch.output   sI    
  (D,AX,M,M$,D!#&t99#3#3Hi#H#HD ##r+   c                 h    t          |                                                                           S )zw
        Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
        )r   r   	hexdigestrC   s    r)   r4   zCharsetMatch.fingerprint   s&    
 dkkmm$$..000r+   rX   )r6   r   r,   N)r,   r   )r   )'__name__
__module____qualname__bytesr1   floatboolr   r*   r7   r>   propertyrD   rF   r
   rN   rQ   rS   rV   r3   r   r^   ra   rd   r   ru   r;   r<   r   r   r   r   r   r   rr   r   r   r   r4   rg   r+   r)   r   r      sH        .2' '' "' #	'
 !' *' &c]' ' ' '2Yt Y Y Y Y
(t ( ( ( ( 

e 

 

 

 X

 U    X 	67 	6 	6 	6 X	6    W# W W W W# # # # #    X 
$s) 
 
 
 X
 $T $ $ $ X$ $ $ $ $ X$ /49 / / / X/ %# % % % X%. %u % % % X% %5 % % % X%
 2u 2 2 2 X2 65 6 6 6 X6 U    X $~.    X %d % % % X% $49 $ $ $ X$ EtCy E E E XE      	$ 	$s 	$ 	$ 	$ 	$ 	$ 1S 1 1 1 X1 1 1r+   r   c                       e Zd ZdZddee         fdZd ZdefdZde	fdZ
d	eddfd
Zded         fdZded         fdZdS )CharsetMatchesz
    Container with every CharsetMatch items ordered by default from most probable to the less one.
    Act like a list(iterable) but does not implements all related methods.
    Nresultsc                 6    |rt          |          ng | _        d S rX   )r   _results)r(   r   s     r)   r*   zCharsetMatches.__init__  s    +2:wr+   c              #   &   K   | j         D ]}|V  d S rX   r   )r(   results     r)   __iter__zCharsetMatches.__iter__  s,      m 	 	FLLLL	 	r+   r,   c                     t          |t                    r| j        |         S t          |t                    r't	          |d          }| j        D ]}||j        v r|c S t          )z
        Retrieve a single item either by its position or encoding name (alias may be used here).
        Raise KeyError upon invalid index or encoding not present in results.
        F)r.   intr   r1   r   rr   KeyError)r(   itemr   s      r)   __getitem__zCharsetMatches.__getitem__
  sv    
 dC   	'=&&dC   	"T5))D- " "6777!MMM 8r+   c                 *    t          | j                  S rX   )rt   r   rC   s    r)   __len__zCharsetMatches.__len__  s    4=!!!r+   r   c                    t          |t                    s4t          d                    t	          |j                                      t          |j                  t          k    rB| j	        D ]:}|j
        |j
        k    r(|j        |j        k    r|                    |            dS ;| j	                            |           t          | j	                  | _	        dS )z~
        Insert a single match. Will be inserted accordingly to preserve sort.
        Can be inserted as a submatch.
        z-Cannot append instance '{}' to CharsetMatchesN)r.   r   r9   r0   r1   r2   rt   r   r   r   r4   r;   rV   rU   r   )r(   r   matchs      r)   rU   zCharsetMatches.append  s    
 $-- 	jLSSTWX\XfTgTghhiiitx==,,,  $(888U[DJ=V=V&&t,,,FFT"""t}--r+   r   c                 .    | j         sdS | j         d         S )zQ
        Simply return the first match. Strict equivalent to matches[0].
        Nr   r   rC   s    r)   r   zCharsetMatches.best+  s      } 	4}Qr+   c                 *    |                                  S )zP
        Redundant method, call the method best(). Kept for BC reasons.
        )r   rC   s    r)   r   zCharsetMatches.first3  s     yy{{r+   rX   )r   r   r   __doc__r   r   r*   r   r   r   r   rU   r   r   r   rg   r+   r)   r   r      s         ; ;\ 2 ; ; ; ;  <    " " " " ".< .D . . . .  h~.        x/      r+   r   c                       e Zd Zdededee         dee         dedee         deded	ed
ee         defdZe	d             Z
defdZdS )CliDetectionResultpathr3   r^   alternative_encodingsru   r   r   r;   r<   unicode_pathis_preferredc                     || _         |
| _        || _        || _        || _        || _        || _        || _        || _        |	| _	        || _
        d S rX   )r   r   r3   r^   r   ru   r   r   r;   r<   r   )r(   r   r3   r^   r   ru   r   r   r;   r<   r   r   s               r)   r*   zCliDetectionResult.__init__@  sZ    	(  0%:" ",
"(r+   c                     | j         | j        | j        | j        | j        | j        | j        | j        | j        | j	        | j
        dS )Nr   r3   r^   r   ru   r   r   r;   r<   r   r   r   rC   s    r)   __dict__zCliDetectionResult.__dict__M  sO     I $ 5%)%?"1Z - -
 
 	
r+   r,   c                 0    t          | j        dd          S )NT   )ensure_asciiindent)r   r   rC   s    r)   to_jsonzCliDetectionResult.to_json]  s$    M
 
 
 	
r+   N)r   r   r   r1   r   r   r   r   r*   r   r   r   rg   r+   r)   r   r   >  s        )S )C )49 )eijmen )z} )  KO  PS  KT )  fj )  sx )  EJ )  Zb  cf  Zg )  w{ ) ) ) ) 
 
 X

 
 
 
 
 
 
r+   r   )"r@   encodings.aliasesr   hashlibr   jsonr   typingr   r   r   r	   collectionsr
   rer   r   rI   charset_normalizer.constantr   charset_normalizer.mdr   charset_normalizer.utilsr   r   r   r   r   r1   r   CoherenceMatchr   r   CharsetNormalizerMatchrg   r+   r)   <module>r      s    % % % % % %             - - - - - - - - - - - -       ) ) ) ) ) ) ) ) 8 8 8 8 8 8 , , , , , , U U U U U U U U U Um1 m1 m1 m1 m1 m1 m1 m1`9 9 9 9 9 9 9 9x sEz"' $
 $
 $
 $
 $
 $
 $
 $
N &   r+   