
    wOg0                         d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZmZ ddlmZmZ ddlmZmZmZ  G d	 d
          Z G d d          Zeeef         Zee         Z G d d          ZdS )    )aliases)sha256)dumps)sub)AnyDictIteratorListOptionalTupleUnion   )RE_POSSIBLE_ENCODING_INDICATIONTOO_BIG_SEQUENCE)	iana_nameis_multi_byte_encodingunicode_rangec                      e Zd Z	 	 d&dededededddee         d	ee         fd
Zde	defdZ
de	defdZedefd            ZdefdZdefdZd'dZedefd            Zedee         fd            Zedefd            Zedefd            Zedee         fd            Zedefd            Zedefd            Zedefd            Zedefd            Zedefd            Zedefd            Zeded          fd            Zedefd            Zedee         fd             Zedee         fd!            Z d(d#edefd$Z!edefd%            Z"dS ))CharsetMatchNpayloadguessed_encodingmean_mess_ratiohas_sig_or_bom	languagesCoherenceMatchesdecoded_payloadpreemptive_declarationc                     || _         || _        || _        || _        || _        d | _        g | _        d| _        d | _        d | _	        || _
        || _        d S )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string_preemptive_declaration)selfr   r   r   r   r   r   r   s           S/var/www/AppWebDeiv/myenv/lib/python3.11/site-packages/charset_normalizer/models.py__init__zCharsetMatch.__init__   se      '.'6,5%348+-,/"04/3&56L$$$    otherreturnc                     t          |t                    s/t          |t                    rt          |          | j        k    S dS | j        |j        k    o| j        |j        k    S )NF)
isinstancer   strr   encodingfingerprintr,   r0   s     r-   __eq__zCharsetMatch.__eq__(   s_    %.. 	%%% 9 ''4=885}.X43CuGX3XXr/   c                    t          |t                    st          t          | j        |j        z
            }t          | j        |j        z
            }|dk     r|dk    r| j        |j        k    S |dk     rC|dk    r=t          | j                  t          k    r| j        |j        k     S | j	        |j	        k    S | j        |j        k     S )zQ
        Implemented to make sorted available upon CharsetMatches items.
        g{Gz?g{Gz?)
r3   r   
ValueErrorabschaos	coherencelenr    r   multi_byte_usage)r,   r0   chaos_differencecoherence_differences       r-   __lt__zCharsetMatch.__lt__/   s     %.. 	"%dj5;&>"?"?&)$.5?*J&K&K d""';d'B'B>EO33$$)=)E)E 4=!!%555zEK//(5+AAAzEK''r/   c                 j    dt          t          |                     t          | j                  z  z
  S )Ng      ?)r>   r4   rawr,   s    r-   r?   zCharsetMatch.multi_byte_usageE   s&    c#d))nns48}}455r/   c                 ^    | j          t          | j        | j        d          | _         | j         S )Nstrict)r*   r4   r    r!   rE   s    r-   __str__zCharsetMatch.__str__I   s)    <t}dnhGGDL|r/   c                 B    d                     | j        | j                  S )Nz<CharsetMatch '{}' bytes({})>)formatr5   r6   rE   s    r-   __repr__zCharsetMatch.__repr__O   s    .55dmTEUVVVr/   c                     t          |t                    r|| k    r't          d                    |j                            d |_        | j                            |           d S )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r3   r   r:   rJ   	__class__r*   r&   appendr7   s     r-   add_submatchzCharsetMatch.add_submatchR   sk    %.. 	%4--MTTO    E"""""r/   c                     | j         S N)r!   rE   s    r-   r5   zCharsetMatch.encoding]   s
    ~r/   c                     g }t          j                    D ]F\  }}| j        |k    r|                    |           &| j        |k    r|                    |           G|S )z
        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
        )r   itemsr5   rN   )r,   also_known_asups       r-   encoding_aliaseszCharsetMatch.encoding_aliasesa   sn    
 $&MOO 	( 	(DAq}!!$$Q''''!##$$Q'''r/   c                     | j         S rQ   r$   rE   s    r-   bomzCharsetMatch.bomn       ##r/   c                     | j         S rQ   rY   rE   s    r-   byte_order_markzCharsetMatch.byte_order_markr   r[   r/   c                 $    d | j         D             S )z
        Return the complete list of possible languages found in decoded sequence.
        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
        c                     g | ]
}|d          S )r    ).0es     r-   
<listcomp>z*CharsetMatch.languages.<locals>.<listcomp>|   s    ...!...r/   r#   rE   s    r-   r   zCharsetMatch.languagesv   s     /.do....r/   c                    | j         shd| j        v rdS ddlm}m} t          | j                  r || j                  n || j                  }t          |          dk    sd|v rdS |d         S | j         d         d         S )z
        Most probable language found in decoded sequence. If none were detected or inferred, the property will return
        "Unknown".
        asciiEnglishr   )encoding_languagesmb_encoding_languageszLatin BasedUnknown)r#   could_be_from_charsetcharset_normalizer.cdrh   ri   r   r5   r>   )r,   rh   ri   r   s       r-   languagezCharsetMatch.language~   s      	  $444 y XWWWWWWW *$-887%%dm444''66  9~~""my&@&@ yQ<q!!$$r/   c                     | j         S rQ   )r"   rE   s    r-   r<   zCharsetMatch.chaos   s    $$r/   c                 :    | j         sdS | j         d         d         S )Nr   r   r   rd   rE   s    r-   r=   zCharsetMatch.coherence   s#     	3q!!$$r/   c                 4    t          | j        dz  d          S Nd      )ndigits)roundr<   rE   s    r-   percent_chaoszCharsetMatch.percent_chaos   s    TZ#%q1111r/   c                 4    t          | j        dz  d          S rq   )ru   r=   rE   s    r-   percent_coherencezCharsetMatch.percent_coherence   s    T^c)15555r/   c                     | j         S )z+
        Original untouched bytes.
        )r    rE   s    r-   rD   zCharsetMatch.raw   s    
 }r/   c                     | j         S rQ   )r&   rE   s    r-   submatchzCharsetMatch.submatch   s
    |r/   c                 2    t          | j                  dk    S Nr   )r>   r&   rE   s    r-   has_submatchzCharsetMatch.has_submatch   s    4<  1$$r/   c                     | j         | j         S d t          |           D             }t          t          d |D                                 | _         | j         S )Nc                 ,    g | ]}t          |          S r`   )r   )ra   chars     r-   rc   z*CharsetMatch.alphabets.<locals>.<listcomp>   s-     0
 0
 0
$(M$0
 0
 0
r/   c                     h | ]}||S r`   r`   )ra   rs     r-   	<setcomp>z)CharsetMatch.alphabets.<locals>.<setcomp>   s    +L+L+L!!+LA+L+L+Lr/   )r%   r4   sortedlist)r,   detected_rangess     r-   	alphabetszCharsetMatch.alphabets   sj    +''0
 0
,/II0
 0
 0
  &d+L+L+L+L+L&M&MNN##r/   c                 6    | j         gd | j        D             z   S )z
        The complete list of encoding that output the exact SAME str result and therefore could be the originating
        encoding.
        This list does include the encoding available in property 'encoding'.
        c                     g | ]	}|j         
S r`   )r5   )ra   ms     r-   rc   z6CharsetMatch.could_be_from_charset.<locals>.<listcomp>   s    "D"D"D!1:"D"D"Dr/   )r!   r&   rE   s    r-   rk   z"CharsetMatch.could_be_from_charset   s%     "D"Dt|"D"D"DDDr/   utf_8r5   c                 :     j          j         |k    r| _         t                     } j        J j                                        dvr/t	          t
           fd|dd         d          }||dd         z   }|                    |d           _         j        S )z
        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
        Any errors will be simply ignored by the encoder NOT replaced.
        N)zutf-8utf8r   c                     | j         |                                 d         |                                 d                                      |                                 d         t	          j                            S )Nr   r   )stringspanreplacegroupsr   r)   )r   r,   s    r-   <lambda>z%CharsetMatch.output.<locals>.<lambda>   sW    ahqvvxx{QVVXXa['@AII

1y1F'G'G  r/   i    r   r   )r)   r4   r+   lowerr   r   encoder(   )r,   r5   decoded_stringpatched_headers   `   r-   outputzCharsetMatch.output   s    
  (D,AX,M,M$,D! YYN,80668812 2 "%3    #5D5)" " "0.2G!G#1#8#89#M#MD ##r/   c                 h    t          |                                                                           S )zw
        Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
        )r   r   	hexdigestrE   s    r-   r6   zCharsetMatch.fingerprint   s&    
 dkkmm$$..000r/   )NN)r0   r   r1   N)r   )#__name__
__module____qualname__bytesr4   floatboolr   r.   objectr8   rB   propertyr?   rH   rK   rO   r5   r
   rW   rZ   r]   r   rm   r<   r=   rv   rx   rD   r{   r~   r   rk   r   r6   r`   r/   r-   r   r      s        *.04M MM M 	M
 M &M "#M !)M M M M8YF Yt Y Y Y Y(F (t ( ( ( (, 6% 6 6 6 X6    W# W W W W	# 	# 	# 	# #    X 
$s) 
 
 
 X
 $T $ $ $ X$ $ $ $ $ X$ /49 / / / X/ %# % % % X%6 %u % % % X% %5 % % % X%
 2u 2 2 2 X2 65 6 6 6 X6 U    X $~.    X %d % % % X% 	$49 	$ 	$ 	$ X	$ EtCy E E E XE$ $s $ $ $ $ $8 1S 1 1 1 X1 1 1r/   r   c                       e Zd ZdZddeee                  fdZdee         fdZ	de
eef         defdZdefd	Zdefd
ZdeddfdZded         fdZded         fdZdS )CharsetMatchesz
    Container with every CharsetMatch items ordered by default from most probable to the less one.
    Act like a list(iterable) but does not implements all related methods.
    Nresultsc                 6    |rt          |          ng | _        d S rQ   )r   _results)r,   r   s     r-   r.   zCharsetMatches.__init__   s    ?F,NF7OOOBr/   r1   c              #   $   K   | j         E d {V  d S rQ   r   rE   s    r-   __iter__zCharsetMatches.__iter__   s&      =         r/   itemc                     t          |t                    r| j        |         S t          |t                    r't	          |d          }| j        D ]}||j        v r|c S t          )z
        Retrieve a single item either by its position or encoding name (alias may be used here).
        Raise KeyError upon invalid index or encoding not present in results.
        F)r3   intr   r4   r   rk   KeyError)r,   r   results      r-   __getitem__zCharsetMatches.__getitem__   sv    
 dC   	'=&&dC   	"T5))D- " "6777!MMM 8r/   c                 *    t          | j                  S rQ   r>   r   rE   s    r-   __len__zCharsetMatches.__len__  s    4=!!!r/   c                 2    t          | j                  dk    S r}   r   rE   s    r-   __bool__zCharsetMatches.__bool__  s    4=!!A%%r/   c                    t          |t                    s4t          d                    t	          |j                                      t          |j                  t          k     rB| j	        D ]:}|j
        |j
        k    r(|j        |j        k    r|                    |            dS ;| j	                            |           t          | j	                  | _	        dS )z~
        Insert a single match. Will be inserted accordingly to preserve sort.
        Can be inserted as a submatch.
        z-Cannot append instance '{}' to CharsetMatchesN)r3   r   r:   rJ   r4   rM   r>   rD   r   r   r6   r<   rO   rN   r   )r,   r   matchs      r-   rN   zCharsetMatches.append  s    
 $-- 	?FF''    tx==+++  $(888U[DJ=V=V&&t,,,FFT"""t}--r/   r   c                 .    | j         sdS | j         d         S )zQ
        Simply return the first match. Strict equivalent to matches[0].
        Nr   r   rE   s    r-   bestzCharsetMatches.best(  s      } 	4}Qr/   c                 *    |                                  S )zP
        Redundant method, call the method best(). Kept for BC reasons.
        )r   rE   s    r-   firstzCharsetMatches.first0  s     yy{{r/   rQ   )r   r   r   __doc__r   r
   r   r.   r	   r   r   r   r4   r   r   r   r   rN   r   r   r`   r/   r-   r   r      s0        
O Ol); < O O O O!(<0 ! ! ! !c3h L    " " " " "&$ & & & &.< .D . . . .( h~.        x/      r/   r   c                       e Zd Zdedee         dee         dee         dedee         deded	ed
ee         defdZe	de
eef         fd            ZdefdZdS )CliDetectionResultpathr5   rW   alternative_encodingsrm   r   r   r<   r=   unicode_pathis_preferredc                     || _         |
| _        || _        || _        || _        || _        || _        || _        || _        |	| _	        || _
        d S rQ   )r   r   r5   rW   r   rm   r   r   r<   r=   r   )r,   r   r5   rW   r   rm   r   r   r<   r=   r   r   s               r-   r.   zCliDetectionResult.__init__<  s\     	+7'/+;0E"%$-$2!
 )".r/   r1   c                     | j         | j        | j        | j        | j        | j        | j        | j        | j        | j	        | j
        dS )Nr   r5   rW   r   rm   r   r   r<   r=   r   r   r   rE   s    r-   __dict__zCliDetectionResult.__dict__V  sO     I $ 5%)%?"1Z - -
 
 	
r/   c                 0    t          | j        dd          S )NT   )ensure_asciiindent)r   r   rE   s    r-   to_jsonzCliDetectionResult.to_jsonf  s    T]a@@@@r/   N)r   r   r   r4   r   r
   r   r   r.   r   r   r   r   r   r`   r/   r-   r   r   ;  s        // 3-/ s)	/
  $Cy/ / 9/ / / / sm/ / / / /4 
$sCx. 
 
 
 X
A A A A A A Ar/   r   N)encodings.aliasesr   hashlibr   jsonr   rer   typingr   r   r	   r
   r   r   r   constantr   r   utilsr   r   r   r   r   r4   r   CoherenceMatchr   r   r`   r/   r-   <module>r      sr   % % % % % %                   D D D D D D D D D D D D D D D D D D G G G G G G G G C C C C C C C C C Cf1 f1 f1 f1 f1 f1 f1 f1R@ @ @ @ @ @ @ @F sEz"' ,A ,A ,A ,A ,A ,A ,A ,A ,A ,Ar/   