
    n9d[<                         d dl Z d dlZd dlZd dlmZ d dlmZmZmZ  G d de          Z	 G d de          Z
 G d d	e          ZdS )
    N)BeautifulSoup)EntitySubstitutionEncodingDetectorUnicodeDammitc                       e Zd ZdZd Zej                            dg d          d             Zd Z	d Z
d Zd	 Zd
 Zd ZdS )TestUnicodeDammitz"Standalone tests of UnicodeDammit.c                 B    d}t          |          }|j        |k    sJ d S )Nu   I'm already Unicode! ☃)r   unicode_markup)selfmarkupdammits      5lib/python3.11/site-packages/bs4/tests/test_dammit.pytest_unicode_inputz$TestUnicodeDammit.test_unicode_input   s.    3v&&$......    z smart_quotes_to,expect_converted))Nu   ‘’“”)xmlz &#x2018;&#x2019;&#x201C;&#x201D;)htmlz&lsquo;&rsquo;&ldquo;&rdquo;)asciiz''""c                 p    d}t          |dg|          j        }|d                    |          k    sJ dS )zbVerify the functionality of the smart_quotes_to argument
        to the UnicodeDammit constructor.s   <foo></foo>windows-1252)known_definite_encodingssmart_quotes_toz<foo>{}</foo>N)r   r
   format)r   r   expect_convertedr   	converteds        r   test_smart_quotes_toz&TestUnicodeDammit.test_smart_quotes_to   sX     0!n-=+
 
 
  	 O223CDDDDDDDDr   c                     d}t          |          }|j                                        dk    sJ |j        dk    sJ d S )Ns   Sacré bleu! ☃utf-8u   Sacré bleu! ☃r   original_encodinglowerr
   )r   utf8r   s      r   test_detect_utf8z"TestUnicodeDammit.test_detect_utf8&   sO    1t$$'--//7::::$(DDDDDDDr   c                     d}t          |dg          }|j                                        dk    sJ |j        dk    sJ d S )N   
iso-8859-8u   םולשr   r   hebrewr   s      r   test_convert_hebrewz%TestUnicodeDammit.test_convert_hebrew,   sS    $v~66'--//<????$(BBBBBBBr   c                     d}t          |          }|j                                        dk    sJ |j                            d          |k    sJ d S )Ns   ケータイ Watchr   )r   r   r    r
   encode)r   utf_8r   s      r   /test_dont_see_smart_quotes_where_there_are_nonezATestUnicodeDammit.test_dont_see_smart_quotes_where_there_are_none2   s\    Iu%%'--//7::::$++G44======r   c                     d                     d          }t          |dg          }|j                                        dk    sJ d S )N   Räksmörgåsr   r%   r*   r   r   r    r   	utf8_datar   s      r    test_ignore_inappropriate_codecsz2TestUnicodeDammit.test_ignore_inappropriate_codecs8   sL    #**733	y<.99'--//7::::::r   c                     d                     d          }dD ]2}t          ||g          }|j                                        dk    sJ 3d S )Nr.   r   )z.utf8z...z
utF---16.!r/   )r   r1   bad_encodingr   s       r   test_ignore_invalid_codecsz,TestUnicodeDammit.test_ignore_invalid_codecs=   sb    #**733	: 	? 	?L"9|n==F+1133w>>>>>	? 	?r   c                     d                     d          }t          |dg          }|j                                        dk    sJ t          |ddg          }|j        d k    sJ d S )Nr.   r   )exclude_encodingsr   r/   r0   s      r   test_exclude_encodingsz(TestUnicodeDammit.test_exclude_encodingsC   s    #**733	 yWIFFF'--//>AAAA '>)BD D D'4//////r   N)__name__
__module____qualname____doc__r   pytestmarkparametrizer   r"   r(   r,   r2   r5   r8    r   r   r   r      s        ,,/ / /
 [*	
 	
 	
 E E EE E EC C C> > >; ; ;
? ? ?0 0 0 0 0r   r   c                   D    e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
S )TestEncodingDetectorc                 X    t          d          }t          |j                  }d|v sJ d S )Ns'   <?xml version="1.0" encoding="UTF-" ?>u   utf-�)r   list	encodings)r   detectedrE   s      r   Ptest_encoding_detector_replaces_junk_in_encoding_name_with_replacement_characterzeTestEncodingDetector.test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_characterS   s;    #9; ;+,,	.);;;;;;r   c                 L    dD ] }t          |d          }d|j        k    sJ !d S )N)s&   <html><meta charset="euc-jp" /></html>s&   <html><meta charset='euc-jp' /></html>s$   <html><meta charset=euc-jp /></html>s#   <html><meta charset=euc-jp/></html>Tis_htmlzeuc-jp)r   r   r   datar   s      r    test_detect_html5_style_meta_tagz5TestEncodingDetector.test_detect_html5_style_meta_tagY   sH    4 	8 	8D
 #4666Fv777777	8 	8r   c                    d}t           j        j        }t          j        t          j                   	 d }|t           j        _        t          |          }d|j        k    sJ d|j        v sJ t          |d          }|j        sJ 	 t          j        t          j
                   |t           j        _        d S # t          j        t          j
                   |t           j        _        w xY w)NsT   ﻿<?xml version="1.0" encoding="UTF-8"?>
<html><b>بتر</b>
<i>ѐ</i></html>c                     d S Nr@   )strs    r   noopzETestEncodingDetector.test_last_ditch_entity_replacement.<locals>.noopy   s    tr   Tu   �zhtml.parser)bs4r   chardet_dammitloggingdisableWARNINGr   contains_replacement_charactersr
   r   NOTSET)r   docchardetrR   r   soups         r   "test_last_ditch_entity_replacementz7TestEncodingDetector.test_last_ditch_entity_replacementc   s     2 *+(((	0  (,CJ%"3''F6AAAAAv44444 m44D77777OGN+++(/CJ%%% OGN+++(/CJ%////s   AB9 91C*c                 \    d}t          |          }d|j        k    sJ d|j        k    sJ d S )N   < a >   < / a > u   <a>áé</a>utf-16le)r   r
   r   rK   s      r   test_byte_order_mark_removedz1TestEncodingDetector.test_byte_order_mark_removed   sB    Mt$$ 55555V5555555r   c                 L   d}t          |          }t          |dg          }d|j        k    sJ t          |dg          }d|j        k    sJ dgd |j        D             k    sJ d}t          |dgd	g
          }d	|j        k    sJ dd	gd |j        D             k    sJ d S )Nr_   zutf-16)r   r   )user_encodingsr`   c                     g | ]
}|d          S r   r@   .0xs     r   
<listcomp>zRTestEncodingDetector.test_known_definite_versus_user_encodings.<locals>.<listcomp>   s    EEE!EEEr   r$   r%   )r   rc   c                     g | ]
}|d          S re   r@   rf   s     r   ri   zRTestEncodingDetector.test_known_definite_versus_user_encodings.<locals>.<listcomp>   s    *P*P*PA1Q4*P*P*Pr   r   r   tried_encodings)r   rL   r   beforeafterr'   s         r   )test_known_definite_versus_user_encodingsz>TestEncodingDetector.test_known_definite_versus_user_encodings   s    
 Nt$$ txjIII633333
 dG9===U44444|EEf.DEEEEEEE %v	/;n> > > v77777&*P*P9O*P*P*PPPPPPPr   c                     d}t          |dgdgdg          }d|j        k    sJ g dd |j        D             k    sJ d S )Nr$   	shift-jisr   r%   )r   override_encodingsrc   )rq   r   r%   c                     g | ]
}|d          S re   r@   rf   s     r   ri   zKTestEncodingDetector.test_deprecated_override_encodings.<locals>.<listcomp>   s    222aQqT222r   rk   r&   s      r   "test_deprecated_override_encodingsz7TestEncodingDetector.test_deprecated_override_encodings   s     %&1] 'y(>	
 
 
 v77777 4332261222
 
 
 
 
 
r   c                 T   d                     d          }d                     d          }||z   |z   }t          j        t                    5  |                    d           d d d            n# 1 swxY w Y   t          j        |          }d|                    d          k    sJ d S )Nu	   ☃☃☃r!   u   “Hi, I like Windows!”windows_1252u+   ☃☃☃“Hi, I like Windows!”☃☃☃)r*   r=   raisesUnicodeDecodeErrordecoder   	detwingle)r   r!   rv   rZ   fixeds        r   test_detwinglez#TestEncodingDetector.test_detwingle   s    !))&11./5vn/E/E 	
 \!D( ]-.. 	 	JJv	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ',,<V@T@TTTTTTTs   A..A25A2c                     dD ]J}|                     d          }|                    d          sJ t          j        |          }||k    sJ Kd S )N)u   œu   ₓu   ðr!      )r*   endswithr   rz   )r   tricky_unicode_charinputoutputs       r   +test_detwingle_ignores_multibyte_charactersz@TestEncodingDetector.test_detwingle_ignores_multibyte_characters   sj    
$ 	# 	#
 (..v66E>>'*****",U33FU?????	# 	#r   c                    d}|                     d          }d}|                     d          }t          j        } ||d          J d ||d          k    sJ d ||d          k    sJ d ||          k    sJ d ||          k    sJ d	d
z  } |||z             J  |||z             J  |||z   dd          dk    sJ  ||d          dk    sJ  |d	|z   d          dk    sJ  |d|z   d          J d S )Nz0<html><head><meta charset="utf-8"></head></html>r   z,<?xml version="1.0" encoding="ISO-8859-1" ?>FrI   r   Tz
iso-8859-1    i  )rJ   search_entire_document)r      a)r*   r   find_declared_encoding)r   html_unicode
html_bytesxml_unicode	xml_bytesmspacers          r   test_find_declared_encodingz0TestEncodingDetector.test_find_declared_encoding   s    J!((11
C&&w//	3qu---555!!L$7777777!!J5555555qq~~----qq||++++ q*$%%---q)#$$,,,
 Afz!4MMM    q4888LHHHHq	!$???<OOOOq	!$???GGGGGr   N)r9   r:   r;   rG   rM   r]   ra   ro   rt   r|   r   r   r@   r   r   rB   rB   Q   s        < < <8 8 8!0 !0 !0F6 6 6Q Q Q>
 
 
$U U U.# # #(H (H (H (H (Hr   rB   c                       e Zd ZdZd Zej                            dddg          d             Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd ZdS )TestEntitySubstitutionz1Standalone tests of the EntitySubstitution class.c                     t           | _        d S rP   )r   subr   s    r   setup_methodz#TestEntitySubstitution.setup_method  s    %r   zoriginal,substituted)u   foo∀☃õbaru   foo&forall;☃&otilde;bar)u   ‘’foo“”z&lsquo;&rsquo;foo&ldquo;&rdquo;c                 F    | j                             |          |k    sJ d S rP   r   substitute_html)r   originalsubstituteds      r   test_substitute_htmlz+TestEntitySubstitution.test_substitute_html  s+     x''11[@@@@@@r   c                 n    dD ]1\  }}d}||z  }||z  }| j                             |          |k    sJ 2d S )N)
)z&models;u   ⊧)z&Nfr;u   𝔑)z&ngeqq;u   ≧̸)z&not;   ¬)z&Not;u   ⫬z||)fjr   )z&gt;>)z&lt;<)z&amp;&z3 %s 4r   )r   entityutemplaterawwith_entitiess         r   test_html5_entityz(TestEntitySubstitution.test_html5_entity%  sc    
 	B 	BIFA0  HQ,C$v-M8++C00MAAAAA7	B 	Br   c                     d}d}| j                             |          |k    sJ d}d}| j                             |          |k    sJ d S )Nu   fjords ⊔ penguinszfjords &sqcup; penguinsu   fjords ⊔︀ penguinszfjords &sqcups; penguinsr   )r   rL   r   s      r   )test_html5_entity_with_variation_selectorz@TestEntitySubstitution.test_html5_entity_with_variation_selectorC  s`     (*x''--7777-+x''--777777r   c                 L    d}| j                             |d          |k    sJ d S )NWelcome to "my bar"Fr   substitute_xmlr   ss     r   Itest_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_falsez`TestEntitySubstitution.test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_falseO  s0    !x&&q%00A555555r   c                     | j                             dd          dk    sJ | j                             dd          dk    sJ d S )NWelcomeTz	"Welcome"z	Bob's Barz"Bob's Bar"r   r   s    r   6test_xml_attribute_quoting_normally_uses_double_quoteszMTestEntitySubstitution.test_xml_attribute_quoting_normally_uses_double_quotesS  sN    x&&y$77;FFFFx&&{D99^KKKKKKr   c                 L    d}| j                             |d          dk    sJ d S )Nr   Tz'Welcome to "my bar"'r   r   s     r   Otest_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quoteszfTestEntitySubstitution.test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotesW  s1    !x&&q$//3LLLLLLLr   c                 L    d}| j                             |d          dk    sJ d S )NWelcome to "Bob's Bar"Tz""Welcome to &quot;Bob's Bar&quot;"r   r   s     r   btest_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quoteszyTestEntitySubstitution.test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes[  s1    %x&&q$//3XXXXXXXr   c                 J    d}| j                             |          |k    sJ d S )Nr   r   )r   quoteds     r   <test_xml_quotes_arent_escaped_when_value_is_not_being_quotedzSTestEntitySubstitution.test_xml_quotes_arent_escaped_when_value_is_not_being_quoted_  s.    *x&&v..&888888r   c                 F    | j                             d          dk    sJ d S )Nzfoo<bar>zfoo&lt;bar&gt;r   r   s    r   'test_xml_quoting_handles_angle_bracketsz>TestEntitySubstitution.test_xml_quoting_handles_angle_bracketsc  s*    x&&z226FFFFFFFr   c                 F    | j                             d          dk    sJ d S )NzAT&TzAT&amp;Tr   r   s    r   #test_xml_quoting_handles_ampersandsz:TestEntitySubstitution.test_xml_quoting_handles_ampersandsf  s)    x&&v..*<<<<<<r   c                 F    | j                             d          dk    sJ d S )N&Aacute;T&Tz&amp;Aacute;T&amp;Tr   r   s    r   Etest_xml_quoting_including_ampersands_when_they_are_part_of_an_entityz\TestEntitySubstitution.test_xml_quoting_including_ampersands_when_they_are_part_of_an_entityi  s*    x&&}559NNNNNNNr   c                 F    | j                             d          dk    sJ d S )Nr   z&Aacute;T&amp;T)r   "substitute_xml_containing_entitiesr   s    r   Dtest_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entityz[TestEntitySubstitution.test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entityl  s*    x::=IIM^^^^^^^r   c                 J    d}| j                             |          |k    sJ dS )z:There's no need to do this except inside attribute values.zBob's "bar"Nr   )r   texts     r    test_quotes_not_html_substitutedz7TestEntitySubstitution.test_quotes_not_html_substitutedo  s.    x''--555555r   N)r9   r:   r;   r<   r   r=   r>   r?   r   r   r   r   r   r   r   r   r   r   r   r   r   r@   r   r   r   r     s8       ;;& & & [2
 C		
 A A AB B B<
8 
8 
86 6 6L L LM M MY Y Y9 9 9G G G= = =O O O_ _ _6 6 6 6 6r   r   )r=   rU   rS   r   
bs4.dammitr   r   r   objectr   rB   r   r@   r   r   <module>r      s     



               C0 C0 C0 C0 C0 C0 C0 C0J{H {H {H {H {H6 {H {H {H|c6 c6 c6 c6 c6V c6 c6 c6 c6 c6r   