
    n9dJ                     d   d Z dgZddlZddlZddlmZmZmZmZm	Z	 ddl
mZmZ ddlZddlmZmZ ddl
mZmZmZmZ 	 ddlmZ d	Zn# e$ rZdd
lmZ dZY dZ[ndZ[ww xY w G d de	          Z G d dej                  Z G d de          Z  G d dej!                  Z" G d de"          Z#dS )MITHTML5TreeBuilder    N)DetectsXMLParsedAsHTML
PERMISSIVEHTMLHTML_5HTMLTreeBuilder)NamespacedAttributenonwhitespace_re)
namespacesprefixes)CommentDoctypeNavigableStringTag)_baseF)baseTc                   B    e Zd ZdZdZeeeegZdZ		 d	dZ
d Zd Zd ZdS )
r   a  Use html5lib to build a tree.

    Note that this TreeBuilder does not support some features common
    to HTML TreeBuilders. Some of these features could theoretically
    be implemented, but at the very least it's quite difficult,
    because html5lib moves the parse tree around as it's being built.

    * This TreeBuilder doesn't use different subclasses of NavigableString
      based on the name of the tag in which the string was found.

    * You can't use a SoupStrainer to parse only part of a document.
    html5libTNc              #      K   || _         |rt          j        dd           t          j        |           |d d dfV  d S )NzjYou provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.   
stacklevelF)user_specified_encodingwarningswarnr   warn_if_markup_looks_like_xml)selfmarkupr   document_declared_encodingexclude_encodingss        5lib/python3.11/site-packages/bs4/builder/_html5lib.pyprepare_markupzHTML5TreeBuilder.prepare_markup@   sg       (?$
  	M|    	<VDDDtT5))))))    c                    | j         j        t          j        dd           t	          j        | j                  }|| j        _        t                      }t          |t                    st          r| j        |d<   n
| j        |d<    |j        |fi |}t          |t                    rd |_        n:|j        j        j        d         }t          |t                    s|j        }||_        d | j        _        d S )NzYou provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.   r   )treeoverride_encodingencodingr   )soup
parse_onlyr   r   r   
HTMLParsercreate_treebuilderunderlying_builderparserdict
isinstancestrnew_html5libr   parseoriginal_encoding	tokenizerstreamcharEncodingname)r   r   r/   extra_kwargsdocr5   s         r"   feedzHTML5TreeBuilder.feedU   s   9+M U    $$*ABBB)/&vv&#&& 	H H484P011+/+GZ(fl622\22 fc"" 	6 %)C!! & 0 7 DQ G/55 ; %6$:!$5C!)-&&&r$   c                 R    t          || j        | j                  | _        | j        S )N)store_line_numbers)TreeBuilderForHtml5libr*   r>   r.   )r   namespaceHTMLElementss     r"   r-   z#HTML5TreeBuilder.create_treebuildert   s1    "8!49#6#
 #
 #
 &&r$   c                     d|z  S )zSee `TreeBuilder`.z)<html><head></head><body>%s</body></html> )r   fragments     r"   test_fragment_to_documentz*HTML5TreeBuilder.test_fragment_to_document{   s    :XEEr$   )NN)__name__
__module____qualname____doc__NAMEr   r   r   featuresTRACKS_LINE_NUMBERSr#   r<   r-   rD   rB   r$   r"   r   r   *   s          Dj&$/H  KO* * * **. . .>' ' 'F F F F Fr$   c                   Z     e Zd Z	 	 d fd	Zd Zd Zd Zd Zd Zd	 Z	d
 Z
d Zd Z xZS )r?   NTc                     |r|| _         nddlm}  |	 dd|i|| _         t          t          |                               |           d | _        || _        d S )Nr   BeautifulSoup html.parserr>   )rP   rQ   )r*   bs4rO   superr?   __init__r/   r>   )r   r@   r*   r>   kwargsrO   	__class__s         r"   rT   zTreeBuilderForHtml5lib.__init__   s     		DII)))))) &! 6H DI 	$d++445JKKK "4r$   c                 j    | j                                          t          | j         | j         d           S N)r*   resetElementr   s    r"   documentClassz$TreeBuilderForHtml5lib.documentClass   s*    	ty$)T222r$   c                     |d         }|d         }|d         }t          j        |||          }| j                            |           d S )Nr9   publicIdsystemId)r   for_name_and_idsr*   object_was_parsed)r   tokenr9   r^   r_   doctypes         r"   insertDoctypez$TreeBuilderForHtml5lib.insertDoctype   sO    V}$$*48DD	##G,,,,,r$   c                     i }| j         r:| j        r3| j         j        j                                        \  }}||d<   |dz
  |d<    | j        j        ||fi |}t          || j        |          S )N
sourceline   	sourcepos)r/   r>   r6   r7   positionr*   new_tagrZ   )r   r9   	namespacerU   rf   rh   tags          r"   elementClassz#TreeBuilderForHtml5lib.elementClass   s    ; 	.42 	. %)K$9$@$I$I$K$K!J	#-F< "+A+F;dii::6::sDIy111r$   c                 F    t          t          |          | j                  S rX   )TextNoder   r*   )r   datas     r"   commentClassz#TreeBuilderForHtml5lib.commentClass   s    ty111r$   c                 ~    ddl m}  |dd          | _        d| j        _        t	          | j        | j        d           S )Nr   rN   rP   rQ   z[document_fragment])rR   rO   r*   r9   rZ   )r   rO   s     r"   fragmentClassz$TreeBuilderForHtml5lib.fragmentClass   sH    %%%%%% "M"m44	.	ty$)T222r$   c                 D    | j                             |j                   d S rX   )r*   appendelementr   nodes     r"   appendChildz"TreeBuilderForHtml5lib.appendChild   s     	&&&&&r$   c                     | j         S rX   )r*   r[   s    r"   getDocumentz"TreeBuilderForHtml5lib.getDocument   s
    yr$   c                 J    t           j                            |           j        S rX   )treebuilder_baseTreeBuildergetFragmentrv   r[   s    r"   r   z"TreeBuilderForHtml5lib.getFragment   s    +77==EEr$   c                     ddl m g t          j        d          dfd	 |d           d                              S )Nr   rN   z8^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$c                 v   t          | 	          r	 t          | t                    r
                    |           }|r|                    d          }|j        dk    rj|                    d          pd}|                    d          p|                    d          pd}                    dd|z  d|d	|d
|d	           d S                     dd|z  d|d           d S                     dd|z  d           d S t          | t                    r!                    dd|z  d| d           d S t          | t                    r!                    dd|z  d| d           d S | j        rt          | j                 d| j
        }n| j
        }                    dd|z  d|d           | j        rg }t          | j                                                  D ]w\  }}t          |t                    rt          |j                 d|j
        }t          |t                    rd                    |          }|                    ||f           xt!          |          D ])\  }}                    dd|dz   z  |d|d           *|dz  }| j        D ]} ||           d S )Nrg      rP   r   r&   | z
<!DOCTYPE z "z" "z">>z<!DOCTYPE >z<!-- z -->"<z=")r1   r   matchgroup	lastindexru   r   r   rk   r   r9   attrslistitemsr
   joinsortedchildren)rv   indentmr9   r^   r_   
attributesvaluechildrO   
doctype_rervserializeElements            r"   r   z?TreeBuilderForHtml5lib.testSerializer.<locals>.serializeElement   s5   '=11 '7++ %4$$W-- 
B771::D{Q#$771::#3#$771::#A#Ar			#&<<<xxx#K L L L L L 			fddd"KLLLLLIII#,,,@AAAAAGW-- 4			cFlllGGGDEEEEEG_55 4			vwww?@@@@@$ (&.w/@&A&A&A&-ll4DD #<D			sV|||TTT:;;;= 
T!#J'+GM,?,?,A,A'B'B 9 9e%d,?@@ S.6t~.F.F.F		#RD%eT22 4$'HHUOOE"))4-8888'-j'9'9 T Te			#!2D2DdddEEE"RSSSS!$- 4 4E$$UF33334 4r$   
)r   )rR   rO   recompiler   )r   rv   rO   r   r   r   s     @@@@r"   testSerializerz%TreeBuilderForHtml5lib.testSerializer   s    %%%%%%Z [\\
(	4 (	4 (	4 (	4 (	4 (	4 (	4 (	4 (	4R 	!$$$yy}}r$   )NT)rE   rF   rG   rT   r\   rd   rm   rq   rs   ry   r{   r   r   __classcell__)rV   s   @r"   r?   r?      s        37$(5 5 5 5 5 5,3 3 3- - -2 2 22 2 23 3 3' ' '  F F F0 0 0 0 0 0 0r$   r?   c                   >    e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	S )
AttrListc                 P    || _         t          | j         j                  | _        d S rX   )rv   r0   r   )r   rv   s     r"   rT   zAttrList.__init__   s!    $,,--


r$   c                 r    t          | j                                                                                  S rX   )r   r   r   __iter__r[   s    r"   r   zAttrList.__iter__   s*    DJ$$&&''00222r$   c                    | j         j        pi }||                    dg           v s0| j         j        |v rK||                    | j         j        g           v r)t	          |t
                    st          j        |          }|| j         |<   d S )N*)rv   cdata_list_attributesgetr9   r1   r   r   findall)r   r9   r   	list_attrs       r"   __setitem__zAttrList.__setitem__   s     L6<"	IMM#r****!Y..IMM$,*;R@@@@ eT** 8(077"Tr$   c                 N    t          | j                                                  S rX   )r   r   r   r[   s    r"   r   zAttrList.items
  s    DJ$$&&'''r$   c                 N    t          | j                                                  S rX   r   r   keysr[   s    r"   r   zAttrList.keys  s    DJOO%%&&&r$   c                 *    t          | j                  S rX   )lenr   r[   s    r"   __len__zAttrList.__len__  s    4:r$   c                     | j         |         S rX   )r   r   r9   s     r"   __getitem__zAttrList.__getitem__  s    z$r$   c                 R    |t          | j                                                  v S rX   r   r   s     r"   __contains__zAttrList.__contains__  s!    tDJOO--....r$   N)rE   rF   rG   rT   r   r   r   r   r   r   r   rB   r$   r"   r   r      s        . . .3 3 3# # #( ( (' ' '       / / / / /r$   r   c                       e Zd Zd Zd Zd Zd Z eee          ZddZ	d Z
d Zd	 Zd
 Zd Zd Z ee          ZdS )rZ   c                 z    t           j                            | |j                   || _        || _        || _        d S rX   )r}   NoderT   r9   rv   r*   rk   )r   rv   r*   rk   s       r"   rT   zElement.__init__  s6    &&tW\:::	"r$   c                    d x}}t          |t                    r|x}}nLt          |t                    r|}n4|j        j        t
          k    r|j        x}}| |_        n|j        }| |_        t          |t                    s |j        |j                                         |~| j        j        rr| j        j        d         j        t
          k    rR| j        j        d         }| j	        
                    ||z             }|                    |           || j	        _        d S t          |t                    r| j	        
                    |          }| j        j        r| j                            d          }n-| j        j        | j	                                        }n| j        }| j	                            || j        |           d S )NF)parentmost_recent_element)r1   r2   r   rv   rV   r   r   extractcontentsr*   
new_stringreplace_with_most_recent_element_last_descendantnext_elementra   )r   rx   string_childr   old_elementnew_elementr   s          r"   ry   zElement.appendChild  s   ##udC   	 $('L55c"" 
	 EE\#66#'</L5DKKLEDK%%% 	#%,*BL  """$)>$%b)3FF ,/3K)..{\/IJJK$$[111-8DI***$$$ 3	,,T22
 |$ 	3&*l&C&CE&J&J##*6
 '+i&@&@&B&B##&*l#I''dl$7 ( 9 9 9 9 9r$   c                 b    t          | j        t                    ri S t          | j                  S rX   )r1   rv   r   r   r[   s    r"   getAttributeszElement.getAttributesS  s+    dlG,, 	I%%%r$   c                    |t          |          dk    rg }t          |                                          D ]+\  }}t          |t                    rt          | }||= |||<   ,| j        j                            | j	        |           t          |                                          D ]\  }}|| j
        |<   | j        j                            | j
                   d S d S d S )Nr   )r   r   r   r1   tupler
   r*   builder$_replace_cdata_list_attribute_valuesr9   rv   set_up_substitutions)r   r   converted_attributesr9   r   new_names         r"   setAttributeszElement.setAttributesX  s    !c*oo&9&9#% #J$4$4$6$677 1 1edE** 12D9H"4(+0Jx(IBB	:' ' '#J$4$4$6$677 + +e%*T"" I224<@@@@@% "!&9&9r$   Nc                     t          | j                            |          | j                  }|r|                     ||           d S |                     |           d S rX   )ro   r*   r   insertBeforery   )r   rp   r   texts       r"   
insertTextzElement.insertTextn  sa    	,,T22DI>> 	#dL11111T"""""r$   c                    | j                             |j                   }|j         j        t          k    r}| j         j        rq| j         j        |dz
           j        t          k    rN| j         j        |dz
           }| j                            ||j         z             }|                    |           d S | j                             ||j                    | |_	        d S )Nrg   )
rv   indexrV   r   r   r*   r   r   insertr   )r   rx   refNoder   old_nodenew_strs         r"   r   zElement.insertBeforeu  s    ""7?33L"o55$,:O5%eAg.8OKK|,U1W5Hi**8dl+BCCG!!'*****Lt|444DKKKr$   c                 8    |j                                          d S rX   )rv   r   rw   s     r"   removeChildzElement.removeChild  s    r$   c                 >   | j         }|j         }|j        }|                    dd          }t          |j                  dk    r|j        d         }|j        }n	d}|j        }|j        }t          |          dk    rm|d         }	|||	_        n||	_        ||	_        ||	|_        n|	|_        ||	|_        |d                             dd          }
||
_        ||
|_        d|
_        |D ]#}||_        |j        	                    |           $g |_        ||_        dS )z1Move all of this tag's children into another tag.Fr   r   NT)
rv   next_siblingr   r   r   r   previous_elementprevious_siblingr   ru   )r   
new_parentrv   new_parent_elementfinal_next_elementnew_parents_last_descendantnew_parents_last_child(new_parents_last_descendant_next_element	to_appendfirst_childlast_childs_last_descendantr   s               r"   reparentChildrenzElement.reparentChildren  ss    ,'/ %1&8&I&I%QV&W&W#!*++a// &8%@%D"7R7_44 &*"7I7V4$	y>>A $A,K*6/J,,/A,+AK(*6;F+882="/%16A&3 +4B-*H*HPT*U*U'7_'47C Mh8I7;'4 	6 	6E-EL'..u5555 1r$   c                     | j                             | j        j        | j                  }t          || j         | j                  }| j        D ]\  }}||j        |<   |S rX   )r*   rj   rv   r9   rk   rZ   r   )r   rl   rx   keyr   s        r"   	cloneNodezElement.cloneNode  s^    i 14>BBsDIt~66 	) 	)IC#(DOC  r$   c                     | j         j        S rX   )rv   r   r[   s    r"   
hasContentzElement.hasContent  s    |$$r$   c                 \    | j         d k    rt          d         | j        fS | j         | j        fS )Nhtml)rk   r   r9   r[   s    r"   getNameTuplezElement.getNameTuple  s0    >T!!f%ty00>49,,r$   rX   )rE   rF   rG   rT   ry   r   r   propertyr   r   r   r   r   r   r   r   	nameTuplerB   r$   r"   rZ   rZ     s        # # #49 49 49l& & &
A A A( -77J# # # #
 
 
  <2 <2 <2D  % % %- - - &&IIIr$   rZ   c                       e Zd Zd Zd ZdS )ro   c                 b    t           j                            | d            || _        || _        d S rX   )r}   r   rT   rv   r*   )r   rv   r*   s      r"   rT   zTextNode.__init__  s-    &&tT222			r$   c                     t           rX   )NotImplementedErrorr[   s    r"   r   zTextNode.cloneNode  s    !!r$   N)rE   rF   rG   rT   r   rB   r$   r"   ro   ro     s2          
" " " " "r$   ro   )$__license____all__r   r   bs4.builderr   r   r   r   r	   bs4.elementr
   r   r   html5lib.constantsr   r   r   r   r   r   html5lib.treebuildersr   r}   r3   ImportErrorer   r   r~   r?   objectr   r   rZ   ro   rB   r$   r"   <module>r     sl     				                                       ??????LL   >>>>>>LLLLLL
SF SF SF SF SF SF SF SFlv v v v v-9 v v vp/ / / / /v / / /<@' @' @' @' @'# @' @' @'D" " " " "w " " " " "s   A AAA