U
    vRÚcç  ã                   @   s  d Z dZddlZddlmZ ddlmZ ddlZddlmZm	Z	 ddl
mZ ddlZddlZddlZddlZddlZddlZddlZddlZdd	„ Zd#dd„ZG dd„ deƒZdd„ ZdZdZd$dd„Zd%dd„Zd&dd„Zd'dd„Zd(d d!„Zed"kreej  ¡ ƒ dS ))z=Diagnostic functions, mainly for use when doing tech support.ZMITé    N)ÚBytesIO)Ú
HTMLParser)ÚBeautifulSoupÚ__version__)Úbuilder_registryc                 C   sL  t dt ƒ t dtj ƒ dddg}|D ]4}tjD ]}||jkr2 q(q2| |¡ t d| ƒ q(d|krÆ| d¡ z*dd	l	m
} t d
d tt|jƒ¡ ƒ W n* tk
rÄ } zt dƒ W 5 d}~X Y nX d|krzddl}t d|j ƒ W n, tk
r } zt dƒ W 5 d}~X Y nX t| dƒr.|  ¡ } nŠ|  d¡sF|  d¡r^t d|  ƒ t dƒ dS z:tj | ¡r–t d|  ƒ t| ƒ}| ¡ } W 5 Q R X W n tk
r®   Y nX t dƒ |D ]Š}t d| ƒ d}	zt| |d}
d}	W n8 tk
r } zt d| ƒ t ¡  W 5 d}~X Y nX |	r<t d| ƒ t |
 ¡ ƒ t dƒ q¼dS )z¼Diagnostic suite for isolating common problems.

    :param data: A string containing markup that needs to be explained.
    :return: None; diagnostics are printed to standard output.
    z'Diagnostic running on Beautiful Soup %szPython version %súhtml.parserÚhtml5libÚlxmlz;I noticed that %s is not installed. Installing it may help.zlxml-xmlr   ©ÚetreezFound lxml version %sÚ.z.lxml is not installed or couldn't be imported.NzFound html5lib version %sz2html5lib is not installed or couldn't be imported.Úreadzhttp:zhttps:z<"%s" looks like a URL. Beautiful Soup is not an HTTP client.zpYou need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.z7"%s" looks like a filename. Reading data from the file.Ú z#Trying to parse your markup with %sF)ÚfeaturesTú%s could not parse the markup.z#Here's what %s did with the markup:zP--------------------------------------------------------------------------------)Úprintr   ÚsysÚversionr   Zbuildersr   ÚremoveÚappendr	   r   ÚjoinÚmapÚstrZLXML_VERSIONÚImportErrorr   Úhasattrr   Ú
startswithÚosÚpathÚexistsÚopenÚ
ValueErrorr   Ú	ExceptionÚ	tracebackÚ	print_excZprettify)ÚdataZbasic_parsersÚnameZbuilderr   Úer   ÚfpÚparserÚsuccessÚsoup© r+   ú+lib/python3.8/site-packages/bs4/diagnose.pyÚdiagnose   sr    



ÿÿ
ÿ
ÿ

r-   Tc                 K   sp   ddl m} | dd¡}t| tƒr,|  d¡} t| ƒ}|j|f||dœ|—ŽD ]\}}td||j	|j
f ƒ qLdS )	a´  Print out the lxml events that occur during parsing.

    This lets you see how lxml parses a document when no Beautiful
    Soup code is running. You can use this to determine whether
    an lxml-specific problem is in Beautiful Soup's lxml tree builders
    or in lxml itself.

    :param data: Some markup.
    :param html: If True, markup will be parsed with lxml's HTML parser.
       if False, lxml's XML parser will be used.
    r   r
   ÚrecoverTÚutf8)Úhtmlr.   z%s, %4s, %sN)r	   r   ÚpopÚ
isinstancer   Úencoder   Z	iterparser   ÚtagÚtext)r$   r0   Úkwargsr   r.   ÚreaderZeventÚelementr+   r+   r,   Ú
lxml_trace]   s    

ÿ ÿÿr9   c                   @   s`   e Zd ZdZdd„ Zdd„ Zdd„ Zdd	„ Zd
d„ Zdd„ Z	dd„ Z
dd„ Zdd„ Zdd„ ZdS )ÚAnnouncingParserzèSubclass of HTMLParser that announces parse events, without doing
    anything else.

    You can use this to get a picture of how html.parser sees a given
    document. The easiest way to do this is to call `htmlparser_trace`.
    c                 C   s   t |ƒ d S )N)r   )ÚselfÚsr+   r+   r,   Ú_p{   s    zAnnouncingParser._pc                 C   s   |   d| ¡ d S )Nz%s START©r=   )r;   r%   Zattrsr+   r+   r,   Úhandle_starttag~   s    z AnnouncingParser.handle_starttagc                 C   s   |   d| ¡ d S )Nz%s ENDr>   ©r;   r%   r+   r+   r,   Úhandle_endtag   s    zAnnouncingParser.handle_endtagc                 C   s   |   d| ¡ d S )Nz%s DATAr>   ©r;   r$   r+   r+   r,   Úhandle_data„   s    zAnnouncingParser.handle_datac                 C   s   |   d| ¡ d S )Nz
%s CHARREFr>   r@   r+   r+   r,   Úhandle_charref‡   s    zAnnouncingParser.handle_charrefc                 C   s   |   d| ¡ d S )Nz%s ENTITYREFr>   r@   r+   r+   r,   Úhandle_entityrefŠ   s    z!AnnouncingParser.handle_entityrefc                 C   s   |   d| ¡ d S )Nz
%s COMMENTr>   rB   r+   r+   r,   Úhandle_comment   s    zAnnouncingParser.handle_commentc                 C   s   |   d| ¡ d S )Nz%s DECLr>   rB   r+   r+   r,   Úhandle_decl   s    zAnnouncingParser.handle_declc                 C   s   |   d| ¡ d S )Nz%s UNKNOWN-DECLr>   rB   r+   r+   r,   Úunknown_decl“   s    zAnnouncingParser.unknown_declc                 C   s   |   d| ¡ d S )Nz%s PIr>   rB   r+   r+   r,   Ú	handle_pi–   s    zAnnouncingParser.handle_piN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r=   r?   rA   rC   rD   rE   rF   rG   rH   rI   r+   r+   r+   r,   r:   s   s   r:   c                 C   s   t ƒ }| | ¡ dS )zÂPrint out the HTMLParser events that occur during parsing.

    This lets you see how HTMLParser parses a document when no
    Beautiful Soup code is running.

    :param data: Some markup.
    N)r:   Zfeed)r$   r(   r+   r+   r,   Úhtmlparser_trace™   s    rN   ZaeiouZbcdfghjklmnpqrstvwxyzé   c                 C   s:   d}t | ƒD ](}|d dkr"t}nt}|t |¡7 }q|S )z#Generate a random word-like string.r   é   r   )ÚrangeÚ_consonantsÚ_vowelsÚrandomÚchoice)Úlengthr<   ÚiÚtr+   r+   r,   Úrword§   s    rY   é   c                 C   s   d  dd„ t| ƒD ƒ¡S )z'Generate a random sentence-like string.ú c                 s   s   | ]}t t d d¡ƒV  qdS )rZ   é	   N)rY   rT   Úrandint)Ú.0rW   r+   r+   r,   Ú	<genexpr>´   s     zrsentence.<locals>.<genexpr>)r   rQ   )rV   r+   r+   r,   Ú	rsentence²   s    r`   éè  c                 C   s¤   dddddddg}g }t | ƒD ]r}t dd	¡}|dkrPt |¡}| d
| ¡ q|dkrp| tt dd¡ƒ¡ q|dkrt |¡}| d| ¡ qdd |¡ d S )z+Randomly generate an invalid HTML document.ÚpZdivÚspanrW   ÚbZscriptÚtabler   é   z<%s>é   rZ   rP   z</%s>z<html>Ú
z</html>)rQ   rT   r]   rU   r   r`   r   )Únum_elementsZ	tag_namesÚelementsrW   rU   Ztag_namer+   r+   r,   Úrdoc¶   s    

rk   é † c           
      C   s$  t dt ƒ t| ƒ}t dt|ƒ ƒ dddgddfD ]z}d}z"t ¡ }t||ƒ}t ¡ }d}W n6 tk
r” } zt d	| ƒ t ¡  W 5 d
}~X Y nX |r4t d||| f ƒ q4ddl	m
} t ¡ }| |¡ t ¡ }t d||  ƒ dd
l}	|	 ¡ }t ¡ }| |¡ t ¡ }t d||  ƒ d
S )z.Very basic head-to-head performance benchmark.z1Comparative parser benchmark on Beautiful Soup %sz3Generated a large invalid HTML document (%d bytes).r	   r0   r   r   FTr   Nz"BS4+%s parsed the markup in %.2fs.r   r
   z$Raw lxml parsed the markup in %.2fs.z(Raw html5lib parsed the markup in %.2fs.)r   r   rk   ÚlenÚtimer   r!   r"   r#   r	   r   ZHTMLr   r   Úparse)
ri   r$   r(   r)   Úar*   rd   r&   r   r   r+   r+   r,   Úbenchmark_parsersÈ   s4    


rq   r	   c                 C   sX   t  ¡ }|j}t| ƒ}tt||d}t d|||¡ t 	|¡}| 
d¡ | dd¡ dS )z7Use Python's profiler on a randomly generated document.)Úbs4r$   r(   zbs4.BeautifulSoup(data, parser)Z
cumulativez_html5lib|bs4é2   N)ÚtempfileZNamedTemporaryFiler%   rk   Údictrr   ÚcProfileZrunctxÚpstatsZStatsZ
sort_statsZprint_stats)ri   r(   Z
filehandleÚfilenamer$   ÚvarsZstatsr+   r+   r,   Úprofileè   s    

rz   Ú__main__)T)rO   )rZ   )ra   )rl   )rl   r	   )!rM   Z__license__rv   Úior   Zhtml.parserr   rr   r   r   Zbs4.builderr   r   rw   rT   rt   rn   r"   r   r-   r9   r:   rN   rS   rR   rY   r`   rk   rq   rz   rJ   Ústdinr   r+   r+   r+   r,   Ú<module>   s8   G
&



 

