U
    qe}                  
   @   s   d dl Z d dlZd dlZd dlZd dlZd dlZddlmZ ddl	m
Z
mZmZmZmZmZmZ ddlmZmZ ddlmZ ddlmZmZmZ G dd	 d	e
jejejejejejej	ZdS )
    N   )_cp1252)_baseccdcgeorssitunesmediarsspsc)_sanitize_html_HTMLSanitizer)FeedParserDict)_urljoinmake_safe_absolute_uriresolve_relative_urisc                C       s  e Zd Zddddddddddddddddddddddd	d
dddddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3BZi Zd4d5d6d7d8d9d:d;d<d=h
Zd
d>d?d@dAdBdCdDdEh	Zd
d>d?d@dAdBdCdDdEh	ZdFdGhZ fdHdIZ	dJdK Z
dLdM ZdNdO ZdPdQ ZdRdS ZddUdVZdWdX ZdYdZ Zd[d\ Zd]d^ Zed_d` Zdadb Zdcdd Zededf Zedgdh Zdidj ZddkdlZdmdn Zdodp Zedqdr Zdsdt Zdudv Z dwdx Z!edydz Z"dd|d}Z#d~d Z$dddZ%dd Z&dddZ'dd Z(dd Z)dd Z*  Z+S )_FeedParserMixin ZadminagannotateaudioZblogChannelr   ZcreativeCommonscocontentcpr   ZdctermsemailevZ
feedburnerfmZfoafgeor   ZgmlZicbmimager   lZmediaZpingbackprismrdfZrdfsrefZreqvsearchslashZsoapssstrsubsyZszfZtaxothrtiZ	trackbackZwfwZwikixhtmlZxlinkxmlr
   )Br   http://backend.userland.com/rssz%http://blogs.law.harvard.edu/tech/rsshttp://purl.org/rss/1.0/&http://my.netscape.com/rdf/simple/0.9/zhttp://example.com/newformat#zhttp://example.com/nechozhttp://purl.org/echo/zuri/of/echo/namespace#zhttp://purl.org/pie/zhttp://purl.org/atom/ns#zhttp://www.w3.org/2005/Atomz'http://purl.org/rss/1.0/modules/rss091#zhttp://webns.net/mvcb/z,http://purl.org/rss/1.0/modules/aggregation/z)http://purl.org/rss/1.0/modules/annotate/z!http://media.tangent.org/rss/1.0/z-http://backend.userland.com/blogChannelModulez%http://creativecommons.org/ns#licensezhttp://web.resource.org/cc/z>http://cyber.law.harvard.edu/rss/creativeCommonsRssModule.htmlz4http://backend.userland.com/creativeCommonsRssModulez'http://purl.org/rss/1.0/modules/companyz(http://purl.org/rss/1.0/modules/content/z&http://my.theinfo.org/changed/1.0/rss/z http://purl.org/dc/elements/1.1/zhttp://purl.org/dc/terms/z&http://purl.org/rss/1.0/modules/email/z&http://purl.org/rss/1.0/modules/event/z*http://rssnamespace.org/feedburner/ext/1.0zhttp://freshmeat.net/rss/fm/zhttp://xmlns.com/foaf/0.1/z(http://www.w3.org/2003/01/geo/wgs84_pos#zhttp://www.georss.org/georsszhttp://www.opengis.net/gmlzhttp://postneo.com/icbm/z&http://purl.org/rss/1.0/modules/image/z*http://www.itunes.com/DTDs/PodCast-1.0.dtdz'http://example.com/DTDs/PodCast-1.0.dtdz%http://purl.org/rss/1.0/modules/link/zhttp://search.yahoo.com/mrsszhttp://search.yahoo.com/mrss/z4http://madskills.com/public/xml/rss/module/pingback/z.http://prismstandard.org/namespaces/1.2/basic/z+http://www.w3.org/1999/02/22-rdf-syntax-ns#z%http://www.w3.org/2000/01/rdf-schema#z*http://purl.org/rss/1.0/modules/reference/z*http://purl.org/rss/1.0/modules/richequiv/z'http://purl.org/rss/1.0/modules/search/z&http://purl.org/rss/1.0/modules/slash/z)http://schemas.xmlsoap.org/soap/envelope/z.http://purl.org/rss/1.0/modules/servicestatus/z-http://hacks.benhammersley.com/rss/streaming/z-http://purl.org/rss/1.0/modules/subscription/z,http://purl.org/rss/1.0/modules/syndication/z/http://schemas.pocketsoap.com/rss/myDescModule/z)http://purl.org/rss/1.0/modules/taxonomy/z*http://purl.org/rss/1.0/modules/threading/z*http://purl.org/rss/1.0/modules/textinput/z5http://madskills.com/public/xml/rss/module/trackback/z$http://wellformedweb.org/commentAPI/z%http://purl.org/rss/1.0/modules/wiki/zhttp://www.w3.org/1999/xhtmlzhttp://www.w3.org/1999/xlinkz$http://www.w3.org/XML/1998/namespacez"http://podlove.org/simple-chapterscommentsdocshreficonidlinkZlogourlZwfw_commentZwfw_commentrss	copyrightdescriptioninforightssubtitlesummaryZtaglinetitleapplication/xhtml+xml	text/htmlc                    s   | j s(| j D ]\}}|| j | < qt | _g | _d| _i | _d| _	d| _
d| _d| _d| _d| _d| _d| _d| _t | _t | _d | _i | _g | _g | _g | _d| _d| _d| _d| _| jr| jdd| jd< i | _t t!| "  d S )Nr   r   _-language)#_matchnamespaces
namespacesitemslowerr   feeddataentriesversionnamespaces_in_useinfeedinentry	incontentintextinputinimageZinauthorZincontributorZinpublisherinsource
sourcedatacontentparamsZ_summaryKeynamespacemapelementstack	basestack	langstacksvgOKtitle_depthdepthZ
hasContentlangreplaceproperty_depth_mapsuperr   __init__)selfkv	__class__ n/mounts/lovelace/software/anaconda3/envs/qiime2-shotgun-2024.2/lib/python3.8/site-packages/feedparser/mixin.pyr^      s>    	z_FeedParserMixin.__init__c                 C   s   t d S N)NotImplementedError)r_   kvrd   rd   re   _normalize_attributes   s    z&_FeedParserMixin._normalize_attributesc                    s    j d7  _  fdd|D }t|}|d|dp> j}t|trX| jd} jrtt j|pn j _nt	 j| _|d|d}|d	krd }n|d kr j
}|r|d
kr|dd jd< | _
 j j  j| |D ]>\}}|dr |dd  | q|dkr d | q jrh jdddsh|dkr^d S d jd<  jr( jddkr(|ddkr|dd\}} j|d	}|dkr|dkr|d|f |dkr|dkr|d|f |dkr  jd7  _ jd| |f ddS |ddkrJ|dd\}}	n
d	| }}	 j||}|rp|d }|s|dkrd _|s|dkrd _d | |	 }
zt |
}||W S  tk
r   ||	 }t |dkr !|d Y S  " }|||< Y nX d S )!Nr   c                    s   g | ]}  |qS rd   )ri   ).0attrr_   rd   re   
<listcomp>   s     z5_FeedParserMixin.unknown_starttag.<locals>.<listcomp>zxml:basebaseignorezxml:langrZ   r   )feedrsszrdf:RDFr@   rA   rB   zxmlns:   xmlnstyper+   z	xhtml:divdivr=   :r?   mathz"http://www.w3.org/1998/Math/MathMLsvgzhttp://www.w3.org/2000/svgz<%s%s>r   escape)r<   r4   r7   name)r<   r4   r7   r5   r1   widthheightZ_start_)#rY   dictgetbaseuri
isinstancebytesdecodeencodingr   r   rZ   r[   rG   rU   appendrV   
startswithtrack_namespacerM   rR   endswithfindsplitrJ   rW   handle_datastrattrsrS   rN   rO   getattrAttributeErrorlenpush_get_context)r_   tagattrsattrs_dr   rZ   prefixuri	namespacesuffix
methodnamemethodZunknown_tagcontextrd   rl   re   unknown_starttag   sx    






z!_FeedParserMixin.unknown_starttagc                 C   s  | ddkr |dd\}}n
d| }}| j||}|rD|d }|dkr`| jr`|  jd8  _d| | }z | jrzt t| |}|  W n" tk
r   | ||  Y nX | jr| j	dd	
d	s|d
krd S d| j	d< | jr| j	ddkr|dd }| jd| dd | jrL| j  | jrL| jd rL| jd | _| jrr| j  | jrr| jd | _|  jd8  _d S )Nrw   r?   r   r   r@   ry   Z_end_rt   r+   ru   r=   z</%s>r   rz   )r   r   rS   r   rW   r   r   poprM   rR   r   r   rU   r   rV   rZ   rY   )r_   r   r   r   r   r   rd   rd   re   unknown_endtag/  s@    





z_FeedParserMixin.unknown_endtagc                 C   sr   | j s
d S | }|dkr$d| }n6|d dkrDt|dd  d}nt|}t|d}| j d d	 | d S )
N)
34383960Z62x22Zx26Zx27Zx3cZx3ez&#%s;r   xr      utf-8r?      )rT   rF   intchrencoder   )r_   r!   textcrd   rd   re   handle_charref[  s    
z_FeedParserMixin.handle_charrefc                 C   s   | j s
d S |dkrd| }nx|| jkrP| j| }|dr|dr| |S nDztjj|  W n tk
r|   d| }Y nX ttjj| 	d}| j d d 
| d S )N)ltgtquotampaposz&%s;z&#;r   r?   r   )rT   entitiesr   r   handle_entityrefhtmlname2codepointKeyErrorr   r   r   )r_   r!   r   rd   rd   re   r   j  s    


z!_FeedParserMixin.handle_entityrefr   c                 C   sD   | j s
d S |r,| jddkr,tjj|}| j d d | d S )Nrt   r=   r?   r   )rT   rR   r   r+   saxsaxutilsr{   r   )r_   r   r{   rd   rd   re   r   }  s
    z_FeedParserMixin.handle_datac                 C   s   d S rf   rd   r_   r   rd   rd   re   handle_comment  s    z_FeedParserMixin.handle_commentc                 C   s   d S rf   rd   r   rd   rd   re   	handle_pi  s    z_FeedParserMixin.handle_pic                 C   s   d S rf   rd   r   rd   rd   re   handle_decl  s    z_FeedParserMixin.handle_declc                 C   s   | j ||d  dkrf| j d|}|dkr:t| j }|S | tjj| j |d | d |d S | j d|}|dkr|d S |S d S )	N	   z	<![CDATA[z]]>r?   r      >r   )rawdatar   r   r   r+   r   r   r{   )r_   ir`   rd   rd   re   parse_declaration  s    
$z"_FeedParserMixin.parse_declarationc                 C   s<   |   } | dks| dkrd} n| dkr,d} n| dkr8d} | S )Nr   plain
text/plainr   r>   r*   r=   )rF   )content_typerd   rd   re   map_content_type  s    z!_FeedParserMixin.map_content_typec                 C   s   |  }| js@||fdkr"d| _n|dkr2d| _n|dkr@d| _|ddkrVd	}|}|| jkr| j| | j|< || j| j| < n|| j|pd
< d S )N)Nr.   rss090r-   rss10zhttp://www.w3.org/2005/atomatom10zbackend.userland.com/rssr?   r,   r   )rF   rI   r   rC   rS   rJ   )r_   r   r   Zlowerurird   rd   re   r     s    
z _FeedParserMixin.track_namespacec                 C   s   t | jp
d|S )Nr   )r   r   )r_   r   rd   rd   re   resolve_uri  s    z_FeedParserMixin.resolve_uric                 C   s   |S rf   rd   )elementdatard   rd   re   decode_entities  s    z _FeedParserMixin.decode_entitiesc                 C   s   d dd | D S )Nr   c                 s   s2   | ]*}d |d t jj|d ddif V  qdS )z %s="%s"r   r   "z&quot;N)r+   r   r   r{   rj   trd   rd   re   	<genexpr>  s   z,_FeedParserMixin.strattrs.<locals>.<genexpr>)join)r   rd   rd   re   r     s    
z_FeedParserMixin.strattrsc                 C   s   | j ||g g d S rf   )rT   r   )r_   r   expecting_textrd   rd   re   r     s    z_FeedParserMixin.pushc              
   C   s  | j s
d S | j d d |kr d S | j  \}}}t|D ] \}}t|tr8|d||< q8| jdkr\| jdddkr\|rt	|dkr|d 
 s|d= qz|rt	|dkr|d 
 s|d= q|r\|d d	ks|d d
r\|d dkr\d}|d d D ]L}|dr,|d8 }|dkrL q\n |dr|ds|d7 }q|dd }d|}	|rt|	
 }	|s~|	S tr| jddrzt|	dd}	W n  tjtjtfk
r   Y nX || jkr|	r|dkr| jr| |	}	| jdds| ||	}	| jdsP| jddkrP| |	rPd| jd< z| jd= W n tk
rr   Y nX z| jd= W n tk
r   Y nX | | jdd| jk}
|
r| jr|| jkrt|	| j| j | jdd}	|
r| j!r|| j"krt#|	| j | jdd}	| j r<t|	tr<|	| j d}	| j dkrt|	tsz|	dd}	W n t$tfk
r   Y nX t|	ts|	%t&}	|dkr|	S |dkrd| j'  k r| j(krn n|	S | j)r| j*s|dkr,| j+d ,|g  t-.| j}|	|d< | j+d | /| n|dkr| j0s|	1dd }	t23d!d"|	}	|	| j+d |< |	r|	| j+d d# d d$< n|d%krd&}| j4,| j+d i |}|d ks| j(|kr| j(| j4| j+d  |< |	| j+d |< | j5rt-.| j}|	|d< || j+d |d' < n| j6s&| j*r| 7 }|d%kr<d(}|	||< |dkrvt23d!d"|	}	|	||< |	|d# d d$< n(| j5rt-.| j}|	|d< |||d' < |	S ))Nr?   r   r   r   rt   r   r=   r   z<div>z<div z</div>z</<z/>r   base64utf8r3   atomr   r>   modero   )r   zutf-8_INVALID_PYTHON_3z
iso-8859-1)categorytagsZitunes_keywordsr<   r   valuer4   z&amp;&z&([A-Za-z0-9_]+);z&\g<1>linksr1   r7   r;   _detailr:   )8rT   r   	enumerater   r   r   rI   rR   r   r   stripr   r   r   r   decodebytesr   binasciiError
IncompleteUnicodeDecodeErrorcan_be_relative_uriZ
guidislinkr   r   looks_like_htmlr   r   
html_typesr   can_contain_relative_urisr   r   sanitize_htmlcan_contain_dangerous_markupr   UnicodeEncodeError	translater   rX   rY   rL   rP   rH   
setdefaultcopydeepcopyr   rO   r[   rer&   r\   rM   rK   r   )r_   r   strip_whitespacer   piecesr   ra   rY   pieceoutputZ
is_htmlishrR   Zold_value_depthr   rd   rd   re   r     s    
 0


 


*




z_FeedParserMixin.popc                 C   sl   |  j d7  _ | jr$| jdd| _t| |d|| j| jd| _| || j| jd< | 	|| d S )Nr   r@   rA   rt   )rt   rB   rn   r   )
rM   rZ   r[   r   r   r   r   rR   
_is_base64r   )r_   r   r   Zdefault_content_typer   rd   rd   re   push_contenti  s    
z_FeedParserMixin.push_contentc                 C   s&   |  |}|  jd8  _| j  |S )Nr   )r   rM   rR   clear)r_   r   r   rd   rd   re   pop_contentt  s    

z_FeedParserMixin.pop_contentc                 C   s\   t d| st d| sdS tdd t d| D r:dS tdd t d| D rXdS d	S )
z3
        :type s: str
        :rtype: bool
        z</(\w+)>z&#?\w+;Fc                 s   s    | ]}|  tjkr|V  qd S rf   )rF   r   Zacceptable_elementsr   rd   rd   re   r     s      z3_FeedParserMixin.looks_like_html.<locals>.<genexpr>z</?(\w+)c                 s   s   | ]}|t jjkr|V  qd S rf   )r   r   
entitydefs)rj   erd   rd   re   r     s      z&(\w+);T)r   r"   anyfindall)srd   rd   re   r   ~  s    z _FeedParserMixin.looks_like_htmlc                 C   sL   | d}|dkrH|d | }||d d  }| j||}|d | }|S )Nrw   r?   r   )r   rS   r   )r_   r|   Zcolonposr   r   rd   rd   re   _map_to_standard_prefix  s    
z(_FeedParserMixin._map_to_standard_prefixc                 C   s   | | |S rf   )r   r  )r_   r   r|   rd   rd   re   _get_attribute  s    z_FeedParserMixin._get_attributec                 C   sT   | dddkrdS | jd dr(dS | jd dr<dS | jd d	rPdS dS )
Nr   r   r   r   rt   ztext/r   z+xmlz/xml)r   rR   r   r   )r_   r   rR   rd   rd   re   r     s    z_FeedParserMixin._is_base64c              
   C   sl   |  d|  d|  dd }|rhz
| d= W n tk
r>   Y nX z
| d= W n tk
r^   Y nX || d< | S )Nr5   r   r1   )r   r   )r   r1   rd   rd   re   _enforce_href  s    

z_FeedParserMixin._enforce_hrefFc                 C   s&   |   }|r|||< n||| d S rf   )r   r   )r_   keyr   	overwriter   rd   rd   re   _save  s    
z_FeedParserMixin._savec                 C   sX   | j r| j}nF| jr*d| jkr*| jd }n*| jr<| jd }n| jrN| jd }n| j}|S )Nr   Z	textinputr?   )rP   rQ   rO   rG   rN   rL   rH   )r_   r   rd   rd   re   r     s    z_FeedParserMixin._get_contextauthorc                 C   sV   |   }||d t  |||d  |< |   |dt g ||d d |< d S )Nr   authorsr?   )r   r   r   _sync_author_detail)r_   r  r   r   r   rd   rd   re   _save_author  s    z_FeedParserMixin._save_authorc                 C   s,   |   }|dt g ||d d |< d S )NZcontributorsr?   )r   r   r   )r_   r  r   r   rd   rd   re   _save_contributor  s    z"_FeedParserMixin._save_contributorc                 C   sZ  |   }|d| t gd }|rn|d}|d}|rR|rRd||f ||< n|r`|||< n|rl|||< n||d  }}|sd S td|}|r|d}||d}|d	d}|d
d}|dd}| }|r|d dkr|dd  }|r|d dkr|d d }| }|s*|r:|d| | |rH||d< |rV||d< d S )Nz%ssr?   r|   r   z%s (%s)z(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))(\?subject=\S+)?r   r   z()z<>z&lt;&gt;(r   )z	%s_detail)	r   r   r   r   r"   groupr[   r   r   )r_   r  r   detailr|   r   r  Z
emailmatchrd   rd   re   r
    sB    




z$_FeedParserMixin._sync_author_detailc                 C   sH   |   }|dg }|s$|s$|s$d S t|||d}||krD|| d S )Nr   )termschemelabel)r   r   r   r   )r_   r  r  r  r   r   r   rd   rd   re   _add_tag   s    z_FeedParserMixin._add_tagc                 C   s   |  dd d S )Nr   r   )r   )r_   r   rd   rd   re   _start_tags	  s    z_FeedParserMixin._start_tagsc                 C   s,   |  ddD ]}| | d d  qd S )Nr   ,)r   r   r  r   )r_   r  rd   rd   re   	_end_tags  s    z_FeedParserMixin._end_tags)r   )r   )F)r  )r  ),__name__
__module____qualname__rD   rC   r   r   r   r   r^   ri   r   r   r   r   r   r   r   r   r   staticmethodr   r   r   r   r   r   r   r   r   r   r  r  r   r  r  r   r  r  r
  r  r  r  __classcell__rd   rd   rb   re   r   *   s  
F-Z,
	




 

	



&	r   )r   r   r   html.entitiesr   r   xml.sax.saxutilsr+   r   rD   r   r   r   r   r   r	   r
   	sanitizerr   r   utilr   urlsr   r   r   	Namespacer   rd   rd   rd   re   <module>   s&   $
