U
    qe1                     @   s   d dl Z d dlZd dlZz.zd dlZW n ek
rB   d dlZY nX W n ek
rb   dZdZY n
X dd Zddlm	Z	m
Z
mZ dZdZdZd	Zd
ZdZedZedZeejeef dddZdd ZdS )    Nc                 C   s   t | d pdS )Nencoding )chardetdetect)data r   r/mounts/lovelace/software/anaconda3/envs/qiime2-shotgun-2024.2/lib/python3.8/site-packages/feedparser/encodings.pylazy_chardet_encoding*   s    r	      )CharacterEncodingOverrideCharacterEncodingUnknownNonXMLContentTypes   Los    < ?s   < ? s      <s   <   z  z^<\?xml[^>]*?>s#   ^<\?.*encoding=[\'"](.*?)[\'"].*\?>)linereturnc                 C   sj   |  d}|sdS |d  }d}|dd D ]2}|d\}}}|  dkr.| d	}q.||fS )
a  Parse an HTTP Content-Type header.

    The return value will be a tuple of strings:
    the MIME type, and the value of the "charset" (if any).

    This is a custom replacement for Python's cgi.parse_header().
    The cgi module will be removed in Python 3.13.
    ;)r   r   r   r   r
   N=charsetz"')splitstrip	partitionlower)r   chunks	mime_typeZcharset_valuechunkkey_valuer   r   r   parse_content_typeG   s    

r   c              
   C   s  d}d}|dd t jkr.d}|dd }n&|dd t jkrTd}|dd }n |dd t jkr|dd tkrd}|dd }n|dd t jkr|dd tkrd}|dd }n|dd	 t jkrd
}|d	d }nt|dd tkrd}n^|dd tkrd}nF|dd t	kr&d}n.|dd t
kr>d}n|dd tkrTd}|}z|rp||d
}W n ttfk
r   d}Y nX t|}|r| d d
 }|r|dkr|}| dpd}t|\}}d}	d}
d}||
ks|dr,|dr,d}	|p(|p(d
}np||ksN|dr^|dr^d}	|pZd}n>|drv|prd}n&| rd| kr|pd}n
|pd
}| dkrd}| dkrd}d}| r|	sd| krd| d  }nd}t|}d}g }|||td
ddfD ]}t|r$||}|s.q||kr<q|| z||}W n ttfk
rn   Y n@X d}d}t|rt||}n|d | }|d
} qq|stdd ||f  }d}n||krt d!||f }|}||d"< |rd#|d$< ||d%< |S )&z|Detect and convert the character encoding to UTF-8.

    http_headers is a dictionary
    data is a raw string (not Unicode)r   N   zutf-32bezutf-32le   zutf-16bezutf-16le   zutf-8cp037r   )u16zutf-16utf16utf_16u32zutf-32utf32utf_32ziso-10646-ucs-2ziso-10646-ucs-4Zcsucs4Z	csunicodezucs-2zucs-4zcontent-type)zapplication/xmlzapplication/xml-dtdz&application/xml-external-parsed-entity)ztext/xmlztext/xml-external-parsed-entityzapplication/z+xmlr
   ztext/zus-asciiz
iso-8859-1gb2312gb18030z%s is not an XML media typezno Content-type specifiedzwindows-1252z
iso-8859-2z&<?xml version='1.0' encoding='utf-8'?>
z#document encoding unknown, I tried z>%s, %s, utf-8, windows-1252, and iso-8859-2 but nothing workedz)document declared as %s, but parsed as %sr   Tbozobozo_exception)!codecsBOM_UTF32_BEBOM_UTF32_LEBOM_UTF16_BE
ZERO_BYTESBOM_UTF16_LEBOM_UTF8EBCDIC_MARKERUTF16BE_MARKERUTF16LE_MARKERUTF32BE_MARKERUTF32LE_MARKERdecodeencodeUnicodeDecodeErrorLookupErrorRE_XML_PI_ENCODINGmatchgroupsr   getr   
startswithendswithr   r	   callableappendRE_XML_DECLARATIONsearchsubr   r   )http_headersr   resultZbom_encodingxml_encodingZtempdataZxml_encoding_matchZhttp_content_typeZhttp_encodingZacceptable_content_typeZapplication_content_typesZtext_content_typesZrfc3023_encodingerrormsgZknown_encodingtried_encodingsZproposed_encodingZnew_declarationr   r   r   convert_to_utf8_   s    /	""



   





rN   )r-   retypingtZcchardetr   ImportErrorr	   
exceptionsr   r   r   r4   r5   r6   r7   r8   r1   compilerE   r=   strTupler   rN   r   r   r   r   <module>   s,   
	

