
    3 d%                         d Z ddlZddlmZ ddlmZmZ ddlmZ ddl	Z	ddl
mZmZmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ  e            Z G d de          Z G d de          Zd Z dS )z
This module implements the TextResponse class which adds encoding handling and
discovering (through HTTP headers) to base Response class.

See documentation in docs/topics/request-response.rst
    N)suppress)	GeneratorTuple)urljoin)html_body_declared_encodinghtml_to_unicodehttp_content_type_encodingread_bomresolve_encoding)strip_html5_whitespace)Request)Response)memoizemethod_noargs
to_unicode)get_base_urlc                       e Zd ZU dZeZej        dz   Zee	df         e
d<    fdZ fdZ fdZed             Zd	 Zd
 Zed             Zd Zed             Zd Zd Zed             Zed             Zed             Zd Zd Z	 	 	 	 	 	 	 	 	 	 	 	 ddef fdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddeeddf         f fdZ  xZ!S )TextResponseascii)encoding.
attributesc                     |                     dd           | _        d | _        d | _        d | _         t                      j        |i | d S )Nr   )pop	_encoding_cached_benc_cached_ubody_cached_selectorsuper__init__)selfargskwargs	__class__s      9lib/python3.11/site-packages/scrapy/http/response/text.pyr   zTextResponse.__init__&   sR    J55 ! $$)&)))))    c                     t          |t                    rt          || j                  | _        d S t                                          |           d S N)
isinstancestrr   r   _urlr   _set_url)r   urlr"   s     r#   r*   zTextResponse._set_url-   sJ    c3 	""366DIIIGGS!!!!!r$   c                    d| _         t          |t                    rM| j        %t	          dt          |           j         d          |                    | j                  | _         d S t                      	                    |           d S )Nr$   zCannot convert unicode body - z has no encoding)
_bodyr'   r(   r   	TypeErrortype__name__encoder   	_set_body)r   bodyr"   s     r#   r2   zTextResponse._set_body3   s    
dC   	$~ =Dzz*= = =   T^44DJJJGGd#####r$   c                 R    |                                  p|                                 S r&   )_declared_encoding_body_inferred_encodingr   s    r#   r   zTextResponse.encoding?   s%    &&((JD,H,H,J,JJr$   c                     | j         p;|                                 p'|                                 p|                                 S r&   )r   _bom_encoding_headers_encoding_body_declared_encodingr7   s    r#   r5   zTextResponse._declared_encodingC   sJ    N .!!##.%%''. ++--		
r$   c                 h    | j         t          u rt          j        | j                  | _         | j         S )z`
        .. versionadded:: 2.2

        Deserialize a JSON document to a Python object.
        )_cached_decoded_json_NONEjsonloadstextr7   s    r#   r?   zTextResponse.jsonK   s1     $- 	>(,
49(=(=D%((r$   c                 v    | j         }| j        %d| }t          || j                  d         | _        | j        S )zBody as unicodeNzcharset=   )r   r   r   r3   )r   benccharsets      r#   rA   zTextResponse.textU   sF    
 } 	H'''G!0$)!D!DQ!GD!!r$   c                 <    t          t          |           |          S )zoJoin this Response's url with a possible relative url to form an
        absolute interpretation of the latter.)r   r   )r   r+   s     r#   r   zTextResponse.urljoin`   s     |D))3///r$   c                 p    | j                             dd          }t          t          |                    S )N   Content-Typer$   )headersgetr	   r   )r   content_types     r#   r:   zTextResponse._headers_encodinge   s/    |''==)*\*B*BCCCr$   c                     | j         [t          | j                            dd                    }t	          || j        | j        | j                  \  }}|| _         || _        | j         S )NrH   r$   )auto_detect_fundefault_encoding)	r   r   rI   rJ   r   r3   _auto_detect_fun_DEFAULT_ENCODINGr   )r   rK   rD   ubodys       r#   r6   z$TextResponse._body_inferred_encodingj   ss     		'%dl&6&6&L&LMML)	 $ 5!%!7	  KD% !%D!&D  r$   c                     | j         ddfD ]9}	 |                    |           n# t          $ r Y %w xY wt          |          c S d S )Nzutf-8cp1252)rP   decodeUnicodeErrorr   )r   rA   encs      r#   rO   zTextResponse._auto_detect_funw   ss    *GX> 	) 	)CC       #C(((((	) 	)s   $
11c                 *    t          | j                  S r&   )r   r3   r7   s    r#   r;   z$TextResponse._body_declared_encoding   s    *49555r$   c                 6    t          | j                  d         S )Nr   )r
   r3   r7   s    r#   r9   zTextResponse._bom_encoding   s    	""1%%r$   c                 J    ddl m} | j         ||           | _        | j        S )Nr   )Selector)scrapy.selectorrZ   r   )r   rZ   s     r#   selectorzTextResponse.selector   s8    ,,,,,,  	3$,HTNND!$$r$   c                 (     | j         j        |fi |S r&   )r\   xpath)r   queryr!   s      r#   r^   zTextResponse.xpath   s    "t}"533F333r$   c                 6    | j                             |          S r&   )r\   css)r   r_   s     r#   ra   zTextResponse.css   s    }  '''r$   NGETr   Freturnc                    t          |t          j                  rt          |          }n)t          |t          j                  rt          d          || j        n|}t                                          |||||||||	|
|||          S )a  
        Return a :class:`~.Request` instance to follow a link ``url``.
        It accepts the same arguments as ``Request.__init__`` method,
        but ``url`` can be not only an absolute URL, but also

        * a relative URL
        * a :class:`~scrapy.link.Link` object, e.g. the result of
          :ref:`topics-link-extractors`
        * a :class:`~scrapy.selector.Selector` object for a ``<link>`` or ``<a>`` element, e.g.
          ``response.css('a.my_link')[0]``
        * an attribute :class:`~scrapy.selector.Selector` (not SelectorList), e.g.
          ``response.css('a::attr(href)')[0]`` or
          ``response.xpath('//img/@src')[0]``

        See :ref:`response-follow-example` for usage examples.
        zSelectorList is not supportedN)r+   callbackmethodrI   r3   cookiesmetar   prioritydont_filtererrback	cb_kwargsflags)	r'   parselrZ   _url_from_selectorSelectorList
ValueErrorr   r   follow)r   r+   re   rf   rI   r3   rg   rh   r   ri   rj   rk   rl   rm   r"   s                 r#   rr   zTextResponse.follow   s    @ c6?++ 	>$S))CCV011 	><===$,B4==(ww~~#  
 
 	
r$   c                    d |||fD             }t          |          dk    rt          d          |s.|r|                     |          }|r|                     |          }t	          |t
          j                  rW|}g }|D ]P}t          t                    5  |	                    t          |                     ddd           n# 1 swxY w Y   Qt                                          |||||||||	|
|||          S )a  
        A generator that produces :class:`~.Request` instances to follow all
        links in ``urls``. It accepts the same arguments as the :class:`~.Request`'s
        ``__init__`` method, except that each ``urls`` element does not need to be
        an absolute URL, it can be any of the following:

        * a relative URL
        * a :class:`~scrapy.link.Link` object, e.g. the result of
          :ref:`topics-link-extractors`
        * a :class:`~scrapy.selector.Selector` object for a ``<link>`` or ``<a>`` element, e.g.
          ``response.css('a.my_link')[0]``
        * an attribute :class:`~scrapy.selector.Selector` (not SelectorList), e.g.
          ``response.css('a::attr(href)')[0]`` or
          ``response.xpath('//img/@src')[0]``

        In addition, ``css`` and ``xpath`` arguments are accepted to perform the link extraction
        within the ``follow_all`` method (only one of ``urls``, ``css`` and ``xpath`` is accepted).

        Note that when passing a ``SelectorList`` as argument for the ``urls`` parameter or
        using the ``css`` or ``xpath`` parameters, this method will not produce requests for
        selectors from which links cannot be obtained (for instance, anchor tags without an
        ``href`` attribute)
        c                     g | ]}||S r&    ).0xs     r#   
<listcomp>z+TextResponse.follow_all.<locals>.<listcomp>   s    DDD1aDQDDDr$   rC   zFPlease supply exactly one of the following arguments: urls, css, xpathN)urlsre   rf   rI   r3   rg   rh   r   ri   rj   rk   rl   rm   )lenrq   ra   r^   r'   rn   rp   r   _InvalidSelectorappendro   r   
follow_all)r   ry   re   rf   rI   r3   rg   rh   r   ri   rj   rk   rl   rm   ra   r^   	arguments	selectorsselr"   s                      r#   r}   zTextResponse.follow_all   sy   R EDsE 2DDD	y>>Q 	X    	) %xx}} )zz%((dF/00 	9ID  9 9.// 9 9KK 23 7 78889 9 9 9 9 9 9 9 9 9 9 9 9 9 9ww!!# " 
 
 	
s   #CC	C	)Nrb   NNNNNr   FNNN)NNrb   NNNNNr   FNNNNN)"r0   
__module____qualname__rP   r>   r=   r   r   r   r(   __annotations__r   r*   r2   propertyr   r5   r?   rA   r   r   r:   r6   rO   r;   r9   r\   r^   ra   r   rr   r   r}   __classcell__)r"   s   @r#   r   r      st         "*"5"EJc3hEEE* * * * *" " " " "
$ 
$ 
$ 
$ 
$ K K XK
 
 
) ) ) " " X"0 0 0
 D D D! ! !) ) ) 6 6 6 & & & % % X%4 4 4( ( ( 3
 3
 
3
 3
 3
 3
 3
 3
n !G
 G
" 
7D$&	'#G
 G
 G
 G
 G
 G
 G
 G
 G
 G
r$   r   c                       e Zd ZdZdS )r{   z>
    Raised when a URL cannot be obtained from a Selector
    N)r0   r   r   __doc__ru   r$   r#   r{   r{     s           r$   r{   c                    t          | j        t                    rt          | j                  S t	          | j        d          st          d|            | j        j        dvrt          d| j        j         d          | j                            d          }|t          d| j        j         d|            t          |          S )	NtagzUnsupported selector: )alinkz1Only <a> and <link> elements are supported; got <>href<z!> element has no href attribute: )r'   rootr(   r   hasattrr{   r   rJ   )r   r   s     r#   ro   ro     s    #(C   0%ch///38U## ?===>>>
x|=( 
R38<RRR
 
 	
 8<<D YW38<WWRUWWXXX!$'''r$   )!r   r?   
contextlibr   typingr   r   urllib.parser   rn   w3lib.encodingr   r   r	   r
   r   
w3lib.htmlr   scrapy.httpr   scrapy.http.responser   scrapy.utils.pythonr   r   scrapy.utils.responser   objectr>   r   rq   r{   ro   ru   r$   r#   <module>r      s           # # # # # # # #                           . - - - - -       ) ) ) ) ) ) @ @ @ @ @ @ @ @ . . . . . .r
 r
 r
 r
 r
8 r
 r
 r
j    z   ( ( ( ( (r$   