
    3 d                         d Z ddlZddlmZmZmZ ddlmZmZm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ d	 Zd
 Zd Z e            Z G d d          Z G d de          ZdS )z
This modules implements the CrawlSpider which is the recommended spider to use
for scraping typical web sites that requires crawling pages.

See documentation in docs/topics/spiders.rst
    N)AsyncIterable	AwaitableSequence)HtmlResponseRequestResponse)LinkExtractor)Spider)collect_asyncgeniterate_spider_outputc                     | S N )xs    4lib/python3.11/site-packages/scrapy/spiders/crawl.py	_identityr      s    H    c                     | S r   r   )requestresponses     r   _identity_process_requestr      s    Nr   c                 t    t          |           r| S t          | t                    rt          || d           S d S r   )callable
isinstancestrgetattr)methodspiders     r   _get_methodr       sE     &# -vvt,,,- -r   c                   *    e Zd Z	 	 	 	 	 	 	 ddZd ZdS )RuleNc                     |pt           | _        || _        || _        |pi | _        |pt
          | _        |pt          | _        ||n| | _	        d S r   )
_default_link_extractorlink_extractorcallbackerrback	cb_kwargsr   process_linksr   process_requestfollow)selfr%   r&   r(   r+   r)   r*   r'   s           r   __init__zRule.__init__%   s\     -G0G "b*7i.K2K &DffHr   c                     t          | j        |          | _        t          | j        |          | _        t          | j        |          | _        t          | j        |          | _        d S r   )r    r&   r'   r)   r*   )r,   r   s     r   _compilezRule._compile7   sX    #DM6::"4<88();VDD*4+?HHr   )NNNNNNN)__name__
__module____qualname__r-   r/   r   r   r   r"   r"   $   sW         E E E E$I I I I Ir   r"   c                        e Zd ZU dZee         ed<    fdZd Zd Z	de
defdZd	 Zd
 Zd Zd ZddZd Zd Ze fd            Z xZS )CrawlSpiderr   rulesc                 b     t                      j        |i | |                                  d S r   )superr-   _compile_rules)r,   akw	__class__s      r   r-   zCrawlSpider.__init__B   s7    !"r"""r   c                 >    |                      || j        |d          S )NT)r   r&   r(   r+   )_parse_responseparse_start_urlr,   r   kwargss      r   _parsezCrawlSpider._parseF   s.    ##)	 $ 
 
 	
r   c                     g S r   r   r?   s      r   r>   zCrawlSpider.parse_start_urlN   s    	r   r   resultsc                     |S r   r   )r,   r   rC   s      r   process_resultszCrawlSpider.process_resultsQ   s    r   c           	      n    t          |j        | j        | j        t	          ||j                            S )N)rule	link_text)urlr&   r'   meta)r   rI   	_callback_errbackdicttext)r,   
rule_indexlinks      r   _build_requestzCrawlSpider._build_requestT   s:    ^M:;;;	
 
 
 	
r   c              #     K   t          |t                    sd S t                      t          | j                  D ]\  }}fd|j                            |          D             }|                    |          D ]E}                    |           | 	                    ||          }|
                    ||          V  Fd S )Nc                     g | ]}|v|	S r   r   ).0lnkseens     r   
<listcomp>z3CrawlSpider._requests_to_follow.<locals>.<listcomp>a   s3       d?  r   )r   r   set	enumerate_rulesr%   extract_linksr)   addrQ   r*   )r,   r   rO   rG   linksrP   r   rV   s          @r   _requests_to_followzCrawlSpider._requests_to_follow\   s      (L11 	Fuu )$+ 6 6 		> 		>J   .<<XFF  E
 **511 > >--j$??**7H======>		> 		>r   c                     | j         |j        d                  }|                     ||j        i |j        ||j                  S NrG   )rZ   rJ   r=   r&   r(   r+   )r,   r   r(   rG   s       r   rK   zCrawlSpider._callbackk   sG    {8=01##dm%D%D)%Ddk
 
 	
r   c                 r    | j         |j        j        d                  }|                     ||j                  S r`   )rZ   r   rJ   _handle_failurer'   )r,   failurerG   s      r   rL   zCrawlSpider._errbackq   s0    {7?/78##GT\:::r   Tc                b  K   |r ||fi |pd}t          |t                    rt          |           d {V }nt          |t                    r| d {V }|                     ||          }t          |          D ]}|W V  |r$| j        r|                     |          D ]}|W V  d S d S d S Nr   )r   r   r   r   rE   r   _follow_linksr^   )r,   r   r&   r(   r+   cb_resrequest_or_items          r   r=   zCrawlSpider._parse_responseu   s      	&Xh44)44:F&-00 &/77777777FI.. &%))(F;;F#8#@#@ & &%%%%%% 	&d( 	&#'#;#;H#E#E & &%%%%%%	& 	& 	& 	&& &r   c              #   X   K   |r# ||          pd}t          |          D ]}|V  d S d S re   r   )r,   rc   r'   rC   rh   s        r   rb   zCrawlSpider._handle_failure   s\       	&gg&&,"G#8#A#A & &%%%%%	& 	&& &r   c                     g | _         | j        D ]N}| j                             t          j        |                     | j         d                             |            Od S )N)rZ   r5   appendcopyr/   )r,   rG   s     r   r8   zCrawlSpider._compile_rules   s_    J 	+ 	+DKty///KO$$T****	+ 	+r   c                      t                      j        |g|R i |}|j                            dd          |_        |S )NCRAWLSPIDER_FOLLOW_LINKST)r7   from_crawlersettingsgetboolrf   )clscrawlerargsr@   r   r;   s        r   rp   zCrawlSpider.from_crawler   sP    %%g??????&/77& 
  
 r   )T)r0   r1   r2   r5   r   r"   __annotations__r-   rA   r>   r   listrE   rQ   r^   rK   rL   r=   rb   r8   classmethodrp   __classcell__)r;   s   @r   r4   r4   >   s!        E8D>    
 
 
   4    
 
 
> > >
 
 
; ; ;& & & && & &+ + +     [    r   r4   )__doc__rm   typingr   r   r   scrapy.httpr   r   r   scrapy.linkextractorsr	   scrapy.spidersr
   scrapy.utils.asyncgenr   scrapy.utils.spiderr   r   r   r    r$   r"   r4   r   r   r   <module>r      sG     5 5 5 5 5 5 5 5 5 5 7 7 7 7 7 7 7 7 7 7 / / / / / / ! ! ! ! ! ! 2 2 2 2 2 2 5 5 5 5 5 5    - - - (-// I I I I I I I I4X X X X X& X X X X Xr   