
    Ng$                        d dl mZ d dlZd dlZd dlmZmZmZ d dlm	Z	 d dl
mZ d dlmZ erd dlmZ d dlmZ d d	lmZ  ej(                  e      Zdd
Z G d de      Z G d d      Z G d d      Z G d d      Zy)    )annotationsN)TYPE_CHECKINGProtocolcast)Request)
Downloader)build_from_crawler)Iterable)Self)Crawlerc                    dj                  | D cg c]  }|j                         s|dv r|nd c}      }t        j                  | j	                  d            j                         }dj                  ||g      S c c}w )a!  
    Return a filesystem-safe version of a string ``text``

    >>> _path_safe('simple.org').startswith('simple.org')
    True
    >>> _path_safe('dash-underscore_.org').startswith('dash-underscore_.org')
    True
    >>> _path_safe('some@symbol?').startswith('some_symbol_')
    True
     z-.__utf8-)joinisalnumhashlibmd5encode	hexdigest)textcpathable_slotunique_slots       .lib/python3.12/site-packages/scrapy/pqueues.py
_path_safer      sp     GGdSd!))+eQDdSTM ++dkk&12<<>K88]K011	 Ts   A;c                  0    e Zd ZdZddZddZd	dZd
dZy)QueueProtocolz:Protocol for downstream queues of ``ScrapyPriorityQueue``.c                     y N selfrequests     r   pushzQueueProtocol.push+   s    c    c                     y r!   r"   r$   s    r   popzQueueProtocol.pop-   s    Sr'   c                     y r!   r"   r)   s    r   closezQueueProtocol.close/   s    Sr'   c                     y r!   r"   r)   s    r   __len__zQueueProtocol.__len__1   s    cr'   Nr%   r   returnNoner0   zRequest | None)r0   r1   r0   int)__name__
__module____qualname____doc__r&   r*   r,   r.   r"   r'   r   r   r   (   s    D1( !r'   r   c                      e Zd ZdZe	 d	 	 	 	 	 	 	 	 	 dd       Z	 d	 	 	 	 	 	 	 ddZddZddZddZ	ddZ
ddZdd	Zdd
ZddZy)ScrapyPriorityQueuea  A priority queue implemented using multiple internal queues (typically,
    FIFO queues). It uses one internal queue for each priority value. The internal
    queue must implement the following methods:

        * push(obj)
        * pop()
        * close()
        * __len__()

    Optionally, the queue could provide a ``peek`` method, that should return the
    next object to be returned by ``pop``, but without removing it from the queue.

    ``__init__`` method of ScrapyPriorityQueue receives a downstream_queue_cls
    argument, which is a class used to instantiate a new (internal) queue when
    a new priority is allocated.

    Only integer priorities should be used. Lower numbers are higher
    priorities.

    startprios is a sequence of priorities to start with. If the queue was
    previously closed leaving some priority buckets non-empty, those priorities
    should be passed in startprios.

    c                     | ||||      S r!   r"   clscrawlerdownstream_queue_clskey
startprioss        r   from_crawlerz ScrapyPriorityQueue.from_crawlerN        70#zBBr'   c                l    || _         || _        || _        i | _        d | _        | j                  |       y r!   )r>   r?   r@   queuescurprio
init_prios)r$   r>   r?   r@   rA   s        r   __init__zScrapyPriorityQueue.__init__X   s4     !(9M!02#'
#r'   c                t    |sy |D ]   }| j                  |      | j                  |<   " t        |      | _        y r!   )qfactoryrE   minrF   )r$   rA   prioritys      r   rG   zScrapyPriorityQueue.init_priosf   s6    "H$(MM($;DKK! # :r'   c                v    t        | j                  | j                  | j                  dz   t	        |      z         S N/)r	   r?   r>   r@   str)r$   r@   s     r   rJ   zScrapyPriorityQueue.qfactoryo   s3    !%%LLHHsNSX%
 	
r'   c                    |j                    S r!   )rL   r#   s     r   rL   zScrapyPriorityQueue.priorityv   s        r'   c                   | j                  |      }|| j                  vr| j                  |      | j                  |<   | j                  |   }|j                  |       | j                  || j                  k  r|| _        y y r!   )rL   rE   rJ   r&   rF   )r$   r%   rL   qs       r   r&   zScrapyPriorityQueue.pushy   sm    ==)4;;&$(MM($;DKK!KK!	w<<8dll#:#DL $;r'   c                V   | j                   y | j                  | j                      }|j                         }|sj| j                  | j                   = |j                          | j                  j	                         D cg c]
  \  }}|s	| }}}|rt        |      nd | _         |S c c}}w r!   )rF   rE   r*   r,   itemsrK   )r$   rS   mpprioss        r   r*   zScrapyPriorityQueue.pop   s    <<KK%EEGDLL)GGI#';;#4#4#6<#641a!Q#6E<).3u:DDL =s   =
B%B%c                    | j                   y| j                  | j                      }t        t        |j	                               S )  Returns the next object to be returned by :meth:`pop`,
        but without removing it from the queue.

        Raises :exc:`NotImplementedError` if the underlying queue class does
        not implement a ``peek`` method, which is optional for queues.
        N)rF   rE   r   r   peek)r$   queues     r   r[   zScrapyPriorityQueue.peek   s6     <<DLL)GUZZ\**r'   c                    g }| j                   j                         D ]&  \  }}|j                  |       |j                          ( |S r!   )rE   rU   appendr,   )r$   activerW   rS   s       r   r,   zScrapyPriorityQueue.close   s>    KK%%'DAqMM!GGI ( r'   c                r    | j                   r*t        d | j                   j                         D              S dS )Nc              3  2   K   | ]  }t        |        y wr!   len.0xs     r   	<genexpr>z.ScrapyPriorityQueue.__len__.<locals>.<genexpr>   s     8#7a3q6#7   r   )rE   sumvaluesr)   s    r   r.   zScrapyPriorityQueue.__len__   s+    <@KKs84;;#5#5#788NQNr'   Nr"   )
r>   r   r?   type[QueueProtocol]r@   rP   rA   Iterable[int]r0   r   )r>   r   r?   rl   r@   rP   rA   rm   )rA   rm   r0   r1   )r@   r4   r0   r   )r%   r   r0   r4   r/   r2   )r0   z	list[int]r3   )r5   r6   r7   r8   classmethodrB   rH   rG   rJ   rL   r&   r*   r[   r,   r.   r"   r'   r   r:   r:   4   s    2  %'CC 2C 	C
 "C 
C C %'$$ 2$ 	$
 "$'
!$
+Or'   r:   c                  ,    e Zd ZddZddZddZd	dZy)
DownloaderInterfacec                V    |j                   sJ |j                   j                  | _        y r!   )engine
downloader)r$   r>   s     r   rH   zDownloaderInterface.__init__   s    ~~~&-nn&?&?r'   c                N    |D cg c]  }| j                  |      |f c}S c c}w r!   )_active_downloads)r$   possible_slotsslots      r   statszDownloaderInterface.stats   s*    AOP''-t4PPPs   "c                8    | j                   j                  |      S r!   )rs   get_slot_keyr#   s     r   rz   z DownloaderInterface.get_slot_key   s    ++G44r'   c                    || j                   j                  vryt        | j                   j                  |   j                        S )z<Return a number of requests in a Downloader for a given slotr   )rs   slotsrc   r_   r$   rw   s     r   ru   z%DownloaderInterface._active_downloads   s7    t,,,4??((.5566r'   N)r>   r   )rv   zIterable[str]r0   zlist[tuple[int, str]])r%   r   r0   rP   )rw   rP   r0   r4   )r5   r6   r7   rH   rx   rz   ru   r"   r'   r   rp   rp      s    @Q57r'   rp   c                      e Zd ZdZe	 d	 	 	 	 	 	 	 	 	 dd       Z	 d	 	 	 	 	 	 	 ddZ	 d	 	 	 	 	 ddZddZddZ	ddZ
dd	Zdd
ZddZy)DownloaderAwarePriorityQueuezPriorityQueue which takes Downloader activity into account:
    domains (slots) with the least amount of active downloads are dequeued
    first.
    Nc                     | ||||      S r!   r"   r<   s        r   rB   z)DownloaderAwarePriorityQueue.from_crawler   rC   r'   c                   |j                   j                  d      dk7  rt        d| j                   d      |r)t	        |t
              st        d|j                  d      t        |      | _        || _        || _	        || _
        i | _        |xs i j                         D ]$  \  }}| j                  ||      | j                  |<   & y )NCONCURRENT_REQUESTS_PER_IPr   "z-" does not support CONCURRENT_REQUESTS_PER_IPzDDownloaderAwarePriorityQueue accepts ``slot_startprios`` as a dict; z instance is passed. Most likely, it means the state iscreated by an incompatible priority queue. Only a crawl started with the same priority queue class can be resumed.)settingsgetint
ValueError	__class__
isinstancedictrp   _downloader_interfacer?   r@   r>   pqueuesrU   	pqfactory)r$   r>   r?   r@   slot_startpriosrw   rA   s          r   rH   z%DownloaderAwarePriorityQueue.__init__   s     ""#?@AEDNN##PQ  :ot#D2",,/ 0..  ;Ng:V"9M! '79!0!6B = = ?D*!%j!ADLL !@r'   c                x    t        | j                  | j                  | j                  dz   t	        |      z   |      S rN   )r:   r>   r?   r@   r   )r$   rw   rA   s      r   r   z&DownloaderAwarePriorityQueue.pqfactory   s9     #LL%%HHsNZ--	
 	
r'   c                    | j                   j                  | j                        }|sy t        |      d   }| j                  |   }|j	                         }t        |      dk(  r| j                  |= |S )N   r   )r   rx   r   rK   r*   rc   )r$   rx   rw   r\   r%   s        r   r*   z DownloaderAwarePriorityQueue.pop   sd    **00>5z!}T"))+u:?T"r'   c                    | j                   j                  |      }|| j                  vr| j                  |      | j                  |<   | j                  |   }|j	                  |       y r!   )r   rz   r   r   r&   )r$   r%   rw   r\   s       r   r&   z!DownloaderAwarePriorityQueue.push   sU    ))66w?t||#!%!5DLLT"

7r'   c                    | j                   j                  | j                        }|syt        |      d   }| j                  |   }|j	                         S )rZ   Nr   )r   rx   r   rK   r[   )r$   rx   rw   r\   s       r   r[   z!DownloaderAwarePriorityQueue.peek  sK     **00>5z!}T"zz|r'   c                    | j                   j                         D ci c]  \  }}||j                          }}}| j                   j                          |S c c}}w r!   )r   rU   r,   clear)r$   rw   r\   r_   s       r   r,   z"DownloaderAwarePriorityQueue.close  sO    9=9K9K9MN9M+$$%9MN Os   Ac                r    | j                   r*t        d | j                   j                         D              S dS )Nc              3  2   K   | ]  }t        |        y wr!   rb   rd   s     r   rg   z7DownloaderAwarePriorityQueue.__len__.<locals>.<genexpr>  s     9#8a3q6#8rh   r   )r   ri   rj   r)   s    r   r.   z$DownloaderAwarePriorityQueue.__len__  s+    =A\\s94<<#6#6#899PqPr'   c                    || j                   v S r!   )r   r}   s     r   __contains__z)DownloaderAwarePriorityQueue.__contains__  s    t||##r'   r!   )
r>   r   r?   rl   r@   rP   rA   dict[str, Iterable[int]] | Noner0   r   )r>   r   r?   rl   r@   rP   r   r   rk   )rw   rP   rA   rm   r0   r:   r2   r/   )r0   zdict[str, list[int]]r3   )rw   rP   r0   bool)r5   r6   r7   r8   rn   rB   rH   r   r*   r&   r[   r,   r.   r   r"   r'   r   r   r      s    
  7;CC 2C 	C
 4C 
C C <@BB 2B 	B
 9BB 68

%2
	

Q$r'   r   )r   rP   r0   rP   )
__future__r   r   loggingtypingr   r   r   scrapyr   scrapy.core.downloaderr   scrapy.utils.miscr	   collections.abcr
   typing_extensionsr   scrapy.crawlerr   	getLoggerr5   loggerr   r   r:   rp   r   r"   r'   r   <module>r      ss    "   0 0  - 0( '&			8	$2$	"H 	"oO oOd7 7$e$ e$r'   