
    d$                         d Z ddlZddlZddlZdgZ ej        dd          Z G d d          Z G d d          Z	 G d	 d
          Z
dS )a%   robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://www.robotstxt.org/norobots-rfc.txt
    NRobotFileParserRequestRatezrequests secondsc                   \    e Zd ZdZddZd Zd Zd Zd Zd Z	d	 Z
d
 Zd Zd Zd Zd ZdS )r   zs This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

     c                     g | _         g | _        d | _        d| _        d| _        |                     |           d| _        d S )NFr   )entriessitemapsdefault_entrydisallow_all	allow_allset_urllast_checkedselfurls     $  /croot/python-split_1694437901252/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_plac/lib/python3.11/urllib/robotparser.py__init__zRobotFileParser.__init__   sG    !!S    c                     | j         S )zReturns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        )r   r   s    r   mtimezRobotFileParser.mtime%   s       r   c                 @    ddl }|                                 | _        dS )zYSets the time the robots.txt file was last fetched to the
        current time.

        r   N)timer   )r   r   s     r   modifiedzRobotFileParser.modified.   s#    
 	 IIKKr   c                 |    || _         t          j                            |          dd         \  | _        | _        dS )z,Sets the URL referring to a robots.txt file.      N)r   urllibparseurlparsehostpathr   s     r   r   zRobotFileParser.set_url6   s4    %|44S99!A#>	4999r   c                    	 t           j                            | j                  }|                                }|                     |                    d                                                     dS # t           j        j	        $ rK}|j
        dv rd| _        n)|j
        dk    r|j
        dk     rd| _        Y d}~dS Y d}~dS Y d}~dS Y d}~dS d}~ww xY w)z4Reads the robots.txt URL and feeds it to the parser.zutf-8)i  i  Ti  i  N)r   requesturlopenr   readr   decode
splitlineserror	HTTPErrorcoder   r   )r   frawerrs       r   r&   zRobotFileParser.read;   s    		9&&tx00A &&((CJJszz'**557788888 |% 	& 	& 	&x:%%$(!!SSX^^!% ! "!!!!!%3^^^^^	&s   $A6 6C
.CCc                 p    d|j         v r| j        	|| _        d S d S | j                            |           d S N*)
useragentsr
   r   append)r   entrys     r   
_add_entryzRobotFileParser._add_entryH   sM    %"""!)%*""" *) L&&&&&r   c                 <   d}t                      }|                                  |D ]V}|sB|dk    rt                      }d}n+|dk    r%|                     |           t                      }d}|                    d          }|dk    r
|d|         }|                                }|s|                    dd          }t          |          dk    r|d                                                                         |d<   t          j	        
                    |d                                                   |d<   |d         dk    rM|dk    r#|                     |           t                      }|j                            |d                    d}o|d         dk    r8|dk    r0|j                            t          |d         d	                     d}|d         d
k    r8|dk    r0|j                            t          |d         d                     d}|d         dk    rP|dk    rH|d                                                                         rt!          |d                   |_        d}S|d         dk    r|dk    r|d                             d          }t          |          dk    r|d                                                                         rg|d                                                                         r;t%          t!          |d                   t!          |d                             |_        d}*|d         dk    r | j                            |d                    X|dk    r|                     |           dS dS )zParse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        r   r      #N:z
user-agentdisallowFallowTzcrawl-delayzrequest-rate/sitemap)Entryr   r5   findstripsplitlenlowerr   r   unquoter2   r3   	rulelinesRuleLineisdigitintdelayr   req_rater	   )r   linesstater4   lineinumberss          r   r   zRobotFileParser.parseQ   sJ     7	2 7	2D A::!GGEEEaZZOOE***!GGEE		#AAvvBQBx::<<D ::c1%%D4yyA~~q'--////11Q ,..tAw}}??Q7l**zz... %$++DG444EE!W
**zz..xQ/G/GHHH !!W''zz..xQ/F/FGGG !!W--zz  7==??2244 7*-d1g,,EK !!W..zz"&q'--"4"4LLA--'!*2B2B2D2D2L2L2N2N- '
 0 0 2 2 : : < < .-8WQZ#gVWj//-Z-ZEN !!W	))
 M((a111A::OOE""""" :r   c                    | j         rdS | j        rdS | j        sdS t          j                            t          j                            |                    }t          j                            dd|j        |j	        |j
        |j        f          }t          j                            |          }|sd}| j        D ].}|                    |          r|                    |          c S /| j        r| j                            |          S dS )z=using the parsed robots.txt decide if useragent can fetch urlFTr   r<   )r   r   r   r   r   r    rD   
urlunparser"   paramsqueryfragmentquoter   
applies_to	allowancer
   )r   	useragentr   
parsed_urlr4   s        r   	can_fetchzRobotFileParser.can_fetch   s     	5> 	4
   	5 \**6<+?+?+D+DEE
l%%r"Z_j.
0C'E F Fl  %% 	C\ 	, 	,E	** ,s+++++,  	5%//444tr   c                     |                                  sd S | j        D ] }|                    |          r	|j        c S !| j        r| j        j        S d S N)r   r   rV   rI   r
   r   rX   r4   s      r   crawl_delayzRobotFileParser.crawl_delay   sm    zz|| 	4\ 	# 	#E	** #{"""# 	,%++tr   c                     |                                  sd S | j        D ] }|                    |          r	|j        c S !| j        r| j        j        S d S r\   )r   r   rV   rJ   r
   r]   s      r   request_ratezRobotFileParser.request_rate   sm    zz|| 	4\ 	& 	&E	** &~%%%& 	/%..tr   c                 "    | j         sd S | j         S r\   )r	   r   s    r   	site_mapszRobotFileParser.site_maps   s    } 	4}r   c                     | j         }| j        || j        gz   }d                    t          t          |                    S )Nz

)r   r
   joinmapstr)r   r   s     r   __str__zRobotFileParser.__str__   s>    ,)!3 44G{{3sG,,---r   N)r   )__name__
__module____qualname____doc__r   r   r   r   r&   r5   r   rZ   r^   r`   rb   rg    r   r   r   r      s         
   ! ! !( ( (? ? ?
9 9 9' ' 'G# G# G#R  :      
. . . . .r   c                   $    e Zd ZdZd Zd Zd ZdS )rF   zoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.c                     |dk    r|sd}t           j                            t           j                            |                    }t           j                            |          | _        || _        d S )Nr   T)r   r   rQ   r    rU   r"   rW   )r   r"   rW   s      r   r   zRuleLine.__init__   s[    2::i:I|&&v|'<'<T'B'BCCL&&t,,	"r   c                 L    | j         dk    p|                    | j                   S r0   )r"   
startswith)r   filenames     r   rV   zRuleLine.applies_to   s$    yCA8#6#6ty#A#AAr   c                 .    | j         rdnddz   | j        z   S )NAllowDisallowz: )rW   r"   r   s    r   rg   zRuleLine.__str__   s    >9zTADIMMr   N)rh   ri   rj   rk   r   rV   rg   rl   r   r   rF   rF      sS        1 1# # #B B BN N N N Nr   rF   c                   *    e Zd ZdZd Zd Zd Zd ZdS )r>   z?An entry has one or more user-agents and zero or more rulelinesc                 >    g | _         g | _        d | _        d | _        d S r\   )r2   rE   rI   rJ   r   s    r   r   zEntry.__init__   s"    
r   c                 |   g }| j         D ]}|                    d|            | j        |                    d| j                    | j        ,| j        }|                    d|j         d|j                    |                    t          t          | j	                             d
                    |          S )NzUser-agent: zCrawl-delay: zRequest-rate: r<   
)r2   r3   rI   rJ   requestssecondsextendre   rf   rE   rd   )r   retagentrates       r   rg   zEntry.__str__   s    _ 	/ 	/EJJ-e--....:!JJ3tz33444=$=DJJFFFFFGGG

3sDN++,,,yy~~r   c                     |                     d          d                                         }| j        D ]&}|dk    r dS |                                }||v r dS 'dS )z2check if this entry applies to the specified agentr<   r   r1   TF)rA   rC   r2   )r   rX   r}   s      r   rV   zEntry.applies_to   sp     OOC((+1133	_ 	 	E||ttKKMME	!!tt "ur   c                 V    | j         D ] }|                    |          r	|j        c S !dS )zZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT)rE   rV   rW   )r   rq   rM   s      r   rW   zEntry.allowance
  sA     N 	& 	&Dx(( &~%%%&tr   N)rh   ri   rj   rk   r   rg   rV   rW   rl   r   r   r>   r>      sV        II  
 
 
      r   r>   )rk   collectionsurllib.parser   urllib.request__all__
namedtupler   r   rF   r>   rl   r   r   <module>r      s   
 
            
$k$]4FGG~. ~. ~. ~. ~. ~. ~. ~.BN N N N N N N N$( ( ( ( ( ( ( ( ( (r   