
    Ng6                       d dl mZ d dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
 erd dlmZ d dlmZ d dlmZ  ej"                  e      Z	 d	 	 	 	 	 	 	 dd	Z G d
 de      Z G d de      Z G d de      Z G d de      Zy)    )annotationsN)ABCMetaabstractmethod)TYPE_CHECKING)
to_unicode)Self)Spider)Crawlerc                    	 |rt        |       }|S | j                  dd      }	 |S # t        $ r2 t        j	                  dt        j                         d|i       d}Y |S w xY w)Nzutf-8ignore)errorszFailure while parsing robots.txt. File either contains garbage or is in an encoding other than UTF-8, treating it as an empty file.spider)exc_infoextra )r   decodeUnicodeDecodeErrorloggerwarningsysr   )robotstxt_bodyr   to_native_str_typebody_decodeds       0lib/python3.12/site-packages/scrapy/robotstxt.pydecode_robotstxtr      s    %n5L  *000JL   	 	P\\^V$	 	 	
 	s   ' ' 7A"!A"c                  :    e Zd Zeedd              Zedd       Zy)RobotParserc                     y)ap  Parse the content of a robots.txt_ file as bytes. This must be a class method.
        It must return a new instance of the parser backend.

        :param crawler: crawler which made the request
        :type crawler: :class:`~scrapy.crawler.Crawler` instance

        :param robotstxt_body: content of a robots.txt_ file.
        :type robotstxt_body: bytes
        N )clscrawlerr   s      r   from_crawlerzRobotParser.from_crawler+   s     	    c                     y)zReturn ``True`` if  ``user_agent`` is allowed to crawl ``url``, otherwise return ``False``.

        :param url: Absolute URL
        :type url: str or bytes

        :param user_agent: User agent
        :type user_agent: str or bytes
        Nr   selfurl
user_agents      r   allowedzRobotParser.allowed9   s     	r#   Nr!   r
   r   bytesreturnr   r'   str | bytesr(   r.   r,   bool)__name__
__module____qualname__classmethodr   r"   r)   r   r#   r   r   r   *   s0    
  
 	 	r#   r   )	metaclassc                  .    e Zd ZddZedd       ZddZy)PythonRobotParserc                    ddl m} || _        t        ||d      } |       | _        | j                  j                  |j                                y )Nr   )RobotFileParserT)r   )urllib.robotparserr8   r   r   rpparse
splitlines)r&   r   r   r8   r   s        r   __init__zPythonRobotParser.__init__G   s>    6%+'SWX#2#4l--/0r#   c                8    |sd n|j                   } | ||      }|S Nr   r    r!   r   r   os        r   r"   zPythonRobotParser.from_crawlerO        $'..'r#   c                f    t        |      }t        |      }| j                  j                  ||      S r?   r   r:   	can_fetchr%   s      r   r)   zPythonRobotParser.allowedU   s,    
+
oww  S11r#   Nr   r+   r   Spider | Noner*   r-   r0   r1   r2   r=   r3   r"   r)   r   r#   r   r6   r6   F   s     1  
2r#   r6   c                  .    e Zd ZddZedd       ZddZy)RerpRobotParserc                    ddl m} || _         |       | _        t	        ||      }| j                  j                  |       y )Nr   )RobotExclusionRulesParser)robotexclusionrulesparserrM   r   r:   r   r;   )r&   r   r   rM   r   s        r   r=   zRerpRobotParser.__init__\   s2    G%+-F-H'?l#r#   c                8    |sd n|j                   } | ||      }|S r?   r@   rA   s        r   r"   zRerpRobotParser.from_crawlerd   rC   r#   c                f    t        |      }t        |      }| j                  j                  ||      S r?   )r   r:   
is_allowedr%   s      r   r)   zRerpRobotParser.allowedj   s,    
+
oww!!*c22r#   NrG   r*   r-   rI   r   r#   r   rK   rK   [   s     $  
3r#   rK   c                  .    e Zd ZddZedd       ZddZy)ProtegoRobotParserc                b    ddl m} || _        t        ||      }|j	                  |      | _        y )Nr   )Protego)protegorU   r   r   r;   r:   )r&   r   r   rU   r   s        r   r=   zProtegoRobotParser.__init__q   s(    #%+'?---r#   c                8    |sd n|j                   } | ||      }|S r?   r@   rA   s        r   r"   zProtegoRobotParser.from_crawlerx   rC   r#   c                f    t        |      }t        |      }| j                  j                  ||      S r?   rE   r%   s      r   r)   zProtegoRobotParser.allowed~   s,    
+
oww  j11r#   NrG   r*   r-   rI   r   r#   r   rS   rS   p   s     .  
2r#   rS   )F)r   r+   r   rH   r   r/   r,   str)
__future__r   loggingr   abcr   r   typingr   scrapy.utils.pythonr   typing_extensionsr   scrapyr	   scrapy.crawlerr
   	getLoggerr0   r   r   r   r6   rK   rS   r   r#   r   <module>rc      s    "  
 '   *&& 
		8	$ NS#0FJ*G 82 2*3k 3*2 2r#   