
    3 d                     @    d Z ddlmZ ddlZ G d d          ZddZdS )z
Module for processing Sitemaps.

Note: The main purpose of this module is to provide support for the
SitemapSpider, its API is subject to change without notice.
    )urljoinNc                       e Zd ZdZd Zd ZdS )SitemapzTClass to parse Sitemap (type=urlset) and Sitemap Index
    (type=sitemapindex) filesc                    t           j                            ddd          }t           j                            ||          | _        | j        j        }d|v r&| j        j                            dd          d         n|| _        d S )NTF)recoverremove_commentsresolve_entities)parser}   )lxmletree	XMLParser
fromstring_roottagsplittype)selfxmltextxmlprts       4lib/python3.11/site-packages/scrapy/utils/sitemap.py__init__zSitemap.__init__   s~    z##$ $ 
 
 Z**74*@@
Z^7:byHDJN((a0033b			    c              #     K   | j                                         D ]}i }|                                D ]}|j        }d|v r|                    dd          d         n|}|dk    rFd|j        v r<|                    dg                               |                    d                     w|j        r|j        	                                nd||<   d|v r|V  d S )Nr   r   linkhref	alternate loc)
r   getchildrenr   r   attrib
setdefaultappendgettextstrip)r   elemdelr   names         r   __iter__zSitemap.__iter__   s      J**,, 	 	DA&&(( A Af/2czBsyya((++s6> A* M["55<<RVVF^^LLL13@bgmmooobAdGGz 	 	r   N)__name__
__module____qualname____doc__r   r-    r   r   r   r      s?        ! !I I I    r   r   c              #   *  K   |                                  D ]{}|                                                                                    d          r@|                    dd          d                                         }t          ||          V  |dS )zXReturn an iterator over all sitemap urls contained in the given
    robots.txt file
    zsitemap::r   N)
splitlineslstriplower
startswithr   r(   r   )robots_textbase_urllineurls       r   sitemap_urls_from_robotsr=   *   s       &&(( ) );;==  ++J77 	)**S!$$Q'--//C(C((((() )r   )N)r1   urllib.parser   
lxml.etreer   r   r=   r2   r   r   <module>r@      su     !                     :) ) ) ) ) )r   