
    +gds1                        d dl Z d dlmZ d dlmZmZmZ d dlZ	 d dlm	Z
 n# e$ r d dlZ
Y nw xY wddlm	Z	 dZe de Z e j        e          Zded	eeef         fd
Z ed          \  ZZg dZeeee         ee         f         Z G d d          Z G d de          Zedk    rbd dlmZ  ed          Ze                     d           e!                                Z" ee"j#                  Z#e$                    e#          Z%dS dS )    N)Path)AnyListTuple   )	resourceszDhttps://github.com/huggingface/datasets/tree/main/src/datasets/utils/resourcereturnc                 v    t          j        t          |           }t          j        |          t
           d|  fS )Nz/resources/)pkg_resources	read_textr   yaml	safe_loadBASE_REF_URL)r
   contents     5lib/python3.11/site-packages/datasets/utils/readme.pyload_yaml_resourcer      s7    %i::G>'""|$J$J$J$JJJ    zreadme_structure.yaml)z[Needs More Information]z[More Information Needed]zj(https://github.com/huggingface/datasets/blob/main/CONTRIBUTING.md#how-to-contribute-to-the-dataset-cards)c            	       ^    e Zd Zddededee         defdZddefdZd	ed
e	fdZ
d
efdZdS )SectionNFnamelevellinessuppress_parsing_errorsc                     || _         || _        || _        d| _        d| _        i | _        g | _        g | _        | j        |                     |           d S d S )N Tr   )	r   r   r   textis_empty_textr   parsing_error_listparsing_warning_listparse)selfr   r   r   r   s        r   __init__zSection.__init__)   sh    	

	!"$$&!:!JJ/FJGGGGG "!r   c                    d}g }d}| j         D ]J}|                    d          dk    r|                    d          d d         dk    r| }B|                                d         | j        dz   k    r|s|dk    r$t	          || j        dz   |          | j        |<   g }n]|g k    rW| xj        d                    |                                          z  c_        | j        dk    r| j        t          vrd| _	        g }d                    |                                d	d                                        d          }5|
                    |           L|dk    rI|| j        v r| j        
                    d
| d           t	          || j        dz   |          | j        |<   n[|g k    rU| xj        d                    |                                          z  c_        | j        dk    r| j        t          vrd| _	        | j        dk    rc|sc| j        g k    s| j        g k    rOd                    d | j        | j        z   D                       x}}d| j         d|z   }t          |          d S d S d S )Nr   F 
   z```r   # r   z)Multiple sections with the same heading `z:` have been found. Please keep only one of these sections.
c              3       K   | ]	}d |z   V  
dS )-	N .0xs     r   	<genexpr>z Section.parse.<locals>.<genexpr>[   s&      +s+s!EAI+s+s+s+s+s+sr   z=The following issues were found while parsing the README at ``:
)r   stripsplitr   r   r   r   joinFILLER_TEXTr    appendr!   r"   r   
ValueError)r$   r   current_sub_levelcurrent_lines
code_startlineerrorserror_strings           r   r#   zSection.parse5   s   
J 	3 	3Dzz%  B&&E""2A2&%//!+^

aDJ$444Z4$**6=>OQUQ[^aQacp6q6qDL!23$&MM$**		RWW]%;%;%A%A%C%CC		9??ty/K/K16D.(*$'HHTZZ\\!""-=$>$>$D$DU$K$K!!$$T**** B&&$44+22 RDU  R  R  R   3::KTZZ]M]_l2m2m.// B&&II!7!7!=!=!?!??IIyB49K+G+G-2*:$;&",,0IR0O0O"&))+s+st?VY]Yr?r+s+s+s"s"ssn_c_hnnnqww ...	 0O0Or   	structurer   c           	      |   g }g }|d         du r0| j         r)| j        i k    r|                    d| j         d           |d         du r%| j         r|                    d| j         d           |d         )| j        i k    rQd
 |d         D             }|                    d| j         dd                    d |D                        d           nd |d         D             }d}t          |          D ]x\  }}|| j        vr$|                    d| j         d| d           d}2| j        dk    r>| j        |                             |d         |                   \  }	}
}||
z  }||z  }y|r/| j        D ]'}||vr!|                    d| j         d| d           (|ri ||fS |                                 ||fS )aC  Validates a Section class object recursively using the structure provided as a dictionary.

        Args:
            structute (:obj: `dict`): The dictionary representing expected structure.

        Returns:
            :obj: `ReadmeValidatorOutput`: The dictionary representation of the section, and the errors.
        allow_emptyFz"Expected some content in section `z` but it is empty.allow_empty_textzExpected some text in section `z4` but it is empty (text in subsections are ignored).subsectionsNc                     g | ]
}|d          S r   r.   r0   
subsections     r   
<listcomp>z$Section.validate.<locals>.<listcomp>}   s    XXX*V,XXXr   z	Section `z&` expected the following subsections: , c                     g | ]
}d |z   d z   S `r.   r/   s     r   rI   z$Section.validate.<locals>.<listcomp>   s&    [v[v[vjk\_`a\abe\e[v[v[vr   z. Found 'None'.c                     g | ]
}|d          S rF   r.   rG   s     r   rI   z$Section.validate.<locals>.<listcomp>   s    "a"a"a*:f#5"a"a"ar   z` is missing subsection: `z`.Tz###rM   z` has an extra subsection: `z[`. Skipping further validation checks for this subsection as expected structure is unknown.)	r    r   r8   r   r6   	enumerater   validateto_dict)r$   r@   
error_listwarning_listvaluesstructure_nameshas_missing_subsectionsidxr   _subsec_error_listsubsec_warning_lists               r   rP   zSection.validate_   s    
]#u,,! fdlb&8&8!!"dty"d"d"deee'(E11! !!udiuuu   ]#/|r!!XXy?WXXX!! I	  I  IQUQZQZ[v[vou[v[v[vQwQw  I  I  I   
 #b"a	R_H`"a"a"a*/'!*?!;!; < <IC4<//"))*cdi*c*c[_*c*c*cddd26//  :..$HLUYHZHcHc )- 8 =I IEA02E #&77
$(;;*  $  66(// !mDI  !m  !m4  !m  !m  !m    	<z<//<<>>:|;;r   c                 p    | j         | j        | j        d | j                                        D             dS )z3Returns the dictionary representation of a section.c                 6    g | ]}|                                 S r.   )rQ   )r0   values     r   rI   z#Section.to_dict.<locals>.<listcomp>   s     OOOEMMOOOOOr   )r   r   r    rD   )r   r   r    r   rT   r$   s    r   rQ   zSection.to_dict   sB     II!/OO9L9L9N9NOOO	
 
 	
r   NFF)__name__
__module____qualname__strr   boolr%   r#   dictReadmeValidatorOutputrP   rQ   r.   r   r   r   r   (   s        
H 
HS 
H 
HT#Y 
H`d 
H 
H 
H 
H(/ (/T (/ (/ (/ (/TE<$ E<+@ E< E< E< E<N
 
 
 
 
 
 
r   r   c            
            e Zd Zddedee         dedef fdZd Ze	dd	e
dedefd
            Ze		 ddedededefd            Zddef fdZd Zd Z xZS )ReadMeNFr   r   r@   r   c                     t                                          |d           || _        d| _        d| _        || _        | j        |                     |           d S d S )Nr   )r   r   r   r   )superr%   r@   yaml_tags_line_count	tag_countr   r#   )r$   r   r   r@   r   	__class__s        r   r%   zReadMe.__init__   sh    d"---"$&!
:!JJ/FJGGGGG "!r   c                 $   | j         |                     t                    \  }}}n|                     | j                   \  }}}|g k    s|g k    r?d                    d ||z   D                       }d| j         d|z   }t          |          d S )Nr+   c                     g | ]}d |z   S )r-   r.   r/   s     r   rI   z#ReadMe.validate.<locals>.<listcomp>   s    MMMa	MMMr   z3The following issues were found for the README at `r3   )r@   	_validatereadme_structurer6   r   r9   )r$   r   rR   rS   r>   r?   s         r   rP   zReadMe.validate   s    >!04?O0P0P-GZ04t~0N0N-GZ|r11YYMM:3LMMMNNF`QUQZ```ciiL\***  21r   pathc                     t          |d          5 }|                                }d d d            n# 1 swxY w Y    | ||||          S )Nzutf-8)encodingr   )open	readlines)clsrt   r@   r   fr   s         r   from_readmezReadMe.from_readme   s    $))) 	"QKKMME	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	" 	"s4	CZ[[[[s   377rootstring	root_namec                 J    |                     d          } | ||||          S )Nr+   r   )r5   )ry   r}   r@   r~   r   r   s         r   from_stringzReadMe.from_string   s/     T""s9eYH_````r   c                 r   d}| j         D ]M}| xj        dz  c_        |                    d          dk    r| xj        dz  c_        | j        dk    r n|dz  }N| j        dk    r| j         |dz   d          | _         n| j         | j        d          | _         t	                                          |           d S )Nr   r   r'   z---   r   )r   rm   r4   rn   rl   r#   )r$   r   
line_countr=   ro   s       r   r#   zReadMe.parse   s    
J 	 	D%%*%%zz%  E))!#>Q&&E!OJJ>QJN$4$45DJJDN$4$45DJ.EFFFFFr   c                 D    t          |                                           S )z>Returns the string of dictionary representation of the ReadMe.)rd   rQ   r^   s    r   __str__zReadMe.__str__   s    4<<>>"""r   c           
      r   g }g }| j         dk    r|                    d           nA| j        dk    r|                    d           n | j        dk    r|                    d           t          | j                                                  }|dk    r[|                    dd                    d t          | j                                                  D                        d	           n|dk     r|                    d
           nt          | j                                                  d         }|                    d          r;| j        |         	                    |d         d                   \  }}}||z  }||z  }n|                    d           |ri ||fS | 
                                ||fS )Nr   z-Empty YAML markers are present in the README.z*No YAML markers are present in the README.r   z2Only the start of YAML tags present in the README.z-The README has several first-level headings: rJ   c                     g | ]
}d |z   d z   S rL   r.   r/   s     r   rI   z$ReadMe._validate.<locals>.<listcomp>   s"    JxJxJxYZ3q5QT9JxJxJxr   zL. Only one heading is expected. Skipping further validation for this README.zmThe README has no first-level headings. One heading is expected. Skipping further validation for this README.zDataset Card forrD   zuNo first-level heading starting with `Dataset Card for` found in README. Skipping further validation for this README.)rm   r8   rn   lenr   keysr6   list
startswithrP   rQ   )	r$   rs   rR   rS   num_first_level_keys	start_keyrX   sec_error_listsec_warning_lists	            r   rr   zReadMe._validate   s   
$)) OPPPP^q   LMMMM^q   TUUU"4<#4#4#6#677!## H		JxJx^bcgcoctctcvcv^w^wJxJxJx@y@y  H  H  H    "A%%    T\..0011!4I##$677 6:l96M6V6V$]3A67 73>#3 n,
 00 !! L    	<z<//<<>>:|;;r   r_   )Nr|   Fr`   )ra   rb   rc   rd   r   rf   re   r%   rP   classmethodr   r{   r   r#   r   rr   __classcell__)ro   s   @r   ri   ri      sa       H HS Hc Ht Hei H H H H H H+ + + \ \t \ \VZ \ \ \ [\
 kpa aa%)a=@adha a a [aG GT G G G G G G"# # #)< )< )< )< )< )< )<r   ri   __main__)ArgumentParserz?Validate the content (excluding YAML tags) of a README.md file.)usagereadme_filepath)&loggingpathlibr   typingr   r   r   r   importlib.resourcesr   r   ImportErrorimportlib_resourcesr   r   __file__this_url	getLoggerra   loggerrd   r   rs   known_readme_structure_urlr7   rf   rg   r   ri   argparser   apadd_argument
parse_argsargsr   r{   readmer.   r   r   <module>r      s.          # # # # # # # # # # 0/////// 0 0 0//////0       V''X''		8	$	$K KsCx K K K K
 0B/ABY/Z/Z , ,   dDItCy89 E
 E
 E
 E
 E
 E
 E
 E
P_< _< _< _< _<W _< _< _<D z''''''	_	`	`	`BOO%&&&==??Dd4/00O00FFF s   ! 	--