
    +gd                        d dl Z d dlZd dlmZ d dlmZ ddlmZ ddlm	Z	  e	e
          Z G d d	e j                  Z G d
 de          Z G d de          Z G d de          Z G d de          Zd&dee         defdZ G d de          Z G d de          Z G d de          Z G d de          Zdee         defdZd'd!ed"ed#efd$Zd% ZdS )(    N)sha256)Optional   )config   )
get_loggerc                       e Zd ZdZdZdZdZdS )VerificationModea  `Enum` that specifies which verification checks to run.

    The default mode is `BASIC_CHECKS`, which will perform only rudimentary checks to avoid slowdowns
    when generating/downloading a dataset for the first time.

    The verification modes:

    |                           | Verification checks                                                           |
    |---------------------------|------------------------------------------------------------------------------ |
    | `ALL_CHECKS`              | Split checks, uniqueness of the keys yielded in case of the GeneratorBuilder  |
    |                           | and the validity (number of files, checksums, etc.) of downloaded files       |
    | `BASIC_CHECKS` (default)  | Same as `ALL_CHECKS` but without checking downloaded files                    |
    | `NO_CHECKS`               | None                                                                          |

    
all_checksbasic_checks	no_checksN)__name__
__module____qualname____doc__
ALL_CHECKSBASIC_CHECKS	NO_CHECKS     9lib/python3.11/site-packages/datasets/utils/info_utils.pyr
   r
      s)           J!LIIIr   r
   c                       e Zd ZdZdS )ChecksumVerificationExceptionz>Exceptions during checksums verifications of downloaded files.Nr   r   r   r   r   r   r   r   r   #   s        HHHHr   r   c                       e Zd ZdZdS )UnexpectedDownloadedFilez(Some downloaded files were not expected.Nr   r   r   r   r   r   '   s        2222r   r   c                       e Zd ZdZdS )ExpectedMoreDownloadedFilesz7Some files were supposed to be downloaded but were not.Nr   r   r   r   r   r   +           AAAAr   r   c                       e Zd ZdZdS )NonMatchingChecksumErrorz?The downloaded file checksum don't match the expected checksum.Nr   r   r   r   r!   r!   /   s        IIIIr   r!   expected_checksumsrecorded_checksumsc                      t                               d           d S t          t                     t                    z
            dk    r9t	          t          t                     t                    z
                      t          t                    t                     z
            dk    r9t          t          t                    t                     z
                       fd D             }|d|z   nd}t          |          dk    rt          d| d| d          t                               d	|z              d S )
NzUnable to verify checksums.r   c                 8    g | ]}|         |         k    |S r   r   ).0urlr"   r#   s     r   
<listcomp>z$verify_checksums.<locals>.<listcomp>;   s0    hhh5G5LPbcfPg5g5g5g5g5gr   z for  zChecksums didn't matchz:
zY
Set `verification_mode='no_checks'` to skip checksums verification and ignore this errorz&All the checksums matched successfully)loggerinfolensetr   strr   r!   )r"   r#   verification_namebad_urlsfor_verification_names   ``   r   verify_checksumsr2   3   sr   !1222
3!""S);%<%<<==AA)#c2D.E.EL^H_H_._*`*`aaa
3!""S);%<%<<==AA&s3/A+B+BSI[E\E\+\']']^^^hhhhh1hhhH;L;XG&777^`
8}}q&g%: g gg g g
 
 	

 KK8;PPQQQQQr   c                       e Zd ZdZdS )SplitsVerificationExceptionz%Exceptions during splis verificationsNr   r   r   r   r4   r4   F   s        ////r   r4   c                       e Zd ZdZdS )UnexpectedSplitsz6The expected splits of the downloaded file is missing.Nr   r   r   r   r6   r6   J   s        @@@@r   r6   c                       e Zd ZdZdS )ExpectedMoreSplitsz!Some recorded splits are missing.Nr   r   r   r   r8   r8   N   s        ++++r   r8   c                       e Zd ZdZdS )NonMatchingSplitsSizesErrorz7The splits sizes don't match the expected splits sizes.Nr   r   r   r   r:   r:   R   r   r   r:   expected_splitsrecorded_splitsc                      t                               d           d S t          t                     t                    z
            dk    r9t	          t          t                     t                    z
                      t          t                    t                     z
            dk    r9t          t          t                    t                     z
                       fd D             }t          |          dk    rt          t          |                    t                               d           d S )NzUnable to verify splits sizes.r   c                 j    g | ]/}|         j         |         j         k    |         |         d 0S ))expectedrecorded)num_examples)r&   namer;   r<   s     r   r(   z!verify_splits.<locals>.<listcomp>^   sP       4 -1F1SSS %T*8MNNSSSr   z$All the splits matched successfully.)r*   r+   r,   r-   r8   r.   r6   r:   )r;   r<   
bad_splitss   `` r   verify_splitsrD   V   s5   4555
3#o"6"6677!;; S%9%9C<P<P%P!Q!QRRR
3#o"6"6677!;;s3#7#7#o:N:N#NOOPPP    #  J
 :)#j//:::
KK677777r   Tpathrecord_checksumreturnc                 <   |rvt                      }t          | d          5 t          fdd          D ]}|                    |           |                                }ddd           n# 1 swxY w Y   nd}t
          j                            |           |dS )z7Compute the file size and the sha256 checksum of a filerbc                  .                          d          S )Ni   )read)fs   r   <lambda>z(get_size_checksum_dict.<locals>.<lambda>m   s    affWoo r   r   N)	num_byteschecksum)r   openiterupdate	hexdigestosrE   getsize)rE   rF   mchunkrO   rL   s        @r   get_size_checksum_dictrX   h   s     HH$ 	%5555s;;    {{}}H	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	%
 ..HEEEs   A A..A25A2c                 B    | rt           j        r| t           j        k     S dS )zCheck if `dataset_size` is smaller than `config.IN_MEMORY_MAX_SIZE`.

    Args:
        dataset_size (int): Dataset size in bytes.

    Returns:
        bool: Whether `dataset_size` is smaller than `config.IN_MEMORY_MAX_SIZE`.
    F)r   IN_MEMORY_MAX_SIZE)dataset_sizes    r   is_small_datasetr\   u   s)      1 f777ur   )N)T)enumrT   hashlibr   typingr   r)   r   loggingr   r   r*   Enumr
   	Exceptionr   r   r   r!   dictr2   r4   r6   r8   r:   rD   r.   boolrX   r\   r   r   r   <module>re      s    				                         
H		    ty   ,I I I I II I I I3 3 3 3 3< 3 3 3B B B B B"? B B BJ J J J J< J J JR R$ RT R R R R&0 0 0 0 0) 0 0 0A A A A A2 A A A, , , , ,4 , , ,B B B B B"= B B B88D> 8D 8 8 8 8$
F 
F 
Ft 
Ft 
F 
F 
F 
F    r   