
    +gdTR                        d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZmZ ddlZddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z"  e e#          Z$ G d de%          Z&d'dZ' ed          d'd            Z(d(de)de)de
e         fdZ* ed          d(de)de)de
e         fd            Z+	 	 	 	 	 d)de)de
eeee)f                  de
e         de
eee)f                  de
ee)e"f                  de
ee,e)f                  fd Z-	 	 	 	 	 d)de)de
ee)e"f                  de
e         de
eee)f                  d!e
e)         de
eeee)f                  fd"Z.	 	 	 	 	 	 d*de)d#e
e)         de
ee)ee)         e	e)ee)ee)         f         f         f                  de
e         de
eee)f                  de
ee)e"f                  de
ee,e)f                  d$efd%Z/	 	 	 	 	 	 d*de)d#e
e)         de
ee)ee)         e	e)ee)ee)         f         f         f                  de
e         de
eee)f                  de
ee)e"f                  de
ee,e)f                  fd&Z0dS )+z List and inspect datasets.    N)PurePath)DictListMappingOptionalSequenceUnion   )DownloadConfig)DownloadMode)StreamingDownloadManager)DatasetInfo)dataset_module_factoryimport_main_classload_dataset_buildermetric_module_factory)
deprecated)relative_to_absolute_path)
get_logger)Versionc                       e Zd ZdS )SplitsNotFoundErrorN)__name__
__module____qualname__     0lib/python3.11/site-packages/datasets/inspect.pyr   r   (   s        Dr   r   TFc                 h    t          j        |          }| sd |D             }|sd |D             }|S )a]  List all the datasets scripts available on the Hugging Face Hub.

    Args:
        with_community_datasets (`bool`, *optional*, defaults to `True`):
            Include the community provided datasets.
        with_details (`bool`, *optional*, defaults to `False`):
            Return the full details on the datasets instead of only the short name.

    Example:

    ```py
    >>> from datasets import list_datasets
    >>> list_datasets()
    ['acronym_identification',
     'ade_corpus_v2',
     'adversarial_qa',
     'aeslc',
     'afrikaans_ner_corpus',
     'ag_news',
     ...
    ]
    ```
    )fullc                 $    g | ]}d |j         v|S /id.0datasets     r   
<listcomp>z!list_datasets.<locals>.<listcomp>F   s$    MMMs'*7L7LG7L7L7Lr   c                     g | ]	}|j         
S r   r$   r&   s     r   r)   z!list_datasets.<locals>.<listcomp>H   s    7777GJ777r   )huggingface_hublist_datasets)with_community_datasetswith_detailsdatasetss      r   r,   r,   ,   sS    0 ,,???H" NMM8MMM 877h777Or   ux   Use 'evaluate.list_evaluation_modules' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluatec                 d    t          j                    }| sd |D             }|sd |D             }|S )u  List all the metrics script available on the Hugging Face Hub.

    <Deprecated version="2.5.0">

    Use `evaluate.list_evaluation_modules` instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate

    </Deprecated>

    Args:
        with_community_metrics (:obj:`bool`, optional, default ``True``): Include the community provided metrics.
        with_details (:obj:`bool`, optional, default ``False``): Return the full details on the metrics instead of only the short name.

    Example:

    ```py
    >>> from datasets import list_metrics
    >>> list_metrics()
    ['accuracy',
     'bertscore',
     'bleu',
     'bleurt',
     'cer',
     'chrf',
     ...
    ]
    ```
    c                 $    g | ]}d |j         v|S r"   r$   r'   metrics     r   r)   z list_metrics.<locals>.<listcomp>m   s$    HHHf3fi3G3G63G3G3Gr   c                     g | ]	}|j         
S r   r$   r2   s     r   r)   z list_metrics.<locals>.<listcomp>o   s    33369333r   )r+   list_metrics)with_community_metricsr.   metricss      r   r5   r5   L   sN    > *,,G! IHHHHH 4337333Nr   path
local_pathdownload_configc                     t          | fd|i|}t          |j        d          }t          j        |          }t
          j                            |          }t          j        |          D ]\  }}	}
t
          j        	                    |t
          j        
                    ||                    }t          j        |d           d |	D             |	dd<   |
D ]S}t          j        t
          j        	                    ||          t
          j        	                    ||                     Tt          j        ||           t          |          }t!          d|  d| d	| d
t#          |                                           d	           dS )a  
    Allow inspection/modification of a dataset script by copying on local drive at local_path.

    Args:
        path (`str`): Path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name
                as the directory),
                e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`.
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`list_datasets`])
                e.g. `'squad'`, `'glue'` or `'openai/webtext'`.
        local_path (`str`):
            Path to the local folder to copy the dataset script to.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        **download_kwargs (additional keyword arguments):
            Optional arguments for [`DownloadConfig`] which will override
            the attributes of `download_config` if supplied.
    r:   Tr(   exist_okc                 <    g | ]}|                     d           |S ).__
startswithr'   dirnames     r   r)   z#inspect_dataset.<locals>.<listcomp>   ,    ^^^7g>P>PQ\>]>]^w^^^r   Nz"The processing script for dataset  can be inspected at . The main class is in zP. You can modify this processing script and use it with `datasets.load_dataset("")`.)r   r   module_pathinspectgetsourcefileosr8   rF   walkjoinrelpathmakedirsshutilcopy2copystatr   printr   as_posix)r8   r9   r:   download_kwargsdataset_modulebuilder_clsmodule_source_pathmodule_source_dirpathdirpathdirnames	filenamesdst_dirpathfilenames                r   inspect_datasetrb   s   s   ( ,Dee/eUdeeN#N$>MMMK .{;;GOO,>??(*0E(F(F . .$9gll:rwwH]/^/^__
K$//// _^h^^^! 	_ 	_HLgx88"',,{T\:]:]^^^^----*:66J		T 	 	
 	 	 5	 	YablYmYmYvYvYxYx	 	 	    r   uz   Use 'evaluate.inspect_evaluation_module' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluatec                     t          | fd|i|}t          |j        d          }t          j        |          }t
          j                            |          }t          j        |          D ]\  }}	}
t
          j        	                    |t
          j        
                    ||                    }t          j        |d           d |	D             |	dd<   |
D ]S}t          j        t
          j        	                    ||          t
          j        	                    ||                     Tt          j        ||           t          |          }t!          d|  d	| d
| dt#          |                                           d	           dS )u  
    Allow inspection/modification of a metric script by copying it on local drive at local_path.

    <Deprecated version="2.5.0">

    Use `evaluate.inspect_evaluation_module` instead, from the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate

    </Deprecated>

    Args:
        path (``str``): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. ``'./dataset/squad'`` or ``'./dataset/squad/squad.py'``
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with ``datasets.list_datasets()``)
                e.g. ``'squad'``, ``'glue'`` or ``'openai/webtext'``
        local_path (``str``): path to the local folder to copy the datset script to.
        download_config (Optional ``datasets.DownloadConfig``): specific download configuration parameters.
        **download_kwargs (additional keyword arguments): optional attributes for DownloadConfig() which will override the attributes in download_config if supplied.
    r:   Fr<   Tr=   c                 <    g | ]}|                     d           |S r@   rC   rE   s     r   r)   z"inspect_metric.<locals>.<listcomp>   rG   r   Nz"The processing scripts for metric rH   rI   zP. You can modify this processing scripts and use it with `datasets.load_metric("rJ   )r   r   rK   rL   rM   rN   r8   rF   rO   rP   rQ   rR   rS   rT   rU   r   rV   r   rW   )r8   r9   r:   rX   metric_modulerZ   r[   r\   r]   r^   r_   r`   ra   s                r   inspect_metricrf      s   0 *$cccSbccM#M$=uMMMK .{;;GOO,>??(*0E(F(F . .$9gll:rwwH]/^/^__
K$////^^h^^^! 	_ 	_HLgx88"',,{T\:]:]^^^^----*:66J		T 	 	
 	 	 5	 	YablYmYmYvYvYxYx	 	 	    r   
data_filesdownload_moderevisionuse_auth_tokenc                 b     t                     } fd|D             S )a  Get the meta information about a dataset, returned as a dict mapping config name to DatasetInfoDict.

    Args:
        path (`str`): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`datasets.list_datasets`])
                e.g. `'squad'`, `'glue'` or``'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        use_auth_token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.
        **config_kwargs (additional keyword arguments):
            Optional attributes for builder class which will override the attributes if supplied.

    Example:

    ```py
    >>> from datasets import get_dataset_infos
    >>> get_dataset_infos('rotten_tomatoes')
    {'default': DatasetInfo(description="Movie Review Dataset.
This is a dataset of containing 5,331 positive and 5,331 negative processed
sentences from Rotten Tomatoes movie reviews...), ...}
    ```
    )r8   ri   r:   rh   rg   rj   c                 :    i | ]}|t          d|d S ))r8   config_namerg   r:   rh   ri   rj   r   )get_dataset_config_info)	r'   rm   config_kwargsrg   r:   rh   r8   ri   rj   s	     r   
<dictcomp>z%get_dataset_infos.<locals>.<dictcomp>   sa         	, 	
#!+')	
 	
 	
 	
  r   )get_dataset_config_names)r8   rg   r:   rh   ri   rj   ro   config_namess   ``````` r   get_dataset_infosrs      s}    X ,'#%  L          (   r   dynamic_modules_pathc           	          t          | f|||||d|}t          |j                  }t          |j                                                  p|j                            dd          gS )a  Get the list of available config names for a particular dataset.

    Args:
        path (`str`): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`datasets.list_datasets`])
                e.g. `'squad'`, `'glue'` or `'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        dynamic_modules_path (`str`, defaults to `~/.cache/huggingface/modules/datasets_modules`):
            Optional path to the directory in which the dynamic modules are saved. It must have been initialized with `init_dynamic_modules`.
            By default the datasets and metrics are stored inside the `datasets_modules` module.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        **download_kwargs (additional keyword arguments):
            Optional attributes for [`DownloadConfig`] which will override the attributes in `download_config` if supplied,
            for example `use_auth_token`.

    Example:

    ```py
    >>> from datasets import get_dataset_config_names
    >>> get_dataset_config_names("glue")
    ['cola',
     'sst2',
     'mrpc',
     'qqp',
     'stsb',
     'mnli',
     'mnli_mismatched',
     'mnli_matched',
     'qnli',
     'rte',
     'wnli',
     'ax']
    ```
    )ri   r:   rh   rt   rg   rm   default)r   r   rK   listbuilder_configskeysbuilder_kwargsget)	r8   ri   r:   rh   rt   rg   rX   rY   rZ   s	            r   rq   rq     s    p ,'#1   N $N$>??K+002233t8U8Y8YZgir8s8s7ttr   rm   returnc           
          t           f||||||d|}|j        }	|	j        |r|                                nt	                      }|||_        |                    t          |j        |                     	  fd|	                    t          |j        |                    D             |	_        n"# t          $ r}
t          d          |
d}
~
ww xY w|	S )a  Get the meta information (DatasetInfo) about a dataset for a particular config

    Args:
        path (``str``): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. ``'./dataset/squad'`` or ``'./dataset/squad/squad.py'``
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with ``datasets.list_datasets()``)
                e.g. ``'squad'``, ``'glue'`` or ``'openai/webtext'``
        config_name (:obj:`str`, optional): Defining the name of the dataset configuration.
        data_files (:obj:`str` or :obj:`Sequence` or :obj:`Mapping`, optional): Path(s) to source data file(s).
        download_config (:class:`~download.DownloadConfig`, optional): Specific download configuration parameters.
        download_mode (:class:`DownloadMode` or :obj:`str`, default ``REUSE_DATASET_IF_EXISTS``): Download/generate mode.
        revision (:class:`~utils.Version` or :obj:`str`, optional): Version of the dataset script to load.
            As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
            You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
        use_auth_token (``str`` or :obj:`bool`, optional): Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If True, or not specified, will get token from `"~/.huggingface"`.
        **config_kwargs (additional keyword arguments): optional attributes for builder class which will override the attributes if supplied.

    )namerg   r:   rh   ri   rj   N)	base_pathr:   c                 0    i | ]}|j         |j         d S ))r~   dataset_name)r~   )r'   split_generatorr8   s     r   rp   z+get_dataset_config_info.<locals>.<dictcomp>  s:       #  $/CUY&Z&Z  r   z<The split names could not be parsed from the dataset config.)r   infosplitscopyr   rj   _check_manual_downloadr   r   _split_generators	Exceptionr   )r8   rm   rg   r:   rh   ri   rj   ro   builderr   errs   `          r   rn   rn   P  s;   > #	'#%	 	 	 	G <D{4CY/..000IYIY%-;O*&&$w/@Rabbb	
 	
 	
	o   '.'@'@,w7HZijjj( (  DKK  	o 	o 	o%&deeknn	oKs   ;:B6 6
C CCc           
      v    t          | f||||||d|}t          |j                                                  S )a  Get the list of available splits for a particular config and dataset.

    Args:
        path (`str`): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`datasets.list_datasets`])
                e.g. `'squad'`, `'glue'` or `'openai/webtext'`
        config_name (`str`, *optional*):
            Defining the name of the dataset configuration.
        data_files (`str` or `Sequence` or `Mapping`, *optional*):
            Path(s) to source data file(s).
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        revision ([`Version`] or `str`, *optional*):
            Version of the dataset script to load.
            As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
            You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
        use_auth_token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.
        **config_kwargs (additional keyword arguments):
            Optional attributes for builder class which will override the attributes if supplied.

    Example:

    ```py
    >>> from datasets import get_dataset_split_names
    >>> get_dataset_split_names('rotten_tomatoes')
    ['train', 'validation', 'test']
    ```
    )rm   rg   r:   rh   ri   rj   )rn   rw   r   ry   )	r8   rm   rg   r:   rh   ri   rj   ro   r   s	            r   get_dataset_split_namesr     s[    Z #	'#%	 	 	 	D   ""###r   )TF)N)NNNNN)NNNNNN)1__doc__rL   rN   rS   pathlibr   typingr   r   r   r   r   r	   r+   download.download_configr   download.download_managerr   #download.streaming_download_managerr   r   r   loadr   r   r   r   utils.deprecation_utilsr   utils.file_utilsr   utils.loggingr   utils.versionr   r   logger
ValueErrorr   r,   r5   strrb   rf   boolrs   rq   rn   r   r   r   r   <module>r      sJ    " !  				        A A A A A A A A A A A A A A A A     4 4 4 4 4 4 3 3 3 3 3 3 I I I I I I       h h h h h h h h h h h h / / / / / / 7 7 7 7 7 7 % % % % % % " " " " " " 
H			 	 	 	 	* 	 	 	   @ ~ ! ! ! !H& &# &3 &.AY & & & &R  A & & &# &@X & & & &V 48048<.215@ @
@tT3/0@ n-@ E,"345	@
 uS'\*+@ U49-.@ @ @ @J /3048<*.37Bu Bu
BuuS'\*+Bu n-Bu E,"345	Bu
 #3-Bu tT3/0Bu Bu Bu BuN "&_c048<.215: :
:#: sHSM73c8TW=FX@Y;Y3ZZ[\: n-	:
 E,"345: uS'\*+: U49-.: : : : :~ "&_c048<.2157$ 7$
7$#7$ sHSM73c8TW=FX@Y;Y3ZZ[\7$ n-	7$
 E,"3457$ uS'\*+7$ U49-.7$ 7$ 7$ 7$ 7$ 7$r   