
    +dN                     T    d dl mZ d dlmZmZ  G d dej        ej                  ZdS )   )base)reverse_formatsunique_stringc                   j     e Zd ZdZdZdZdZdZd fd	Zd Z	d	 Z
d
 Zd Zd Zd Zd Zd Zd Z xZS )	CSVSourcezURead CSV files into dataframes

    Prototype of sources reading dataframe data

    csvz0.0.1	dataframeTNc                     || _         || _        || _        |pi | _        d| _        t          t          |                               |           dS )a  
        Parameters
        ----------
        urlpath : str or iterable, location of data
            May be a local path, or remote path if including a protocol specifier
            such as ``'s3://'``. May include glob wildcards or format pattern strings.
            Some examples:

            - ``{{ CATALOG_DIR }}data/precipitation.csv``
            - ``s3://data/*.csv``
            - ``s3://data/precipitation_{state}_{zip}.csv``
            - ``s3://data/{year}/{month}/{day}/precipitation.csv``
            - ``{{ CATALOG_DIR }}data/precipitation_{date:%Y-%m-%d}.csv``
        csv_kwargs : dict
            Any further arguments to pass to Dask's read_csv (such as block size)
            or to the `CSV parser <https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html>`_
            in pandas (such as which columns to use, encoding, data-types)
        storage_options : dict
            Any parameters that need to be passed to the remote data backend,
            such as credentials.
        path_as_pattern : bool or str, optional
            Whether to treat the path as a pattern (ie. ``data_{field}.csv``)
            and create new columns in the output corresponding to pattern
            fields. If str, is treated as pattern to match on. Default is True.
        N)metadata)path_as_patternurlpath_storage_options_csv_kwargs
_dataframesuperr   __init__)selfr   
csv_kwargsr   storage_optionsr   	__class__s         1lib/python3.11/site-packages/intake/source/csv.pyr   zCSVSource.__init__   sW    4  / /%+i''':::::    c                     ddl m | j        |         t          j        j                  }fdt          | j        |                                          D             } | j        j	        di || _        dS )z0Get a column of values for each field in pattern    )CategoricalDtypec           
          i | ]h\  }}|j         j                            t          t	          |                                                   t          |                              iS  )catcodesmapdict	enumerateastypeset).0fieldvaluesr   cols      r   
<dictcomp>z2CSVSource._set_pattern_columns.<locals>.<dictcomp>A   st     
 
 
huhmouE37=$$T)F*;*;%<%<==DDEUEUVYZ`VaVaEbEbcc
 
 
r   Nr   )
pandas.api.typesr   r   sortedr   
categoriesr   patternitemsassign)r   path_columnpathscolumn_by_fieldr   r(   s       @@r   _set_pattern_columnszCSVSource._set_pattern_columns:   s    555555ok*sw)**
 
 
 
 
 zI  JN  JV  X]  z^  z^  zd  zd  zf  zf
 
 
 1$/0CC?CCr   c                     | j                             d          }|t                      }|| j         d<   n!t          |t                    rd}|| j         d<   |S )zFSet ``include_path_column`` in csv_kwargs and returns path column nameinclude_path_columnNpath)r   getr   
isinstancebool)r   r0   s     r   _path_columnzCSVSource._path_columnF   sj    &**+@AA 	B (//K6AD233T** 	B K6AD23r   c                 `   ddl }| j        & |j        j        |fd| j        i| j        | _        dS d| j        v}|                                 } |j        j        |fd| j        i| j        | _        |                     |           |r$| j        	                    |gd          | _        dS dS )zAOpen dataset using dask and use pattern fields to set new columnsr   Nr   r5   r   )axis)
dask.dataframer-   r	   read_csvr   r   r   r:   r3   drop)r   r   daskdrop_path_columnr0   s        r   _open_datasetzCSVSource._open_datasetT   s    < 	5dn5gyytOdyhlhxyyDOF08HH''))1$.1'uu4K`udhdtuu 	!!+... 	J"o22K=q2IIDOOO	J 	Jr   c                 ^   |                      | j                  d         }| j        |                     |           | j        j        j                                        }d |                                D             }t          j	        |d t          |          f| j        j        i           S )Nr   c                 4    i | ]\  }}|t          |          S r   )str)r%   nts      r   r)   z)CSVSource._get_schema.<locals>.<dictcomp>n   s$    999A!SVV999r   )dtypeshapenpartitionsextra_metadata)
_get_cache_urlpathr   rB   _metadtypesto_dictr.   r   SchemalenrJ   )r   r   rO   s      r   _get_schemazCSVSource._get_schemag   s    //$-003? 	(w'''&-557799&,,..999{c&kk/BPTP_Pk|~r   c                     |                                   | j                            |                                          S N)rS   r   get_partitioncompute)r   is     r   _get_partitionzCSVSource._get_partitionq   s6    ,,Q//77999r   c                 \    |                                   | j                                        S rU   )rS   r   rW   r   s    r   readzCSVSource.readu   s(    &&(((r   c                 8    |                                   | j        S rU   )rS   r   r[   s    r   to_daskzCSVSource.to_dasky   s    r   c                 h    ddl m}  |ddddd| j        ffgi           }|                                S )Nr   )SparkHolderT)r\   )format)r   )option)headertrueload)intake_spark.baser`   r   setup)r   r`   hs      r   to_sparkzCSVSource.to_spark}   sN    111111Ky*>@^agjnjvix`yz|~wwyyr   c                     d | _         d S rU   )r   r[   s    r   _closezCSVSource._close   s    r   )NNNT)__name__
__module____qualname____doc__nameversion	containerpartition_accessr   r3   r:   rB   rS   rY   r\   r^   ri   rk   __classcell__)r   s   @r   r   r      s          DGI ;  ;  ;  ;  ;  ;D
D 
D 
D  J J J&@ @ @: : :) ) )          r   r   N) r   utilsr   r   
DataSourcePatternMixinr   r   r   r   <module>ry      sv          1 1 1 1 1 1 1 1x x x x x!2 x x x x xr   