
    Cd1.                        d dl mZ d dlZd dlZd dlmZmZ d dl	Z	d dl
mZ d dlmZ d dlmZmZmZmZ d dlmZ d dlmZmZ erenej        Zdd	d
dd
d
efdZddZ	 	 	 	 	 ddZdS )    )annotationsN)is_list_like	is_scalar)methods)PANDAS_GT_200)	DataFrameSeriesapply_concat_applymap_partitionshas_known_categories)Mget_meta_library_Fc                T    t           t          j        t          j        f          rt          j         f|||||||d|S d}	d}
t           t                    rBt          j                   st          |	          t                     st          |
          nt           t                    r|z j	        dk    
                                rt          |	           j	        dk    
                                rt          |	           j                            dg          j        }n*t           fd	|D                       st          |	          t           fd
|D                       st          |
          t          t!                     j         f|||||||d|S )a
  
    Convert categorical variable into dummy/indicator variables.

    Data must have category dtype to infer result's ``columns``.

    Parameters
    ----------
    data : Series, or DataFrame
        For Series, the dtype must be categorical.
        For DataFrame, at least one column must be categorical.
    prefix : string, list of strings, or dict of strings, default None
        String to append DataFrame column names.
        Pass a list with length equal to the number of columns
        when calling get_dummies on a DataFrame. Alternatively, `prefix`
        can be a dictionary mapping column names to prefixes.
    prefix_sep : string, default '_'
        If appending prefix, separator/delimiter to use. Or pass a
        list or dictionary as with `prefix.`
    dummy_na : bool, default False
        Add a column to indicate NaNs, if False NaNs are ignored.
    columns : list-like, default None
        Column names in the DataFrame to be encoded.
        If `columns` is None then all the columns with
        `category` dtype will be converted.
    sparse : bool, default False
        Whether the dummy columns should be sparse or not.  Returns
        SparseDataFrame if `data` is a Series or if all columns are included.
        Otherwise returns a DataFrame with some SparseBlocks.

        .. versionadded:: 0.18.2

    drop_first : bool, default False
        Whether to get k-1 dummies out of k categorical levels by removing the
        first level.

    dtype : dtype, default bool
        Data type for new columns. Only a single dtype is allowed.

        .. versionadded:: 0.18.2

    Returns
    -------
    dummies : DataFrame

    Examples
    --------
    Dask's version only works with Categorical data, as this is the only way to
    know the output shape without computing all the data.

    >>> import pandas as pd
    >>> import dask.dataframe as dd
    >>> s = dd.from_pandas(pd.Series(list('abca')), npartitions=2)
    >>> dd.get_dummies(s)
    Traceback (most recent call last):
        ...
    NotImplementedError: `get_dummies` with non-categorical dtypes is not supported...

    With categorical data:

    >>> s = dd.from_pandas(pd.Series(list('abca'), dtype='category'), npartitions=2)
    >>> dd.get_dummies(s)  # doctest: +NORMALIZE_WHITESPACE
    Dask DataFrame Structure:
                       a      b      c
    npartitions=2
    0              uint8  uint8  uint8
    2                ...    ...    ...
    3                ...    ...    ...
    Dask Name: get_dummies, 2 graph layers
    >>> dd.get_dummies(s).compute()  # doctest: +ELLIPSIS
       a  b  c
    0  1  0  0
    1  0  1  0
    2  0  0  1
    3  1  0  0

    See Also
    --------
    pandas.get_dummies
    )prefix
prefix_sepdummy_nacolumnssparse
drop_firstdtypez`get_dummies` with non-categorical dtypes is not supported. Please use `df.categorize()` beforehand to convert to categorical dtype.z`get_dummies` with unknown categories is not supported. Please use `column.cat.as_known()` or `df.categorize()` beforehand to ensure known categoriesNobjectstringcategory)includec              3  L   K   | ]}t          j        |                   V  d S N)r   is_categorical_dtype.0cdatas     6lib/python3.11/site-packages/dask/dataframe/reshape.py	<genexpr>zget_dummies.<locals>.<genexpr>   s2      NNw3DG<<NNNNNN    c              3  B   K   | ]}t          |                   V  d S r   r   r    s     r$   r%   zget_dummies.<locals>.<genexpr>   s0      BBQ'Q00BBBBBBr&   )
isinstancepdr	   r   get_dummiesr   r   NotImplementedErrorr   dtypesany_metaselect_dtypesr   allr   r   )r#   r   r   r   r   r   r   r   kwargsnot_cat_msgunknown_cat_msgs   `          r$   r*   r*      s	   t $BL122 
~

!!

 

 

 

 
	
	( 	  $ 7+D11 	3%k222#D)) 	7%o666	7	D)	$	$ 7?x',,.. 7)+666x',,.. 7)+666j..
|.DDLGGNNNNgNNNNN 7)+666BBBB'BBBBB 	7%o666*     r&   meanc                   t          |          r|t          d          t          |          r|t          d          t          j        | |                   st          d          t	          | |                   st          d          t          |          rt          d |D                       st          |          st          d          g d}t          |          r||vr/t          d	d
                    d |D                       z             t          j	        | |         j
        j        |          }t          |          r|}n2t          j                            t          |          |fd|g          }|dv rt          |          r@t          j        || |         j        t          j        | j        |                             }nt          j        |t          j        | j        |                             }|D ]1}	||	                             | |         j        |	                   ||	<   2n>t          j        |t*          j        t          j        | j        |                             }|||d}
|dv r*t/          | gt          j        t          j        |d|
          }|dv r*t/          | gt          j        t          j        |d|
          }|dk    r|S |dk    r|S |dk    r||z  S |dk    r*t/          | gt          j        t          j        |d|
          S |dk    r*t/          | gt          j        t          j        |d|
          S t          )a  
    Create a spreadsheet-style pivot table as a DataFrame. Target ``columns``
    must have category dtype to infer result's ``columns``.
    ``index``, ``columns``, and ``aggfunc`` must be all scalar.
    ``values`` can be scalar or list-like.

    Parameters
    ----------
    df : DataFrame
    index : scalar
        column to be index
    columns : scalar
        column to be columns
    values : scalar or list(scalar)
        column(s) to aggregate
    aggfunc : {'mean', 'sum', 'count', 'first', 'last'}, default 'mean'

    Returns
    -------
    table : DataFrame

    See Also
    --------
    pandas.DataFrame.pivot_table
    Nz.'index' must be the name of an existing columnz0'columns' must be the name of an existing columnz 'columns' must be category dtypezs'columns' must have known categories. Please use `df[columns].cat.as_known()` beforehand to ensure known categoriesc                ,    g | ]}t          |          S  )r   )r!   vs     r$   
<listcomp>zpivot_table.<locals>.<listcomp>   s    ...!1...r&   z4'values' must refer to an existing column or columns)r4   sumcountfirstlastzaggfunc must be either z, c              3  "   K   | ]
}d | d V  dS )'Nr7   )r!   xs     r$   r%   zpivot_table.<locals>.<genexpr>   s*      1W1Wq(a(((1W1W1W1W1W1Wr&   )name)names)r<   r=   )r   r   index)r   rC   )rC   r   values)r:   r4   pivot_table_sum)chunk	aggregatemetatokenchunk_kwargs)r;   r4   pivot_table_countr:   r;   r4   r<   pivot_table_firstr=   pivot_table_last)r   
ValueErrorr   r   r   r   r0   joinr)   CategoricalIndexcat
categories
MultiIndexfrom_productsortedr   r   Indexr.   astyper,   npfloat64r
   	pivot_sum	pivot_aggpivot_countpivot_firstpivot_agg_first
pivot_lastpivot_agg_last)dfrC   r   rD   aggfuncavailable_aggfuncscolumns_contentsnew_columnsrH   	value_colr1   pv_sumpv_counts                r$   pivot_tableri      s   6 U Ku}IJJJW MKLLL'744 =;<<<7,, 

 
 	
 	VQ..v...//Q VQ
 OPPPBBBW 
0B!B!B%		1W1WDV1W1W1W(W(WW
 
 	
 *2g;?+EGTTT 
&m00F^^-.tWo 1 
 
 ###V 	W<#j&hrx//  DD <#hrx//  D $ W W	"&y/"8"8F9J99U"V"VYW |rz"(5/9R9R
 
 
 FCCF/!!#D#'#
 
 
 ###%D%'%
 
 
 %	G			F		  	G		!D%-%
 
 
 	
 
F		!D$,$
 
 
 	
 r&   valuec                    ddl m} t          j                            ddi          5  |                     t          j        ||||||d          cddd           S # 1 swxY w Y   dS )a  
    Unpivots a DataFrame from wide format to long format, optionally leaving identifier variables set.

    This function is useful to massage a DataFrame into a format where one or more columns are identifier variables
    (``id_vars``), while all other columns, considered measured variables (``value_vars``), are "unpivoted" to the row
    axis, leaving just two non-identifier columns, 'variable' and 'value'.

    Parameters
    ----------
    frame : DataFrame
    id_vars : tuple, list, or ndarray, optional
        Column(s) to use as identifier variables.
    value_vars : tuple, list, or ndarray, optional
        Column(s) to unpivot. If not specified, uses all columns that
        are not set as `id_vars`.
    var_name : scalar
        Name to use for the 'variable' column. If None it uses
        ``frame.columns.name`` or 'variable'.
    value_name : scalar, default 'value'
        Name to use for the 'value' column.
    col_level : int or string, optional
        If columns are a MultiIndex then use this level to melt.

    Returns
    -------
    DataFrame
        Unpivoted DataFrame.

    See Also
    --------
    pandas.DataFrame.melt
    r   )
no_defaultzdataframe.convert-stringFmelt)rH   id_vars
value_varsvar_name
value_name	col_levelrI   N)dask.dataframe.corerl   daskconfigsetr   r   rm   )framern   ro   rp   rq   rr   rl   s          r$   rm   rm   <  s    R /..... 
4e<	=	= 

 

##F!! $ 	
 	


 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

s   'AA #A )NNNr4   )NNNrj   N)
__future__r   numpyrX   pandasr)   pandas.api.typesr   r   rt   dask.dataframer   dask.dataframe._compatr   rs   r   r	   r
   r   dask.dataframe.utilsr   
dask.utilsr   r   booluint8_get_dummies_dtype_defaultr*   ri   rm   r7   r&   r$   <module>r      sM   " " " " " "         4 4 4 4 4 4 4 4  " " " " " " 0 0 0 0 0 0 U U U U U U U U U U U U 5 5 5 5 5 5 * * * * * * * * &3@TT 
 
$R R R RtD D D D\ 6
 6
 6
 6
 6
 6
r&   