
    {bgR                       d Z ddlmZ ddlZddlmZmZ ddlmZm	Z	 ddl
mZ ddlmZmZmZmZ ddlZddlZddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2 ddl3m4Z4 ddl5m6Z6 g dZ7dZ8 ed       G d d             Z9 G d de      Z: G d de:      Z;e G d d e:             Z<e G d! d"e:             Z= ed#       G d$ d%e;             Z>d(d&Z?	 d)	 	 	 d*d'Z@y)+z
This module provides Grouper objects that encapsulate the
"factorization" process - conversion of value we are grouping by
to integer codes (one per group).
    )annotationsN)ABCabstractmethod)	dataclassfield)pairwise)TYPE_CHECKINGAnyLiteralcast)	ArrayLike)BaseCFTimeOffset_new_to_legacy_freq)duck_array_opsapply_ufunc)Coordinates_coordinates_from_variable)	DataArray)isnull)T_Group_DummyGroup)safe_cast_to_indexCFTimeGrouper)BinsDatetimeLikeGroupIndicesResampleCompatibleSelfSideOptions)Variable)is_chunked_array)
BinGrouperEncodedGroupsGrouper	ResamplerTimeResamplerUniqueGrouper__resample_dim__F)initc                  f    e Zd ZU dZded<   ded<   ded<   ded	<   d
ed<   	 	 	 d	 	 	 	 	 	 	 	 	 ddZy)r%   a>  
    Dataclass for storing intermediate values for GroupBy operation.
    Returned by the ``factorize`` method on Grouper objects.

    Attributes
    ----------
    codes : DataArray
        Same shape as the DataArray to group by. Values consist of a unique integer code for each group.
    full_index : pd.Index
        Pandas Index for the group coordinate containing unique group labels.
        This can differ from ``unique_coord`` in the case of resampling and binning,
        where certain groups in the output need not be present in the input.
    group_indices : tuple of int or slice or list of int, optional
        List of indices of array elements belonging to each group. Inferred if not provided.
    unique_coord : Variable, optional
        Unique group values present in dataset. Inferred if not provided
    r   codespd.Index
full_indexr   group_indiceszVariable | _DummyGroupunique_coordr   coordsNc           	        ddl m} t        |t              sJ |j                  t        d      || _        t        |t        j                        sJ || _	        |et        |j                        s@t        d  ||j                  j                         t        |            D              | _        nt               | _        n|| _        |@|t!        j"                  |         }t%        |j                  ||j&                        | _        n|| _        |7t        | j(                  t*              rJ t-        | j(                        | _        y || _        y )Nr   )_codes_to_group_indicesz3Please set a name on the array you are grouping by.c              3  $   K   | ]  }|r| 
 y wN ).0gs     /lib/python3.12/site-packages/xarray/groupers.py	<genexpr>z)EncodedGroups.__init__.<locals>.<genexpr>`   s!      + 	 s   dimsdataattrs)xarray.core.groupbyr4   
isinstancer   name
ValueErrorr-   pdIndexr/   r#   r>   tupleravellenr0   npuniquer"   r?   r1   r   r   r2   )selfr-   r/   r0   r1   r2   r4   unique_valuess           r:   __init__zEncodedGroups.__init__M   s    	@%+++::RSS
*bhh///$ #EJJ/%* +4

((*C
O+ &" &+W"!.D&ryy'78M (ZZm5;;!D !-D>!$"3"3[AAA4T5F5FGDK DK    )NNN)
r-   r   r/   r.   r0   zGroupIndices | Noner1   zVariable | _DummyGroup | Noner2   zCoordinates | None)__name__
__module____qualname____doc____annotations__rM   r7   rN   r:   r%   r%   3   sg    $ (( .26:%),!,! ,! +	,!
 4,! #,!rN   r%   c                  4    e Zd ZdZedd       Zedd       Zy)r&   zUAbstract base class for Grouper objects that allow specializing GroupBy instructions.c                     y)z
        Creates intermediates necessary for GroupBy.

        Parameters
        ----------
        group : DataArray
            DataArray we are grouping by.

        Returns
        -------
        EncodedGroups
        Nr7   )rK   groups     r:   	factorizezGrouper.factorize   s     	rN   c                     y)zL
        Creates a new version of this Grouper clearing any caches.
        Nr7   rK   s    r:   resetzGrouper.reset   s    
 	rN   NrV   r   returnr%   r\   r    )rO   rP   rQ   rR   r   rW   rZ   r7   rN   r:   r&   r&   |   s+    _   rN   r&   c                      e Zd ZdZy)r'   z
    Abstract base class for Grouper objects that allow specializing resampling-type GroupBy instructions.

    Currently only used for TimeResampler, but could be used for SpaceResampler in the future.
    N)rO   rP   rQ   rR   r7   rN   r:   r'   r'      s     	rN   r'   c                      e Zd ZU dZ edd      Zded<    ed      Zded	<   edd
       Z	ddZ
ddZddZddZddZy)r)   a  
    Grouper object for grouping by a categorical variable.

    Parameters
    ----------
    labels: array-like, optional
        Group labels to aggregate on. This is required when grouping by a chunked array type
        (e.g. dask or cubed) since it is used to construct the coordinate on the output.
        Grouped operations will only be run on the specified group labels. Any group that is not
        present in ``labels`` will be ignored.
    NF)defaultreprzpd.Index | None_group_as_indexr`   zArrayLike | Nonelabelsc                D   | j                   | j                  j                  dk(  r+| j                  j                         | _         | j                   S t	        j
                  t        j                  | j                        j                               | _         | j                   S )z-Caches the group DataArray as a pandas Index.   )	rb   rV   ndimto_indexrD   rE   rI   arrayrG   rY   s    r:   group_as_indexzUniqueGrouper.group_as_index   sx     'zz!#'+zz':':'<$ ### (*xx0D0J0J0L'M$###rN   c                "     t        |              S r6   )typerY   s    r:   rZ   zUniqueGrouper.reset   s    tDz|rN   c                   || _         t        |j                        r| j                  t	        d      | j                  | j                  |      S | j                  }t        | j                   t              xs( |j                  xr |j                  xs |j                  }| j                   j                  | j                   j                  fk(  }|xr |}|r| j                         S | j                         S )NzTWhen grouping by a dask array, `labels` must be passed using a UniqueGrouper object.)rV   r#   r>   rd   rC   _factorize_given_labelsrj   rA   r   	is_uniqueis_monotonic_increasingis_monotonic_decreasingr=   rB   _factorize_dummy_factorize_unique)rK   rV   indexis_unique_and_monotonicis_dimensioncan_squeezes         r:   rW   zUniqueGrouper.factorize   s    
EJJ'DKK,?*  ;;"//66##",TZZ"E #
OO Q..O%2O2O 	  zz4::??*<<">'>((**))++rN   c           	         t        t        |d| j                  idt        j                  gd      }t        |t        j                  | j                        t        |j                  | j                  | j                  j                              S )Nrd   parallelizedT)kwargsdaskoutput_dtypes
keep_attrsr<   )r-   r/   r1   )r   rn   rd   rI   int64r%   rD   rE   r"   rB   rV   r?   )rK   rV   r-   s      r:   rn   z%UniqueGrouper._factorize_given_labels   sr    #dkk*88*
 xx,!ZZ[[jj&&
 	
rN   c                   t        | j                  t        j                         }t	        | j                  |      \  }}|dk(  j                         rt        d      | j                  j                  |j                  | j                  j                        d      }t        |j                  || j                  j                        }t        j                  |      }t        |||t!        |            S )NsortzEFailed to group data. Are you grouping by a variable that is all NaN?Fr>   deepr<   r-   r/   r1   r2   )rA   rj   rD   
MultiIndexunique_value_groupsallrC   rV   copyreshapeshaper"   rB   r?   rE   r%   r   )rK   r   rL   codes_r-   r1   r/   s          r:   rs   zUniqueGrouper._factorize_unique   s    d112==AA 3D4G4Gd SvbLW  

V^^DJJ4D4D%EER-tzz7G7G
 XXm,
!%-l;	
 	
rN   c                "   | j                   j                  }t        d t        |      D              }t	        j
                  |      }t        | j                   t              rj| j                   j                         j                  |      }| j                   }t        j                  | j                   j                        }t               }n| j                   j                  |d      }| j                   j                  j                         }| j                  }t        |t        j                         r,t        j"                  || j                   j$                        }n#t&        rt        |t(              sJ t+        |      }t-        |||||      S )Nc              3  :   K   | ]  }t        ||d z           yw)rf   N)slice)r8   is     r:   r;   z1UniqueGrouper._factorize_dummy.<locals>.<genexpr>  s     +Q[E!QUO[s   )r>   Fr   )dimr-   r0   r/   r1   r2   )rV   sizerF   rangerI   arangerA   r   to_dataarrayr   rD   
RangeIndexr   variableto_base_variablerj   r   from_pandas_multiindexrB   r	   r"   r   r%   )rK   r   r0   
size_ranger-   r1   r/   r2   s           r:   rr   zUniqueGrouper._factorize_dummy   s'   zz ',+QU4[+Q&QYYt_
 djj+.JJ++-22
2CE::Ltzz7J ]FJJOO%O@E::..??AL,,J*bmm4$;;DJJOO !%lH===3LA'!%
 	
rN   )r\   r.   r]   r[   )r\   r%   )rO   rP   rQ   rR   r   rb   rS   rd   propertyrj   rZ   rW   rn   rs   rr   r7   rN   r:   r)   r)      sU    
 (-T'FO_F$T2F2$ $,0
&
*!
rN   r)   c                      e Zd ZU dZded<   dZded<   dZded	<   d
Zded<   dZded<   dZ	ded<   ddZ
ddZd ZddZddZy)r$   a  
    Grouper object for binning numeric data.

    Attributes
    ----------
    bins : int, sequence of scalars, or IntervalIndex
        The criteria to bin by.

        * int : Defines the number of equal-width bins in the range of `x`. The
          range of `x` is extended by .1% on each side to include the minimum
          and maximum values of `x`.
        * sequence of scalars : Defines the bin edges allowing for non-uniform
          width. No extension of the range of `x` is done.
        * IntervalIndex : Defines the exact bins to be used. Note that
          IntervalIndex for `bins` must be non-overlapping.

    right : bool, default True
        Indicates whether `bins` includes the rightmost edge or not. If
        ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]``
        indicate (1,2], (2,3], (3,4]. This argument is ignored when
        `bins` is an IntervalIndex.
    labels : array or False, default None
        Specifies the labels for the returned bins. Must be the same length as
        the resulting bins. If False, returns only integer indicators of the
        bins. This affects the type of the output container (see below).
        This argument is ignored when `bins` is an IntervalIndex. If True,
        raises an error. When `ordered=False`, labels must be provided.
    retbins : bool, default False
        Whether to return the bins or not. Useful when bins is provided
        as a scalar.
    precision : int, default 3
        The precision at which to store and display the bins labels.
    include_lowest : bool, default False
        Whether the first interval should be left-inclusive or not.
    duplicates : {"raise", "drop"}, default: "raise"
        If bin edges are not unique, raise ValueError or drop non-uniques.
    r   binsTboolrightNr
   rd      int	precisionFinclude_lowestraisezLiteral['raise', 'drop']
duplicatesc                     t        |       | j                  | j                  | j                  | j                  | j
                  | j                        S )N)r   r   rd   r   r   r   )rl   r   r   rd   r   r   r   rY   s    r:   rZ   zBinGrouper.resetS  sA    tDz**;;nn..
 	
rN   c                t    t        j                  | j                        j                         rt	        d      y )NzAll bin edges are NaN.)r   r   r   r   rC   rY   s    r:   __post_init__zBinGrouper.__post_init__]  s.      +//1566 2rN   c           
         t        j                  t        j                  |      j	                         | j
                  | j                  | j                  | j                  | j                  | j                  d      S )NT)r   r   rd   r   r   r   retbins)rD   cutrI   asarrayrG   r   r   rd   r   r   r   )rK   r>   s     r:   _cutzBinGrouper._cuta  sW    vvJJt""$**;;nn..	
 		
rN   c                ,      fd}t        ||dd      S )Nc                    j                  |       \  }}t        j                  t              r|_        |j                  j                  | j                        S r6   )r   rA   r   r   r-   r   r   )r>   rz   binnedr   rK   s       r:   _wrapperz,BinGrouper._factorize_lazy.<locals>._wrappern  sB    99T?LFD$))S) 	<<''

33rN   ry   T)r{   r}   r   )rK   rV   r   s   `  r:   _factorize_lazyzBinGrouper._factorize_lazym  s    	4 8UDQQrN   c                P   t        |t              r,t        |j                  |j                  |j
                        }t        |j                        }t        | j                  t              r|rt        d| j                  d      | j                  |      }|s+|dk(  j                         rt        d| j                        |j
                   d}||_        | j                  t        j                  dg      j                  |j                               \  }}|j"                  }|sLt        j$                  t'        j(                  |j                  j+                                     }|||dk7        }	n|}	t-        ||	|j.                        }
t1        |||
t3        |
      	      S )
N)r=   rB   zOBin edges must be provided when grouping by chunked arrays. Received self.bins=z insteadr   z.None of the data falls within bins with edges _binsr   r<   r   )rA   r   r   r>   r=   rB   r#   r   r   rC   r   r   r   rI   ri   astypedtype
categoriesr   rD   rJ   rG   r"   r?   r%   r   )rK   rV   by_is_chunkedr-   new_dim_namedummy_r/   uniquesrL   r1   s              r:   rW   zBinGrouper.factorizew  sj   e[)ejjuzz

KE(4dii%-bX\XaXaWeemn  $$U+%2+!2!2!4@N   **U+!
 99RXXqc]11%++>?q%%
ggbii

(8(8(:;<G&ww"}'=>M&MM
 !%-l;	
 	
rN   r]   )r\   None)rV   r   r\   r   r[   )rO   rP   rQ   rR   rS   r   rd   r   r   r   rZ   r   r   r   rW   r7   rN   r:   r$   r$   #  s\    $L JE4FCIs ND +2J(2
7

R#
rN   r$   )ra   c                      e Zd ZU dZded<    ed      Zded<    ed      Zded<    ed	      Zd
ed<    ed      Z	ded<    edd      Z
ded<    edd      Zded<   ddZddZddZddZddZy)r(   aF  
    Grouper object specialized to resampling the time coordinate.

    Attributes
    ----------
    freq : str, datetime.timedelta, pandas.Timestamp, or pandas.DateOffset
        Frequency to resample to. See `Pandas frequency
        aliases <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
        for a list of possible values.
    closed : {"left", "right"}, optional
        Side of each interval to treat as closed.
    label : {"left", "right"}, optional
        Side of each interval to use for labeling.
    origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pandas.Timestamp, datetime.datetime, numpy.datetime64, or cftime.datetime, default 'start_day'
        The datetime on which to adjust the grouping. The timezone of origin
        must match the timezone of the index.

        If a datetime is not used, these values are also supported:
        - 'epoch': `origin` is 1970-01-01
        - 'start': `origin` is the first value of the timeseries
        - 'start_day': `origin` is the first day at midnight of the timeseries
        - 'end': `origin` is the last value of the timeseries
        - 'end_day': `origin` is the ceiling midnight of the last day
    offset : pd.Timedelta, datetime.timedelta, or str, default is None
        An offset timedelta added to the origin.
    r   freqNrc   zSideOptions | Noneclosedlabel	start_dayzstr | DatetimeLikeoriginz.pd.Timedelta | datetime.timedelta | str | NoneoffsetF)r+   ra   zCFTimeGrouper | pd.Grouperindex_grouperr.   rj   c                     t        |       | j                  | j                  | j                  | j                  | j
                        S )Nr   r   r   r   r   )rl   r   r   r   r   r   rY   s    r:   rZ   zTimeResampler.reset  s8    tDz;;**;;;;
 	
rN   c                   ddl m} t        |      }| j                  }|j                  st        d      t        ||      rHddlm}  || j                  | j                  | j                  | j                  |      | _        || _        y t        | j                  t              rt        d      t        j                   t#        | j                        | j                  | j                  | j                  |      | _        || _        y )Nr   CFTimeIndexz&Index must be monotonic for resamplingr   r   zZ'BaseCFTimeOffset' resample frequencies are only supported when resampling a 'CFTimeIndex')xarrayr   r   r   rp   rC   rA   xarray.core.resample_cftimer   r   r   r   r   r   r   rD   r&   r   rj   )rK   rV   r   rj   r   r   s         r:   _init_propertieszTimeResampler._init_properties  s    &+E255EFFnk2A!.YY{{jj{{"D, - $))%56 6 
 "$(3{{jj{{"D -rN   c                    | j                         \  }}|j                  }|j                         j                         r|j	                         }|j                  d      }|||fS )Nr*   )first_itemsrt   r   anydropnarename)rK   r   r-   r/   s       r:   _get_index_and_itemsz"TimeResampler._get_index_and_items  sa    !--/U &&
##%%,,.K&&'9:
;--rN   c                   ddl m} ddlm} t	        | j
                  |      r/| j
                  j                  t        || j                              S t        j                  t        j                  | j                  j                        | j                        }|j                  | j
                        }|j                         }|j!                         }t        j"                  t        j                  t%        |            |      }||fS )Nr   r   r   )xarray.coding.cftimeindexr   r   r   rA   r   r   r   rj   rD   SeriesrI   r   r   groupbyfirstcountrepeatrH   )rK   r   r   sgroupedr   countsr-   s           r:   r   zTimeResampler.first_items  s    9=d((-8%%11[$"5"56  		"))D$7$7$<$<=t?R?RSAii 2 23G!--/K]]_F IIbiiK(896BE%%rN   c           
        | j                  |       | j                         \  }}}|j                  j                  t        j
                        }t        t        |      D cg c]  \  }}t        ||       c}}t        |d   d       gz         }t        |j                  |j                  |j                        }	|j                  |j                  |j                        d      }
t!        |
|||	t#        |	            S c c}}w )Nr   r<   Fr   r   )r   r   valuesr   rI   r~   rF   r   r   r"   rB   rt   r?   r   r   r   r%   r   )rK   rV   r/   r   r   sbinsr   jr0   r1   r-   s              r:   rW   zTimeResampler.factorize  s    e$*.*C*C*E'
K""))"((3&+%-e_5_TQU1a[_5uRy$9O8PP'
  +"3"35;;
 

u{{ ;%
H'!%-l;
 	
 6s   "D
r]   )rV   r   r\   r   )r\   z&tuple[pd.Index, pd.Series, np.ndarray])r\   ztuple[pd.Series, np.ndarray]r[   )rO   rP   rQ   rR   rS   r   r   r   r   r   r   rj   rZ   r   r   r   rW   r7   rN   r:   r(   r(     s    6 !&t!4F4 %d 3E3!&{!;F;=B4=PF:P055u0MM-M$%e<NH<
#-J.&&
rN   r(   c                `   t        j                  |      }|t        j                  |j                        k(  j	                         }t        j
                  || |      }t        j                  | |       t        |       z  |t        |      k(  z  }|sd||t        |      k(  <   ||f   }d||<   |S )N)sorterr   )	rI   argsortr   r   r   searchsortedisinr   rH   )r>   rd   r   	is_sortedr-   masks         r:   rn   rn   #  s    ZZF299V[[11668IOOFD8EGGD&!!F4L0ES[4HID &(es6{"#x E$KLrN   c                    t        j                  | |      \  }}t        |t         j                        r| j                  |_        ||fS )a   Group an array by its unique values.

    Parameters
    ----------
    ar : array-like
        Input array. This will be flattened if it is not already 1-D.
    sort : bool, default: True
        Whether or not to sort unique values.

    Returns
    -------
    values : np.ndarray
        Sorted, unique values as returned by `np.unique`.
    indices : list of lists of int
        Each element provides the integer indices in `ar` with values given by
        the corresponding value in `unique_values`.
    r   )rD   rW   rA   r   names)arr   inverser   s       r:   r   r   2  s;    ( ll2D1OGV&"--(xx7?rN   )r>   
np.ndarrayrd   r   r\   r   )T)r   r   r\   z(tuple[np.ndarray | pd.Index, np.ndarray])ArR   
__future__r   datetimeabcr   r   dataclassesr   r   	itertoolsr   typingr	   r
   r   r   numpyrI   pandasrD   numpy.typingr   xarray.coding.cftime_offsetsr   r   xarray.corer   xarray.core.computationr   xarray.core.coordinatesr   r   xarray.core.dataarrayr   xarray.core.duck_array_opsr   r@   r   r   xarray.core.indexesr   r   r   xarray.core.typesr   r   r   r   r    r!   xarray.core.variabler"   xarray.namedarray.pycompatr#   __all__RESAMPLE_DIMr%   r&   r'   r)   r$   r(   rn   r   r7   rN   r:   <module>r     s    #  # (  4 4   " N & / K + - 4 2 5  * 7 " E! E! E!Pc 6	 	 ~
G ~
 ~
B v
 v
 v
r B
I B
 B
J  -rN   