U
    !`gv                     @   s  d Z ddlZddlZddlmZ ddlZddlZddlZddlZ	ddl
mZ ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ eddddZdefddZG dd dZG dd dZG dd dZG dd dZG dd dZ G dd dZ!G dd  d Z"G d!d" d"Z#G d#d$ d$Z$G d%d& d&Z%G d'd( d(Z&d)d* Z'd+d, Z(d-d. Z)d/d0 Z*dS )1z Base classes for all estimators.    N)defaultdict   )__version__)
get_config)	_IS_32BIT)_DEFAULT_TAGS
_safe_tags)	check_X_ycheck_arrayestimator_html_repr)_deprecate_positional_argsTsafec                   s   t | }|ttttfkr.| fdd| D S t| drBt| t r| sPt| S t| t rdt	dnt	dt
| t | f | j}| jdd}| D ]\}}t|dd||< q|f |}|jdd}|D ],}|| }	|| }
|	|
k	rtd	| |f q|S )
a  Constructs a new unfitted estimator with the same parameters.

    Clone does a deep copy of the model in an estimator
    without actually copying attached data. It yields a new estimator
    with the same parameters that has not been fitted on any data.

    If the estimator's `random_state` parameter is an integer (or if the
    estimator doesn't have a `random_state` parameter), an *exact clone* is
    returned: the clone and the original estimator will give the exact same
    results. Otherwise, *statistical clone* is returned: the clone might
    yield different results from the original estimator. More details can be
    found in :ref:`randomness`.

    Parameters
    ----------
    estimator : {list, tuple, set} of estimator instance or a single             estimator instance
        The estimator or group of estimators to be cloned.

    safe : bool, default=True
        If safe is False, clone will fall back to a deep copy on objects
        that are not estimators.

    c                    s   g | ]}t | d qS )r   )clone).0er    +lib/python3.8/site-packages/sklearn/base.py
<listcomp>9   s     zclone.<locals>.<listcomp>
get_paramszaCannot clone object. You should provide an instance of scikit-learn estimator instead of a class.zCannot clone object '%s' (type %s): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' method.Fdeepr   zWCannot clone object %s, as the constructor either does not set or modifies parameter %s)typelisttupleset	frozensethasattr
isinstancecopydeepcopy	TypeErrorrepr	__class__r   itemsr   RuntimeError)	estimatorr   Zestimator_typeklassZnew_object_paramsnameZparamZ
new_objectZ
params_setZparam1Zparam2r   r   r   r      s2    



r   c                 C   s:  t  }t jdddd t }|}dd|d  d  }tt|  D ]\}\}}	t|	tkrpd|t	|	f }
nd|||	f }
t
|
d	kr|
d
d d |
dd
  }
|dkr|t
|
 dksd|
kr|| t
|}n|d |d7 }||
 |t
|
7 }qFt jf | d|}ddd |dD }|S )af  Pretty print the dictionary 'params'

    Parameters
    ----------
    params : dict
        The dictionary to pretty print

    offset : int, default=0
        The offset in characters to add at the begin of each line.

    printer : callable, default=repr
        The function to convert entries to strings, typically
        the builtin str or repr

       @      )Z	precisionZ	thresholdZ	edgeitemsz,
r    z%s=%si  Ni,  ...ir   K   
z,  c                 s   s   | ]}| d V  qdS )r.   N)rstrip)r   lr   r   r   	<genexpr>   s     z_pprint.<locals>.<genexpr>)npZget_printoptionsZset_printoptionsr   	enumeratesortedr&   r   floatstrlenappendjoinsplit)paramsoffsetZprinterZoptionsZparams_listZthis_line_lengthZline_sepikvZ	this_reprlinesr   r   r   _pprint[   s.    




rE   c                       s   e Zd ZdZedd Zd ddZdd Zd!d
dZ fddZ	 fddZ
dd Zdd Zdd Zd"ddZedd Zdd Zdd Z  ZS )#BaseEstimatorzBase class for all estimators in scikit-learn.

    Notes
    -----
    All estimators should specify all the parameters that can be set
    at the class level in their ``__init__`` as explicit keyword
    arguments (no ``*args`` or ``**kwargs``).
    c                 C   st   t | jd| j}|tjkrg S t|}dd |j D }|D ] }|j|jkr@t	d| |f q@t
dd |D S )z%Get parameter names for the estimatorZdeprecated_originalc                 S   s&   g | ]}|j d kr|j|jkr|qS self)r*   kindZVAR_KEYWORDr   pr   r   r   r      s    
 z2BaseEstimator._get_param_names.<locals>.<listcomp>zscikit-learn estimators should always specify their parameters in the signature of their __init__ (no varargs). %s with constructor %s doesn't  follow this convention.c                 S   s   g | ]
}|j qS r   )r*   rJ   r   r   r   r      s     )getattr__init__objectinspectZ	signature
parametersvaluesrI   ZVAR_POSITIONALr'   r8   )clsZinitZinit_signaturerP   rK   r   r   r   _get_param_names   s    

zBaseEstimator._get_param_namesTc                    s\   t  }|  D ]H t|  }|rNt|drN|  }| fdd|D  || < q|S )ae  
        Get parameters for this estimator.

        Parameters
        ----------
        deep : bool, default=True
            If True, will return the parameters for this estimator and
            contained subobjects that are estimators.

        Returns
        -------
        params : dict
            Parameter names mapped to their values.
        r   c                 3   s"   | ]\}} d  | |fV  qdS )__Nr   )r   rB   valkeyr   r   r5      s     z+BaseEstimator.get_params.<locals>.<genexpr>)dictrS   rL   r   r   r&   update)rH   r   outvalueZ
deep_itemsr   rV   r   r      s    

zBaseEstimator.get_paramsc           	      K   s   |s| S | j dd}tt}| D ]V\}}|d\}}}||krTtd|| f |rf||| |< q$t| || |||< q$| D ]\}}|| jf | q| S )a  
        Set the parameters of this estimator.

        The method works on simple estimators as well as on nested objects
        (such as :class:`~sklearn.pipeline.Pipeline`). The latter have
        parameters of the form ``<component>__<parameter>`` so that it's
        possible to update each component of a nested object.

        Parameters
        ----------
        **params : dict
            Estimator parameters.

        Returns
        -------
        self : estimator instance
            Estimator instance.
        Tr   rT   zsInvalid parameter %s for estimator %s. Check the list of available parameters with `estimator.get_params().keys()`.)r   r   rX   r&   	partition
ValueErrorsetattr
set_params)	rH   r?   Zvalid_paramsZnested_paramsrW   r[   ZdelimZsub_keyZ
sub_paramsr   r   r   r_      s"    
zBaseEstimator.set_params  c                 C   s   ddl m} d}|ddd|d}|| }td| }||kr|d }d| }t|| }	t||d d d	  }
d
||	|
  kr|d7 }t||d d d	  }
d}|	t| t||
 k r|d |	 d ||
 d   }|S )Nr   )_EstimatorPrettyPrinter   T)ZcompactindentZindent_at_nameZn_max_elements_to_showr2   r-   z^(\s*\S){%d}r1   z[^\n]*\nr/   )	Zutils._pprintra   Zpformatr;   r=   r>   rematchend)rH   Z
N_CHAR_MAXra   ZN_MAX_ELEMENTS_TO_SHOWZpprepr_Z
n_nonblankZlimZregexZleft_limZ	right_limZellipsisr   r   r   __repr__   s,      
	zBaseEstimator.__repr__c                    sV   zt   }W n tk
r,   | j }Y nX t| jdrNt|	 t
dS |S d S )Nsklearn.)_sklearn_version)super__getstate__AttributeError__dict__r!   r   
__module__
startswithrX   r&   r   )rH   stater%   r   r   rm   '  s    zBaseEstimator.__getstate__c                    st   t | jdr>|dd}|tkr>td| jj	|tt
 zt | W n  tk
rn   | j| Y nX d S )Nrj   rk   zpre-0.18zTrying to unpickle estimator {0} from version {1} when using version {2}. This might lead to breaking code or invalid results. Use at your own risk.)r   rp   rq   popr   warningswarnformatr%   __name__UserWarningrl   __setstate__rn   ro   rY   )rH   rr   Zpickle_versionrs   r   r   rz   2  s      zBaseEstimator.__setstate__c                 C   s   t S N)r   rG   r   r   r   
_more_tagsA  s    zBaseEstimator._more_tagsc                 C   s<   i }t t| jD ]"}t|dr|| }|| q|S )Nr|   )reversedrO   Zgetmror%   r   r|   rY   )rH   Zcollected_tagsZ
base_classZ	more_tagsr   r   r   	_get_tagsD  s    

zBaseEstimator._get_tagsc                 C   sV   |j d }|r|| _dS t| ds&dS || jkrRtd| d| jj d| j ddS )a  Set the `n_features_in_` attribute, or check against it.

        Parameters
        ----------
        X : {ndarray, sparse matrix} of shape (n_samples, n_features)
            The input samples.
        reset : bool
            If True, the `n_features_in_` attribute is set to `X.shape[1]`.
            If False and the attribute exists, then check that it is equal to
            `X.shape[1]`. If False and the attribute does *not* exist, then
            the check is skipped.
            .. note::
               It is recommended to call reset=True in `fit` and in the first
               call to `partial_fit`. All other methods that validate `X`
               should set `reset=False`.
        r   Nn_features_in_zX has z features, but z is expecting z features as input.)shaper   r   r]   r%   rx   )rH   XresetZ
n_featuresr   r   r   _check_n_featuresO  s    


zBaseEstimator._check_n_featuresno_validationFc           	      K   s   |dkr:|   d r(td| jj dt|f|}|}ndt|tr^|dkr^t|f|}|}n@|r|\}}t|f|}t|f|}nt||f|\}}||f}|ddr| j	||d |S )	ar  Validate input data and set or check the `n_features_in_` attribute.

        Parameters
        ----------
        X : {array-like, sparse matrix, dataframe} of shape                 (n_samples, n_features)
            The input samples.
        y : array-like of shape (n_samples,), default='no_validation'
            The targets.

            - If `None`, `check_array` is called on `X`. If the estimator's
              requires_y tag is True, then an error will be raised.
            - If `'no_validation'`, `check_array` is called on `X` and the
              estimator's requires_y tag is ignored. This is a default
              placeholder and is never meant to be explicitly set.
            - Otherwise, both `X` and `y` are checked with either `check_array`
              or `check_X_y` depending on `validate_separately`.

        reset : bool, default=True
            Whether to reset the `n_features_in_` attribute.
            If False, the input will be checked for consistency with data
            provided when reset was last True.
            .. note::
               It is recommended to call reset=True in `fit` and in the first
               call to `partial_fit`. All other methods that validate `X`
               should set `reset=False`.
        validate_separately : False or tuple of dicts, default=False
            Only used if y is not None.
            If False, call validate_X_y(). Else, it must be a tuple of kwargs
            to be used for calling check_array() on X and y respectively.
        **check_params : kwargs
            Parameters passed to :func:`sklearn.utils.check_array` or
            :func:`sklearn.utils.check_X_y`. Ignored if validate_separately
            is not False.

        Returns
        -------
        out : {ndarray, sparse matrix} or tuple of these
            The validated input. A tuple is returned if `y` is not None.
        N
requires_yzThis z= estimator requires y to be passed, but the target y is None.r   Z	ensure_2dT)r   )
r~   r]   r%   rx   r   r    r:   r	   getr   )	rH   r   yr   Zvalidate_separatelyZcheck_paramsrZ   Zcheck_X_paramsZcheck_y_paramsr   r   r   _validate_dataq  s&    +zBaseEstimator._validate_datac                 C   s   t  d dkrtd| jS )a  HTML representation of estimator.

        This is redundant with the logic of `_repr_mimebundle_`. The latter
        should be favorted in the long term, `_repr_html_` is only
        implemented for consumers who do not interpret `_repr_mimbundle_`.
        displaydiagramzW_repr_html_ is only defined when the 'display' configuration option is set to 'diagram')r   rn   _repr_html_innerrG   r   r   r   _repr_html_  s    zBaseEstimator._repr_html_c                 C   s   t | S )zThis function is returned by the @property `_repr_html_` to make
        `hasattr(estimator, "_repr_html_") return `True` or `False` depending
        on `get_config()["display"]`.
        r   rG   r   r   r   r     s    zBaseEstimator._repr_html_innerc                 K   s*   dt | i}t d dkr&t| |d< |S )z8Mime bundle used by jupyter kernels to display estimatorz
text/plainr   r   z	text/html)r$   r   r   )rH   kwargsoutputr   r   r   _repr_mimebundle_  s    zBaseEstimator._repr_mimebundle_)T)r`   )r   TF)rx   rp   __qualname____doc__classmethodrS   r   r_   ri   rm   rz   r|   r~   r   r   propertyr   r   r   __classcell__r   r   rs   r   rF      s$   	

,
1"  
H
rF   c                   @   s&   e Zd ZdZdZdddZdd ZdS )	ClassifierMixinz0Mixin class for all classifiers in scikit-learn.
classifierNc                 C   s    ddl m} ||| ||dS )a  
        Return the mean accuracy on the given test data and labels.

        In multi-label classification, this is the subset accuracy
        which is a harsh metric since you require for each sample that
        each label set be correctly predicted.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Test samples.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            True labels for `X`.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        score : float
            Mean accuracy of ``self.predict(X)`` wrt. `y`.
        r   )accuracy_scoresample_weight)metricsr   predict)rH   r   r   r   r   r   r   r   score  s    zClassifierMixin.scorec                 C   s   ddiS Nr   Tr   rG   r   r   r   r|     s    zClassifierMixin._more_tags)Nrx   rp   r   r   _estimator_typer   r|   r   r   r   r   r     s   
r   c                   @   s&   e Zd ZdZdZdddZdd ZdS )	RegressorMixinz:Mixin class for all regression estimators in scikit-learn.	regressorNc                 C   s$   ddl m} | |}||||dS )a  Return the coefficient of determination :math:`R^2` of the
        prediction.

        The coefficient :math:`R^2` is defined as :math:`(1 - \frac{u}{v})`,
        where :math:`u` is the residual sum of squares ``((y_true - y_pred)
        ** 2).sum()`` and :math:`v` is the total sum of squares ``((y_true -
        y_true.mean()) ** 2).sum()``. The best possible score is 1.0 and it
        can be negative (because the model can be arbitrarily worse). A
        constant model that always predicts the expected value of `y`,
        disregarding the input features, would get a :math:`R^2` score of
        0.0.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Test samples. For some estimators this may be a precomputed
            kernel matrix or a list of generic objects instead with shape
            ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted``
            is the number of samples used in the fitting for the estimator.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            True values for `X`.

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights.

        Returns
        -------
        score : float
            :math:`R^2` of ``self.predict(X)`` wrt. `y`.

        Notes
        -----
        The :math:`R^2` score used when calling ``score`` on a regressor uses
        ``multioutput='uniform_average'`` from version 0.23 to keep consistent
        with default value of :func:`~sklearn.metrics.r2_score`.
        This influences the ``score`` method of all the multioutput
        regressors (except for
        :class:`~sklearn.multioutput.MultiOutputRegressor`).
        r   )r2_scorer   )r   r   r   )rH   r   r   r   r   Zy_predr   r   r   r     s    *
zRegressorMixin.scorec                 C   s   ddiS r   r   rG   r   r   r   r|   ,  s    zRegressorMixin._more_tags)Nr   r   r   r   r   r     s   
.r   c                   @   s&   e Zd ZdZdZdddZdd ZdS )	ClusterMixinz7Mixin class for all cluster estimators in scikit-learn.Z	clustererNc                 C   s   |  | | jS )a  
        Perform clustering on `X` and returns cluster labels.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Input data.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        labels : ndarray of shape (n_samples,), dtype=np.int64
            Cluster labels.
        )fitZlabels_rH   r   r   r   r   r   fit_predict4  s    
zClusterMixin.fit_predictc                 C   s   dg iS )NZpreserves_dtyper   rG   r   r   r   r|   J  s    zClusterMixin._more_tags)N)rx   rp   r   r   r   r   r|   r   r   r   r   r   0  s   
r   c                   @   s4   e Zd ZdZedd Zdd Zdd Zdd	 Zd
S )BiclusterMixinz9Mixin class for all bicluster estimators in scikit-learn.c                 C   s   | j | jfS )z{Convenient way to get row and column indicators together.

        Returns the ``rows_`` and ``columns_`` members.
        )rows_columns_rG   r   r   r   biclusters_Q  s    zBiclusterMixin.biclusters_c                 C   s0   | j | }| j| }t|d t|d fS )a  Row and column indices of the `i`'th bicluster.

        Only works if ``rows_`` and ``columns_`` attributes exist.

        Parameters
        ----------
        i : int
            The index of the cluster.

        Returns
        -------
        row_ind : ndarray, dtype=np.intp
            Indices of rows in the dataset that belong to the bicluster.
        col_ind : ndarray, dtype=np.intp
            Indices of columns in the dataset that belong to the bicluster.

        r   )r   r   r6   Znonzero)rH   rA   Zrowscolumnsr   r   r   get_indicesY  s    

zBiclusterMixin.get_indicesc                 C   s   |  |}tdd |D S )a-  Shape of the `i`'th bicluster.

        Parameters
        ----------
        i : int
            The index of the cluster.

        Returns
        -------
        n_rows : int
            Number of rows in the bicluster.

        n_cols : int
            Number of columns in the bicluster.
        c                 s   s   | ]}t |V  qd S r{   )r;   )r   rA   r   r   r   r5     s     z+BiclusterMixin.get_shape.<locals>.<genexpr>)r   r   )rH   rA   indicesr   r   r   	get_shapeo  s    
zBiclusterMixin.get_shapec                 C   s@   ddl m} ||dd}| |\}}||ddtjf |f S )a   Return the submatrix corresponding to bicluster `i`.

        Parameters
        ----------
        i : int
            The index of the cluster.
        data : array-like of shape (n_samples, n_features)
            The data.

        Returns
        -------
        submatrix : ndarray of shape (n_rows, n_cols)
            The submatrix corresponding to bicluster `i`.

        Notes
        -----
        Works with sparse matrices. Only works if ``rows_`` and
        ``columns_`` attributes exist.
        r   r
   Zcsr)Zaccept_sparseN)utils.validationr   r   r6   Znewaxis)rH   rA   datar   Zrow_indZcol_indr   r   r   get_submatrix  s    zBiclusterMixin.get_submatrixN)	rx   rp   r   r   r   r   r   r   r   r   r   r   r   r   N  s   
r   c                   @   s   e Zd ZdZdddZdS )TransformerMixinz1Mixin class for all transformers in scikit-learn.Nc                 K   s6   |dkr| j |f||S | j ||f||S dS )a  
        Fit to data, then transform it.

        Fits transformer to `X` and `y` with optional parameters `fit_params`
        and returns a transformed version of `X`.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Input samples.

        y :  array-like of shape (n_samples,) or (n_samples, n_outputs),                 default=None
            Target values (None for unsupervised transformations).

        **fit_params : dict
            Additional fit parameters.

        Returns
        -------
        X_new : ndarray array of shape (n_samples, n_features_new)
            Transformed array.
        N)r   Z	transform)rH   r   r   Z
fit_paramsr   r   r   fit_transform  s    zTransformerMixin.fit_transform)N)rx   rp   r   r   r   r   r   r   r   r     s   r   c                   @   s   e Zd ZdZdZdddZdS )DensityMixinz7Mixin class for all density estimators in scikit-learn.ZDensityEstimatorNc                 C   s   dS )a=  Return the score of the model on the data `X`.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Test samples.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        score : float
        Nr   r   r   r   r   r     s    zDensityMixin.score)N)rx   rp   r   r   r   r   r   r   r   r   r     s   r   c                   @   s   e Zd ZdZdZdddZdS )OutlierMixinzAMixin class for all outlier detection estimators in scikit-learn.outlier_detectorNc                 C   s   |  ||S )a  Perform fit on X and returns labels for X.

        Returns -1 for outliers and 1 for inliers.

        Parameters
        ----------
        X : {array-like, sparse matrix, dataframe} of shape             (n_samples, n_features)

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        y : ndarray of shape (n_samples,)
            1 for inliers, -1 for outliers.
        )r   r   r   r   r   r   r     s    zOutlierMixin.fit_predict)N)rx   rp   r   r   r   r   r   r   r   r   r     s   r   c                   @   s   e Zd ZdgZdS )MetaEstimatorMixinr(   N)rx   rp   r   Z_required_parametersr   r   r   r   r     s   r   c                   @   s   e Zd ZdZdd ZdS )MultiOutputMixinz2Mixin to mark estimators that support multioutput.c                 C   s   ddiS )NZmultioutputTr   rG   r   r   r   r|     s    zMultiOutputMixin._more_tagsNrx   rp   r   r   r|   r   r   r   r   r     s   r   c                   @   s   e Zd ZdZdd ZdS )_UnstableArchMixinz=Mark estimators that are non-determinstic on 32bit or PowerPCc                 C   s   dt pt diS )NZnon_deterministic)ZppcZpowerpc)r   platformmachinerq   rG   r   r   r   r|     s    z_UnstableArchMixin._more_tagsNr   r   r   r   r   r     s   r   c                 C   s   t | dddkS )a  Return True if the given estimator is (probably) a classifier.

    Parameters
    ----------
    estimator : object
        Estimator object to test.

    Returns
    -------
    out : bool
        True if estimator is a classifier and False otherwise.
    r   Nr   rL   r(   r   r   r   is_classifier  s    r   c                 C   s   t | dddkS )a  Return True if the given estimator is (probably) a regressor.

    Parameters
    ----------
    estimator : estimator instance
        Estimator object to test.

    Returns
    -------
    out : bool
        True if estimator is a regressor and False otherwise.
    r   Nr   r   r   r   r   r   is_regressor  s    r   c                 C   s   t | dddkS )a  Return True if the given estimator is (probably) an outlier detector.

    Parameters
    ----------
    estimator : estimator instance
        Estimator object to test.

    Returns
    -------
    out : bool
        True if estimator is an outlier detector and False otherwise.
    r   Nr   r   r   r   r   r   is_outlier_detector#  s    r   c              	   C   sd   t  * t jdtd t| d}t| dd}W 5 Q R X t| dd}|r`||kr\t dt |S |S )a  Returns True if estimator is pairwise.

    - If the `_pairwise` attribute and the tag are present and consistent,
      then use the value and not issue a warning.
    - If the `_pairwise` attribute and the tag are present and not
      consistent, use the `_pairwise` value and issue a deprecation
      warning.
    - If only the `_pairwise` attribute is present and it is not False,
      issue a deprecation warning and use the `_pairwise` value.

    Parameters
    ----------
    estimator : object
        Estimator object to test.

    Returns
    -------
    out : bool
        True if the estimator is pairwise and False otherwise.
    ignore)categoryZ	_pairwiseFZpairwiserV   z_pairwise was deprecated in 0.24 and will be removed in 1.1 (renaming of 0.26). Set the estimator tags of your estimator instead)ru   catch_warningsfilterwarningsFutureWarningr   rL   r   rv   )r(   Zhas_pairwise_attributeZpairwise_attributeZpairwise_tagr   r   r   _is_pairwise3  s    

r   )+r   r!   ru   collectionsr   r   rO   re   Znumpyr6   r2   r   Z_configr   Zutilsr   Zutils._tagsr   r   r   r	   r   Zutils._estimator_html_reprr   r   r   r$   rE   rF   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sF   >2  K$6N%