a
    5ghk                    @  sL  d Z ddlmZ ddlmZmZmZmZmZm	Z	 ddl
mZmZmZ ddlZddlmZmZ ddlmZ ddlZdd	l
mZmZmZmZmZmZmZmZmZmZm Z m!Z! ddl"Z"dd
l#m$Z$ g dZ%ede&eZ'dZ(dZ)e*e&j+Z,dd Z-dd Z.dd Z/dd Z0dddddddZ1ddddddddZ2dddddd d!Z3ddddddd"d#Z4dddd$d%d&Z5ddddd'd(d)Z6dddd$d*d+Z7ddddd'd,d-Z8ddddd.d/d0Z9ddddd.d1d2Z:ddddd.d3d4Z;zdd5l<m=Z= dd6l>m?Z@ e=d7d7d8d9ZAe@e-d:d;d<d= ZBe@e/d:d;d>d? ZCe.ZDeAeDZEd@d Z.e7ZFeAeFZGdddd$dAd+Z7eFj e7_ e5ZHeAeHZIdddd$dBd&Z5eHj e5_ e1ZJeAeJZKddddddCdZ1eJj e1_ e3ZLeAeLZMddddddDd!Z3W n eNy   Y n0 G dEdF dFejOZPG dGdH dHePZQG dIdJ dJePeZRG dKdL dLePZSG dMdN dNeSZTG dOdP dPeSZUG dQdR dReUZVG dSdT dTeUZWG dUdV dVeTZXG dWdX dXeXZYG dYdZ dZeXZZG d[d\ d\eYZ[G d]d^ d^eYZ\G d_d` d`eTZ]G dadb dbePZ^dddcdddeZ_dfdg Z`ddhdidjZadkdl Zbdmdn ZcdZeZfd0e9fd2e:fd+e7fdoZddpdqdrdsdtZedS )ua1  
Standard cost functions to minimize for statistical fits.

We provide these for convenience, so that you do not have to write your own for standard
fits. The cost functions optionally use Numba to accelerate some calculations, if Numba
is installed.

**There is no need** to set :attr:`iminuit.Minuit.errordef` manually for any of these
cost functions. :class:`iminuit.Minuit` automatically uses the correct value, which is
provided by each cost function with the attribute ``Cost.errordef``.

What to use when
----------------
- Fit a normalised probability density to data

    - Data are not binned: :class:`UnbinnedNLL`
    - Data are binned: :class:`BinnedNLL`, also supports histogram of weighted samples

- Fit a density to data, density is not normalised

    - Data are not binned: :class:`ExtendedUnbinnedNLL`
    - Data are binned: :class:`ExtendedBinnedNLL`, also supports
      histogram of weighted samples

- Fit a template to binned data with bin-wise uncertainties on the template

    - :class:`Template`, also supports weighted data and weighted template histograms

- Fit of a function f(x) to (x, y, yerror) pairs with normal-distributed fluctuations. x
  is one- or multi-dimensional, y is one-dimensional.

    - y values contain no outliers: :class:`LeastSquares`
    - y values contain outliers: :class:`LeastSquares` with loss function set to
      "soft_l1"

- Include constraints from external fits or apply regularisation

    - :class:`NormalConstraint`

Combining cost functions
------------------------
All cost functions can be added, which generates a new combined cost function.
Parameters with the same name are shared between component cost functions. Use this to
constrain one or several parameters with different data sets and using different
statistical models for each data set. Gaussian penalty terms can also be added to the
cost function to introduce external knowledge about a parameter.

Model parameter limits
----------------------
The Minuit algorithms support box constrains in parameter space. A user-defined model
can declare that a parameter is only valid over an interval on the real line with the
``Annotated`` type annotation, see :class:`iminuit.Minuit` for details. A typical
example is the sigma parameter of a normal distribution, which must be positive. The
cost functions defined here propagate this information to :class:`iminuit.Minuit`.

Note: The :class:`Template` declares that the template amplitudes must be non-negative,
which is usually the right choice, however, it may be desirable to fit templates which
can have negative amplitudes. To achieve this, simply reset the limits with
:attr:`iminuit.Minuit.limits` after creating the Minuit instance.

User-defined gradients
----------------------
If the user provides a model gradient, the cost functions defined here except
:class:`Template` will then also make their gradient available, which is then
automatically used by :class:`iminuit.Minuit` (see the constructor for details) to
potentially improve the fit (improve convergence  or robustness).

Note that it is perfectly normal to use Minuit without a user-defined gradient, and
Minuit does not always benefit from a user-defined gradient. If the gradient is
expensive to compute, the time to converge may increase. If you have trouble with the
fitting process, it is unlikely that the issues are resolved by a user-defined gradient.

Notes
-----
The cost functions defined here have been optimized with knowledge about implementation
details of Minuit to give the highest accucary and the most robust results, so they
should perform well. If you have trouble with your own implementations, try these.

The binned versions of the log-likelihood fits support weighted samples. For each bin of
the histogram, the sum of weights and the sum of squared weights is needed then, see
class documentation for details.
    annotations   )describemerge_signaturesPerformanceWarning_smart_sampling_detect_log_spacingis_positive_definite)ModelModelGradientLossFunctionN)NDArray	ArrayLike)Sequence)ListTupleUnionr   
CollectionDictAnyIterableOptionalTypeVarCallablecast)deprecated_parameter)	CHISQUARENEGATIVE_LOG_LIKELIHOODchi2multinomial_chi2poisson_chi2template_chi2_jsctemplate_chi2_datemplate_nll_asyCostCostSumConstant	BinnedNLLUnbinnedNLLExtendedBinnedNLLExtendedUnbinnedNLLTemplateLeastSquaresNormalConstraintT      ?      ?c                 C  s(   t | }| dk}t | | ||< |S )z
    Evaluate to log(x) for x > 0 and to 0 otherwise.

    Parameters
    ----------
    x : array
        Argument.

    Returns
    -------
    array
        Elementwise contains log(x) for x > 0 and zero otherwise.
    r   )np
zeros_likelog)xrma r8   \/mounts/lovelace/software/anaconda3/envs/metaDMG/lib/python3.9/site-packages/iminuit/cost.pylog_or_zero   s    
r:   c                 C  s   t t t | t  S N)r2   sumsortr4   _TINY_FLOATr5   r8   r8   r9   _unbinned_nll   s    r@   c                 C  s   | | | }|| S r;   r8   )yyeymzr8   r8   r9   
_z_squared   s    rE   c                 C  s   | d u r|S | S r;   r8   )r5   replacementr8   r8   r9   _replace_none   s    rG   r   float)rA   rB   rC   returnc                 C  s4   t | ||\} }}| jdks"J t t| ||S )a  
    Compute (potentially) chi2-distributed cost.

    The value returned by this function is chi2-distributed, if the observed values are
    normally distributed around the expected values with the provided standard
    deviations.

    Parameters
    ----------
    y : array-like with shape (N,)
        Observed values.
    ye : array-like with shape (N,)
        Uncertainties of values.
    ym : array-like with shape (N,)
        Expected values.

    Returns
    -------
    float
        Value of cost function.
    r   )r2   
atleast_1dndimr<   rE   rA   rB   rC   r8   r8   r9   r      s    r   r   )rA   rB   rC   gymrI   c                 C  sV   t | |||\} }}}| jdks&J |jdks4J dt j| | | |d  dd S )a  
    Compute gradient of :func:`chi2`.

    Parameters
    ----------
    y : array-like  with shape (N,)
        Observed values.
    ye : array-like  with shape (N,)
        Uncertainties of values.
    ym : array-like with shape (N,)
        Expected values.
    gym : array-like with shape (K, N)
        Gradient of ym with respect to K model parameters.

    Returns
    -------
    array with shape (K,)
        Gradient of cost function with respect to model parameters.
    r      Zaxis)r2   rJ   rK   r<   )rA   rB   rC   rM   r8   r8   r9   
_chi2_grad   s    rQ   c                 C  s(   t | ||}dttd| d  S )NrN   r   )rE   r2   r<   sqrt)rA   rB   rC   Zz_sqrr8   r8   r9   _soft_l1_cost   s    rS   c                 C  sN   d| }| | | }d|d  d }dt j|| | | ttd|jd S )Nr   rN   g      rO   rP   )r2   r<   tuplerangerK   )rA   rB   rC   rM   Zinv_yerD   fr8   r8   r9   _soft_l1_cost_grad   s    rW   )nmurI   c                 C  s6   t | |\} }dt | t| t|  | |   S )a/  
    Compute asymptotically chi2-distributed cost for Poisson-distributed data.

    See Baker & Cousins, NIM 221 (1984) 437-442.

    Parameters
    ----------
    n : array-like
        Observed counts.
    mu : array-like
        Expected counts per bin.

    Returns
    -------
    float
        Cost function value.

    Notes
    -----
    The implementation makes the result asymptotically chi2-distributed,
    which helps to maximise the numerical accuracy for Minuit.

    If sum(mu) == sum(n), the result is equal to :func:`multinomial_chi2`.
    rN   r2   rJ   r<   r:   rX   rY   r8   r8   r9   r!      s    r!   )rX   rY   gmurI   c                 C  s,   |j dksJ dtjd| |  | dd S )NrN   r0   r   rP   rK   r2   r<   rX   rY   r\   r8   r8   r9   _poisson_chi2_grad  s    r_   c                 C  s.   t | |\} }dt | t| t|   S )a  
    Compute asymptotically chi2-distributed cost for multinomially-distributed data.

    See Baker & Cousins, NIM 221 (1984) 437-442.

    Parameters
    ----------
    n : array-like
        Observed counts.
    mu : array-like
        Expected counts. Must satisfy sum(mu) == sum(n).

    Returns
    -------
    float
        Cost function value.

    Notes
    -----
    The implementation makes the result asymptotically chi2-distributed,
    which helps to maximise the numerical accuracy for Minuit.
    rN   rZ   r[   r8   r8   r9   r      s    r    c                 C  s(   |j dksJ dtj| | | dd S )NrN   rO   r   rP   r]   r^   r8   r8   r9   _multinomial_chi2_grad4  s    r`   )rX   rY   mu_varrI   c                 C  sn   t | ||\} }}||d  }dd| |  }|t |d | |   }t| || t |d d |  S )a  
    Compute asymptotically chi2-distributed cost for a template fit.

    J.S. Conway, PHYSTAT 2011, https://doi.org/10.48550/arXiv.1103.0354

    Parameters
    ----------
    n : array-like
        Observed counts.
    mu : array-like
        Expected counts. This is the sum of the normalised templates scaled with
        the component yields. Must be positive everywhere.
    mu_var : array-like
        Expected variance of mu. Must be positive everywhere.

    Returns
    -------
    float
        Asymptotically chi-square-distributed test statistic.

    Notes
    -----
    The implementation deviates slightly from the paper by making the result
    asymptotically chi2-distributed, which helps to maximise the numerical
    accuracy for Minuit.
    rN   r1   r   )r2   rJ   rR   r!   r<   )rX   rY   ra   Zbeta_varpbetar8   r8   r9   r"   9  s
    r"   c                 C  sP   t | ||\} }}|d | }| | || t  }t| || t|||  S )a   
    Compute asymptotically chi2-distributed cost for a template fit.

    H.P. Dembinski, A. Abdelmotteleb, https://doi.org/10.48550/arXiv.2206.12346

    Parameters
    ----------
    n : array-like
        Observed counts.
    mu : array-like
        Expected counts. This is the sum of the normalised templates scaled
        with the component yields.
    mu_var : array-like
        Expected variance of mu. Must be positive everywhere.

    Returns
    -------
    float
        Asymptotically chi-square-distributed test statistic.
    rN   )r2   rJ   r>   r!   )rX   rY   ra   krc   r8   r8   r9   r#   _  s    r#   c              	   C  s   ddl m} t| ||\} }}|d | d }|| }t|t| || |  || d | | td|   ||   S )u  
    Compute marginalized negative log-likelikihood for a template fit.

    This is the negative logarithm of equation 3.15 of the paper by
    C.A. Argüelles, A. Schneider, T. Yuan,
    https://doi.org/10.1007/JHEP06(2019)030.

    The authors use a Bayesian approach and integrate over the nuisance
    parameters. Like the other Barlow-Beeston-lite methods, this is an
    approximation. The resulting likelihood cannot be turned into an
    asymptotically chi-square distributed test statistic as detailed
    in Baker & Cousins, NIM 221 (1984) 437-442.

    Parameters
    ----------
    n : array-like
        Observed counts.
    mu : array-like
        Expected counts. This is the sum of the normalised templates scaled
        with the component yields.
    mu_var : array-like
        Expected variance of mu. Must be positive everywhere.

    Returns
    -------
    float
        Negative log-likelihood function value.
    r   )loggammarN   r   )Zscipy.specialre   r2   rJ   r<   r4   )rX   rY   ra   Zlgalpharc   r8   r8   r9   r$   {  s    
(r$   )njit)overloadTnumpy)ZnogilcacheZerror_modelalways)inlinec                 C  s   t S r;   )r:   r?   r8   r8   r9   _ol_log_or_zero  s    rm   c                 C  s   t S r;   )rE   rL   r8   r8   r9   _ol_z_squared  s    rn   c                 C  s"   | j tjtjfv rt| S t| S r;   )dtyper2   float32float64_unbinned_nll_nb_unbinned_nll_npr?   r8   r8   r9   r@     s    c                 C  s6   t | |\} }|jt jt jfv r,t| |S t| |S r;   )r2   rJ   ro   rp   rq   _multinomial_chi2_nb_multinomial_chi2_npr[   r8   r8   r9   r      s    
c                 C  s6   t | |\} }|jt jt jfv r,t| |S t| |S r;   )r2   rJ   ro   rp   rq   _poisson_chi2_nb_poisson_chi2_npr[   r8   r8   r9   r!     s    
c                 C  s>   t | ||\} }}|jt jt jfv r2t| ||S t| ||S r;   )r2   rJ   ro   rp   rq   _chi2_nb_chi2_nprL   r8   r8   r9   r     s    c                 C  s*   |j tjtjfv rt| ||S t| ||S r;   )ro   r2   rp   rq   _soft_l1_cost_nb_soft_l1_cost_nprL   r8   r8   r9   rS     s    c                   @  s  e Zd ZU dZdZded< ded< edd Zd	d
 Zedd Z	edd Z
ejdd Zedd ZejddddZdddddZdd Zdd ZdddddZdd dd!d"Zed#d$d%d&Zejd'ddd(d)Zejd'd dd*d+Zejd#d$d,d-Zd.S )/r%   z@
    Base class for all cost functions.

    :meta private:
    _parameters_verbose(Dict[str, Optional[Tuple[float, float]]]r}   intr~   c                 C  s   |   S )z;
        For internal use.

        :meta private:
        )	_errordefselfr8   r8   r9   errordef  s    zCost.errordefc                 C  s   t S r;   )r   r   r8   r8   r9   r     s    zCost._errordefc                 C  s   |   S )z
        Return number of points in least-squares fits or bins in a binned fit.

        Infinity is returned if the cost function is unbinned. This is used by Minuit to
        compute the reduced chi2, a goodness-of-fit estimate.
        )_ndatar   r8   r8   r9   ndata  s    z
Cost.ndatac                 C  s
   t | jS )z(Return total number of model parameters.)lenr}   r   r8   r8   r9   npar  s    z	Cost.nparc                 C  s   t  d S r;   )NotImplementedr   r8   r8   r9   r     s    zCost._ndatac                 C  s   | j S )zs
        Access verbosity level.

        Set this to 1 to print all function calls with input and output.
        r~   r   r8   r8   r9   verbose  s    zCost.verbosevaluec                 C  s
   || _ d S r;   r   r   r   r8   r8   r9   r   $  s    )
parametersr   c                 C  s   || _ || _dS For internal use.Nr|   )r   r   r   r8   r8   r9   __init__(  s    zCost.__init__c                 C  s
   t | |S z{
        Add two cost functions to form a combined cost function.

        Returns
        -------
        CostSum
        r&   )r   rhsr8   r8   r9   __add__/  s    zCost.__add__c                 C  s
   t || S r   r   )r   lhsr8   r8   r9   __radd__9  s    zCost.__radd__rH   argsrI   c                 G  s$   |  |}| jdkr t|d| |S )z
        Evaluate the cost function.

        If verbose >= 1, print arguments and result.

        Parameters
        ----------
        *args : float
            Parameter values.

        Returns
        -------
        float
        r   z->)_valuer   print)r   r   r6   r8   r8   r9   __call__C  s    

zCost.__call__r   c                 G  s
   |  |S )aK  
        Compute gradient of the cost function.

        This requires that a model gradient is provided.

        Parameters
        ----------
        *args : float
            Parameter values.

        Returns
        -------
        ndarray of float
            The length of the array is equal to the length of args.
        )_gradr   r   r8   r8   r9   gradW  s    z	Cost.gradboolrI   c                 C  s   |   S )z4Return True if cost function can compute a gradient.)	_has_gradr   r8   r8   r9   has_gradi  s    zCost.has_gradSequence[float]c                 C  s   d S r;   r8   r   r8   r8   r9   r   n  s    zCost._valuec                 C  s   d S r;   r8   r   r8   r8   r9   r   q  s    z
Cost._gradc                 C  s   d S r;   r8   r   r8   r8   r9   r   t  s    zCost._has_gradN)__name__
__module____qualname____doc__	__slots____annotations__propertyr   r   r   r   abcabstractmethodr   r   setterr   r   r   r   r   r   r   r   r   r8   r8   r8   r9   r%     s<   


	




r%   c                      s^   e Zd ZdZdZdd fddZdd Zd	dd
ddZd	dd
ddZe	dd Z
  ZS )r'   z
    Cost function that represents a constant.

    If your cost function produces results that are far away from O(1), adding a
    constant that brings the value closer to zero may improve the numerical stability.
    r   rH   r   c                   s   || _ t i d dS )z!Initialize constant with a value.FN)r   superr   r   	__class__r8   r9   r     s    zConstant.__init__c                 C  s   dS Nr   r8   r   r8   r8   r9   r     s    zConstant._ndatar   r   c                 C  s   | j S r;   r   r   r8   r8   r9   r     s    zConstant._valuer   c                 C  s
   t dS r   )r2   zerosr   r8   r8   r9   r     s    zConstant._gradc                   C  s   dS NTr8   r8   r8   r8   r9   r     s    zConstant._has_grad)r   r   r   r   r   r   r   r   r   staticmethodr   __classcell__r8   r8   r   r9   r'   x  s   r'   c                      s   e Zd ZdZdZdd fddZddd	d
ZdddddZdddddZddddZ	dd Z
dd Zdd Zd!ddddd Z  ZS )"r&   aa  
    Sum of cost functions.

    Users do not need to create objects of this class themselves. They should just add
    cost functions, for example::

        nll = UnbinnedNLL(...)
        lsq = LeastSquares(...)
        ncs = NormalConstraint(...)
        csum = nll + lsq + ncs

    CostSum is used to combine data from different experiments or to combine normal cost
    functions with penalty terms (see NormalConstraint).

    The parameters of CostSum are the union of all parameters of its constituents.

    Supports the sequence protocol to access the constituents.

    Warnings
    --------
    CostSum does not support cost functions that accept a parameter array, because the
    function signature does not allow one to determine how many parameters are accepted
    by the function and which parameters overlap between different cost functions.
    )_items_mapszUnion[Cost, float])itemsc                   s   g | _ |D ]T}t|tr*|  j |j 7  _ q
t|ttfrR|dkr^| j t| q
| j | q
t| j dd\}| _t	 
|tdd | j D  dS )z
        Initialize with cost functions.

        Parameters
        ----------
        *items : Cost
            Cost functions. May also be other CostSum functions.
        r   Tr   c                 s  s   | ]}|j V  qd S r;   )r   .0cr8   r8   r9   	<genexpr>      z#CostSum.__init__.<locals>.<genexpr>N)r   
isinstancer&   r   rH   appendr'   r   r   r   r   max)r   r   itemZ
signaturesr   r8   r9   r     s    	
zCostSum.__init__r   r   c                 #  s<   t | j| jD ](\}}t fdd|D }||fV  qd S )Nc                 3  s   | ]} | V  qd S r;   r8   r   ir   r8   r9   r     r   z!CostSum._split.<locals>.<genexpr>)zipr   r   rT   )r   r   	componentZcmapcomponent_argsr8   r   r9   _split  s    zCostSum._splitrH   r   c                 C  s0   d}|  |D ]\}}||||j 7 }q|S )N        )r   r   r   )r   r   r6   r   r   r8   r8   r9   r     s    zCostSum._valuer   c                   sZ   t | j}t| j| jD ]:\}}t fdd|D }||  |||j 7  < q|S )Nc                 3  s   | ]} | V  qd S r;   r8   r   r   r8   r9   r     r   z CostSum._grad.<locals>.<genexpr>)	r2   r   r   r   r   r   rT   r   r   )r   r   r6   r   indicesr   r8   r   r9   r     s
    zCostSum._gradr   r   c                 C  s   t dd | jD S )Nc                 s  s   | ]}|j V  qd S r;   )r   )r   r   r8   r8   r9   r     r   z$CostSum._has_grad.<locals>.<genexpr>)allr   r   r8   r8   r9   r     s    zCostSum._has_gradc                 C  s   t dd | jD S )Nc                 s  s   | ]}|j V  qd S r;   )r   r   r8   r8   r9   r     r   z!CostSum._ndata.<locals>.<genexpr>)r<   r   r   r8   r8   r9   r     s    zCostSum._ndatac                 C  s
   | j  S )z,Return number of constituent cost functions.)r   __len__r   r8   r8   r9   r     s    zCostSum.__len__c                 C  s   | j |S )z'Get constituent cost function by index.)r   __getitem__)r   keyr8   r8   r9   r     s    zCostSum.__getitem__NzDict[int, Dict[str, Any]])r   component_kwargsc                 C  s   ddl m} tdd | D }| }|||  d  |jd||jd\}}|du r^i }d}t| 	|D ]L\}	\}
}t
|
d	sqp||	i }|||  |
j|fi | |d7 }qpdS )
a  
        Visualize data and model agreement (requires matplotlib).

        The visualization is drawn with matplotlib.pyplot into the current figure.
        Subplots are created to visualize each part of the cost function, the figure
        height is increased accordingly. Parts without a visualize method are silently
        ignored.

        Parameters
        ----------
        args : array-like
            Parameter values.
        component_kwargs : dict of dicts, optional
            Dict that maps an index to dict of keyword arguments. This can be
            used to pass keyword arguments to a visualize method of a component with
            that index.
        **kwargs :
            Other keyword arguments are forwarded to all components.
        r   pyplotc                 s  s   | ]}t |d V  qdS )	visualizeN)hasattr)r   compr8   r8   r9   r     r   z$CostSum.visualize.<locals>.<genexpr>g      ?r   )numNr   )
matplotlibr   r<   ZgcfZset_figwidthZget_figwidthZsubplotsnumber	enumerater   r   getZscar   )r   r   r   pltrX   Zfig_axr   rd   r   Zcargskwargsr8   r8   r9   r     s    
zCostSum.visualize)N)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r8   r8   r   r9   r&     s    r&   c                   @  sx   e Zd ZU dZdZded< ddddd	d
Zedd Zej	ddddZedd Z
e
j	ddddZ
dd ZdS )
MaskedCostzV
    Base class for cost functions that support data masking.

    :meta private:
    )_data_mask_maskedOptional[NDArray]r   r   r   r   )r   datar   c                 C  s&   || _ d| _|   t| || dS r   )r   r   _update_cacher%   r   )r   r   r   r   r8   r8   r9   r     s    zMaskedCost.__init__c                 C  s   | j S )z
        Boolean array, array of indices, or None.

        If not None, only values selected by the mask are considered. The mask acts on
        the first dimension of a value array, i.e. values[mask]. Default is None.
        )r   r   r8   r8   r9   mask(  s    zMaskedCost.maskzOptional[ArrayLike])r   c                 C  s$   |d u rd nt || _|   d S r;   )r2   asarrayr   r   )r   r   r8   r8   r9   r   2  s    c                 C  s   | j S )zReturn data samples.)r   r   r8   r8   r9   r   7  s    zMaskedCost.datar   r   c                 C  s   || j d< |   d S N.)r   r   r   r8   r8   r9   r   <  s    
c                 C  s   | j t| jd | _d S r   )r   rG   r   r   r   r8   r8   r9   r   A  s    zMaskedCost._update_cacheN)r   r   r   r   r   r   r   r   r   r   r   r   r8   r8   r8   r9   r     s   

	
r   c                   @  s>   e Zd ZdZdddddZdd Zejdddd	d
ZdS )MaskedCostWithPullszG
    Base class for cost functions with pulls.

    :meta private:
    r   r   r   c                 C  s
   |  |S )a  
        Return studentized residuals (aka pulls).

        Parameters
        ----------
        args : sequence of float
            Parameter values.

        Returns
        -------
        array
            Array of pull values. If the cost function is masked, the array contains NaN
            values where the mask value is False.

        Notes
        -----
        Pulls allow one to estimate how well a model fits the data. A pull is a value
        computed for each data point. It is given by (observed - predicted) /
        standard-deviation. If the model is correct, the expectation value of each pull
        is zero and its variance is one in the asymptotic limit of infinite samples.
        Under these conditions, the chi-square statistic is computed from the sum of
        pulls squared has a known probability distribution if the model is correct. It
        therefore serves as a goodness-of-fit statistic.

        Beware: the sum of pulls squared in general is not identical to the value
        returned by the cost function, even if the cost function returns a chi-square
        distributed test-statistic. The cost function is computed in a slightly
        differently way that makes the return value approach the asymptotic chi-square
        distribution faster than a test statistic based on sum of pulls squared. In
        summary, only use pulls for plots. Compute the chi-square test statistic
        directly from the cost function.
        )_pullsr   r8   r8   r9   pullsL  s    !zMaskedCostWithPulls.pullsc                 C  s   t | jjd | j S r;   )r2   prodr   shape_ndimr   r8   r8   r9   r   o  s    zMaskedCostWithPulls._ndatac                 C  s   d S r;   r8   r   r8   r8   r9   r   r  s    zMaskedCostWithPulls._pullsN)	r   r   r   r   r   r   r   r   r   r8   r8   r8   r9   r   E  s
   #r   c                      s   e Zd ZdZdZdddddd fd	d
Zejdd Zejdd Z	dd Z
dd Zeddd(ddddddZddddd Zdddd!d"Zejdddd#d$Zdd%d&d'Z  ZS ))UnbinnedCostzE
    Base class for unbinned cost functions.

    :meta private:
    )_model_model_grad_logr   r   r   Optional[ModelGradient]Optional[Sequence[str]])modelr   r4   r   namec                   s0   || _ || _|| _t t||t|| dS r   )r   r   r   r   r   _model_parameters_norm)r   r   r   r   r4   r   r   r   r8   r9   r     s    
zUnbinnedCost.__init__c                 C  s   dS )Get probability density model.Nr8   r   r8   r8   r9   pdf  s    zUnbinnedCost.pdfc                 C  s   dS )Get number density model.Nr8   r   r8   r8   r9   
scaled_pdf  s    zUnbinnedCost.scaled_pdfc                 C  s   t jS r;   )r2   infr   r8   r8   r9   r     s    zUnbinnedCost._ndatac                 C  s   | j jd S N)r   r   r   r8   r8   r9   _npoints  s    zUnbinnedCost._npointsZnbins)binsr   2   r   Union[int, Sequence[float]])r   model_pointsr  c                   sB  ddl m} tj}|jdkr*tdt|trRt	|}j
|g R  }nt|dkrt|rzt|d |d |}nt|d |d |}j
|g R  }n"t fdd|d |d \}}tj|||d |d fd\}}	d	|	dd
 |	d
d   }
|	d |	d  }|j|
||d	 dd |j|d|| dd d
S )aX  
        Visualize data and model agreement (requires matplotlib).

        The visualization is drawn with matplotlib.pyplot into the current axes.

        Parameters
        ----------
        args : array-like
            Parameter values.
        model_points : int or array-like, optional
            How many points to use to draw the model. Default is 0, in this case
            an smart sampling algorithm selects the number of points. If array-like,
            it is interpreted as the point locations.
        bins : int, optional
            number of bins. Default is 50 bins.
        r   r   r   7visualize is not implemented for multi-dimensional datar   c                   s   j | g R  S r;   )r   r?   r   r   r8   r9   <lambda>  r   z(UnbinnedCost.visualize.<locals>.<lambda>)r  rU   r1   NokfmtC0)fc)r   r   r2   r=   r   rK   
ValueErrorr   r   arrayr   r	   	geomspacelinspacer   Z	histogramerrorbarZfill_between)r   r   r  r  r   r5   xmrC   rX   xecxdxr8   r  r9   r     s$    


" zUnbinnedCost.visualizerH   r   r   c                 G  s   |  |}td||S )a+  
        Estimate Fisher information for model and sample.

        The estimated Fisher information is only meaningful if the arguments provided
        are estimates of the true values.

        Parameters
        ----------
        *args: float
            Estimates of model parameters.
        z	ji,ki->jk)_pointwise_scorer2   einsumr   r   gr8   r8   r9   fisher_information  s    
zUnbinnedCost.fisher_informationc                 G  s   t j| j| S )a  
        Estimate covariance of the parameters with the sandwich estimator.

        This requires that the model gradient is provided, and that the arguments are
        the maximum-likelihood estimates. The sandwich estimator is only asymptotically
        correct.

        Parameters
        ----------
        *args : float
            Maximum-likelihood estimates of the parameter values.

        Returns
        -------
        ndarray of float
            The array has shape (K, K) for K arguments.
        )r2   linalginvr  r   r8   r8   r9   
covariance  s    zUnbinnedCost.covariancec                 C  s   d S r;   r8   r   r8   r8   r9   r    s    zUnbinnedCost._pointwise_scorer   c                 C  s
   | j d uS r;   r   r   r8   r8   r9   r     s    zUnbinnedCost._has_grad)r   r  )r   r   r   r   r   r   r   abstractpropertyr   r   r   r  r   r   r  r  r   r  r   r   r8   r8   r   r9   r   v  s$   

  3r   c                      s   e Zd ZdZdZedd Zedd Zddd	d	d
ddddddd fddZdddddZ	dddddZ
dddddZdddddZdddd d!Z  ZS )"r)   z
    Unbinned negative log-likelihood.

    Use this if only the shape of the fitted PDF is of interest and the original
    unbinned data is available. The data can be one- or multi-dimensional.
    r8   c                   s    j r fddS  jS )r   c                    s   t  j|  S r;   r2   expr   r   r   r8   r9   r    r   z!UnbinnedNLL.pdf.<locals>.<lambda>)r   r   r   r8   r   r9   r     s    zUnbinnedNLL.pdfc                   s0   t jj jr" fddS  fddS )r   c                    s    t j|   S r;   r!  r   scaler   r8   r9   r    r   z(UnbinnedNLL.scaled_pdf.<locals>.<lambda>c                    s    j |   S r;   r   r   r#  r8   r9   r    r   )r2   r   r   r   r   r   r8   r#  r9   r     s    zUnbinnedNLL.scaled_pdfr   FNr   r4   r   r   r   r   r   r   r   r   )r   r   r   r4   r   r   c                  s   t  |||||| dS )a  
        Initialize UnbinnedNLL with data and model.

        Parameters
        ----------
        data : array-like
            Sample of observations. If the observations are multidimensional, data must
            have the shape (D, N), where D is the number of dimensions and N the number
            of data points.
        pdf : callable
            Probability density function of the form f(data, par0, [par1, ...]), where
            data is the data sample and par0, ... are model parameters. If the data are
            multivariate, data passed to f has shape (D, N), where D is the number of
            dimensions and N the number of data points. Must return an array with the
            shape (N,).
        verbose : int, optional
            Verbosity level. 0: is no output (default). 1: print current args and
            negative log-likelihood value.
        log : bool, optional
            Distributions of the exponential family (normal, exponential, poisson, ...)
            allow one to compute the logarithm of the pdf directly, which is more
            accurate and efficient than numerically computing ``log(pdf)``. Set this
            to True, if the model returns the logpdf instead of the pdf.
            Default is False.
        grad : callable or None, optional
            Optionally pass the gradient of the pdf. Has the same calling signature like
            the pdf, but must return an array with the shape (K, N), where N is the
            number of data points and K is the number of parameters. If `log` is True,
            the function must return the gradient of the logpdf instead of the pdf. The
            gradient can be used by Minuit to improve or speed up convergence and to
            compute the sandwich estimator for the variance of the parameter estimates.
            Default is None.
        name : sequence of str or None, optional
            Optional names for each parameter of the model (in order). Must have the
            same length as there are model parameters. Default is None.
        Nr   r   )r   r   r   r   r4   r   r   r   r8   r9   r     s    .zUnbinnedNLL.__init__r   rH   r   c                 C  s*   |  |}| jrdt| S dt| S )N       g       @_eval_modelr   r2   r<   r@   )r   r   rV   r8   r8   r9   r   J  s    
zUnbinnedNLL._valuer   c                 C  s   |  |}dtj|dd S )Nr(  r   rP   r  r2   r<   r  r8   r8   r9   r   P  s    
zUnbinnedNLL._gradc                 C  s&   |  |}| jr|S | |}|| S r;   )_eval_model_gradr   r*  )r   r   r  rV   r8   r8   r9   r  T  s
    

zUnbinnedNLL._pointwise_scorec                 C  s$   | j }t| j|g|R  d|  S )Nr   )r   _normalize_outputr   r  r   r   r   r8   r8   r9   r*  [  s    zUnbinnedNLL._eval_modelc                 C  s:   | j d u rtd| j}t| j |g|R  d| j|  S )Nno gradient availablemodel gradientr   r  r   r-  r   r  r.  r8   r8   r9   r,  _  s    
zUnbinnedNLL._eval_model_gradr   r   r   r   r   r   r   r   r   r   r   r  r*  r,  r   r8   r8   r   r9   r)     s    

 0r)   c                      s   e Zd ZdZdZedd Zedd Zddd	d	d
ddddddd fddZdddddZ	dddddZ
dddddZddddd Zdd!dd"d#Z  ZS )$r+   z
    Unbinned extended negative log-likelihood.

    Use this if shape and normalization of the fitted PDF are of interest and the
    original unbinned data is available. The data can be one- or multi-dimensional.
    r8   c                   s$    j r fdd}n fdd}|S )r   c                    s    j |  \}}t|| S r;   )r   r2   r"  r   rX   r5   r   r8   r9   fnw  s    z#ExtendedUnbinnedNLL.pdf.<locals>.fnc                    s    j |  \}}|| S r;   r%  r3  r   r8   r9   r4  }  s    r   )r   r4  r8   r   r9   r   r  s    zExtendedUnbinnedNLL.pdfc                   s    j r fddS  fddS )zGet density model.c                    s   t  j|  d S Nr   r!  r   r   r8   r9   r    r   z0ExtendedUnbinnedNLL.scaled_pdf.<locals>.<lambda>c                    s    j |  d S r6  r%  r   r   r8   r9   r    r   r5  r   r8   r   r9   r     s    zExtendedUnbinnedNLL.scaled_pdfr   FNr&  r   r   r   r   r   r   )r   r   r   r4   r   r   c                  s   t  |||||| dS )a`
  
        Initialize cost function with data and model.

        Parameters
        ----------
        data : array-like
            Sample of observations. If the observations are multidimensional, data must
            have the shape (D, N), where D is the number of dimensions and N the number
            of data points.
        scaled_pdf : callable
            Density function of the form f(data, par0, [par1, ...]), where data is the
            sample and par0, ... are model parameters. Must return a tuple (<integral
            over f in data window>, <f evaluated at data points>). The first value is
            the density integrated over the data window, the interval that we consider
            for the fit. For example, if the data are exponentially distributed, but we
            fit only the interval (0, 5), then the first value is the density integrated
            from 0 to 5. If the data are multivariate, data passed to f has shape (D,
            N), where D is the number of dimensions and N the number of data points.
        verbose : int, optional
            Verbosity level. 0: is no output (default). 1: print current args and
            negative log-likelihood value.
        log : bool, optional
            Distributions of the exponential family (normal, exponential, poisson, ...)
            allow one to compute the logarithm of the pdf directly, which is more
            accurate and efficient than effectively doing ``log(exp(logpdf))``. Set this
            to True, if the model returns the logarithm of the density as the second
            argument instead of the density. Default is False.
        grad : callable or None, optional
            Optionally pass the gradient of the density function. Has the same calling
            signature like the density function, but must return two arrays. The first
            array has shape (K,) where K are the number of parameters, while the second
            has shape (K, N), where N is the number of data points. The first array is
            the gradient of the integrated density. The second array is the gradient of
            the density itself. If `log` is True, the second array must be the gradient
            of the log-density instead. The gradient can be used by Minuit to improve or
            speed up convergence and to compute the sandwich estimator for the variance
            of the parameter estimates. Default is None.
        name : sequence of str or None, optional
            Optional names for each parameter of the model (in order). Must have the
            same length as there are model parameters. Default is None.
        Nr'  )r   r   r   r   r4   r   r   r   r8   r9   r     s    3zExtendedUnbinnedNLL.__init__r   rH   r   c                 C  s6   |  |\}}| jr&d|t|  S d|t|  S NrN   r)  )r   r   fintrV   r8   r8   r9   r     s    zExtendedUnbinnedNLL._valuer   c                 C  s   |  |}dtj|dd S )NrO   r   rP   r+  r  r8   r8   r9   r     s    
zExtendedUnbinnedNLL._gradc                 C  sb   |  |\}}|  }| jr6||| d d tjf  S | |\}}|| || d d tjf  S r;   )r,  r  r   r2   Znewaxisr*  )r   r   gintr  mr   rV   r8   r8   r9   r    s    z$ExtendedUnbinnedNLL._pointwise_scorezTuple[float, float]c                 C  s8   | j }| j|g|R  \}}t|d|  dd}||fS )Nr   in second positionmsg)r   r   r-  r  )r   r   r   r8  rV   r8   r8   r9   r*    s    zExtendedUnbinnedNLL._eval_modelTuple[NDArray, NDArray]c                 C  s`   | j d u rtd| j}| j |g|R  \}}t|d| jdd}t|d| j|  dd}||fS )Nr/  r0  zin first positionr<  r;  r1  )r   r   r   r9  r  r8   r8   r9   r,    s    

z$ExtendedUnbinnedNLL._eval_model_gradr2  r8   r8   r   r9   r+   h  s    

 5r+   c                      s   e Zd ZU dZdZded< ded< ded< d	ed
< ejZe	dd Z
ddddd fddZdddddZdddddZdddddZejdddddZdd d!d"Zdddd#d$Z fd%d&Zddd'd(d)Zddd*d+d,d-Zd.d/ Z  ZS )0
BinnedCostaF  
    Base class for binned cost functions to support histograms filled with weights.

    Histograms filled with weights are supported by applying the Bohm-Zech transform.

    The Bohm-Zech approach was further generalized to handle sums of weights which are
    negative. See Baker & Cousins, NIM 221 (1984) 437-442; Bohm and Zech, NIMA 748
    (2014) 1-6; H. Dembinski, M. Schmelling, R. Waldi, Nucl.Instrum.Meth.A 940 (2019)
    135-141.

    Bohm and Zech use the scaled Poisson distribution (SPD) as an approximate way to
    handle sums of weights instead of Poisson counts. This approach also works for
    multinomial distributions. The idea of the Bohm and Zech is to use the likelihood
    for Poisson distributed data also for weighted data. They show that one can match
    the first and second moment of the compound Poisson distribution for weighted data
    with a single Poisson distribution with a scaling factor s, that is multiplied with
    the predicted expectation and the observation.

    This scaling factor is computed as s = sum(wi) / sum(wi**2), wi are the weights in
    the current bin. Instead of the Baker & Cousins transformed log-likelihood
    l(n; mu) for Poisson-distributed data, where n is the observed count and mu is the
    expectation, we now compute l(sum(w) * s; mu * s), this can be further simplified:

    l(w * s, mu * s) = 2 * [(w * s) * (log(w * s) - log(mu * s)) - s * mu + s * w]
                     = 2 * s * [w * (log(w) - log(mu)) - mu + w]
                     = s * l(w, mu)

    For multinomially-distributed data and s = 1, sum(w-mu) = 0, which is why these
    terms can be omitted in the standard calculation without weights, but in case of
    weighted counts, sum(s * (w - m)) != 0 and the terms must be kept.

    The original formulas from Bohm and Zech are only applicable if w >= 0 (with the
    extra condition that w * log(w) evaluates to 0 for w = 0). One can generalize the
    formula to w < 0, which is relevant in practice for example in fits of sweighted
    samples, by computing s = abs(sum(wi)) / sum(wi ** 2) and replacing w * log(w) with
    0 for w <= 0.

    This works, because this extension has the right gradient. The gradient should be
    equal to hat of the quadratic function s * (w - mu)**2/mu', where mu'=mu but fixed
    during the gradient computation, see D. Dembinski, M. Schmelling, R. Waldi. The
    minimum of this quadratic function yields an unbiased estimate of mu, even if some w
    are negative. Since the quadratic function and the original function have the same
    gradient, the minima of both functions are the same, and the original function also
    yields an unbiased estimate.

    The gradient is not affected by the particular choice of how to handle w * log(w)
    with w < 0, since this term drops out in the computation of the gradient. Other
    choices are possible. Our goal was to select an option which keeps the function
    minimum approximately chi-square distributed, although that property tends to
    dissolve when negative weights are involved. The minimum can even become negative.

    :meta private:
    )_xer   _bohm_zech_n_bohm_zech_sz#Union[NDArray, Tuple[NDArray, ...]]r@  r   r   r   rA  r   rB  c                 C  s   | j S )zAccess bin edges.)r@  r   r8   r8   r9   r  %  s    zBinnedCost.xer   r   %Union[ArrayLike, Sequence[ArrayLike]])r   rX   r  r   c           	        s  t |tstdt|}t|| _| jdkr@ttt|| _	nt
dd |D | _	t|}|j| jkot|jd dk}|j| jt| krtdt| jdkr| j	gn| j	D ].\}}t||j| d krtd| d	q|rtd
nd| _t ||| dS )r   zxe must be iterabler   c                 s  s   | ]}t |V  qd S r;   )r   r   xeir8   r8   r9   r   :  r   z&BinnedCost.__init__.<locals>.<genexpr>r   rN   z4n must either have same dimension as xe or one extraz2n and xe have incompatible shapes along dimension z7, xe must be longer by one element along each dimensionr   N)r   r   r  _shape_from_xer   r   r   r   r   r@  rT   rK   r   r   r   r2   r   rB  r   r   )	r   r   rX   r  r   r   Zis_weightedr   rE  r   r8   r9   r   *  s$    


$
zBinnedCost.__init__r   z'Union[NDArray, Tuple[NDArray, NDArray]]r   c                 C  s
   |  |S )ai  
        Return the bin-wise expectation for the fitted model.

        Parameters
        ----------
        args : array-like
            Parameter values.

        Returns
        -------
        NDArray
            Model prediction for each bin. The expectation is always returned for all
            bins, even if some bins are temporarily masked.
        )_predr   r8   r8   r9   
predictionP  s    zBinnedCost.predictionNonec                 C  s
   |  |S )a  
        Visualize data and model agreement (requires matplotlib).

        The visualization is drawn with matplotlib.pyplot into the current axes.

        Parameters
        ----------
        args : sequence of float
            Parameter values.

        Notes
        -----
        The automatically provided visualization for multi-dimensional data set is often
        not very pretty, but still helps to judge whether the fit is reasonable. Since
        there is no obvious way to draw higher dimensional data with error bars in
        comparison to a model, the visualization shows all data bins as a single
        sequence.
        )
_visualizer   r8   r8   r9   r   c  s    zBinnedCost.visualizec                 C  s   ddl m} |  \}}| |}t|tr0J | jdkr|d}|d}|d}t	t
|d d }t	t
|t}n"| j}d|dd  |d d   }|j|||dd |j||dd	d
 d S )Nr   r   r   r   r1   r	  r
  Tr  )fillcolor)r   r   _n_errrH  r   rT   r   reshaper2   aranger   astyperH   r  r  stairs)r   r   r   rX   nerY   r  r  r8   r8   r9   rJ  x  s    




zBinnedCost._visualizec                 C  s   d S r;   r8   r   r8   r8   r9   rG    s    zBinnedCost._predr>  r   c                 C  sp   | j }| jd u r"| }|d }n|d  }|d d }|dk}| jd urT| j }tj||< tj||< ||fS )Nr1   .r   .r   r   )r   rB  copyr   r2   nan)r   drX   errr7   r8   r8   r9   rM    s    




zBinnedCost._n_errc                 C  s"   |  |}|  \}}|| | S r;   rH  rM  )r   r   rY   rX   rR  r8   r8   r9   r     s    
zBinnedCost._pullsc                   s   t    | j}| jd ur||d }|d }t|}|dk}t|| ||  ||< t|| || < || _|| | _n|| _d S )NrS  rT  r   )	r   r   r   rB  r2   r3   absZmedianrA  )r   rX   valvarsr7   r   r8   r9   r     s    


zBinnedCost._update_cache)r[  rI   c                 C  s>   | j }| j}|d ur|| }| j}|d u r2||fS ||| fS r;   rB  r   rA  )r   r[  r]  r7   rX   r8   r8   r9   _transformed  s    zBinnedCost._transformedz Tuple[NDArray, NDArray, NDArray])r[  r\  rI   c                 C  sR   | j }| j}|d ur$|| }|| }| j}|d u r<|||fS ||| ||d  fS r7  r^  )r   r[  r\  r]  r7   rX   r8   r8   r9   _transformed2  s    
zBinnedCost._transformed2c                 C  s   | j d u r| jS | jd S )NrS  )rB  r   r   r8   r8   r9   _counts  s    
zBinnedCost._counts)r   r   r   r   r   r   r   r   rX   r   r  r   rH  r   rJ  r   r   rG  rM  r   r   r_  r`  ra  r   r8   r8   r   r9   r?    s*   
6
&
r?  c                      s   e Zd ZU dZdZded< ded<  fddZd	d
dddZd	d
dddZd	d
dddZ	d	d
dddZ
d	d
dddZddddZ  ZS )BinnedCostWithModelzY
    Base class for binned cost functions with parametric model.

    :meta private:
    )	_xe_shaper   	_model_xe	_model_xm	_model_dx
_model_lenr   
_pred_impl
np.ndarrayrd  "Union[Tuple[int], Tuple[int, ...]]rc  c                   s  || _ || _|r|rtd|dkr.| j| _n8|dkr@| j| _n&|dkrR| j| _nd| d}t|t t	||||| | j
dkrt| jf| _t| j| _|rt| j}	|	| _| jdd	 d
|	  | _ntdd | jD | _tdd tj| jddiD | _|dkrzdd | jD }	dd t| j|	D }
tj|
ddi}
tj|	ddi}	t|
| _tj|	dd| _n|dkrtdt| j| _dS )r   z1keywords use_pdf and grad cannot be used togetherZapproximateZ	numerical zuse_pdf=zH is not understood, allowed values are '', 'approximate', or 'numerical'r   Nr   r1   c                 s  s   | ]}t |V  qd S r;   r   rD  r8   r8   r9   r   	  r   z/BinnedCostWithModel.__init__.<locals>.<genexpr>c                 S  s   g | ]}|  qS r8   flattenr   r5   r8   r8   r9   
<listcomp>  r   z0BinnedCostWithModel.__init__.<locals>.<listcomp>indexingijc                 S  s   g | ]}t |qS r8   )r2   diff)r   r  r8   r8   r9   rp    r   c                 S  s$   g | ]\}}|d d d|  qS )Nr   r1   r8   )r   rE  Zdxir8   r8   r9   rp    r   r   rP   zDuse_pdf="numerical" is not supported for multidimensional histograms)r   r   r  _pred_approximaterh  _pred_numerical	_pred_cdfr   r   r   r   r   r  rc  r   rd  r2   rs  rf  re  rT   vstackmeshgridr   r  r   rg  )r   rX   r  r   r   r   use_pdfr   r=  r  r  r   r8   r9   r     sJ    






zBinnedCostWithModel.__init__r   r   r   c                 C  s
   |  |S r;   )rh  r   r8   r8   r9   rG    s    zBinnedCostWithModel._predc                 C  sf   | j | jg|R  }t|d| j}| jdkr8|| j}t| jD ]}tj	||d}qBd||dk < |S )Nr   r   rP   r   )
r   rd  r-  rg  r   rN  rc  rU   r2   rs  r   r   rW  r   r8   r8   r9   rv    s    
zBinnedCostWithModel._pred_cdfc                 C  s   | j | jg|R  }|| j S r;   )r   re  rf  )r   r   rA   r8   r8   r9   rt  +  s    z%BinnedCostWithModel._pred_approximatec                   sx   ddl m} jdksJ tjd }tjd D ]:}j| }j|d  }| fdd||d ||< q8|S )Nr   )quadr   c                   s   j | g R  S r;   r%  r?   r  r8   r9   r  8  r   z5BinnedCostWithModel._pred_numerical.<locals>.<lambda>)Zscipy.integrater{  r   r2   emptyrg  rU   rd  )r   r   r{  rW  r   abr8   r  r9   ru  /  s    
 z#BinnedCostWithModel._pred_numericalc                 C  sn   | j | jg|R  }t|d| j| j}| jdkrF|| jg| jR }td| jd D ]}t	j
||d}qV|S )Nr0  r   rP   )r   rd  r-  r   rg  r   rN  rc  rU   r2   rs  rz  r8   r8   r9   
_pred_grad;  s    
zBinnedCostWithModel._pred_gradr   r   c                 C  s
   | j d uS r;   r  r   r8   r8   r9   r   D  s    zBinnedCostWithModel._has_grad)r   r   r   r   r   r   r   rG  rv  rt  ru  r  r   r   r8   r8   r   r9   rb    s   
2	rb  c                      s   e Zd ZU dZdZded< ded< ded< d	d
ddddddddd fddZdddddZdddddZddddd Z	d!d"d#d$Z
dd"d%d&Zdddd'd(Zdd)dd*d+Zdddd,d-Z  ZS ).r,   u  
    Binned cost function for a template fit with uncertainties on the template.

    This cost function is for a mixture of components. Use this if the sample originate
    from two or more components and you are interested in estimating the yield that
    originates from one or more components. In high-energy physics, one component is
    often a peaking signal over a smooth background component. A component can be
    described by a parametric model or a template.

    A parametric model is accepted in form of a scaled cumulative density function,
    while a template is a non-parametric shape estimate obtained by histogramming a
    Monte-Carlo simulation. Even if the Monte-Carlo simulation is asymptotically
    correct, estimating the shape from a finite simulation sample introduces some
    uncertainty. This cost function takes that additional uncertainty into account.

    There are several ways to fit templates and take the sampling uncertainty into
    account. Barlow and Beeston [1]_ found an exact likelihood for this problem, with
    one nuisance parameter per component per bin. Solving this likelihood is somewhat
    challenging though. The Barlow-Beeston likelihood also does not handle the
    additional uncertainty in weighted templates unless the weights per bin are all
    equal.

    Other works [2]_ [3]_ [4]_ describe likelihoods that use only one nuisance parameter
    per bin, which is an approximation. Some marginalize over the nuisance parameters
    with some prior, while others profile over the nuisance parameter. This class
    implements several of these methods. The default method is the one which performs
    best under most conditions, according to current knowledge. The default may change
    if this assessment changes.

    The cost function returns an asymptotically chi-square distributed test statistic,
    except for the method "asy", where it is the negative logarithm of the marginalised
    likelihood instead. The standard transform [5]_ which we use convert likelihoods
    into test statistics only works for (profiled) likelihoods, not for likelihoods
    marginalized over a prior.

    All methods implemented here have been generalized to work with both weighted data
    and weighted templates, under the assumption that the weights are independent of the
    data. This is not the case for sWeights, and the uncertaintes for results obtained
    with sWeights will only be approximately correct [6]_. The methods have been further
    generalized to allow fitting a mixture of parametric models and templates.

    .. [1] Barlow and Beeston, Comput.Phys.Commun. 77 (1993) 219-228
    .. [2] Conway, PHYSTAT 2011 proceeding, https://doi.org/10.48550/arXiv.1103.0354
    .. [3] Argüelles, Schneider, Yuan, JHEP 06 (2019) 030
    .. [4] Dembinski and Abdelmotteleb, https://doi.org/10.48550/arXiv.2206.12346
    .. [5] Baker and Cousins, NIM 221 (1984) 437-442
    .. [6] Langenbruch, Eur.Phys.J.C 82 (2022) 5, 393
    )_model_datard  rc  _implrg  z9List[Union[Tuple[NDArray, NDArray], Tuple[Model, float]]]r  ri  rd  rj  rc  Nr   da)r   r   methodr   rC  z#Collection[Union[Model, ArrayLike]]r   r   str)rX   r  model_or_templater   r   r  c                  s  t |}|dk rtdt|}t |}	d}
i  g | _t|D ]8\}}t|trt|}|j|	kr|j|	d ks|j	dd |krtd|d 
 }|d 
 }n,|j|	ks|j	|krtd|
 }|
 }dt| }||9 }||d	 9 }| j||f d
tjf d| < q>t|trpt|d}t |}
| j||
f |D ]}||  d| d| < qNq>tdq>|durt  t |krtd fddt |D  ttttd}z|| | _W n( ty   td| d| Y n0 |dkrtjdtd	d t  ||| | jdkrPt | jf| _t| j| _n:t dd | jD | _t!dd tj"| jddiD | _t#| j| _$dS )a\  
        Initialize cost function with data and model.

        Parameters
        ----------
        n : array-like
            Histogram counts. If this is an array with dimension D+1, where D is the
            number of histogram axes, then the last dimension must have two elements and
            is interpreted as pairs of sum of weights and sum of weights squared.
        xe : array-like or collection of array-like
            Bin edge locations, must be len(n) + 1, where n is the number of bins. If
            the histogram has more than one axis, xe must be a collection of the bin
            edge locations along each axis.
        model_or_template : collection of array-like or callable
            Collection of models or arrays. An array represent the histogram counts of a
            template. The template histograms must use the same axes as the data
            histogram. If the counts are represented by an array with dimension D+1,
            where D is the number of histogram axes, then the last dimension must have
            two elements and is interpreted as pairs of sum of weights and sum of
            weights squared. Callables must return the model cdf evaluated as xe.
        name : sequence of str or None, optional
            Optional name for the yield of each template and the parameter of each model
            (in order). Must have the same length as there are templates and model
            parameters in templates_or_model. Default is None.
        verbose : int, optional
            Verbosity level. 0: is no output (default). 1: print current args and
            negative log-likelihood value.
        method : {"jsc", "asy", "da"}, optional
            Which method to use. "jsc": Conway's method [2]_. "asy": ASY method [3]_.
            "da": DA method [4]_. Default is "da", which to current knowledge offers the
            best overall performance. The default may change in the future, so please
            set this parameter explicitly in code that has to be stable. For all methods
            except the "asy" method, the minimum value is chi-square distributed.
        r   z*at least one template or model is requiredr   Nr   z&shapes of n and templates do not matchrS  rT  rN   r   r5   r   zHmodel_or_template must be a collection of array-likes and/or Model typeszCnumber of names must match number of templates and model parametersc                   s   i | ]\}}| | qS r8   r8   )r   oldnewZ	annotatedr8   r9   
<dictcomp>  r   z%Template.__init__.<locals>.<dictcomp>)Zjscasyhpdr  zmethod z$ is not understood, allowed values: r  z0key 'hpd' is deprecated, please use 'da' instead)category
stacklevelc                 s  s   | ]}t |V  qd S r;   rl  rD  r8   r8   r9   r     r   z$Template.__init__.<locals>.<genexpr>c                 S  s   g | ]}|  qS r8   rm  ro  r8   r8   r9   rp    r   z%Template.__init__.<locals>.<listcomp>rq  rr  )%r   r  rF  r  r   r   r   r   rK   r   rU  r2   r<   r   r   r   r   r   r"   r$   r#   r  KeyErrorwarningswarnFutureWarningr   r   r   r  rc  rd  rT   rw  rx  r   rg  )r   rX   r  r  r   r   r  Mr   rK   r   r   tttt1t2rV   annrd   Zknown_methodsr   r  r9   r     s    ,
 



zTemplate.__init__r   r>  r   c           
      C  s  d}d}d}| j D  ]\}}t|tjrbt|tjrb|| }||| 7 }||d | 7 }|d7 }qt|tr
t|tr
|| jg||||  R  }t|d| j}| j	dkr|
| j}t| j	D ]}	tj||	d}qd||dk < ||7 }|t|d 7 }||7 }qdsJ q||fS )Nr   rN   r   r   rP   gYnF)r  r   r2   ndarrayr   r   rd  r-  rg  r   rN  rc  rU   rs  Z	ones_like)
r   r   rY   ra   r   r  r  r}  rW  jr8   r8   r9   rG    s,    



zTemplate._predrH   c                 C  sT   |  |\}}| ||\}}}|dk}| || d|| d|| dS )Nr   r   )rG  r`  r  rN  )r   r   rY   ra   rX   r7   r8   r8   r9   r     s    zTemplate._valuer   c                 C  s   t d S r;   )NotImplementedErrorr   r8   r8   r9   r   $  s    zTemplate._gradr   r   c                 C  s   dS )NFr8   r   r8   r8   r9   r   '  s    zTemplate._has_gradc                 C  s   | j tu rtS tS r;   )r  r$   r   r   r   r8   r8   r9   r   *  s    zTemplate._errordefc                 C  s   |  |\}}|t|fS )a=  
        Return the fitted template and its standard deviation.

        This returns the prediction from the templates, the sum over the products of the
        template yields with the normalized templates. The standard deviation is
        returned as the second argument, this is the estimated uncertainty of the fitted
        template alone. It is obtained via error propagation, taking the statistical
        uncertainty in the template into account, but regarding the yields as parameters
        without uncertainty.

        Parameters
        ----------
        args : array-like
            Parameter values.

        Returns
        -------
        y, yerr : NDArray, NDArray
            Template prediction and its standard deviation, based on the statistical
            uncertainty of the template only.
        )rG  r2   rR   )r   r   rY   ra   r8   r8   r9   rH  -  s    zTemplate.predictionrI  c           
      C  s   ddl m} |  \}}| |\}}| jdkr|d}|d}|d}|d}tt|d d }tt|	t
}n"| j}d|dd  |d d   }|j|||dd dD ] }	|j|| ||| |	d	d
 qd S )Nr   r   r   r   r1   r	  r
  )FTr  )ZbaselinerK  rL  )r   r   rM  rH  r   rN  r2   rO  r   rP  rH   r  r  rQ  )
r   r   r   rX   rR  rY   muer  r  rK  r8   r8   r9   rJ  F  s    




zTemplate._visualizec                 C  s6   |  |\}}|  \}}|| |d |d  d  S )NrN   r1   rY  )r   r   rY   r  rX   rR  r8   r8   r9   r   ^  s    zTemplate._pulls)r   r   r   r   r   r   r   rG  r   r   r   r   rH  rJ  r   r   r8   r8   r   r9   r,   H  s"   
1 ~r,   c                	      s   e Zd ZdZdZedd Zdddddd	d
dddddd fddZddd fddZdddddZ	ddd fddZ
  ZS )r(   a  
    Binned negative log-likelihood.

    Use this if only the shape of the fitted PDF is of interest and the data is binned.
    This cost function works with normal and weighted histograms. The histogram can be
    one- or multi-dimensional.

    The cost function has a minimum value that is asymptotically chi2-distributed. It is
    constructed from the log-likelihood assuming a multivariate-normal distribution and
    using the saturated model as a reference, see :func:`multinomial_chi2` for details.

    When this class is used with weighted data, we use the Bohm-Zech transform for
    Poisson-distributed data and the :func:`poisson_chi2` cost function, because
    :func:`multinomial_chi2` yields biased results for weighted data. The
    reasoning for this choice is that :func:`multinomial_chi2` and :func:`poisson_chi2`
    yield the same result for a model which predicts probabilities and expected counts
    are computed by multiplying the probability with the total number of counts. Thus we
    can derive :func:`multinomial_chi2` as a special case of :func:`poisson_chi2` in
    case of unweighted data, but this mathematical equivalence is gone when data are
    weighted. The correct cost function is then :func:`poisson_chi2`.
    )_chi2c                 C  s   | j S )z Get cumulative density function.r%  r   r8   r8   r9   cdf}  s    zBinnedNLL.cdfr   Nrk  r   r   ry  r   r   rC  r   r   r   r  r   )rX   r  r  r   r   ry  r   c             	     s4   t  ||||||| | jdu r*t| _nt| _dS )aO
  
        Initialize cost function with data and model.

        Parameters
        ----------
        n : array-like
            Histogram counts. If this is an array with dimension D+1, where D is the
            number of histogram axes, then the last dimension must have two elements
            and is interpreted as pairs of sum of weights and sum of weights squared.
        xe : array-like or collection of array-like
            Bin edge locations, must be len(n) + 1, where n is the number of bins.
            If the histogram has more than one axis, xe must be a collection of the
            bin edge locations along each axis.
        cdf : callable
            Cumulative density function of the form f(xe, par0, par1, ..., parN),
            where xe is a bin edge and par0, ... are model parameters. The corresponding
            density must be normalized to unity over the space covered by the histogram.
            If the model is multivariate, xe must be an array-like with shape (D, N),
            where D is the dimension and N is the number of points where the model is
            evaluated.
        verbose : int, optional
            Verbosity level. 0: is no output (default).
            1: print current args and negative log-likelihood value.
        grad: callable or None, optional
            Optionally pass the gradient of the cdf (Default is None). Has the same
            calling signature like the cdf, but must return an array with the shape (K,
            N), where N is the number of data points and K is the number of parameters.
            The gradient can be used by Minuit to improve or speed up convergence.
        use_pdf: str, optional
            Either "", "numerical", or "approximate" (Default is ""). If the model cdf
            is not available, but the model pdf is, this option can be set to
            "numerical" or "approximate" to compute the integral of the pdf over the bin
            patch. The option "numerical" uses numerical integration, which is accurate
            but computationally expensive and only supported for 1D histograms. The
            option "approximate" uses the zero-order approximation of evaluating the pdf
            at the bin center, multiplied with the bin area. This is fast and works in
            higher dimensions, but can lead to biased results if the curvature of the
            pdf inside the bin is significant.
        name : sequence of str or None, optional
            Optional names for each parameter of the model (in order). Must have the
            same length as there are model parameters. Default is None.
        N)r   r   rB  r    r  r!   )r   rX   r  r  r   r   ry  r   r   r8   r9   r     s    5
zBinnedNLL.__init__r   r   r   c                   s>   t  |}| j}|d ur,|t||  }|t|   S r;   )r   rG  r   r2   r<   ra  )r   r   rb   r7   r   r8   r9   rG    s
    zBinnedNLL._predrH   c                 C  s0   |  |}| |\}}| |d|dS r   )rG  r_  r  rN  r   r   rY   rX   r8   r8   r9   r     s    
zBinnedNLL._valuec                   s  t  |}t  |}| j}|d urft|| }|| |t|d d |f  |d   }|| }|  }t|}|| }|| }	| j}|d ur|| }|	d d |f }	|d}|d}|	|	jd d}	| j	}
|
d u rt
|||	S |
d}
t|||
|	 S )NrN   r   r   )r   r  rG  r   r2   r<   ra  rN  r   rB  r`   r_   )r   r   Zpgrb   r7   ZpsumrX   ZntotrY   r\   r]  r   r8   r9   r     s.    *



zBinnedNLL._grad)r   r   r   r   r   r   r  r   rG  r   r   r   r8   r8   r   r9   r(   d  s   

";
r(   c                	      sn   e Zd ZdZdZedd Zdddddd	d
dddddd fddZdddddZdddddZ	  Z
S )r*   a  
    Binned extended negative log-likelihood.

    Use this if shape and normalization of the fitted PDF are of interest and the data
    is binned. This cost function works with normal and weighted histograms. The
    histogram can be one- or multi-dimensional.

    The cost function works for both weighted data. The cost function assumes that
    the weights are independent of the data. This is not the case for sWeights, and
    the uncertaintes for results obtained with sWeights will only be approximately
    correct, see C. Langenbruch, Eur.Phys.J.C 82 (2022) 5, 393.

    The cost function has a minimum value that is asymptotically chi2-distributed. It is
    constructed from the log-likelihood assuming a poisson distribution and using the
    saturated model as a reference.
    r8   c                 C  s   | j S )zGet integrated density model.r%  r   r8   r8   r9   
scaled_cdf  s    zExtendedBinnedNLL.scaled_cdfr   Nrk  r  r   rC  r   r   r   r  r   )rX   r  r  r   r   ry  r   c             	     s   t  ||||||| dS )a	  
        Initialize cost function with data and model.

        Parameters
        ----------
        n : array-like
            Histogram counts. If this is an array with dimension D+1, where D is the
            number of histogram axes, then the last dimension must have two elements
            and is interpreted as pairs of sum of weights and sum of weights squared.
        xe : array-like or collection of array-like
            Bin edge locations, must be len(n) + 1, where n is the number of bins.
            If the histogram has more than one axis, xe must be a collection of the
            bin edge locations along each axis.
        scaled_cdf : callable
            Scaled Cumulative density function of the form f(xe, par0, [par1, ...]),
            where xe is a bin edge and par0, ... are model parameters.  If the model is
            multivariate, xe must be an array-like with shape (D, N), where D is the
            dimension and N is the number of points where the model is evaluated.
        verbose : int, optional
            Verbosity level. 0: is no output (default). 1: print current args and
            negative log-likelihood value.
        grad: callable or None, optional
            Optionally pass the gradient of the cdf (Default is None). Has the same
            calling signature like the cdf, but must return an array with the shape (K,
            N), where N is the number of data points and K is the number of parameters.
            The gradient can be used by Minuit to improve or speed up convergence.
        use_pdf: str, optional
            Either "", "numerical", or "approximate". If the model cdf is not available,
            but the model pdf is, this option can be set to "numerical" or "approximate"
            to compute the integral of the pdf over the bin patch. The option
            "numerical" uses numerical integration, which is accurate but
            computationally expensive and only supported for 1D histograms. The option
            "approximate" uses the zero-order approximation of evaluating the pdf at the
            bin center, multiplied with the bin area. This is fast and works in higher
            dimensions, but can lead to biased results if the curvature of the pdf
            inside the bin is significant.
        name : sequence of str or None, optional
            Optional names for each parameter of the model (in order). Must have the
            same length as there are model parameters. Default is None.
        Nr'  )r   rX   r  r  r   r   ry  r   r   r8   r9   r     s    3zExtendedBinnedNLL.__init__r   rH   r   c                 C  s.   |  |}| |\}}t|d|dS r   )rG  r_  r!   rN  r  r8   r8   r9   r   8  s    
zExtendedBinnedNLL._valuer   c                 C  s   |  |}| |}| j}|d ur:|| }|d d |f }|d}||jd d}|  d}| j}|d u r~t|||S |d}t|||| S )Nr   r   )rG  r  r   rN  r   ra  rB  r_   )r   r   rY   r\   r7   rX   r]  r8   r8   r9   r   =  s    



zExtendedBinnedNLL._grad)r   r   r   r   r   r   r  r   r   r   r   r8   r8   r   r9   r*     s   

"5r*   c                
      sz  e Zd ZU dZdZded< ded< ded< d	ed
< ded< ded< edd Zejdd Zedd Z	e	jdd Z	edd Z
e
jdd Z
edd Zedd ZejddddZddd d d!d"d"d"d	dddd#d$ fd%d&Zd'd( ZdBd"d)d*d+d,d-Zd.d/d0d1d2Zd.d/d0d3d4Zd.d/d0d5d6Zd.d/d0d7d8Zd.d9d0d:d;Zd.d/d0d<d=Zd>d?d@dAZ  ZS )Cr-   a   
    Least-squares cost function (aka chisquare function).

    Use this if you have data of the form (x, y +/- yerror), where x can be
    one-dimensional or multi-dimensional, but y is always one-dimensional. See
    :meth:`__init__` for details on how to use a multivariate model.
    )_loss_cost
_cost_gradr   r   r   zUnion[str, LossFunction]r  z2Callable[[ArrayLike, ArrayLike, ArrayLike], float]r  zAOptional[Callable[[NDArray, NDArray, NDArray, NDArray], NDArray]]r  r   r   r   r   r   r   c                 C  s.   | j dkr| jdddf S | jjd| j  S )zGet explanatory variables.r   Nr   )r   r   r/   r   r8   r8   r9   r5   a  s    
zLeastSquares.xc                 C  sL   | j dkr"t|| jd d df< nt|j| jd d d | j f< |   d S )Nr   r   )r   r   r   r/   r   r   r8   r8   r9   r5   h  s    
c                 C  s   | j dd| jf S )zGet samples.Nr   r   r   r8   r8   r9   rA   p  s    zLeastSquares.yc                 C  s$   t || jd d | jf< |   d S r;   r   r   r   r   r   r8   r8   r9   rA   u  s    c                 C  s   | j dd| jd f S )zGet sample uncertainties.Nr   r  r   r8   r8   r9   yerrorz  s    zLeastSquares.yerrorc                 C  s(   t || jd d | jd f< |   d S r6  r  r   r8   r8   r9   r    s    c                   s$   t  jdkr fddS  jS dS )z2Get model of the form y = f(x, par0, [par1, ...]).r   c                   s*   t |dkr | |S  j| g|R  S r6  )r   r   )r5   r   r   r8   r9   r    s    z$LeastSquares.model.<locals>.<lambda>N)r   r}   r   r   r8   r   r9   r     s    zLeastSquares.modelc                 C  s   | j S )zGet loss function.)r  r   r8   r8   r9   loss  s    zLeastSquares.lossr  c                   sx    | _ t trL dkr&t| _t| _qt dkr<t| _t| _qtt	d n(t t
rl fdd| _d | _nt	dd S )NlinearZsoft_l1zunknown loss c                   s   t  t| ||S r;   )r2   r<   rE   rL   r  r8   r9   r    s   z#LeastSquares.loss.<locals>.<lambda>z loss must be str or LossFunction)r  r   r  r   r  rQ   r  rS   rW   r  r   )r   r  r8   r  r9   r    s    

r  r   N)r  r   r   r   r   r   )r5   rA   r  r   r  r   r   r   c          
        s   t |}t |}|jdksJ |jdkr2|jd nd| _|| _|| _|| _t|}t	tj
g |||R  }	t t|||	| dS )aw  
        Initialize cost function with data and model.

        Parameters
        ----------
        x : array-like
            Locations where the model is evaluated. If the model is multivariate, x must
            have shape (D, N), where D is the number of dimensions and N the number of
            data points.
        y : array-like
            Observed values. Must have the same length as x.
        yerror : array-like or float
            Estimated uncertainty of observed values. Must have same shape as y or be a
            scalar, which is then broadcasted to same shape as y.
        model : callable
            Function of the form f(x, par0, [par1, ...]) whose output is compared to
            observed values, where x is the location and par0, ... are model parameters.
            If the model is multivariate, x has shape (D, N), where D is the number
            of dimensions and N the number of data points.
        loss : str or callable, optional
            The loss function can be modified to make the fit robust against outliers,
            see scipy.optimize.least_squares for details. Only "linear" (default) and
            "soft_l1" are currently implemented, but users can pass any loss function as
            this argument. It should be a monotonic, twice differentiable function,
            which accepts the squared residual and returns a modified squared residual.
        verbose : int, optional
            Verbosity level. 0: is no output (default). 1: print current args and
            negative log-likelihood value.

        Notes
        -----
        Alternative loss functions make the fit more robust against outliers by
        weakening the pull of outliers. The mechanical analog of a least-squares fit is
        a system with attractive forces. The points pull the model towards them with a
        force whose potential is given by :math:`rho(z)` for a squared-offset :math:`z`.
        The plot shows the standard potential in comparison with the weaker soft-l1
        potential, in which outliers act with a constant force independent of their
        distance.

        .. plot:: plots/loss.py
        r   r   N)r   rK   r   r   r   r   r  r2   Z
atleast_2dZcolumn_stackZbroadcast_arraysr   r   r   )
r   r5   rA   r  r   r  r   r   r   r   r   r8   r9   r     s    5
zLeastSquares.__init__c                 C  s
   t | jS r;   )r   r   r   r8   r8   r9   r     s    zLeastSquares._ndatar  z@Tuple[Tuple[NDArray, NDArray, NDArray], Tuple[NDArray, NDArray]])r   r  rI   c                   s   ddl m} jdkrtdjj\}}}|j|||dd t|}t	|}t
|trzt|}	j|	g R  }
n\|dkrt|rt|||}	nt|||}	j|	g R  }
nt fdd||\}	}
||	|
 |||f|	|
ffS )	a  
        Visualize data and model agreement (requires matplotlib).

        The visualization is drawn with matplotlib.pyplot into the current axes.

        Parameters
        ----------
        args : array-like
            Parameter values.

        model_points : int or array-like, optional
            How many points to use to draw the model. Default is 0, in this case
            an smart sampling algorithm selects the number of points. If array-like,
            it is interpreted as the point locations.
        r   r   r   r  r	  r
  c                   s   j | g R  S r;   )r   r?   r  r8   r9   r  	  r   z(LeastSquares.visualize.<locals>.<lambda>)r   r   r   r  r   r/   r  r2   minr   r   r   r  r   r	   r  r  r   Zplot)r   r   r  r   r5   rA   rB   ZxminZxmaxr  rC   r8   r  r9   r     s$    




zLeastSquares.visualizer   r   r   c                 C  s   | j | jg|R  S )z
        Return the prediction from the fitted model.

        Parameters
        ----------
        args : array-like
            Parameter values.

        Returns
        -------
        NDArray
            Model prediction for each bin.
        )r   r5   r   r8   r8   r9   rH  	  s    zLeastSquares.predictionc                 C  sP   | j  }| j }| |}| jd urD| j }tj||< tj||< || | S r;   )rA   rU  r  rH  r   r2   rV  )r   r   rA   rB   rC   r7   r8   r8   r9   r   '	  s    





zLeastSquares._pullsc                 C  sJ   | j dkr| jjd n| jjd | j  }| j|g|R  }t|d|  S )Nr   r   r   )r   r   r/   r   r-  r   )r   r   r5   rC   r8   r8   r9   rG  2	  s    (zLeastSquares._predc                 C  s`   | j d u rtd| jdkr(| jjd n| jjd | j }| j |g|R  }t|d| j|  S )Nr/  r   r   r0  )r   r  r   r   r/   r-  r   r   )r   r   r5   ymgr8   r8   r9   r  7	  s
    
(zLeastSquares._pred_gradrH   c                 C  s.   | j j| jd  \}}| |}| |||S r;   )r   r/   r   rG  r  )r   r   rA   rB   rC   r8   r8   r9   r   >	  s    
zLeastSquares._valuec                 C  sL   | j d u rtd| jj| jd  \}}| |}| |}|  ||||S )Nzno cost gradient available)r  r  r   r/   r   rG  r  )r   r   rA   rB   rC   r  r8   r8   r9   r   C	  s    


zLeastSquares._gradr   r   c                 C  s   | j d uo| jd uS r;   )r   r  r   r8   r8   r9   r   K	  s    zLeastSquares._has_grad)r   )r   r   r   r   r   r   r   r5   r   rA   r  r   r  r   r   r   rH  r   rG  r  r   r   r   r   r8   r8   r   r9   r-   O  sR   







	
$B +r-   c                      s   e Zd ZdZdZdddd fddZedd	 Zejd
d	 Zedd Z	e	jdd Z	dddddZ
dddddZddddZdd ZddddZ  ZS )r.   a  
    Gaussian penalty for one or several parameters.

    The Gaussian penalty acts like a pseudo-measurement of the parameter itself, based
    on a (multi-variate) normal distribution. Penalties can be set for one or several
    parameters at once (which is more efficient). When several parameter are
    constrained, one can specify the full covariance matrix of the parameters.

    Notes
    -----
    It is sometimes necessary to add a weak penalty on a parameter to avoid
    instabilities in the fit. A typical example in high-energy physics is the fit of a
    signal peak above some background. If the amplitude of the peak vanishes, the shape
    parameters of the peak become unconstrained and the fit becomes unstable. This can
    be avoided by adding weak (large uncertainty) penalty on the shape parameters whose
    pull is negligible if the peak amplitude is non-zero.

    This class can also be used to approximately include external measurements of some
    parameters, if the original cost function is not available or too costly to compute.
    If the external measurement was performed in the asymptotic limit with a large
    sample, a Gaussian penalty is an accurate statistical representation of the external
    result.
    )	_expected_cov_covinvzUnion[str, Iterable[str]]r   )r   r   errorc                   s   t |tr|fnt|}t|}t|| _| jjdkr>td|dkr\t| j|kr\tdt|| _t| jt| jkrtd| jjdk r|  jdC  _n(| jjdkrt	| jstdntdt
| j| _
t dd	 |D d
 dS )a  
        Initialize the normal constraint with expected value(s) and error(s).

        Parameters
        ----------
        args : str or sequence of str
            Parameter name(s).
        value : float or array-like
            Expected value(s). Must have same length as `args`.
        error : float or array-like
            Expected error(s). If 1D, must have same length as `args`. If 2D, must be
            the covariance matrix of the parameters.
        r   z)value must be a scalar or one-dimensionalz)size of value does not match size of argsz*size of error does not match size of valuerN   *covariance matrix is not positive definitez6covariance matrix cannot have more than two dimensionsc                 S  s   i | ]
}|d qS r;   r8   r   rd   r8   r8   r9   r  	  r   z-NormalConstraint.__init__.<locals>.<dictcomp>FN)r   r  rT   r   r   r  rK   r  r  r
   r  r   r   )r   r   r   r  Ztp_argsnargsr   r8   r9   r   j	  s$    



zNormalConstraint.__init__c                 C  s   | j S )z
        Get expected covariance of parameters.

        Can be 1D (diagonal of covariance matrix) or 2D (full covariance matrix).
        )r  r   r8   r8   r9   r  	  s    zNormalConstraint.covariancec                 C  sB   t |}|jdkr$t|s$td|| jd d < t| j| _d S )NrN   r  )r2   r   rK   r
   r  r  r  r   r8   r8   r9   r  	  s
    
c                 C  s   | j S )zGet expected parameter values.r  r   r8   r8   r9   r   	  s    zNormalConstraint.valuec                 C  s   || j d d < d S r;   r  r   r8   r8   r9   r   	  s    r   rH   r   c                 C  s<   || j  }| jjdk r*t|d | j S td|| j|S )NrN   zi,ij,j)r  r  rK   r2   r<   r  r   r   deltar8   r8   r9   r   	  s    
zNormalConstraint._valuer   c                 C  s2   || j  }| jjdk r$d| | j S d| j | S r7  )r  r  rK   r  r8   r8   r9   r   	  s    
zNormalConstraint._gradr   r   c                 C  s   dS r   r8   r   r8   r8   r9   r   	  s    zNormalConstraint._has_gradc                 C  s
   t | jS r;   )r   r  r   r8   r8   r9   r   	  s    zNormalConstraint._ndatar   c              	   C  s  ddl m} t|}| j}| j}| j}|jdkr<t|}t	|}t
|}d}d}	t|||D ]D\}
}}||
 | }tt||	}	|j|| ddddd |d7 }qb|jddd	 ||	 d
 |	d
  | j}|t|  || || d d dS )z
        Visualize data and model agreement (requires matplotlib).

        The visualization is drawn with matplotlib.pyplot into the current axes.

        Parameters
        ----------
        args : array-like
            Parameter values.
        r   r   rN   r   or  )r  rL  rd   )rL  g?r1   N)r   r   r2   rJ   r}   r   r  rK   ZdiagrR   r   r   r   rZ  r  ZaxvlineZxlimZgcayaxisZ	set_ticksrO  Zset_ticklabelsZylim)r   r   r   Zparr[  ZcovrX  rX   r   Zmax_pullver}  pullr  r8   r8   r9   r   	  s,    






zNormalConstraint.visualize)r   r   r   r   r   r   r   r  r   r   r   r   r   r   r   r   r8   r8   r   r9   r.   O	  s    (



r.   )r   rI   c                 C  s*   t | } | j}|jdkr&| t j} | S )NrV   )r2   rJ   ro   kindrP  rq   )r   ro   r8   r8   r9   r   	  s
    

r   c                 C  s   | j dkrtj| S d|  S )NrN   r0   )rK   r2   r  r  )r  r8   r8   r9   r  	  s    r  r<  c                G  s   t | tjsl|d u r(| dt|  }n| d| dt|  }t|t t| } | jj	dkrl| 
tS | jt|k r| j| S | j|krtdd |D }d| d| jd	|d
}t|| S )Nz( should return numpy array, but returns z should return numpy array z, but returns rV   c                 s  s   | ]}t |V  qd S r;   )r   r   r8   r8   r9   r   	  r   z$_normalize_output.<locals>.<genexpr>z
output of z has shape z, but z is required)r   r2   r  typer  r  r   r  ro   r  rP  rH   rK   r   rN  r   rT   r  )r5   r  r=  r   Zpretty_shaper8   r8   r9   r-  	  s     



r-  c                 C  s.   t | d tr tdd | D S t| d fS )Nr   c                 s  s   | ]}t |d  V  qdS )r   Nrl  rD  r8   r8   r9   r   	
  r   z!_shape_from_xe.<locals>.<genexpr>r   )r   r   rT   r   )r  r8   r8   r9   rF  
  s    rF  c                   s   t | dd t }t|  fdd|D }|rt|t|kr\dd t|| D }n$t|dkrrtdndd |D }|S )	NTr   c                   s   i | ]}| | qS r8   r8   r  r  r8   r9   r  
  r   z%_model_parameters.<locals>.<dictcomp>c                 S  s   i | ]\}}||qS r8   r8   )r   rX   Zattr8   r8   r9   r  
  r   r   z8length of name does not match number of model parametersc                 S  s   i | ]
}|d qS r;   r8   )r   rX   r8   r8   r9   r  
  r   )r   iternextr   r   valuesr  )r   r   r   paramsr8   r  r9   r   
  s    
r   )ZBarlowBeestonLiteZbarlow_beeston_lite_chi2_jscZbarlow_beeston_lite_chi2_hpdZmultinominal_chi2r  r   )r   rI   c                 C  sB   | t v r:t |  \}}tj|  d| d| dtdd |S td S )Nz was renamed to z, please import z insteadrN   )r  )_deprecated_contentr  r  r  AttributeError)r   new_nameobjr8   r8   r9   __getattr__%
  s    r  )fr   
__future__r   utilr   r   r   r   r	   r
   typingr   r   r   ri   r2   Znumpy.typingr   r   collections.abcr   ZABCSequencer   r   r   r   r   r   r   r   r   r   r   r   r  Z_deprecatedr   __all__rH   r/   r   r   ZfinfoZtinyr>   r:   r@   rE   rG   r   rQ   rS   rW   r!   r_   r    r`   r"   r#   r$   Znumbarg   Znumba.extendingrh   Znb_overloadZjitrm   rn   rs   rr   ru   rt   rw   rv   ry   rx   r{   rz   ModuleNotFoundErrorABCr%   r'   r&   r   r   r   r)   r+   r?  rb  r,   r(   r*   r-   r.   r   r  r-  rF  r   r  r  r8   r8   r8   r9   <module>   s   S 8&-



 	|41 g} qs   e   