U
    oe+                     @   s   d Z ddlmZ ddlZzddlZW n* ek
rN   ddlmZ eddY nX ddlm	Z	 e
de	 G dd	 d	Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd%d d!Zed"krdd#lmZ edd$ dS )&z8Maximum Entropy code.

Uses Improved Iterative Scaling.
    )reduceN)MissingPythonDependencyErrorzQPlease install NumPy if you want to use Bio.MaxEntropy. See http://www.numpy.org/)BiopythonDeprecationWarningzThe 'Bio.MaxEntropy' module is deprecated and will be removed in a future release of Biopython. Consider using scikit-learn instead.c                   @   s   e Zd ZdZdd ZdS )
MaxEntropya  Hold information for a Maximum Entropy classifier.

    Members:
    classes      List of the possible classes of data.
    alphas       List of the weights for each feature.
    feature_fns  List of the feature functions.

    Car data from example Naive Bayes Classifier example by Eric Meisner November 22, 2003
    http://www.inf.u-szeged.hu/~ormandi/teaching

    >>> from Bio.MaxEntropy import train, classify
    >>> xcar = [
    ...     ['Red', 'Sports', 'Domestic'],
    ...     ['Red', 'Sports', 'Domestic'],
    ...     ['Red', 'Sports', 'Domestic'],
    ...     ['Yellow', 'Sports', 'Domestic'],
    ...     ['Yellow', 'Sports', 'Imported'],
    ...     ['Yellow', 'SUV', 'Imported'],
    ...     ['Yellow', 'SUV', 'Imported'],
    ...     ['Yellow', 'SUV', 'Domestic'],
    ...     ['Red', 'SUV', 'Imported'],
    ...     ['Red', 'Sports', 'Imported']]
    >>> ycar = ['Yes','No','Yes','No','Yes','No','Yes','No','No','Yes']

    Requires some rules or features

    >>> def udf1(ts, cl):
    ...     return ts[0] != 'Red'
    ...
    >>> def udf2(ts, cl):
    ...     return ts[1] != 'Sports'
    ...
    >>> def udf3(ts, cl):
    ...     return ts[2] != 'Domestic'
    ...
    >>> user_functions = [udf1, udf2, udf3]  # must be an iterable type
    >>> xe = train(xcar, ycar, user_functions)
    >>> for xv, yv in zip(xcar, ycar):
    ...     xc = classify(xe, xv)
    ...     print('Pred: %s gives %s y is %s' % (xv, xc, yv))
    ...
    Pred: ['Red', 'Sports', 'Domestic'] gives No y is Yes
    Pred: ['Red', 'Sports', 'Domestic'] gives No y is No
    Pred: ['Red', 'Sports', 'Domestic'] gives No y is Yes
    Pred: ['Yellow', 'Sports', 'Domestic'] gives No y is No
    Pred: ['Yellow', 'Sports', 'Imported'] gives No y is Yes
    Pred: ['Yellow', 'SUV', 'Imported'] gives No y is No
    Pred: ['Yellow', 'SUV', 'Imported'] gives No y is Yes
    Pred: ['Yellow', 'SUV', 'Domestic'] gives No y is No
    Pred: ['Red', 'SUV', 'Imported'] gives No y is No
    Pred: ['Red', 'Sports', 'Imported'] gives No y is Yes
    c                 C   s   g | _ g | _g | _dS )zInitialize the class.N)classesalphasfeature_fns)self r
   -lib/python3.8/site-packages/Bio/MaxEntropy.py__init__Z   s    zMaxEntropy.__init__N)__name__
__module____qualname____doc__r   r
   r
   r
   r   r   $   s   5r   c                 C   sd   g }t | jt | jkst| jD ]<}d}t| j| jD ]\}}||||| 7 }q8|| q"|S )zCalculate the log of the probability for each class.

    me is a MaxEntropy object that has been trained.  observation is a vector
    representing the observed data.  The return value is a list of
    unnormalized log probabilities for each class.
            )lenr   r   AssertionErrorr   zipappend)meobservationscoresklassZlprobfnalphar
   r
   r   	calculatea   s    
r   c                 C   sV   t | |}|d | jd  }}tdt|D ]$}|| |kr,|| | j|  }}q,|S )z%Classify an observation into a class.r      )r   r   ranger   )r   r   r   Z	max_scorer   ir
   r
   r   classifyr   s    
r    c                 C   sR   i }t t|D ]<}t t|D ]*}| || || }|dkr ||||f< q q|S )a>  Evaluate a feature function on every instance of the training set and class (PRIVATE).

    fn is a callback function that takes two parameters: a
    training instance and a class.  Return a dictionary of (training
    set index, class index) -> non-zero value.  Values of 0 are not
    stored in the dictionary.
    r   )r   r   )r   xsr   valuesr   jfr
   r
   r   _eval_feature_fn|   s    r%   c                    s   i  t |D ]\}}| |< q fdd|D }g }t| }|D ]<}	d}
t|D ]}|
|	||| fd7 }
qP||
|  q@|S )zCalculate the expectation of each function from the data (PRIVATE).

    This is the constraint for the maximum entropy distribution. Return a
    list of expectations, parallel to the list of features.
    c                    s   g | ]} | qS r
   r
   ).0yZclass2indexr
   r   
<listcomp>   s     z+_calc_empirical_expects.<locals>.<listcomp>r   )	enumerater   r   getr   )r!   ysr   featuresindexkeyZys_iZexpectNfeaturesr   r
   r(   r   _calc_empirical_expects   s    
r3   c                 C   s`   t | |||}g }|D ]D}d}| D ] \\}}	}
||| |	 |
 7 }q&||t|   q|S )zCalculate the expectation of each feature from the model (PRIVATE).

    This is not used in maximum entropy training, but provides a good function
    for debugging.
    r   )_calc_p_class_given_xitemsr   r   )r!   r   r-   r   p_yxZexpectsr1   sumr   r#   r$   r
   r
   r   _calc_model_expects   s    r8   c                 C   s   t t| t|f}t|t|ks*tt||D ]6\}}| D ]$\\}}}	|| |  ||	 7  < qDq4t |}tt| D ] }
t||
 }||
 | ||
< q|S )zCalculate conditional probability P(y|x) (PRIVATE).

    y is the class and x is an instance from the training set.
    Return a XSxCLASSES matrix of probabilities.
    )	npzerosr   r   r   r5   expr   r7   )r!   r   r-   r   prob_yxr1   r   xr'   r$   r   zr
   r
   r   r4      s    
r4   c                 C   sF   t | |f}|D ].}| D ] \\}}}|| |  |7  < qq|S )z/Calculate a matrix of f sharp values (PRIVATE).)r9   r:   r5   )r0   Znclassesr-   f_sharpr1   r   r#   r$   r
   r
   r   _calc_f_sharp   s
    r@   c                 C   s   d}d}||k rd }	}
|  D ]N\\}}}|| | | t||| |   }|	|7 }	|
||| |  7 }
q ||	|   |
 |   }	}
|	|
 }||8 }t||k rq|d }qtd|S )z,Solve delta using Newton's method (PRIVATE).r   r   r   z Newton's method did not converge)r5   r9   r;   fabsRuntimeError)r0   r1   r?   Z	empiricalr<   max_newton_iterationsnewton_convergedeltaitersZf_newtonZ	df_newtonr   r#   r$   ZprodZratior
   r
   r   _iis_solve_delta   s     &
rG   c              	   C   sd   t | |||}t| }	|dd }
tt|D ]0}t|	|| ||| |||}|
|  |7  < q.|
S )zBDo one iteration of hill climbing to find better alphas (PRIVATE).N)r4   r   r   rG   )r!   r   r-   r?   r   e_empiricalrC   rD   r6   r0   Z	newalphasr   rE   r
   r
   r   
_train_iis   s    	rI   '  h㈵>d   绽|=c              	      s  st dtt|kr$t d| }}	tt|  fdd|D }
ttt |
}t||	 |
}dgt|
 }d}||k rt| |
|||||}dd t||D }tt	j
|d}|}t }| |  |_|_|_|dk	r|| ||k rq
qtd	|S )
aF  Train a maximum entropy classifier, returns MaxEntropy object.

    Train a maximum entropy classifier on a training set.
    training_set is a list of observations.  results is a list of the
    class assignments for each observation.  feature_fns is a list of
    the features.  These are callback functions that take an
    observation and class and return a 1 or 0.  update_fn is a
    callback function that is called at each training iteration.  It is
    passed a MaxEntropy object that encapsulates the current state of
    the training.

    The maximum number of iterations and the convergence criterion for IIS
    are given by max_iis_iterations and iis_converge, respectively, while
    max_newton_iterations and newton_converge are the maximum number
    of iterations and the convergence criterion for Newton's method.
    zNo data in the training set.z2training_set and results should be parallel lists.c                    s   g | ]}t | qS r
   )r%   )r&   r   r   training_setr
   r   r)   6  s     ztrain.<locals>.<listcomp>r   r   c                 S   s   g | ]\}}t || qS r
   )r9   rA   )r&   r=   r'   r
   r
   r   r)   K  s     NzIIS did not converge)
ValueErrorr   sortedsetr@   r3   rI   r   r   r9   addr   r   r   r   rB   )rO   Zresultsr   Z	update_fnZmax_iis_iterationsZiis_convergerC   rD   r!   r,   r-   r?   rH   r   rF   ZnalphasZdiffr   r
   rN   r   train  sB    


rT   __main__)run_doctest)verbose)NrJ   rK   rL   rM   )r   	functoolsr   warningsZnumpyr9   ImportErrorZBior   r   warnr   r   r    r%   r3   r8   r4   r@   rG   rI   rT   r   Z
Bio._utilsrV   r
   r
   r
   r   <module>   sF   =

$     
L