
    &h+                        d Z ddlZddlmZ 	 ddlZddlm	Z	  ej                  de	        G d d	      Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Z	 	 	 	 	 ddZedk(  rddlmZ  ed       yy# e$ r ddlmZ  ed      dw xY w)z8Maximum Entropy code.

Uses Improved Iterative Scaling.
    N)reduce)MissingPythonDependencyErrorzQPlease install NumPy if you want to use Bio.MaxEntropy. See http://www.numpy.org/)BiopythonDeprecationWarningzThe 'Bio.MaxEntropy' module is deprecated and will be removed in a future release of Biopython. Consider using scikit-learn instead.c                       e Zd ZdZd Zy)
MaxEntropya  Hold information for a Maximum Entropy classifier.

    Members:
    classes      List of the possible classes of data.
    alphas       List of the weights for each feature.
    feature_fns  List of the feature functions.

    Car data from example Naive Bayes Classifier example by Eric Meisner November 22, 2003
    http://www.inf.u-szeged.hu/~ormandi/teaching

    >>> from Bio.MaxEntropy import train, classify
    >>> xcar = [
    ...     ['Red', 'Sports', 'Domestic'],
    ...     ['Red', 'Sports', 'Domestic'],
    ...     ['Red', 'Sports', 'Domestic'],
    ...     ['Yellow', 'Sports', 'Domestic'],
    ...     ['Yellow', 'Sports', 'Imported'],
    ...     ['Yellow', 'SUV', 'Imported'],
    ...     ['Yellow', 'SUV', 'Imported'],
    ...     ['Yellow', 'SUV', 'Domestic'],
    ...     ['Red', 'SUV', 'Imported'],
    ...     ['Red', 'Sports', 'Imported']]
    >>> ycar = ['Yes','No','Yes','No','Yes','No','Yes','No','No','Yes']

    Requires some rules or features

    >>> def udf1(ts, cl):
    ...     return ts[0] != 'Red'
    ...
    >>> def udf2(ts, cl):
    ...     return ts[1] != 'Sports'
    ...
    >>> def udf3(ts, cl):
    ...     return ts[2] != 'Domestic'
    ...
    >>> user_functions = [udf1, udf2, udf3]  # must be an iterable type
    >>> xe = train(xcar, ycar, user_functions)
    >>> for xv, yv in zip(xcar, ycar):
    ...     xc = classify(xe, xv)
    ...     print('Pred: %s gives %s y is %s' % (xv, xc, yv))
    ...
    Pred: ['Red', 'Sports', 'Domestic'] gives No y is Yes
    Pred: ['Red', 'Sports', 'Domestic'] gives No y is No
    Pred: ['Red', 'Sports', 'Domestic'] gives No y is Yes
    Pred: ['Yellow', 'Sports', 'Domestic'] gives No y is No
    Pred: ['Yellow', 'Sports', 'Imported'] gives No y is Yes
    Pred: ['Yellow', 'SUV', 'Imported'] gives No y is No
    Pred: ['Yellow', 'SUV', 'Imported'] gives No y is Yes
    Pred: ['Yellow', 'SUV', 'Domestic'] gives No y is No
    Pred: ['Red', 'SUV', 'Imported'] gives No y is No
    Pred: ['Red', 'Sports', 'Imported'] gives No y is Yes
    c                 .    g | _         g | _        g | _        y)zInitialize the class.N)classesalphasfeature_fns)selfs    ]/mounts/lovelace/software/anaconda3/envs/py312/lib/python3.12/site-packages/Bio/MaxEntropy.py__init__zMaxEntropy.__init__Z   s        N)__name__
__module____qualname____doc__r    r   r   r   r   $   s    3jr   r   c                    g }t        | j                        t        | j                        k(  sJ | j                  D ]L  }d}t	        | j                  | j                        D ]  \  }}| |||      |z  z  } |j                  |       N |S )zCalculate the log of the probability for each class.

    me is a MaxEntropy object that has been trained.  observation is a vector
    representing the observed data.  The return value is a list of
    unnormalized log probabilities for each class.
            )lenr   r
   r	   zipappend)meobservationscoresklasslprobfnalphas          r   	calculater!   a   s     Fr~~#bii.00 R^^RYY7 	4IBRU+e33E	4e	
 Mr   c                     t        | |      }|d   | j                  d   }}t        dt        |            D ]  }||   |kD  s||   | j                  |   }}! |S )z%Classify an observation into a class.r      )r!   r	   ranger   )r   r   r   	max_scorer   is         r   classifyr'   r   se    r;'Fay"**Q-uI1c&k" 8!9y %ay"**Q-uI8 Lr   c                     i }t        t        |            D ]7  }t        t        |            D ]  } | ||   ||         }|dk7  s||||f<     9 |S )a>  Evaluate a feature function on every instance of the training set and class (PRIVATE).

    fn is a callback function that takes two parameters: a
    training instance and a class.  Return a dictionary of (training
    set index, class index) -> non-zero value.  Values of 0 are not
    stored in the dictionary.
    r   )r$   r   )r   xsr	   valuesr&   jfs          r   _eval_feature_fnr-   |   sg     F3r7^ #s7|$ 	#A2a5'!*%AAv!"1v	##
 Mr   c                    i }t        |      D ]
  \  }}|||<    |D cg c]  }||   	 }}g }	t        |       }
|D ]B  }d}t        |
      D ]  }||j                  |||   fd      z  } |	j	                  ||
z         D |	S c c}w )zCalculate the expectation of each function from the data (PRIVATE).

    This is the constraint for the maximum entropy distribution. Return a
    list of expectations, parallel to the list of features.
    r   )	enumerater   r$   getr   )r)   ysr	   featuresclass2indexindexkeyyys_iexpectNfeaturesr&   s                 r   _calc_empirical_expectsr<      s     K( !
s C!$&'qKN'D'FBA q 	.Aaa\1--A	.a!e	
 M (s   Bc                     t        | |||      }g }|D ]J  }d}|j                         D ]  \  \  }}	}
|||   |	   |
z  z  } |j                  |t        |       z         L |S )zCalculate the expectation of each feature from the model (PRIVATE).

    This is not used in maximum entropy training, but provides a good function
    for debugging.
    r   )_calc_p_class_given_xitemsr   r   )r)   r	   r2   r
   p_yxexpectsr:   sumr&   r+   r,   s              r   _calc_model_expectsrC      s{     !Wh?DG &  	"IFQA471:>!C	"sSW}%	&
 Nr   c                    t        j                  t        |       t        |      f      }t        |      t        |      k(  sJ t        ||      D ]3  \  }}|j	                         D ]  \  \  }}}	||   |xx   ||	z  z  cc<    5 t        j
                  |      }t        t        |             D ]  }
t        ||
         }||
   |z  ||
<    |S )zCalculate conditional probability P(y|x) (PRIVATE).

    y is the class and x is an instance from the training set.
    Return a XSxCLASSES matrix of probabilities.
    )npzerosr   r   r?   expr$   rB   )r)   r	   r2   r
   prob_yxr:   r    xr6   r,   r&   zs               r   r>   r>      s     hhBW./G x=CK''h/ '  	'IFQAAJqMUQY&M	'' ffWoG3r7^ $
OQZ!^
$ Nr   c                     t        j                  | |f      }|D ]-  }|j                         D ]  \  \  }}}||   |xx   |z  cc<    / |S )z/Calculate a matrix of f sharp values (PRIVATE).)rE   rF   r?   )r9   nclassesr2   f_sharpr:   r&   r+   r,   s           r   _calc_f_sharprN      s[     hh8}%G   	IFQAAJqMQM	 Nr   c                 ^   d}d}||k  rdx}	}
|j                         D ]E  \  \  }}}||   |   |z  t        j                  |||   |   z        z  }|	|z  }	|
|||   |   z  z  }
G ||	| z  z
  |
 | z  }
}	|	|
z  }||z  }t        j                  |      |k  r	 |S |dz   }||k  rt	        d      )z,Solve delta using Newton's method (PRIVATE).r   r   r#   z Newton's method did not converge)r?   rE   rG   fabsRuntimeError)r9   r:   rM   	empiricalrH   max_newton_iterationsnewton_convergedeltaitersf_newton	df_newtonr&   r+   r,   prodratios                   r   _iis_solve_deltar[      s    
 EE
'
'""9  	.IFQA1:a=1$rvvegajm.C'DDDH
1--I	. ((Q,6
Q)9$775>O+ L 	 '
' =>>r   c           
          t        | |||      }t        |       }	|dd }
t        t        |            D ]&  }t        |	||   |||   |||      }|
|xx   |z  cc<   ( |
S )zBDo one iteration of hill climbing to find better alphas (PRIVATE).N)r>   r   r$   r[   )r)   r	   r2   rM   r
   e_empiricalrS   rT   r@   r9   	newalphasr&   rU   s                r   
_train_iisr_      s}     !Wh?DBAq	I3v; 
 QKN!
 	!
 r   c           
         | st        d      t        |       t        |      k7  rt        d      | |}	}t        t        |            }
|D cg c]  }t	        || |
       }}t        t        |       t        |
      |      }t        ||	|
|      }dgt        |      z  }d}||k  rt        ||
||||||      }t        ||      D cg c]  \  }}t        j                  ||z
         }}}t        t        j                  |d      }|}t               }||
|c|_        |_        |_        | ||       ||k  r	 |S ||k  rt#        d      c c}w c c}}w )aF  Train a maximum entropy classifier, returns MaxEntropy object.

    Train a maximum entropy classifier on a training set.
    training_set is a list of observations.  results is a list of the
    class assignments for each observation.  feature_fns is a list of
    the features.  These are callback functions that take an
    observation and class and return a 1 or 0.  update_fn is a
    callback function that is called at each training iteration.  It is
    passed a MaxEntropy object that encapsulates the current state of
    the training.

    The maximum number of iterations and the convergence criterion for IIS
    are given by max_iis_iterations and iis_converge, respectively, while
    max_newton_iterations and newton_converge are the maximum number
    of iterations and the convergence criterion for Newton's method.
    zNo data in the training set.z2training_set and results should be parallel lists.r   r   zIIS did not converge)
ValueErrorr   sortedsetr-   rN   r<   r_   r   rE   rP   r   addr   r
   r	   r   rQ   )training_setresultsr   	update_fnmax_iis_iterationsiis_convergerS   rT   r)   r1   r	   r   r2   rM   r]   r
   rV   nalphasrI   r6   diffr   s                         r   trainrl     s|   4 788
<CL(MNN 7B S\"G GRR \7;RHRC-s7|XFG *"b'8DK US]"FE
$
$!	
 ,/vw+?@41aA@@bffdA&\06-	2:r~ bM, I3 $
$. 122C S* As   E"E__main__)run_doctest)verbose)Ni'  gh㈵>d   g|=)r   warnings	functoolsr   numpyrE   ImportErrorBior   r   warnr   r!   r'   r-   r<   rC   r>   rN   r[   r_   rl   r   
Bio._utilsrn   r   r   r   <module>rx      s      , A: :z"".&,2H IX z& U
  0
&	$ s   A/ /B