U
    oeZ                     @   s@  d Z ddlZddlmZ ede zddlZW n* ek
rZ   ddlmZ eddY nX ej	Z	dd Z
ej  d	ZeeZG d
d dZdd Zdd Zdd Zd5ddZdZd6ddZdd Zdd Zdd Zd7ddZdd  Zd8d!d"Zd#d$ Zd%d& Zd'd( Zd)d* Z d+d, Z!d-d. Z"d/d0 Z#d1d2 Z$d3d4 Z%dS )9a  A state-emitting MarkovModel.

Note terminology similar to Manning and Schutze is used.


Functions:
train_bw        Train a markov model using the Baum-Welch algorithm.
train_visible   Train a visible markov model using MLE.
find_states     Find the a state sequence that explains some observations.

load            Load a MarkovModel.
save            Save a MarkovModel.

Classes:
MarkovModel     Holds the description of a markov model
    N)BiopythonDeprecationWarningzThe 'Bio.MarkovModel' module is deprecated and will be removed in a future release of Biopython. Consider using the hmmlearn package instead.)MissingPythonDependencyErrorzRPlease install NumPy if you want to use Bio.MarkovModel. See http://www.numpy.org/c                 C   s@   i }t | ddd }t| d }|D ]\}}|| ||< q&|S )zAReturn a dictionary of values with their sequence offset as keys.N   )	enumeratelen)valuesdentriesnindexkey r   .lib/python3.8/site-packages/Bio/MarkovModel.py	itemindex0   s    r   gYnc                   @   s"   e Zd ZdZdddZdd ZdS )MarkovModelz+Create a state-emitting MarkovModel object.Nc                 C   s"   || _ || _|| _|| _|| _dS )zInitialize the class.N)statesalphabet	p_initialp_transition
p_emission)selfr   r   r   r   r   r   r   r   __init__C   s
    zMarkovModel.__init__c                 C   s.   ddl m} | }t| | |d | S )z9Create a string representation of the MarkovModel object.r   )StringIO)ior   saveseekread)r   r   handler   r   r   __str__M   s
    

zMarkovModel.__str__)NNN)__name__
__module____qualname____doc__r   r   r   r   r   r   r   @   s        

r   c                 C   s*   |   }||s&td|d||S )zNRead the first line and evaluate that begisn with the correct start (PRIVATE).zI expected z	 but got )readline
startswith
ValueError)r   startliner   r   r   _readline_and_check_startW   s    
r)   c                 C   s~  t | d}| dd }t | d}| dd }t||}t|t| }}t||_t | d}tt|D ]0}t | d||  d}t| d |j|< qrt||f|_	t | d	}tt|D ]B}t | d||  d}d
d | dd D |j	|ddf< qt||f|_
t | d}tt|D ]D}t | d||  d}dd | dd D |j
|ddf< q4|S )z.Parse a file handle into a MarkovModel object.zSTATES:r   Nz	ALPHABET:zINITIAL:  :r   zTRANSITION:c                 S   s   g | ]}t |qS r   float.0vr   r   r   
<listcomp>x   s     zload.<locals>.<listcomp>z	EMISSION:c                 S   s   g | ]}t |qS r   r,   r.   r   r   r   r1      s     )r)   splitr   r   npzerosr   ranger-   r   r   )r   r(   r   r   mmNMir   r   r   load_   s,    




*
,r:   c              
   C   s  |j }|dd| j d |dd| j d |d tt| jD ](}|d| j|  d| j| dd qL|d	 tt| jD ]6}|d| j|  ddd
d | j| D  d q|d tt| jD ]6}|d| j|  dddd | j| D  d qdS )z$Save MarkovModel object into handle.zSTATES:  
z
ALPHABET: z	INITIAL:
r*   z: gzTRANSITION:
c                 s   s   | ]}t |V  qd S Nstrr/   xr   r   r   	<genexpr>   s     zsave.<locals>.<genexpr>z
EMISSION:
c                 s   s   | ]}t |V  qd S r>   r?   rA   r   r   r   rC      s     N)	writejoinr   r   r5   r   r   r   r   )r6   r   wr9   r   r   r   r      s    &4r   c              	      s  t | t | }}|std|dk	rDt|}|j|fkrDtd|dk	rlt|}|j||fkrltd|dk	rt|}|j||fkrtdg }	t| |D ]}
|	 fdd|
D  qdd |	D }t|d	krtd
t|||	||||d}|\}}}t	| ||||S )a  Train a MarkovModel using the Baum-Welch algorithm.

    Train a MarkovModel using the Baum-Welch algorithm.  states is a list
    of strings that describe the names of each state.  alphabet is a
    list of objects that indicate the allowed outputs.  training_data
    is a list of observations.  Each observation is a list of objects
    from the alphabet.

    pseudo_initial, pseudo_transition, and pseudo_emission are
    optional parameters that you can use to assign pseudo-counts to
    different matrices.  They should be matrices of the appropriate
    size that contain numbers to add to each parameter matrix, before
    normalization.

    update_fn is an optional callback that takes parameters
    (iteration, log_likelihood).  It is called once per iteration.
    zNo training data given.N$pseudo_initial not shape len(states)5pseudo_transition not shape len(states) X len(states)5pseudo_emission not shape len(states) X len(alphabet)c                    s   g | ]} | qS r   r   rA   indexesr   r   r1      s     ztrain_bw.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   )r   rA   r   r   r   r1      s     r   z,I got training data with outputs of length 0)pseudo_initialpseudo_transitionpseudo_emission	update_fn)
r   r&   r3   asarrayshaper   appendmin_baum_welchr   )r   r   training_datarL   rM   rN   rO   r7   r8   training_outputsoutputsZlengthsrB   r   r   r   r   rJ   r   train_bw   sB    


	
rX   i  c
                 C   sd  |dkrt | }nt|| f}|dkr4t | | f}nt|| | f}|dkrXt | |f}nt|| |f}t|}
t|}t|}|dk	rt|}nd}|dk	rt|}nd}|dk	rt|}nd}d}ttD ]j}t}|D ] }|t| |||
|||||	7 }q|	dk	r|	|| |dk	r>t|| dk r> qP|}qt	dt dd |
||fD S )zfImplement the Baum-Welch algorithm to evaluate unknown parameters in the MarkovModel object (PRIVATE).Ng?z%HMM did not converge in %d iterationsc                 S   s   g | ]}t |qS r   )r3   exp)r/   _r   r   r   r1   %  s     z_baum_welch.<locals>.<listcomp>)
_random_norm_copy_and_checkr3   logr5   MAX_ITERATIONSLOG0_baum_welch_onefabsRuntimeError)r7   r8   rV   r   r   r   rL   rM   rN   rO   
lp_initiallp_transitionlp_emissionlpseudo_initiallpseudo_transitionlpseudo_emissionZ	prev_llikr9   ZllikrW   r   r   r   rT      sV    





rT   c	              	   C   s  t |}	t| |	||||}
t| |	|||}t| | |	f}t|	D ]}|| }t| | f}t| D ]R}t| D ]D}|
| | || |  || |  || |d   }||| |< qpqd|t| |dddd|f< qBt| |	f}t|	D ]0}t| D ]"}t||dd|f || |< qqt| }t| D ]}t||ddf ||< q2|dddf }|dk	rt||}|t| }t| D ]p}t| D ],}t|||ddf ||  || |< q|dk	rt|| |||< || t||  ||< qt| D ]}t|t }t|	D ]:}|| }t| D ]"}t	|| ||||f ||< q2q|t| }|dk	rt||| }|t| }|||ddf< qt|
dd|	f S )zExecute one step for Baum-Welch algorithm (PRIVATE).

    Do one iteration of Baum-Welch based on a sequence of output.
    Changes the value for lp_initial, lp_transition and lp_emission in place.
    r   Nr   )
r   _forward	_backwardr3   r4   r5   _logsum
_logvecaddr_   	logaddexp)r7   r8   rW   rc   rd   re   rf   rg   rh   TZfmatZbmatZlp_arctkZlp_traverser9   jlpZlp_arcout_tZ	lp_arcoutZksumr   r   r   r`   (  s`    


 "


*
$
	r`   c                 C   s   t | |d f}||dddf< td|d D ]n}||d  }t| D ]T}	t}
t| D ]6}|| |d  || |	  || |  }t|
|}
qX|
||	 |< qHq0|S )zImplement forward algorithm (PRIVATE).

    Calculate a Nx(T+1) matrix, where the last column is the total
    probability of the output.
    r   Nr   r3   r4   r5   r_   rm   )r7   rn   rc   rd   re   rW   matrixro   rp   rq   lprobr9   rr   r   r   r   ri     s    (ri   c                 C   s   t | |d f}t|d ddD ]j}|| }t| D ]T}t}	t| D ]6}
||
 |d  || |
  || |  }t|	|}	qF|	|| |< q6q"|S )z'Implement backward algorithm (PRIVATE).r   r   rs   )r7   rn   rd   re   rW   rt   ro   rp   r9   ru   rq   rr   r   r   r   rj     s    (rj   c                    s&  t | t | }}|dk	r8t|}|j|fkr8td|dk	r`t|}|j||fkr`td|dk	rt|}|j||fkrtdg g  }}	t| t| |D ]P\}
}t |t |
krtd|fdd|D  |	 fdd|
D  qt|||	||||}|\}}}t| ||||S )	a  Train a visible MarkovModel using maximum likelihoood estimates for each of the parameters.

    Train a visible MarkovModel using maximum likelihoood estimates
    for each of the parameters.  states is a list of strings that
    describe the names of each state.  alphabet is a list of objects
    that indicate the allowed outputs.  training_data is a list of
    (outputs, observed states) where outputs is a list of the emission
    from the alphabet, and observed states is a list of states from
    states.

    pseudo_initial, pseudo_transition, and pseudo_emission are
    optional parameters that you can use to assign pseudo-counts to
    different matrices.  They should be matrices of the appropriate
    size that contain numbers to add to each parameter matrix.
    NrG   rH   rI   zstates and outputs not alignedc                    s   g | ]} | qS r   r   rA   )states_indexesr   r   r1     s     z!train_visible.<locals>.<listcomp>c                    s   g | ]} | qS r   r   rA   )outputs_indexesr   r   r1     s     )	r   r3   rP   rQ   r&   r   rR   _mler   )r   r   rU   rL   rM   rN   r7   r8   training_statesrV   ZtoutputsZtstatesrB   r   r   r   r   )rw   rv   r   train_visible  s@    



	
rz   c                 C   s  t | }|r|| }|D ]}||d   d7  < qt|}t | | f}	|rV|	| }	|D ]D}tt|d D ].}
||
 ||
d   }}|	||f  d7  < qnqZtt|	D ]4}|	|ddf t|	|ddf  |	|ddf< qt | |f}|r|| }t | |f}t||D ]4\}}t||D ]\}}|||f  d7  < q&qtt|D ]6}||ddf t||ddf  ||ddf< qV||	|fS )z<Implement Maximum likelihood estimation algorithm (PRIVATE).r   r   N)r3   r4   
_normalizer5   r   sumoneszip)r7   r8   rV   ry   rL   rM   rN   r   r   r   r   r9   rq   r   rW   osr   r   r   rx     s2    
24rx   c                 C   s   t | gS )z?Return indices of the maximum values aong the vector (PRIVATE).)r3   Zargmax)ZvectorZ	allowancer   r   r   	_argmaxes  s    r   c           
         s   | t j}tjt }tjt }tjt }tj	  fdd|D }t
|||||}tt |D ]0}|| \}}	fdd|D t|	f||< qv|S )zaFind states in the given Markov model output.

    Returns a list of (states, score) tuples.
    c                    s   g | ]} | qS r   r   rA   rJ   r   r   r1   .  s     zfind_states.<locals>.<listcomp>c                    s   g | ]} j | qS r   )r   rA   )r6   r   r   r1   5  s     )r   r   r3   r]   r   VERY_SMALL_NUMBERr   r   r   r   _viterbir5   rY   )
Zmarkov_modeloutputr7   rc   rd   re   resultsr9   r   scorer   )rK   r6   r   find_states  s    

"r   c                 C   s  t |}g }t| D ]}|dg|  qt| |f}||dd|d f  |dddf< td|D ]r}	||	 }
t| D ]\}|dd|	d f |dd|f  |||
f  }t|}||d  |||	f< ||| |	< qzqfg }g }t|dd|d f }|D ]&}||d |g|| |d  f q|r| \}	}}|	dkrT|||f n6||d  |	 }|D ] }||	d |g| |f qhq&|S )zSImplement Viterbi algorithm to find most likely states for a given input (PRIVATE).Nr   r   )r   r5   rR   r3   r4   r   pop)r7   rc   rd   re   r   rn   Z	backtracer9   Zscoresro   rp   rq   Zi_scoresrK   Z
in_processr   r   r   r   r   r   r   9  s4    $0$
"r   c                 C   sz   t | jdkr| t|  } nZt | jdkrntt | D ]4}| |ddf t| |ddf  | |ddf< q6ntd| S )z"Normalize matrix object (PRIVATE).r      Nz&I cannot handle matrixes of that shape)r   rQ   r|   r5   r&   )rt   r9   r   r   r   r{   b  s    4r{   c                 C   s   t | }t|S )z%Normalize a uniform matrix (PRIVATE).)r3   r}   r{   rQ   rt   r   r   r   _uniform_normo  s    
r   c                 C   s   t j| }t|S )z$Normalize a random matrix (PRIVATE).)r3   randomr{   r   r   r   r   r[   u  s    r[   c                 C   s   t j| dd} | j|kr tdt| jdkrNt t| d dkrtdnPt| jdkrtt| D ]*}t t| | d dkrhtd| qhntd	| S )
zFCopy a matrix and check its dimension. Normalize at the end (PRIVATE).r   )copyzIncorrect dimensiong      ?g{Gz?zmatrix not normalized to 1.0r   zmatrix %d not normalized to 1.0z&I don't handle matrices > 2 dimensions)r3   ZarrayrQ   r&   r   ra   r|   r5   )rt   Zdesired_shaper9   r   r   r   r\   {  s    

r\   c                 C   sF   t | jdkr&t| t| jf}n| }t}|D ]}t||}q2|S )z/Implement logsum for a matrix object (PRIVATE).r   )r   rQ   r3   ZreshapeZprodr_   rm   )rt   Zvecr|   Znumr   r   r   rk     s    rk   c                 C   sR   t | t |kstdtt | }tt | D ]}t| | || ||< q2|S )z5Implement a log sum for two vector objects (PRIVATE).zvectors aren't the same length)r   AssertionErrorr3   r4   r5   rm   )Zlogvec1Zlogvec2Zsumvecr9   r   r   r   rl     s
    rl   c                 C   s   t | }t|S )z-Return the exponential of a logsum (PRIVATE).)rk   r3   rY   )Znumbersr|   r   r   r   _exp_logsum  s    r   )NNNN)NNNNNNN)NNN)N)&r#   warningsZBior   warnZnumpyr3   ImportErrorr   rm   r   r   Zseedr   r]   r_   r   r)   r:   r   rX   r^   rT   r`   ri   rj   rz   rx   r   r   r   r{   r   r[   r\   rk   rl   r   r   r   r   r   <module>   sp   


%    
E       
J^   
?.
)	