U
    ]<`m\                     @   s*  d Z ddlZz
ejZW n4 ek
rJ   ddlZedej  dd ZY nX dd Zej	  dZ
ee
ZG d	d
 d
Zdd Zdd Zdd Zd4ddZdZd5ddZdd Zdd Zdd Zd6ddZdd Zd7d d!Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Z d0d1 Z!d2d3 Z"dS )8a  A state-emitting MarkovModel.

Note terminology similar to Manning and Schutze is used.


Functions:
train_bw        Train a markov model using the Baum-Welch algorithm.
train_visible   Train a visible markov model using MLE.
find_states     Find the a state sequence that explains some observations.

load            Load a MarkovModel.
save            Save a MarkovModel.

Classes:
MarkovModel     Holds the description of a markov model
    NzVFor optimal speed, please update to Numpy version 1.3 or later (current version is %s)c                 C   sP   ||  dkr|S | | dkr | S t | |}|tt| | t||   S )z>Implement logaddexp method if Numpy version is older than 1.3.d   )minnumpylogexp)ZlogxZlogyZminxy r   .lib/python3.8/site-packages/Bio/MarkovModel.py	logaddexp*   s    
r	   c                 C   s@   i }t | ddd }t| d }|D ]\}}|| ||< q&|S )zAReturn a dictionary of values with their sequence offset as keys.N   )	enumeratelen)valuesdentriesnindexkeyr   r   r   	itemindex4   s    r   gYnc                   @   s"   e Zd ZdZdddZdd ZdS )MarkovModelz+Create a state-emitting MarkovModel object.Nc                 C   s"   || _ || _|| _|| _|| _dS )zInitialize the class.N)statesalphabet	p_initialp_transition
p_emission)selfr   r   r   r   r   r   r   r   __init__G   s
    zMarkovModel.__init__c                 C   s.   ddl m} | }t| | |d | S )z9Create a string representation of the MarkovModel object.r   )StringIO)ior   saveseekread)r   r   handler   r   r   __str__Q   s
    

zMarkovModel.__str__)NNN)__name__
__module____qualname____doc__r   r#   r   r   r   r   r   D   s        

r   c                 C   s&   |   }||s"td||f |S )zNRead the first line and evaluate that begisn with the correct start (PRIVATE).zI expected %r but got %r)readline
startswith
ValueError)r"   startliner   r   r   _readline_and_check_start[   s    
r-   c                 C   sr  t | d}| dd }t | d}| dd }t||}t|t| }}t||_t | d}tt|D ],}t | d||  }t| d |j|< qrt||f|_	t | d}tt|D ]>}t | d||  }d	d
 | dd D |j	|ddf< qt||f|_
t | d}tt|D ]@}t | d||  }dd
 | dd D |j
|ddf< q,|S )z.Parse a file handle into a MarkovModel object.zSTATES:r   Nz	ALPHABET:zINITIAL:z  %s:r
   zTRANSITION:c                 S   s   g | ]}t |qS r   float.0vr   r   r   
<listcomp>|   s     zload.<locals>.<listcomp>z	EMISSION:c                 S   s   g | ]}t |qS r   r.   r0   r   r   r   r3      s     )r-   splitr   r   r   zerosr   ranger/   r   r   )r"   r,   r   r   mmNMir   r   r   loadc   s,    




*
,r;   c              	   C   s   |j }|dd| j  |dd| j  |d tt| jD ] }|d| j| | j| f  qD|d tt| jD ]0}|d| j| ddd	 | j| D f  q||d
 tt| jD ]0}|d| j| ddd	 | j| D f  qdS )z$Save MarkovModel object into handle.zSTATES: %s
 zALPHABET: %s
z	INITIAL:
z	  %s: %g
zTRANSITION:
z	  %s: %s
c                 s   s   | ]}t |V  qd S Nstrr1   xr   r   r   	<genexpr>   s     zsave.<locals>.<genexpr>z
EMISSION:
c                 s   s   | ]}t |V  qd S r=   r>   r@   r   r   r   rB      s     N)	writejoinr   r   r6   r   r   r   r   )r7   r"   wr:   r   r   r   r      s    .r   c              	      s  t | t | }}|std|dk	rDt|}|j|fkrDtd|dk	rlt|}|j||fkrltd|dk	rt|}|j||fkrtdg }	t| |D ]}
|	 fdd|
D  qdd |	D }t|d	krtd
t|||	||||d}|\}}}t	| ||||S )a  Train a MarkovModel using the Baum-Welch algorithm.

    Train a MarkovModel using the Baum-Welch algorithm.  states is a list
    of strings that describe the names of each state.  alphabet is a
    list of objects that indicate the allowed outputs.  training_data
    is a list of observations.  Each observation is a list of objects
    from the alphabet.

    pseudo_initial, pseudo_transition, and pseudo_emission are
    optional parameters that you can use to assign pseudo-counts to
    different matrices.  They should be matrices of the appropriate
    size that contain numbers to add to each parameter matrix, before
    normalization.

    update_fn is an optional callback that takes parameters
    (iteration, log_likelihood).  It is called once per iteration.
    zNo training data given.N$pseudo_initial not shape len(states)5pseudo_transition not shape len(states) X len(states)5pseudo_emission not shape len(states) X len(alphabet)c                    s   g | ]} | qS r   r   r@   indexesr   r   r3      s     ztrain_bw.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   )r   r@   r   r   r   r3      s     r   z,I got training data with outputs of length 0)pseudo_initialpseudo_transitionpseudo_emission	update_fn)
r   r*   r   asarrayshaper   appendr   _baum_welchr   )r   r   training_datarK   rL   rM   rN   r8   r9   training_outputsoutputsZlengthsrA   r   r   r   r   rI   r   train_bw   sB    


	
rV   i  c
                 C   sd  |dkrt | }nt|| f}|dkr4t | | f}nt|| | f}|dkrXt | |f}nt|| |f}t|}
t|}t|}|dk	rt|}nd}|dk	rt|}nd}|dk	rt|}nd}d}ttD ]j}t}|D ] }|t| |||
|||||	7 }q|	dk	r|	|| |dk	r>t|| dk r> qP|}qt	dt dd |
||fD S )zfImplement the Baum-Welch algorithm to evaluate unknown parameters in the MarkovModel object (PRIVATE).Ng?z%HMM did not converge in %d iterationsc                 S   s   g | ]}t |qS r   )r   r   )r1   _r   r   r   r3   )  s     z_baum_welch.<locals>.<listcomp>)
_random_norm_copy_and_checkr   r   r6   MAX_ITERATIONSLOG0_baum_welch_onefabsRuntimeError)r8   r9   rT   r   r   r   rK   rL   rM   rN   
lp_initiallp_transitionlp_emissionlpseudo_initiallpseudo_transitionlpseudo_emissionZ	prev_llikr:   ZllikrU   r   r   r   rR      sV    





rR   c	              	   C   s  t |}	t| |	||||}
t| |	|||}t| | |	f}t|	D ]}|| }t| | f}t| D ]R}t| D ]D}|
| | || |  || |  || |d   }||| |< qpqd|t| |dddd|f< qBt| |	f}t|	D ]0}t| D ]"}t||dd|f || |< qqt| }t| D ]}t||ddf ||< q2|dddf }|dk	rt||}|t| }t| D ]p}t| D ],}t|||ddf ||  || |< q|dk	rt|| |||< || t||  ||< qt| D ]}t|t }t|	D ]:}|| }t| D ]"}t	|| ||||f ||< q2q|t| }|dk	rt||| }|t| }|||ddf< qt|
dd|	f S )zExecute one step for Baum-Welch algorithm (PRIVATE).

    Do one iteration of Baum-Welch based on a sequence of output.
    Changes the value for lp_initial, lp_transition and lp_emission in place.
    r   Nr   )
r   _forward	_backwardr   r5   r6   _logsum
_logvecaddr[   r	   )r8   r9   rU   r_   r`   ra   rb   rc   rd   TZfmatZbmatZlp_arctkZlp_traverser:   jlpZlp_arcout_tZ	lp_arcoutZksumr   r   r   r\   ,  s`    


 "


*
$
	r\   c                 C   s   t | |d f}||dddf< td|d D ]n}||d  }t| D ]T}	t}
t| D ]6}|| |d  || |	  || |  }t|
|}
qX|
||	 |< qHq0|S )zImplement forward algorithm (PRIVATE).

    Calculate a Nx(T+1) matrix, where the last column is the total
    probability of the output.
    r   Nr   r   r5   r6   r[   r	   )r8   ri   r_   r`   ra   rU   matrixrj   rk   rl   lprobr:   rm   r   r   r   re     s    (re   c                 C   s   t | |d f}t|d ddD ]j}|| }t| D ]T}t}	t| D ]6}
||
 |d  || |
  || |  }t|	|}	qF|	|| |< q6q"|S )z'Implement backward algorithm (PRIVATE).r   r
   rn   )r8   ri   r`   ra   rU   ro   rj   rk   r:   rp   rl   rm   r   r   r   rf     s    (rf   c                    s&  t | t | }}|dk	r8t|}|j|fkr8td|dk	r`t|}|j||fkr`td|dk	rt|}|j||fkrtdg g  }}	t| t| |D ]P\}
}t |t |
krtd|fdd|D  |	 fdd|
D  qt|||	||||}|\}}}t| ||||S )	a  Train a visible MarkovModel using maximum likelihoood estimates for each of the parameters.

    Train a visible MarkovModel using maximum likelihoood estimates
    for each of the parameters.  states is a list of strings that
    describe the names of each state.  alphabet is a list of objects
    that indicate the allowed outputs.  training_data is a list of
    (outputs, observed states) where outputs is a list of the emission
    from the alphabet, and observed states is a list of states from
    states.

    pseudo_initial, pseudo_transition, and pseudo_emission are
    optional parameters that you can use to assign pseudo-counts to
    different matrices.  They should be matrices of the appropriate
    size that contain numbers to add to each parameter matrix.
    NrF   rG   rH   zstates and outputs not alignedc                    s   g | ]} | qS r   r   r@   )states_indexesr   r   r3     s     z!train_visible.<locals>.<listcomp>c                    s   g | ]} | qS r   r   r@   )outputs_indexesr   r   r3     s     )	r   r   rO   rP   r*   r   rQ   _mler   )r   r   rS   rK   rL   rM   r8   r9   training_statesrT   ZtoutputsZtstatesrA   r   r   r   r   )rr   rq   r   train_visible  s@    



	
ru   c                 C   s  t | }|r|| }|D ]}||d   d7  < qt|}t | | f}	|rV|	| }	|D ]D}tt|d D ].}
||
 ||
d   }}|	||f  d7  < qnqZtt|	D ]4}|	|ddf t|	|ddf  |	|ddf< qt | |f}|r|| }t | |f}t||D ]4\}}t||D ]\}}|||f  d7  < q&qtt|D ]6}||ddf t||ddf  ||ddf< qV||	|fS )z<Implement Maximum likelihood estimation algorithm (PRIVATE).r   r   N)r   r5   
_normalizer6   r   sumoneszip)r8   r9   rT   rt   rK   rL   rM   r   r   r   r   r:   rl   r   rU   osr   r   r   rs     s2    
24rs   c                 C   s   t | gS )z?Return indeces of the maximum values aong the vector (PRIVATE).)r   Zargmax)ZvectorZ	allowancer   r   r   	_argmaxes  s    r|   c           
         s   | t j}tjt }tjt }tjt }tj	  fdd|D }t
|||||}tt |D ]0}|| \}}	fdd|D t|	f||< qv|S )zaFind states in the given Markov model output.

    Returns a list of (states, score) tuples.
    c                    s   g | ]} | qS r   r   r@   rI   r   r   r3   2  s     zfind_states.<locals>.<listcomp>c                    s   g | ]} j | qS r   )r   r@   )r7   r   r   r3   9  s     )r   r   r   r   r   VERY_SMALL_NUMBERr   r   r   r   _viterbir6   r   )
Zmarkov_modeloutputr8   r_   r`   ra   resultsr:   r   scorer   )rJ   r7   r   find_states#  s    

"r   c                 C   s  t |}g }t| D ]}|dg|  qt| |f}||dd|d f  |dddf< td|D ]r}	||	 }
t| D ]\}|dd|	d f |dd|f  |||
f  }t|}||d  |||	f< ||| |	< qzqfg }g }t|dd|d f }|D ]&}||d |g|| |d  f q|r| \}	}}|	dkrT|||f n6||d  |	 }|D ] }||	d |g| |f qhq&|S )zSImplement Viterbi algorithm to find most likely states for a given input (PRIVATE).Nr   r   )r   r6   rQ   r   r5   r|   pop)r8   r_   r`   ra   r   ri   Z	backtracer:   Zscoresrj   rk   rl   Zi_scoresrJ   Z
in_processr   r   r   r   r   r   r~   =  s4    $0$
"r~   c                 C   s~   t | jdkr | tt|  } nZt | jdkrrtt | D ]4}| |ddf t| |ddf  | |ddf< q:ntd| S )z"Normalize matrix object (PRIVATE).r      Nz&I cannot handle matrixes of that shape)r   rP   r/   rw   r6   r*   )ro   r:   r   r   r   rv   f  s    4rv   c                 C   s   t | }t|S )z%Normalize a uniform matrix (PRIVATE).)r   rx   rv   rP   ro   r   r   r   _uniform_norms  s    
r   c                 C   s   t j| }t|S )z$Normalize a random matrix (PRIVATE).)r   randomrv   r   r   r   r   rX   y  s    rX   c                 C   s   t j| dd} | j|kr tdt| jdkrNt t| d dkrtdnPt| jdkrtt| D ]*}t t| | d dkrhtd| qhntd	| S )
zFCopy a matrix and check its dimension. Normalize at the end (PRIVATE).r   )copyzIncorrect dimensiong      ?g{Gz?zmatrix not normalized to 1.0r   zmatrix %d not normalized to 1.0z&I don't handle matrices > 2 dimensions)r   ZarrayrP   r*   r   r]   rw   r6   )ro   Zdesired_shaper:   r   r   r   rY     s    

rY   c                 C   sF   t | jdkr&t| t| jf}n| }t}|D ]}t||}q2|S )z/Implement logsum for a matrix object (PRIVATE).r   )r   rP   r   Zreshapeproductr[   r	   )ro   Zvecrw   Znumr   r   r   rg     s    rg   c                 C   sR   t | t |kstdtt | }tt | D ]}t| | || ||< q2|S )z5Implement a log sum for two vector objects (PRIVATE).zvectors aren't the same length)r   AssertionErrorr   r5   r6   r	   )Zlogvec1Zlogvec2Zsumvecr:   r   r   r   rh     s
    rh   c                 C   s   t | }t|S )z-Return the exponential of a logsum (PRIVATE).)rg   r   r   )Znumbersrw   r   r   r   _exp_logsum  s    r   )NNNN)NNNNNNN)NNN)N)#r'   r   r	   AttributeErrorwarningswarn__version__r   r   Zseedr}   r   r[   r   r-   r;   r   rV   rZ   rR   r\   re   rf   ru   rs   r|   r   r~   rv   r   rX   rY   rg   rh   r   r   r   r   r   <module>   sf   




%    
E       
J^   
?.
)	