
    &hZ                        d Z ddlZddlmZ  ej                  de       	 ddlZej                  Z	d Z
ej                  j                          dZ ej                  e      Z G d	 d
      Zd Zd Zd Z	 	 	 	 d dZdZ	 	 	 	 	 	 	 d!dZd Zd Zd Z	 	 	 d"dZd Zd#dZd Zd Zd Zd Z d Z!d Z"d Z#d Z$d Z%y# e$ r ddlmZ  ed      dw xY w)$a  A state-emitting MarkovModel.

Note terminology similar to Manning and Schutze is used.


Functions:
train_bw        Train a markov model using the Baum-Welch algorithm.
train_visible   Train a visible markov model using MLE.
find_states     Find the a state sequence that explains some observations.

load            Load a MarkovModel.
save            Save a MarkovModel.

Classes:
MarkovModel     Holds the description of a markov model
    N)BiopythonDeprecationWarningzThe 'Bio.MarkovModel' module is deprecated and will be removed in a future release of Biopython. Consider using the hmmlearn package instead.)MissingPythonDependencyErrorzRPlease install NumPy if you want to use Bio.MarkovModel. See http://www.numpy.org/c                 l    i }t        | ddd         }t        |       dz
  }|D ]  \  }}||z
  ||<    |S )zAReturn a dictionary of values with their sequence offset as keys.N   )	enumeratelen)valuesdentriesnindexkeys         ^/mounts/lovelace/software/anaconda3/envs/py312/lib/python3.12/site-packages/Bio/MarkovModel.py	itemindexr   1   sK    
Att%GFaA 
sU#H    gYnc                        e Zd ZdZ	 ddZd Zy)MarkovModelz+Create a state-emitting MarkovModel object.Nc                 J    || _         || _        || _        || _        || _        y)zInitialize the class.N)statesalphabet	p_initialp_transition
p_emission)selfr   r   r   r   r   s         r   __init__zMarkovModel.__init__D   s(      "($r   c                 v    ddl m}  |       }t        | |       |j                  d       |j	                         S )z9Create a string representation of the MarkovModel object.r   )StringIO)ior   saveseekread)r   r   handles      r   __str__zMarkovModel.__str__N   s-    T6A{{}r   NNN)__name__
__module____qualname____doc__r   r$    r   r   r   r   A   s    5 OS%r   r   c                 j    | j                         }|j                  |      st        d|d|      |S )zNRead the first line and evaluate that begisn with the correct start (PRIVATE).zI expected z	 but got )readline
startswith
ValueError)r#   startlines      r   _readline_and_check_startr1   X   s7    ??D??5!;uiyABBKr   c                    t        | d      }|j                         dd }t        | d      }|j                         dd }t        ||      }t        |      t        |      }}t	        j
                  |      |_        t        | d      }t        t        |            D ]>  }t        | d||    d      }t        |j                         d         |j                  |<   @ t	        j
                  ||f      |_	        t        | d	      }t        t        |            D ]P  }t        | d||    d      }|j                         dd D cg c]  }t        |       c}|j                  |ddf<   R t	        j
                  ||f      |_
        t        | d
      }t        t        |            D ]P  }t        | d||    d      }|j                         dd D cg c]  }t        |       c}|j                  |ddf<   R |S c c}w c c}w )z.Parse a file handle into a MarkovModel object.zSTATES:r   Nz	ALPHABET:zINITIAL:  :r   zTRANSITION:z	EMISSION:)r1   splitr   r	   npzerosr   rangefloatr   r   )	r#   r0   r   r   mmNMivs	            r   loadr?   `   s    %VY7DZZ\!"F %V[9Dzz|ABH	VX	&Bv;HqA 88A;BL$VZ8D3v; 2(2fQi[1BC

R 01Q2
 hh1v&BO$V];D3v; E(2fQi[1BC37::<3C Daq D1E
 HHaV$BM$V[9D3v; C(2fQi[1BC15ab1ABAuQxBadC I !E Cs   G<Hc                    |j                   } |ddj                  | j                         d        |ddj                  | j                         d        |d       t	        t        | j                              D ],  } |d| j                  |    d| j                  |   dd       .  |d	       t	        t        | j                              D ]A  } |d| j                  |    ddj                  d
 | j                  |   D               d       C  |d       t	        t        | j                              D ]A  } |d| j                  |    ddj                  d | j                  |   D               d       C y)z$Save MarkovModel object into handle.zSTATES:  
z
ALPHABET: z	INITIAL:
r3   z: gzTRANSITION:
c              3   2   K   | ]  }t        |        y wNstr.0xs     r   	<genexpr>zsave.<locals>.<genexpr>   s     'K1A'K   z
EMISSION:
c              3   2   K   | ]  }t        |        y wrE   rF   rH   s     r   rK   zsave.<locals>.<genexpr>   s     'I1A'IrL   N)	writejoinr   r   r8   r	   r   r   r   )r:   r#   wr=   s       r   r    r       sW    	A"))$%R()
388BKK(),-lO3r||$% 6	Bryy|nBr||Aq1
456o3r'( Q	Bryy|nBsxx'K8J'KKLB
OPQm3r}}%& O	Bryy|nBsxx'Ia8H'IIJ"
MNOr   c           	         t        |       t        |      }}|st        d      |0t        j                  |      }|j                  |fk7  rt        d      |1t        j                  |      }|j                  ||fk7  rt        d      |1t        j                  |      }|j                  ||fk7  rt        d      g }	t        |      }
|D ]$  }|	j                  |D cg c]  }|
|   	 c}       & |	D cg c]  }t        |       }}t        |      dk(  rt        d      t        |||	||||      }|\  }}}t        | ||||      S c c}w c c}w )a  Train a MarkovModel using the Baum-Welch algorithm.

    Train a MarkovModel using the Baum-Welch algorithm.  states is a list
    of strings that describe the names of each state.  alphabet is a
    list of objects that indicate the allowed outputs.  training_data
    is a list of observations.  Each observation is a list of objects
    from the alphabet.

    pseudo_initial, pseudo_transition, and pseudo_emission are
    optional parameters that you can use to assign pseudo-counts to
    different matrices.  They should be matrices of the appropriate
    size that contain numbers to add to each parameter matrix, before
    normalization.

    update_fn is an optional callback that takes parameters
    (iteration, log_likelihood).  It is called once per iteration.
    zNo training data given.$pseudo_initial not shape len(states)5pseudo_transition not shape len(states) X len(states)5pseudo_emission not shape len(states) X len(alphabet)r   z,I got training data with outputs of length 0)pseudo_initialpseudo_transitionpseudo_emission	update_fn)
r	   r.   r6   asarrayshaper   appendmin_baum_welchr   )r   r   training_datarU   rV   rW   rX   r;   r<   training_outputsindexesoutputsrJ   lengthsr   r   r   s                    r   train_bwrc      su   4 v;HqA233!N3A4'CDD$JJ'89""q!f,TUU"**_5  QF*TUU
 !G  ?W = =>?  00!s1v0G0
7|qGHH 			%+'	A +,'I|ZvxL*MM% !> 1s   E

6Ei  c
                    |t        |       }nt        || f      }|t        | | f      }nt        || | f      }|t        | |f      }nt        || |f      }t        j                  |      }
t        j                  |      }t        j                  |      }|t        j                  |      }nd}|t        j                  |      }nd}|t        j                  |      }nd}d}t	        t
              D ]Q  }t        }|D ]  }|t        | |||
|||||	      z  } |		 |	||       |t        j                  ||z
        dk  r n|}S t        dt
        z        |
||fD cg c]  }t        j                  |       c}S c c}w )zfImplement the Baum-Welch algorithm to evaluate unknown parameters in the MarkovModel object (PRIVATE).Ng?z%HMM did not converge in %d iterations)_random_norm_copy_and_checkr6   logr8   MAX_ITERATIONSLOG0_baum_welch_onefabsRuntimeErrorexp)r;   r<   r_   r   r   r   rU   rV   rW   rX   
lp_initiallp_transitionlp_emissionlpseudo_initiallpseudo_transitionlpseudo_emission	prev_llikr=   llikra   _s                        r   r]   r]      s     O	#It4	#QF+&|aV<!1a&)
$Z!Q8
 	"JFF<(M&&$K!&&0$VV$56!"66/2
 I>" U' 	GO" 
 
D	  a RWWY-=%>%D	%U( B^STT !+M;GH!BFF1IHHHs   #Fc	           
      @   t        |      }	t        | |	||||      }
t        | |	|||      }t        j                  | | |	f      }t        |	      D ]  }||   }t        j                  | | f      }t        |       D ]@  }t        |       D ]0  }|
|   |   ||   |   z   ||   |   z   ||   |dz      z   }|||   |<   2 B |t        |      z
  |dddd|f<    t        j                  | |	f      }t        |	      D ]+  }t        |       D ]  }t        ||dd|f         ||   |<    - t        j                  |       }t        |       D ]  }t        ||ddf         ||<    |dddf   }|t        ||      }|t        |      z
  }t        |       D ]]  }t        |       D ]!  }t        |||ddf         ||   z
  ||   |<   # |5t        ||   |      ||<   ||   t        ||         z
  ||<   _ t        |       D ]  }t        j                  |      t        z   }t        |	      D ]/  }||   }t        |       D ]  }t        ||   ||||f         ||<    1 |t        |      z
  }|t        |||         }|t        |      z
  }|||ddf<    t        |
dd|	f         S )zExecute one step for Baum-Welch algorithm (PRIVATE).

    Do one iteration of Baum-Welch based on a sequence of output.
    Changes the value for lp_initial, lp_transition and lp_emission in place.
    r   Nr   )
r	   _forward	_backwardr6   r7   r8   _logsum
_logvecaddri   	logaddexp)r;   r<   ra   rn   ro   rp   rq   rr   rs   Tfmatbmatlp_arctklp_traverser=   jlplp_arcout_t	lp_arcoutksums                         r   rj   rj   )  s/     	GAAq*m['JDQ=+w?D XXq!Qi F1X =AJhh1v&q 	'A1X ' GAJ#A&q)*!!nQ'( 1ga!en%  %'Aq!'	' &(<<q!Qw#=( ((Aq6"K1X 9q 	9A 'q!Qw 8KN1	99
 I1X 2{1a401	!2 QT"J"
O<
'*"55

 1X Lq 	JA")&Aq/":Yq\"IM!Q	J))-*:<NOM!,Q/'-:J2KKM!L 1X 
!xx{T!q 	>A
A1X >#DGVAq!G_=Q>	> gdm#'d$4Q$78D'$-'D AqD
!& 41:r   c                 2   t        j                  | |dz   f      }||dddf<   t        d|dz         D ]a  }||dz
     }t        |       D ]I  }	t        }
t        |       D ]+  }||   |dz
     ||   |	   z   ||   |   z   }t	        |
|      }
- |
||	   |<   K c |S )zImplement forward algorithm (PRIVATE).

    Calculate a Nx(T+1) matrix, where the last column is the total
    probability of the output.
    r   Nr   r6   r7   r8   ri   r|   )r;   r}   rn   ro   rp   ra   matrixr   r   r   lprobr=   r   s                r   rx   rx     s     XXq!a%j!F F1a4L1a!e_ 	!AENq 	!A E1X -AYq1u%a(8(;;k!nQ>OO!%,- !F1IaL	!	! Mr   c                    t        j                  | |dz   f      }t        |dz
  dd      D ]^  }||   }t        |       D ]I  }t        }	t        |       D ]+  }
||
   |dz      ||   |
   z   ||   |   z   }t	        |	|      }	- |	||   |<   K ` |S )z'Implement backward algorithm (PRIVATE).r   r   r   )r;   r}   ro   rp   ra   r   r   r   r=   r   r   r   s               r   ry   ry     s    XXq!a%j!F1q5"b! 	!AJq 	!A E1X -AYq1u%a(8(;;k!nQ>OO!%,- !F1IaL	!	! Mr   c           	         t        |       t        |      }}|0t        j                  |      }|j                  |fk7  rt	        d      |1t        j                  |      }|j                  ||fk7  rt	        d      |1t        j                  |      }|j                  ||fk7  rt	        d      g g }	}t        |       }
t        |      }|D ]k  \  }}t        |      t        |      k7  rt	        d      |j                  |D cg c]  }|
|   	 c}       |	j                  |D cg c]  }||   	 c}       m t        |||	||||      }|\  }}}t        | ||||      S c c}w c c}w )a  Train a visible MarkovModel using maximum likelihoood estimates for each of the parameters.

    Train a visible MarkovModel using maximum likelihoood estimates
    for each of the parameters.  states is a list of strings that
    describe the names of each state.  alphabet is a list of objects
    that indicate the allowed outputs.  training_data is a list of
    (outputs, observed states) where outputs is a list of the emission
    from the alphabet, and observed states is a list of states from
    states.

    pseudo_initial, pseudo_transition, and pseudo_emission are
    optional parameters that you can use to assign pseudo-counts to
    different matrices.  They should be matrices of the appropriate
    size that contain numbers to add to each parameter matrix.
    rR   rS   rT   zstates and outputs not aligned)	r	   r6   rY   rZ   r.   r   r[   _mler   )r   r   r^   rU   rV   rW   r;   r<   training_statesr_   states_indexesoutputs_indexestoutputststatesrJ   r   r   r   s                     r   train_visibler     s   . v;HqA!N3A4'CDD$JJ'89""q!f,TUU"**_5  QF*TUU
 )+B%Ov&N)O* H'w<3x=(=>>7Caq 1CDX F!3 FG	H 				A +,'I|ZvxL*MM  D Fs   E
$E$
c                 
   t        j                  |       }|r||z   }|D ]  }||d   xx   dz  cc<    t        |      }t        j                  | | f      }	|r|	|z   }	|D ]:  }t        t	        |      dz
        D ]  }
||
   ||
dz      }}|	||fxx   dz  cc<     < t        t	        |	            D ]%  }|	|ddf   t        |	|ddf         z  |	|ddf<   ' t        j                  | |f      }|r||z   }t        j                  | |f      }t        ||      D ](  \  }}t        ||      D ]  \  }}|||fxx   dz  cc<    * t        t	        |            D ]%  }||ddf   t        ||ddf         z  ||ddf<   ' ||	|fS )z<Implement Maximum likelihood estimation algorithm (PRIVATE).r   r   N)r6   r7   
_normalizer8   r	   sumoneszip)r;   r<   r_   r   rU   rV   rW   r   r   r   r   r=   r   r   ra   oss                    r   r   r     s    I.	! "&)!"9%I 88QF#L#&77! $s6{Q' 	$A!9fQUmqAA!#	$$ 3|$% J)!Q$/#l1a46H2IIQTJ
 1a&!J/1
!QJ/A "( 	"DAqq!t!	"" 3z?# D%ad+c*QT2B.CC
1a4D lJ..r   c                 .    t        j                  |       gS )z?Return indices of the maximum values aong the vector (PRIVATE).)r6   argmax)vector	allowances     r   	_argmaxesr     s    IIfr   c                 D   | }t        |j                        }t        j                  |j                  t
        z         }t        j                  |j                  t
        z         }t        j                  |j                  t
        z         }t        |j                        }|D cg c]  }||   	 }}t        |||||      }	t        t        |	            D ]?  }
|	|
   \  }}|D cg c]  }|j                  |    c}t        j                  |      f|	|
<   A |	S c c}w c c}w )zaFind states in the given Markov model output.

    Returns a list of (states, score) tuples.
    )r	   r   r6   rg   r   VERY_SMALL_NUMBERr   r   r   r   _viterbir8   rm   )markov_modeloutputr:   r;   rn   ro   rp   r`   rJ   resultsr=   r   scores                r   find_statesr      s    
 
BBIIA '889JFF2??->>?M&&)::;K$G"()Qgaj)F) q*m[&IG3w<  C
,23qbiil3RVVE]B
C N * 4s   #D#Dc                    t        |      }g }t        |       D ]  }|j                  dg|z          t        j                  | |f      }||dd|d   f   z   |dddf<   t        d|      D ]U  }	||	   }
t        |       D ]@  }|dd|	dz
  f   |dd|f   z   |||
f   z   }t        |      }||d      |||	f<   |||   |	<   B W g }g }t        |dd|dz
  f         }|D ]#  }|j                  |dz
  |g||   |dz
     f       % |r]|j                         \  }	}}|	dk(  r|j                  ||f       n-||d      |	   }|D ]  }|j                  |	dz
  |g|z   |f        |r]|S )zSImplement Viterbi algorithm to find most likely states for a given input (PRIVATE).Nr   r   )r	   r8   r[   r6   r7   r   pop)r;   rn   ro   rp   r   r}   	backtracer=   scoresr   r   r   i_scoresr`   
in_processr   r   r   s                     r   r   r   :  s   FAI1X %$!$% XXq!fFAvayL 99F1a4L1a[ &1Iq 	&AaQh'-1*==AqD@QQH)G#GAJ/F1a4L%IaLO	&& JGq!a%x()G :1q51#vayQ'789:
%>>+656NNFE?+q	*1-G @!!1q51#,">?@  Nr   c                    t        | j                        dk(  r| t        |       z  } | S t        | j                        dk(  r>t        t        |             D ]%  }| |ddf   t        | |ddf         z  | |ddf<   ' | S t	        d      )z"Normalize matrix object (PRIVATE).r      Nz&I cannot handle matrixes of that shape)r	   rZ   r   r8   r.   )r   r=   s     r   r   r   c  s    
6<<A#f+% M 
V\\	a	s6{# 	<A!!Q$<#fQTl*;;F1a4L	< M ABBr   c                 B    t        j                  |       }t        |      S )z%Normalize a uniform matrix (PRIVATE).)r6   r   r   rZ   r   s     r   _uniform_normr   p  s    WWU^Ffr   c                 V    t         j                  j                  |       }t        |      S )z$Normalize a random matrix (PRIVATE).)r6   randomr   r   s     r   re   re   v  s!    YYe$Ffr   c                    t        j                  | d      } | j                  |k7  rt        d      t	        | j                        dk(  r1t        j
                  t        |       dz
        dkD  rt        d      | S t	        | j                        dk(  rPt        t	        |             D ]7  }t        j
                  t        | |         dz
        dkD  s+t        d|z         | S t        d	      )
zFCopy a matrix and check its dimension. Normalize at the end (PRIVATE).r   )copyzIncorrect dimensiong      ?g{Gz?zmatrix not normalized to 1.0r   zmatrix %d not normalized to 1.0z&I don't handle matrices > 2 dimensions)r6   arrayrZ   r.   r	   rk   r   r8   )r   desired_shaper=   s      r   rf   rf   |  s     XXf1%F||}$.//
6<<A773v;$%,;<< M 
V\\	a	s6{# 	HAwws6!9~+,t3 !BQ!FGG	H
 M ABBr   c                     t        | j                        dkD  r5t        j                  | t        j                  | j                        f      }n| }t
        }|D ]  }t        ||      } |S )z/Implement logsum for a matrix object (PRIVATE).r   )r	   rZ   r6   reshapeprodri   r|   )r   vecr   nums       r   rz   rz     s]    
6<<1jj"''&,,"7!9:
C "S!"Jr   c                     t        |       t        |      k(  sJ d       t        j                  t        |             }t        t        |             D ]  }t	        | |   ||         ||<    |S )z5Implement a log sum for two vector objects (PRIVATE).zvectors aren't the same length)r	   r6   r7   r8   r|   )logvec1logvec2sumvecr=   s       r   r{   r{     sb    w<3w<'I)IIXXc'l#F3w<  6gaj'!*5q	6Mr   c                 B    t        |       }t        j                  |      S )z-Return the exponential of a logsum (PRIVATE).)rz   r6   rm   )numbersr   s     r   _exp_logsumr     s    
'
C66#;r   )NNNN)NNNNNNNr%   rE   )&r)   warningsBior   warnnumpyr6   ImportErrorr   r|   r   r   seedr   rg   ri   r   r1   r?   r    rc   rh   r]   rj   rx   ry   r   r   r   r   r   r   r   re   rf   rz   r{   r   r*   r   r   <module>r      sG  "  + P LL	 		  rvv  ."JO, BNJ  GIT[|.( <N~+/\
4&R
(	  0
&	$ s   B= =C