
    Xfh                         d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
  ej        de           d Zd Zd	 Zd
 Z G d d          Z G d d          ZdS )z+Deal with representations of Markov Models.    N)defaultdict)BiopythonDeprecationWarning)SeqzThe 'Bio.HMM.MarkovModule' module is deprecated and will be removed in a future release of Biopython. Consider using the hmmlearn package instead.c                 p    d t          |           D             }t          |          fd|D             S )z=Return an array of n random numbers summing to 1.0 (PRIVATE).c                 4    g | ]}t          j                     S  )random).0_s     3lib/python3.11/site-packages/Bio/HMM/MarkovModel.py
<listcomp>z%_gen_random_array.<locals>.<listcomp>   s    333Q333    c                     g | ]}|z  S r   r   )r
   xtotals     r   r   z%_gen_random_array.<locals>.<listcomp>   s    )))!AI)))r   )rangesum)n	randArrayr   s     @r   _gen_random_arrayr      sA    33%((333I	NNE))))y))))r   c                 t    t          t                    }| D ] \  }}||                             |           !|S )z?Calculate which symbols can be emitted in each state (PRIVATE).r   listappend)emission_probs	emissionsstatesymbols       r   _calculate_emissionsr   "   sF     D!!I' ( (v%''''r   c                 t    t          t                    }| D ] \  }}||                             |           !|S )ax  Calculate which 'from transitions' are allowed for each state (PRIVATE).

    This looks through all of the trans_probs, and uses this dictionary
    to determine allowed transitions. It converts this information into
    a dictionary, whose keys are source states and whose values are
    lists of destination states reachable from the source state via a
    transition.
    r   trans_probstransitions
from_stateto_states       r   _calculate_from_transitionsr&   -   sG     d##K + 1 1
HJ&&x0000r   c                 t    t          t                    }| D ] \  }}||                             |           !|S )a~  Calculate which 'to transitions' are allowed for each state (PRIVATE).

    This looks through all of the trans_probs, and uses this dictionary
    to determine allowed transitions. It converts this information into
    a dictionary, whose keys are destination states and whose values are
    lists of source states from which the destination is reachable via a
    transition.
    r   r!   s       r   _calculate_to_transitionsr(   =   sG     d##K + 1 1
HH$$Z0000r   c                       e Zd ZdZdZd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Z	 ddZd Zd Zd Zd Zd ZdS )MarkovModelBuilderai  Interface to build up a Markov Model.

    This class is designed to try to separate the task of specifying the
    Markov Model from the actual model itself. This is in hopes of making
    the actual Markov Model classes smaller.

    So, this builder class should be used to create Markov models instead
    of trying to initiate a Markov Model directly.
       c                     t          |          | _        t          |          | _        i | _        i | _        |                     ||          | _        i | _        |                     ||          | _	        dS )ag  Initialize a builder to create Markov Models.

        Arguments:
         - state_alphabet -- An iterable (e.g., tuple or list) containing
           all of the letters that can appear in the states
         - emission_alphabet -- An iterable (e.g., tuple or list) containing
           all of the letters for states that can be emitted by the HMM.

        N)
tuple_state_alphabet_emission_alphabetinitial_probtransition_prob
_all_blankemission_probtransition_pseudo_all_pseudoemission_pseudo)selfstate_alphabetemission_alphabets      r   __init__zMarkovModelBuilder.__init__[   sy      %^44"'(9":":   "!__^=NOO "$#//@QRRr   c                 ,    i }|D ]}|D ]	}d|||f<   
|S )a+  Return a dictionary with all counts set to zero (PRIVATE).

        This uses the letters in the first and second alphabet to create
        a dictionary with keys of two tuples organized as
        (letter of first alphabet, letter of second alphabet). The values
        are all set to 0.
        r   r   )r7   first_alphabetsecond_alphabet	all_blankfirst_statesecond_states         r   r2   zMarkovModelBuilder._all_blanku   sF     	) 	; 	;K / ; ;9:	;566; r   c                 6    i }|D ]}|D ]}| j         |||f<   |S )am  Return a dictionary with all counts set to a default value (PRIVATE).

        This takes the letters in first alphabet and second alphabet and
        creates a dictionary with keys of two tuples organized as:
        (letter of first alphabet, letter of second alphabet). The values
        are all set to the value of the class attribute DEFAULT_PSEUDO.
        )DEFAULT_PSEUDO)r7   r<   r=   
all_countsr?   r@   s         r   r5   zMarkovModelBuilder._all_pseudo   sN     
) 	N 	NK / N N:>:M
K677N r   c           	      f   | j         st          d          t          j        | j                   }t          j        | j                  }t          j        | j                  }t          j        | j                  }t          j        | j                  }t          | j	        | j
        |||||          S )zReturn the markov model corresponding with the current parameters.

        Each markov model returned by a call to this function is unique
        (ie. they don't influence each other).
        zMset_initial_probabilities must be called to fully initialize the Markov model)r0   	Exceptioncopydeepcopyr1   r3   r4   r6   HiddenMarkovModelr.   r/   )r7   r0   r1   r3   r4   r6   s         r   get_markov_modelz#MarkovModelBuilder.get_markov_model   s       	4  
 }T%677-(<==d&899 M$*@AA-(<==  #
 
 	
r   c                    t          j         |          | _        |D ]}|| j        vrt          d| d          t	          | j                  t	          | j                  z
  }|dk     rt          d          t          | j                                                  }|dk    rt          d          |dk    r%d|z
  |z  }| j        D ]}|| j        vr
|| j        |<   dS dS )a  Set initial state probabilities.

        initial_prob is a dictionary mapping states to probabilities.
        Suppose, for example, that the state alphabet is ('A', 'B'). Call
        set_initial_prob({'A': 1}) to guarantee that the initial
        state will be 'A'. Call set_initial_prob({'A': 0.5, 'B': 0.5})
        to make each initial state equally probable.

        This method must now be called in order to use the Markov model
        because the calculation of initial probabilities has changed
        incompatibly; the previous calculation was incorrect.

        If initial probabilities are set for all states, then they should add up
        to 1. Otherwise the sum should be <= 1. The residual probability is
        divided up evenly between all the states for which the initial
        probability has not been set. For example, calling
        set_initial_prob({}) results in P('A') = 0.5 and P('B') = 0.5,
        for the above example.
        State ' was not found in the sequence alphabetr   z.Initial probabilities can't exceed # of states      ?z+Total initial probability cannot exceed 1.0N)rF   r0   r.   
ValueErrorlenrE   r   values)r7   r0   r   num_states_not_setprob_sumprobs         r   set_initial_probabilitiesz,MarkovModelBuilder.set_initial_probabilities   s%   ( !Il33 " 	 	ED000 KUKKK   1 !!566T=N9O9OO!!LMMMt(//1122c>>IJJJ!!(N&88D- 4 4 111/3D%e,	 "!4 4r   c                    dt          | j                  z  }| j        D ]}|| j        |<   dt          | j                  z  }| j        D ]}|| j        |<   dt          | j                  z  }| j        D ]}|| j        |<   dS )a  Reset all probabilities to be an average value.

        Resets the values of all initial probabilities and all allowed
        transitions and all allowed emissions to be equal to 1 divided by the
        number of possible elements.

        This is useful if you just want to initialize a Markov Model to
        starting values (ie. if you have no prior notions of what the
        probabilities should be -- or if you are just feeling too lazy
        to calculate them :-).

        Warning 1 -- this will reset all currently set probabilities.

        Warning 2 -- This just sets all probabilities for transitions and
        emissions to total up to 1, so it doesn't ensure that the sum of
        each set of transitions adds up to 1.
        rM   N)rO   r1   r.   r0   r3   )r7   new_initial_probr   new_trans_probkeynew_emission_probs         r   set_equal_probabilitiesz*MarkovModelBuilder.set_equal_probabilities   s    & T%9!:!::) 	8 	8E'7De$$ s4#7888' 	7 	7C(6D %%  #d&8"9"99% 	8 	8C&7Ds##	8 	8r   c                     t          t          | j                            }| j        D ]}|                                | j        |<   | j        S )zSet all initial state probabilities to a randomly generated distribution.

        Returns the dictionary containing the initial probabilities.
        )r   rO   r.   popr0   )r7   initial_freqsr   s      r    set_random_initial_probabilitiesz3MarkovModelBuilder.set_random_initial_probabilities   sS    
 *#d.B*C*CDD) 	; 	;E'4'8'8':':De$$  r   c                    | j         st          d          t          | j                   }|D ]M}t          t	          ||                             }||         D ] }|                                | j         ||f<   !N| j         S )zSet all allowed transition probabilities to a randomly generated distribution.

        Returns the dictionary containing the transition probabilities.
        zNo transitions have been allowed yet. Allow some or all transitions by calling allow_transition or allow_all_transitions first.)r1   rE   r&   r   rO   r\   )r7   transitions_fromr$   freqsr%   s        r   #set_random_transition_probabilitiesz6MarkovModelBuilder.set_random_transition_probabilities  s    
 # 	C   7t7KLL* 	K 	KJ%c*::*F&G&GHHE,Z8 K K?Dyy{{$j(%;<<K ##r   c                    | j         st          d          t          | j                   }|D ]M}t          t	          ||                             }||         D ] }|                                | j         ||f<   !N| j         S )zSet all allowed emission probabilities to a randomly generated distribution.

        Returns the dictionary containing the emission probabilities.
        z@No emissions have been allowed yet. Allow some or all emissions.)r3   rE   r   r   rO   r\   )r7   r   r   ra   r   s        r   !set_random_emission_probabilitiesz4MarkovModelBuilder.set_random_emission_probabilities  s    
 ! 	R   ));<<	 	B 	BE%c)E*:&;&;<<E#E* B B6;iikk"E6?33B !!r   c                 ~    |                                   |                                  |                                  dS )zSet all probabilities to randomly generated numbers.

        Resets probabilities of all initial states, transitions, and
        emissions to random values.
        N)r^   rb   rd   r7   s    r   set_random_probabilitiesz+MarkovModelBuilder.set_random_probabilities,  s@     	--///00222..00000r   c                 
   |                      | j        | j                  }|                     | j        | j                  }| j        D ]}| j        |         ||<   | j        D ]}| j        |         ||<   || _        || _        dS )zCreate transitions between all states.

        By default all transitions within the alphabet are disallowed;
        this is a convenience function to change this to allow all
        possible transitions.
        N)r2   r.   r5   r1   r4   )r7   	all_probs
all_pseudoset_keys       r   allow_all_transitionsz(MarkovModelBuilder.allow_all_transitions8  s     OOD$8$:NOO	%%d&:D<PQQ
 + 	? 	?G!%!5g!>Ig- 	B 	BG"&"8"AJw  )!+r   Nc                     ||fD ]}|| j         vrt          d| d          ||f| j        vr2||f| j        vr'|d}|| j        ||f<   || j        }|| j        ||f<   dS t          d| d| d          )a  Set a transition as being possible between the two states.

        probability and pseudocount are optional arguments
        specifying the probabilities and pseudo counts for the transition.
        If these are not supplied, then the values are set to the
        default values.

        Raises:
        KeyError -- if the two states already have an allowed transition.

        rK   rL   Nr   Transition from  to z is already allowed.)r.   rN   r1   r4   rB   KeyError)r7   r$   r%   probabilitypseudocountr   s         r   allow_transitionz#MarkovModelBuilder.allow_transitionQ  s     !(+ 	 	ED000 KUKKK   1 !)===C
 'C( C(
 ";FD *h!78 ""1=HD"J#9:::Q:QQ8QQQ  r   c                 x    	 | j         ||f= | j        ||f= dS # t          $ r t          d| d| d          w xY w)zRestrict transitions between the two states.

        Raises:
        KeyError if the transition is not currently allowed.

        rn   ro   z is already disallowed.N)r1   r4   rp   )r7   r$   r%   s      r   destroy_transitionz%MarkovModelBuilder.destroy_transitiony  sp    	$j(%;<&
H'=>>> 	 	 	T:TT8TTT  	s    !9c                 `    ||f| j         v r|| j         ||f<   dS t          d| d| d          )zSet the probability of a transition between two states.

        Raises:
        KeyError if the transition is not allowed.

        rn   ro    is not allowed.N)r1   rp   )r7   r$   r%   rq   s       r   set_transition_scorez'MarkovModelBuilder.set_transition_score  sW     !T%999;FD *h!7888M:MM8MMM  r   c                 `    ||f| j         v r|| j         ||f<   dS t          d| d| d          )a  Set the default pseudocount for a transition.

        To avoid computational problems, it is helpful to be able to
        set a 'default' pseudocount to start with for estimating
        transition and emission probabilities (see p62 in Durbin et al
        for more discussion on this. By default, all transitions have
        a pseudocount of 1.

        Raises:
        KeyError if the transition is not allowed.

        rn   ro   rw   N)r4   rp   )r7   r$   r%   counts       r   set_transition_pseudocountz-MarkovModelBuilder.set_transition_pseudocount  sW     !T%;;;=BD"J#9:::M:MM8MMM  r   c                 `    ||f| j         v r|| j         ||f<   dS t          d| d| d          )zSet the probability of a emission from a particular state.

        Raises:
        KeyError if the emission from the given state is not allowed.

        Emission of  from rw   N)r3   rp   )r7   	seq_stateemission_staterq   s       r   set_emission_scorez%MarkovModelBuilder.set_emission_score  sW     ~&$*<<<>ID	>:;;;P~PPYPPP  r   c                 `    ||f| j         v r|| j         ||f<   dS t          d| d| d          )a  Set the default pseudocount for an emission.

        To avoid computational problems, it is helpful to be able to
        set a 'default' pseudocount to start with for estimating
        transition and emission probabilities (see p62 in Durbin et al
        for more discussion on this. By default, all emissions have
        a pseudocount of 1.

        Raises:
        KeyError if the emission from the given state is not allowed.

        r}   r~   rw   N)r6   rp   )r7   r   r   rz   s       r   set_emission_pseudocountz+MarkovModelBuilder.set_emission_pseudocount  sW     ~&$*>>>@ED )^!<===P~PPYPPP  r   )NN)__name__
__module____qualname____doc__rB   r:   r2   r5   rI   rT   rZ   r^   rb   rd   rg   rl   rs   ru   rx   r{   r   r   r   r   r   r*   r*   M   s6         NS S S4    
 
 
:(4 (4 (4T8 8 8B	! 	! 	!$ $ $(" " "$1 1 1, , ,4 CG& & & &P      ,      r   r*   c                   <    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	S )
rH   zFRepresent a hidden markov model that can be used for state estimation.c                     || _         || _        || _        || _        || _        || _        || _        t          | j                  | _        t          | j                  | _
        dS )a  Initialize a Markov Model.

        Note: You should use the MarkovModelBuilder class instead of
        initiating this class directly.

        Arguments:
         - state_alphabet -- A tuple containing all of the letters that can
           appear in the states.
         - emission_alphabet -- A tuple containing all of the letters for
           states that can be emitted by the HMM.
         - initial_prob - A dictionary of initial probabilities for all states.
         - transition_prob -- A dictionary of transition probabilities for all
           possible transitions in the sequence.
         - emission_prob -- A dictionary of emission probabilities for all
           possible emissions from the sequence states.
         - transition_pseudo -- Pseudo-counts to be used for the transitions,
           when counting for purposes of estimating transition probabilities.
         - emission_pseudo -- Pseudo-counts to be used for the emissions,
           when counting for purposes of estimating emission probabilities.

        N)r8   r9   r0   _transition_pseudo_emission_pseudor1   r3   r&   _transitions_fromr(   _transitions_to)r7   r8   r9   r0   r1   r3   r4   r6   s           r   r:   zHiddenMarkovModel.__init__  sl    > -!2("3 /.*
 "=T=Q!R!R
  99MNNr   c                     | j         S )a,  Get the default transitions for the model.

        Returns a dictionary of all of the default transitions between any
        two letters in the sequence alphabet. The dictionary is structured
        with keys as (letter1, letter2) and values as the starting number
        of transitions.
        )r   rf   s    r   get_blank_transitionsz'HiddenMarkovModel.get_blank_transitions  s     &&r   c                     | j         S )a  Get the starting default emissions for each sequence.

        This returns a dictionary of the default emissions for each
        letter. The dictionary is structured with keys as
        (seq_letter, emission_letter) and values as the starting number
        of emissions.
        )r   rf   s    r   get_blank_emissionsz%HiddenMarkovModel.get_blank_emissions  s     $$r   c                 2    || j         v r| j         |         S g S )a9  Get all destination states which can transition from source state_letter.

        This returns all letters which the given state_letter can transition
        to, i.e. all the destination states reachable from state_letter.

        An empty list is returned if state_letter has no outgoing transitions.
        )r   r7   state_letters     r   r`   z"HiddenMarkovModel.transitions_from  s%     4111),77Ir   c                 2    || j         v r| j         |         S g S )a$  Get all source states which can transition to destination state_letter.

        This returns all letters which the given state_letter is reachable
        from, i.e. all the source states which can reach state_later

        An empty list is returned if state_letter is unreachable.
        )r   r   s     r   transitions_toz HiddenMarkovModel.transitions_to'  s%     4///'55Ir   c                    |                      | j                  }|                      | j                  }|                      | j                  }i }i }t	          t          |                    D ]}|D ]}	||	||         f         }
d}|dk    r	||	         }n\i }|                     |	          D ]#}|||	f         }|||dz
  f         }||z   }|||<   $t          |                                          }|
|z   ||	|f<   |dk    r|D ]}||         |k    r|||dz
  |	f<    ni }|D ]}||t          |          dz
  f         ||<    t          |                                          }d}|D ]}||         |k    r|}|dk    s
J d            g }t          t	          dt          |                              }|
                                 |}|                    |           |D ]$}||dz
  |f         }|                    |           %|
                                 d                    |          }t          |          |fS )aA  Calculate the most probable state path using the Viterbi algorithm.

        This implements the Viterbi algorithm (see pgs 55-57 in Durbin et
        al for a full explanation -- this is where I took my implementation
        ideas from), to allow decoding of the state path, given a sequence
        of emissions.

        Arguments:
         - sequence -- A Seq object with the emission sequence that we
           want to decode.
         - state_alphabet -- An iterable (e.g., tuple or list) containing
           all of the letters that can appear in the states

        r   r+    z)Didn't find the last state to trace from!)_log_transformr0   r1   r3   r   rO   r   maxrP   r   reverser   joinr   )r7   sequencer8   log_initial	log_translog_emissionviterbi_probspred_state_seqi	cur_stateemission_partmax_probpossible_state_probs
prev_state
trans_partviterbi_partcur_probr   ri   state_path_prob
last_statetraceback_seqloop_seqs                          r   viterbizHiddenMarkovModel.viterbi4  s     ))$*;<<''(<==	**4+=>> s8}}%% #	" #	"A+ !" !"	 ,i!-E F66  +95HH ,.(&*&9&9)&D&D D D
%.
I/F%G
 (5j!a%5H'I#/*#<;C,Z88  ##7#>#>#@#@AAH 1>0Hy!n-q55!5 " "/6(BBAFNAE9+=>!E C?!"L 	# 	I 	IE,eS]]Q5F-GHIei..0011 
 	# 	#E?22"
R!L aX//00 U### 	( 	(A"AE5>2E  '''' 	..=!!?22r   c                     t          j         |          }|D ]>}||         }|dk    rt          j        ||                   ||<   .t          j         ||<   ?|S )a\  Return log transform of the given probability dictionary (PRIVATE).

        When calculating the Viterbi equation, add logs of probabilities rather
        than multiplying probabilities, to avoid underflow errors. This method
        returns a new dictionary with the same keys as the given dictionary
        and log-transformed values.
        r   )rF   mathloginf)r7   rq   log_probrX   rS   s        r   r   z HiddenMarkovModel._log_transform  sa     9[)) 	* 	*CC=Daxx $# 7 7!%	r   N)r   r   r   r   r:   r   r   r`   r   r   r   r   r   r   rH   rH     s        PP2O 2O 2Oh' ' '% % %    g3 g3 g3R    r   rH   )r   rF   r   r	   warningscollectionsr   Bior   Bio.Seqr   warnr   r   r&   r(   r*   rH   r   r   r   <module>r      s0   2 1     # # # # # # + + + + + +          	  * * *               D^ ^ ^ ^ ^ ^ ^ ^ ^ ^r   