
    c                         d Z ddlZddlZddlmZmZ ddlmZ ddl	m
Z
mZ ddlmZ  ej        e          Z G d de
j                  Z G d	 d
e
j                  Z G d de
j                  Zd Zd ZdS )a&  Lda Sequence model, inspired by
`David M. Blei, John D. Lafferty: "Dynamic Topic Models"
<https://mimno.infosci.cornell.edu/info6150/readings/dynamic_topic_models.pdf>`_.
The original C/C++ implementation can be found on `blei-lab/dtm <https://github.com/blei-lab/dtm>`_.


TODO: The next steps to take this forward would be:

#. Include DIM mode. Most of the infrastructure for this is in place.
#. See if LdaPost can be replaced by LdaModel completely without breaking anything.
#. Heavy lifting going on in the Sslm class - efforts can be made to cythonise mathematical methods, in particular,
   update_obs and the optimization takes a lot time.
#. Try and make it distributed, especially around the E and M step.
#. Remove all C/C++ coding style/syntax.

Examples
--------

Set up a model using have 30 documents, with 5 in the first time-slice, 10 in the second, and 15 in the third

.. sourcecode:: pycon

    >>> from gensim.test.utils import common_corpus
    >>> from gensim.models import LdaSeqModel
    >>>
    >>> ldaseq = LdaSeqModel(corpus=common_corpus, time_slice=[2, 4, 3], num_topics=2, chunksize=1)

Persist a model to disk and reload it later

.. sourcecode:: pycon

    >>> from gensim.test.utils import datapath
    >>>
    >>> temp_file = datapath("model")
    >>> ldaseq.save(temp_file)
    >>>
    >>> # Load a potentially pre-trained model from disk.
    >>> ldaseq = LdaSeqModel.load(temp_file)

Access the document embeddings generated from the DTM

.. sourcecode:: pycon

    >>> doc = common_corpus[1]
    >>>
    >>> embedding = ldaseq[doc]

    N)digammagammaln)optimize)utilsmatutils)ldamodelc                   t    e Zd ZdZ	 	 	 ddZd Zd Zd Zd Zd Z	d Z
ddZddZddZd Zd Zd Zd ZdS )LdaSeqModelzCEstimate Dynamic Topic Model parameters based on a training corpus.N{Gz?
   gensim      ?{Gzt?         d   c           	      v   || _         || j         t          d          | j         Mt                              d           t	          j        |          | _         t          | j                   | _        n(| j         rt          | j                   | _        nd| _        |^	 t          |          | _        nH# t          $ r; t                              d           t          d |D                       | _        Y nw xY w|| _        | j        t          |          | _        || _        t          |          | _        t          j        ||          | _        g | _        t%          |          D ]?}t'          | j        | j        | j        |
|	          }| j                            |           @d| _        d| _        d| _        d| _        ||t3          d |D                       | _        |d	k    rWt7          j        || j         | j        || j        |t          j        
          }t          j        |j        j                   | _         |dk    r#t          j        |j        j                   | _         |dk    r|| _         | !                    |
|	| j        | j                    | "                    |||||           dS dS dS )a  

        Parameters
        ----------
        corpus : {iterable of list of (int, float), scipy.sparse.csc}, optional
            Stream of document vectors or sparse matrix of shape (`num_documents`, `num_terms`).
            If not given, the model is left untrained (presumably because you want to call
            :meth:`~gensim.models.ldamodel.LdaSeqModel.update` manually).
        time_slice : list of int, optional
            Number of documents in each time-slice. Each time slice could for example represent a year's published
            papers, in case the corpus comes from a journal publishing over multiple years.
            It is assumed that `sum(time_slice) == num_documents`.
        id2word : dict of (int, str), optional
            Mapping from word IDs to words. It is used to determine the vocabulary size, as well as for
            debugging and topic printing.
        alphas : float, optional
            The prior probability for the model.
        num_topics : int, optional
            The number of requested latent topics to be extracted from the training corpus.
        initialize : {'gensim', 'own', 'ldamodel'}, optional
            Controls the initialization of the DTM model. Supports three different modes:
                * 'gensim': Uses gensim's LDA initialization.
                * 'own': Uses your own initialization matrix of an LDA model that has been previously trained.
                * 'lda_model': Use a previously used LDA model, passing it through the `lda_model` argument.
        sstats : numpy.ndarray , optional
            Sufficient statistics used for initializing the model if `initialize == 'own'`. Corresponds to matrix
            beta in the linked paper for time slice 0, expected shape (`self.vocab_len`, `num_topics`).
        lda_model : :class:`~gensim.models.ldamodel.LdaModel`
            Model whose sufficient statistics will be used to initialize the current object if `initialize == 'gensim'`.
        obs_variance : float, optional
            Observed variance used to approximate the true and forward variance as shown in
            `David M. Blei, John D. Lafferty: "Dynamic Topic Models"
            <https://mimno.infosci.cornell.edu/info6150/readings/dynamic_topic_models.pdf>`_.
        chain_variance : float, optional
            Gaussian parameter defined in the beta distribution to dictate how the beta values evolve over time.
        passes : int, optional
            Number of passes over the corpus for the initial :class:`~gensim.models.ldamodel.LdaModel`
        random_state : {numpy.random.RandomState, int}, optional
            Can be a np.random.RandomState object, or the seed to generate one. Used for reproducibility of results.
        lda_inference_max_iter : int, optional
            Maximum number of iterations in the inference step of the LDA training.
        em_min_iter : int, optional
            Minimum number of iterations until converge of the Expectation-Maximization algorithm
        em_max_iter : int, optional
            Maximum number of iterations until converge of the Expectation-Maximization algorithm.
        chunksize : int, optional
            Number of documents in the corpus do be processed in in a chunk.

        NzYat least one of corpus/id2word must be specified, to establish input space dimensionalityzHno word id mapping provided; initializing from corpus, assuming identityr   z4input corpus stream has no len(); counting documentsc              3      K   | ]}d V  dS )   N ).0_s     9lib/python3.11/site-packages/gensim/models/ldaseqmodel.py	<genexpr>z'LdaSeqModel.__init__.<locals>.<genexpr>   s"      %8%8Aa%8%8%8%8%8%8    )num_time_slices	vocab_len
num_topicschain_varianceobs_variancec              3   4   K   | ]}t          |          V  d S N)len)r   lines     r   r   z'LdaSeqModel.__init__.<locals>.<genexpr>   s(      "@"@3t99"@"@"@"@"@"@r   r   )id2wordr   passesalpharandom_statedtyper   own)#r&   
ValueErrorloggerwarningr   dict_from_corpusr$   r   
corpus_len	TypeErrorsum
time_slicer   r   npfullalphastopic_chainsrangesslmappendtop_doc_phis	influencerenormalized_influenceinfluence_sum_lglmaxmax_doc_lenr   LdaModelfloat64	transposestatesstatsinit_ldaseq_ssfit_lda_seq)selfcorpusr3   r&   r6   r   
initializerE   	lda_modelr!   r    r'   r)   lda_inference_max_iterem_min_iterem_max_iter	chunksizetopicsslm_s                      r   __init__zLdaSeqModel.__init__G   s   l  	dl 	k   < 	NNefff 1&99DL ..DNN\ 	 ..DNNDN 	99"%f++ 9 9 9UVVV"%%8%8%8%8%8"8"89 %? 	3#&z??D $":gj&11 :&& 	, 	,E $ 4[_[j-L  E $$U++++ !&*#!%  	b* 	b""@"@"@"@"@@@DX% C$-DLT_!<*  	
 !l9?+ABBZ' C l9?+ABBU" %$ dk4;WWW V%;[+W`aaaaa'	b 	b 	b 	bs   B4 4AC98C9c                     || _         t          | j                  D ].\  }}|dd|f         }t                              ||||           /dS )a  Initialize State Space Language Model, topic-wise.

        Parameters
        ----------
        topic_chain_variance : float
            Gaussian parameter defined in the beta distribution to dictate how the beta values evolve.
        topic_obs_variance : float
            Observed variance used to approximate the true and forward variance as shown in
            `David M. Blei, John D. Lafferty: "Dynamic Topic Models"
            <https://mimno.infosci.cornell.edu/info6150/readings/dynamic_topic_models.pdf>`_.
        alpha : float
            The prior probability for the model.
        init_suffstats : numpy.ndarray
            Sufficient statistics used for initializing the model, expected shape (`self.vocab_len`, `num_topics`).

        N)r6   	enumerater7   r9   sslm_counts_init)rH   topic_chain_variancetopic_obs_variancer(   init_suffstatskchainrE   s           r   rF   zLdaSeqModel.init_ldaseq_ss   si    " !$"344 	[ 	[HAu#AAAqD)F!!%);=QSYZZZZ	[ 	[r   c           	         d}d}d}d}	| j         }
| j        }| j        }| j        }d}|dz   }d}||k     s||k    r||k    rt                              d|           t                              d           |}g }t          |
          D ]+}|                    t          j	        ||f                     ,t          j	        ||
f          }t          j	        ||
dz   f          }| 
                    |||||||          \  }}|| _        t                              d	           |                     |          }||z  }||z
  dk     r&||k     r||z  }t                              d
|           t          j        ||z
  |z            }||k     r|	}t                              d|           d}t                              d|||           |dz  }||k     ||k    r||k    |S )a  Fit a LDA Sequence model (DTM).

        This method will iteratively setup LDA models and perform EM steps until the sufficient statistics convergence,
        or until the maximum number of iterations is reached. Because the true posterior is intractable, an
        appropriately tight lower bound must be used instead. This function will optimize this bound, by minimizing
        its true Kullback-Liebler Divergence with the true posterior.

        Parameters
        ----------
        corpus : {iterable of list of (int, float), scipy.sparse.csc}
            Stream of document vectors or sparse matrix of shape (`num_documents`, `num_terms`).
        lda_inference_max_iter : int
            Maximum number of iterations for the inference step of LDA.
        em_min_iter : int
            Minimum number of time slices to be inspected.
        em_max_iter : int
            Maximum number of time slices to be inspected.
        chunksize : int
            Number of documents to be processed in each chunk.

        Returns
        -------
        float
            The highest lower bound for the true posterior produced after all iterations.

       g-C6?r      i  r   r   z EM iter %izE StepzM Stepz,Bound went down, increasing iterations to %iz)Starting final iterations, max iter is %i      ?<iteration %i iteration lda seq bound is %f convergence is %f)r   r   r   r0   r-   infor8   r:   r4   zeroslda_seq_infergammasfit_lda_seq_topicsfabs)rH   rI   rL   rM   rN   rO   LDASQE_EM_THRESHOLD
LOWER_ITERITER_MULT_LOWMAX_ITERr   r   data_lenr0   boundconvergenceiter_	old_boundtopic_suffstatsrP   rb   lhoodstopic_bounds                          r   rG   zLdaSeqModel.fit_lda_seq   sT   6 #
_
N	'_
)A-k! ,	{5H'H ,	eWbNb ,	KKu---KK!!!I !Oz** H H&&rxH0E'F'FGGGG Xz:677FXz:>:;;F ""6?FFESiktuu E6 DKKK!!! 11/BBK[ E	!Q& d)J6 <*m;*JLbccc '59#4	"ABBK00 ")1&GI_```!KKVX]_dfqrrrQJEY k! ,	{5H'H ,	eWbNb ,	\ r   c                    | j         }| j        }	d}
t          j        || j        | j        t          j                  }t          j        |	|f          |_	        t          | j        ||          }d}|dk    r"|                     ||||||||
||
  
        \  }
}n<|dk    r6|                     |           |                     ||||||||
||
  
        \  }
}|
|fS )a  Inference (or E-step) for the lower bound EM optimization.

        This is used to set up the gensim :class:`~gensim.models.ldamodel.LdaModel` to be used for each time-slice.
        It also allows for Document Influence Model code to be written in.

        Parameters
        ----------
        corpus : {iterable of list of (int, float), scipy.sparse.csc}
            Stream of document vectors or sparse matrix of shape (`num_documents`, `num_terms`).
        topic_suffstats : numpy.ndarray
            Sufficient statistics for time slice 0, used for initializing the model if `initialize == 'own'`,
            expected shape (`self.vocab_len`, `num_topics`).
        gammas : numpy.ndarray
            Topic weight variational parameters for each document. If not supplied, it will be inferred from the model.
        lhoods : list of float
            The total log probability lower bound for each topic. Corresponds to the phi variational parameters in the
            linked paper.
        iter_ : int
            Current iteration.
        lda_inference_max_iter : int
            Maximum number of iterations for the inference step of LDA.
        chunksize : int
            Number of documents to be processed in each chunk.

        Returns
        -------
        (float, list of float)
            The first value is the highest lower bound for the true posterior.
            The second value is the list of optimized dirichlet variational parameters for the approximation of
            the posterior.

                r   r(   r&   r*   )r@   r   ldaDTMDIM)r   r   r   rA   r6   r&   r4   rB   r`   topicsLdaPostr@   inferDTMseqInfluenceTotalFixedinferDIMseq)rH   rI   rn   rb   ro   rl   rL   rO   r   r   rj   rt   ldapostmodels                 r   ra   zLdaSeqModel.lda_seq_infer8  s
   D _
N	:T[RVR^fhfpqqqXy*566
d&6:SVWWWE> 
	 ,,'=y ME66 e^ 	$$V,,, ,,'=y ME6
 f}r   c           	         d}d}d}|                      ||          }t          j        t          j        | j                            }t          t          j        ||
                    D ]\  }}|D ]}|||         k    r|dz  }|                      ||          }d}||         }||         }||_        ||_	        ||_
        |dk    r t                              |||d|	          }nt                              |||| |	          }|t                              ||||          }|j        ||<   ||z  }|dz  }|dz  }Ό||fS )a  Compute the likelihood of a sequential corpus under an LDA seq model, and reports the likelihood bound.

        Parameters
        ----------
        corpus : {iterable of list of (int, float), scipy.sparse.csc}
            Stream of document vectors or sparse matrix of shape (`num_documents`, `num_terms`).
        topic_suffstats : numpy.ndarray
            Sufficient statistics of the current model, expected shape (`self.vocab_len`, `num_topics`).
        gammas : numpy.ndarray
            Topic weight variational parameters for each document. If not supplied, it will be inferred from the model.
        lhoods : list of float of length `self.num_topics`
            The total log probability bound for each topic. Corresponds to phi from the linked paper.
        lda : :class:`~gensim.models.ldamodel.LdaModel`
            The trained LDA model of the previous iteration.
        ldapost : :class:`~gensim.models.ldaseqmodel.LdaPost`
            Posterior probability variables for the given LDA model. This will be used as the true (but intractable)
            posterior.
        iter_ : int
            The current iteration.
        bound : float
            The LDA bound produced after all iterations.
        lda_inference_max_iter : int
            Maximum number of iterations for the inference step of LDA.
        chunksize : int
            Number of documents to be processed in each chunk.

        Returns
        -------
        (float, list of float)
            The first value is the highest lower bound for the true posterior.
            The second value is the list of optimized dirichlet variational parameters for the approximation of
            the posterior.

        r   r   N)rL   )make_lda_seq_slicer4   cumsumarrayr3   rT   r   groupergammalhooddocrx   fit_lda_postupdate_lda_seq_ss)rH   rI   rn   rb   ro   rt   r|   rl   rj   rL   rO   	doc_indextimedoc_numr3   chunk_nochunkr   gamr   	doc_lhoods                        r   ry   zLdaSeqModel.inferDTMseqq  s   H 	%%c400Yrx8899
(vy)I)IJJ  	  	OHe  z$//  AID11#t<<CGY'y) # %! A:  ' 4 4$Mc !5 ! !II !( 4 4$Mc !5 ! !I # e&-&?&?sTc&d&dO$+My!"Q	1=@ f}r   c                     t          | j                  D ]*}| j        |         j        dd|f         |j        dd|f<   +t          j        | j                  |_        |S )a  Update the LDA model topic-word values using time slices.

        Parameters
        ----------

        lda : :class:`~gensim.models.ldamodel.LdaModel`
            The stationary model to be updated
        time : int
            The time slice assigned to the stationary model.

        Returns
        -------
        lda : :class:`~gensim.models.ldamodel.LdaModel`
            The stationary model updated to reflect the passed time slice.

        N)	r8   r   r7   
e_log_probrw   r4   copyr6   r(   )rH   rt   r   rY   s       r   r   zLdaSeqModel.make_lda_seq_slice  sf    " t'' 	H 	HA#03>qqq$wGCJqqq!tGDK((	
r   c                     d}t          | j                  D ]F\  }}t                              d|           t                              |||                   }||z  }G|S )aW  Fit the sequential model topic-wise.

        Parameters
        ----------
        topic_suffstats : numpy.ndarray
            Sufficient statistics of the current model, expected shape (`self.vocab_len`, `num_topics`).

        Returns
        -------
        float
            The sum of the optimized lower bounds for all topics.

        r   zFitting topic number %i)rT   r7   r-   r_   r9   fit_sslm)rH   rn   r   rY   rZ   
lhood_terms         r   rc   zLdaSeqModel.fit_lda_seq_topics  se     !$"344 	  	 HAuKK11555uoa.@AAJZEEr   c                     g }t          | j                  D ],}|                    |                     |||                     -|S )a  Get the most relevant words for a topic, for each timeslice. This can be used to inspect the evolution of a
        topic through time.

        Parameters
        ----------
        topic : int
            The index of the topic.
        top_terms : int, optional
            Number of most relevant words associated with the topic to be returned.

        Returns
        -------
        list of list of str
            Top `top_terms` relevant terms for the topic for each time slice.

        )r8   r   r:   print_topic)rH   rP   	top_termsrw   r   s        r   print_topic_timeszLdaSeqModel.print_topic_times  sQ    " $.// 	D 	DDMM$**5$	BBCCCCr   r   c                 L      fdt           j                  D             S )ag  Get the most relevant words for every topic.

        Parameters
        ----------
        time : int, optional
            The time slice in which we are interested in (since topics evolve over time, it is expected that the most
            relevant words will also gradually change).
        top_terms : int, optional
            Number of most relevant words to be returned for each topic.

        Returns
        -------
        list of list of (str, float)
            Representation of all topics. Each of them is represented by a list of pairs of words and their assigned
            probability.

        c                 >    g | ]}                     |          S r   )r   )r   rP   rH   r   r   s     r   
<listcomp>z,LdaSeqModel.print_topics.<locals>.<listcomp>  s+    ]]]U  i88]]]r   )r8   r   )rH   r   r   s   ```r   print_topicszLdaSeqModel.print_topics  s2    $ ^]]]]]eDOF\F\]]]]r   c                      j                  j        t          j                  t          j        |                                                   z  t          j        |d          } fd|D             }|S )a  Get the list of words most relevant to the given topic.

        Parameters
        ----------
        topic : int
            The index of the topic to be inspected.
        time : int, optional
            The time slice in which we are interested in (since topics evolve over time, it is expected that the most
            relevant words will also gradually change).
        top_terms : int, optional
            Number of words associated with the topic to be returned.

        Returns
        -------
        list of (str, float)
            The representation of this topic. Each element in the list includes the word itself, along with the
            probability assigned to it by the topic.

        T)reversec                 :    g | ]}j         |         |         fS r   r&   )r   id_rH   rP   s     r   r   z+LdaSeqModel.print_topic.<locals>.<listcomp>2  s)    DDDsDL%uSz2DDDr   )r7   r   r4   rC   expr2   r   argsort)rH   rP   r   r   bestnbeststrs   ``    r   r   zLdaSeqModel.print_topic  s    ( !%(3U##uT{##		# 	4@@@DDDDDeDDDr   c                     | j         | j                             d          ddt          j        f         z  }||         S )a+  Get the topic mixture for a document.

        Uses the priors for the dirichlet distribution that approximates the true posterior with the optimal
        lower bound, and therefore requires the model to be already trained.


        Parameters
        ----------
        doc_number : int
            Index of the document for which the mixture is returned.

        Returns
        -------
        list of length `self.num_topics`
            Probability for each topic in the mixture (essentially a point in the `self.num_topics - 1` simplex.

        r   axisN)rb   r2   r4   newaxis)rH   
doc_number	doc_topics      r   
doc_topicszLdaSeqModel.doc_topics5  s:    $ K$+//q/"9"9!!!RZ-"HH	$$r   c                      j          j                             d          ddt          j        f         z  }d fdt	           j                  D             }g }t          j         j                  }t	          |          D ]?\  }}|                    t          |                     |D ]\  }	}
||	xx         |
z  cc<   @ fdt          t           j                            D             }|t          j        |          |||fS )a  Get the information needed to visualize the corpus model at a given time slice, using the pyLDAvis format.

        Parameters
        ----------
        time : int
            The time slice we are interested in.
        corpus : {iterable of list of (int, float), scipy.sparse.csc}, optional
            The corpus we want to visualize at the given time slice.

        Returns
        -------
        doc_topics : list of length `self.num_topics`
            Probability for each topic in the mixture (essentially a point in the `self.num_topics - 1` simplex.
        topic_term : numpy.ndarray
            The representation of each topic as a multinomial over words in the vocabulary,
            expected shape (`num_topics`, vocabulary length).
        doc_lengths : list of int
            The number of words in each document. These could be fixed, or drawn from a Poisson distribution.
        term_frequency : numpy.ndarray
            The term frequency matrix (denoted as beta in the original Blei paper). This could also be the TF-IDF
            representation of the corpus, expected shape (number of documents, length of vocabulary).
        vocab : list of str
            The set of unique terms existing in the cropuse's vocabulary.

        r   r   Nc                 0    | |                                  z  S r#   )r2   )xs    r   	normalizez&LdaSeqModel.dtm_vis.<locals>.normalizef  s    quuww;r   c                 p    g | ]2\  }} t          j        |j        j                                     3S r   )r4   r   r   T)r   rY   rZ   r   r   s      r   r   z'LdaSeqModel.dtm_vis.<locals>.<listcomp>i  sL     
 
 
5 IbfU-/56677
 
 
r   c                 *    g | ]}j         |         S r   r   )r   irH   s     r   r   z'LdaSeqModel.dtm_vis.<locals>.<listcomp>v  s    CCCQaCCCr   )rb   r2   r4   r   rT   r7   r`   r   r:   r$   r8   r&   r   )rH   r   rI   r   
topic_termdoc_lengthsterm_frequencydoc_nor   termfreqvocabr   s   ``          @r   dtm_viszLdaSeqModel.dtm_visJ  sF   4 K$+//q/"9"9!!!RZ-"HH		 	 	
 
 
 
 
%d&788
 
 


 $.11$V,, 	- 	-KFCs3xx(((! - -
dt$$$,$$$$- DCCC%DL0A0A*B*BCCC"(:..^URRr   c                     g }|                      |          D ]6}g }|D ]\  }}|                    |           |                    |           7|S )aw  Get the coherence for each topic.

        Can be used to measure the quality of the model, or to inspect the convergence through training via a callback.

        Parameters
        ----------
        time : int
            The time slice.

        Returns
        -------
        list of list of str
            The word representation for each topic, for each time slice. This can be used to check the time coherence
            of topics as time evolves: If the most relevant words remain the same then the topic has somehow
            converged or is relatively static, if they change rapidly the topic is evolving.

        )r   r:   )rH   r   coherence_topicsrw   coherence_topicworddists          r   dtm_coherencezLdaSeqModel.dtm_coherencez  ss    $ ''-- 	5 	5F O$ - -
d&&t,,,,##O4444r   c                     t          j        | j        | j        | j        t
          j                  }t          j        | j        | j        f          |_	        t          | j        t          |          ||          }g }t          | j                  D ]J}|                     ||          }t                              |d||           }|                    |           K|j        |j                                        z  }|S )a  Get the topic mixture for the given document, using the inferred approximation of the true posterior.

        Parameters
        ----------
        doc : list of (int, float)
            The doc in BOW format. Can be an unseen document.

        Returns
        -------
        list of float
            Probabilities for each topic in the mixture. This is essentially a point in the `num_topics - 1` simplex.

        rs   )r   r@   rt   r   r   )r   rA   r   r6   r&   r4   rB   r`   r   rw   rx   r$   r8   r   r   r   r:   r   r2   )rH   r   rK   r|   time_lhoodsr   r   r   s           r   __getitem__zLdaSeqModel.__getitem__  s     %dk4<WYWac c c	8T^T_$EFF	T_#c((PY_bccc$.// 	& 	&D//	4@@I((!T4@@Eu%%%%MGM$5$5$7$77	r   )NNNr   r   r   NNr   r   r   Nr   r   r   r   )r   )r   r   )__name__
__module____qualname____doc__rR   rF   rG   ra   ry   r   rc   r   r   r   r   r   r   r   r   r   r   r
   r
   E   s&       MMVXmocfzb zb zb zbx[ [ [6X X Xt7 7 7rM M M^  .  .   .^ ^ ^ ^(   8% % %*.S .S .S`     6    r   r
   c                   V    e Zd ZdZddZd Zd Zd Zd	 Zd
 Z	d Z
d Zd Zd Zd ZdS )r9   a~  Encapsulate the inner State Space Language Model for DTM.

    Some important attributes of this class:

        * `obs` is a matrix containing the document to topic ratios.
        * `e_log_prob` is a matrix containing the topic to word ratios.
        * `mean` contains the mean values to be used for inference for each word for a time slice.
        * `variance` contains the variance values to be used for inference of word in a time slice.
        * `fwd_mean` and`fwd_variance` are the forward posterior values for the mean and the variance.
        * `zeta` is an extra variational parameter with a value for each time slice.

    Nr   r   c                 J   || _         || _        || _        || _        || _        t          j        ||f          | _        t          j        ||f          | _        t          j        ||dz   f          | _	        t          j        ||dz   f          | _
        t          j        ||dz   f          | _        t          j        ||dz   f          | _        t          j        |          | _        d | _        d | _        d | _        d | _        d | _        d | _        d | _        d | _        d S )Nr   )r   r   r!   r    r   r4   r`   obsr   meanfwd_meanfwd_variancevariancezetam_update_coeffmean_t
variance_tr>   w_phi_l	w_phi_sum
w_phi_l_sqm_update_coeff_g)rH   r   r   r   r!   r    s         r   rR   zsslm.__init__  s   ".(,$ 8Y899(I#?@@Hi1)<=>>	)_q-@!ABBHi11D%EFF)_q-@!ABBH_--	 #!% $r   c           
          t          | j                  D ]]\  }}t          j        t          j        | j        dd|dz   f         | j        dd|dz   f         dz  z                       | j        |<   ^| j        S )au  Update the Zeta variational parameter.

        Zeta is described in the appendix and is equal to sum (exp(mean[word] + Variance[word] / 2)),
        over every time-slice. It is the value of variational parameter zeta which maximizes the lower bound.

        Returns
        -------
        list of float
            The updated zeta values for each time slice.

        Nr   r\   )rT   r   r4   r2   r   r   r   )rH   jvals      r   update_zetazsslm.update_zeta  s      	** 	] 	]FAs6"&111a!e8)<t}QQQPQTUPUX?VYZ?Z)Z"["[\\DIaLLyr   c                    d}| j         }| j        |         }| j        |         }||z  |d<   t          d|dz             D ]>}| j        r| j        ||dz
           |z   | j        z   z  }nd}|||dz
           |z   z  ||<   ?||         ||<   t          |dz
  dd          D ][}||         dk    r(t          j        ||         ||         |z   z  d          }nd}|||dz            |z
  z  d|z
  ||         z  z   ||<   \||fS )a  Get the variance, based on the
        `Variational Kalman Filtering approach for Approximate Inference (section 3.1)
        <https://mimno.infosci.cornell.edu/info6150/readings/dynamic_topic_models.pdf>`_.

        This function accepts the word to compute variance for, along with the associated sslm class object,
        and returns the `variance` and the posterior approximation `fwd_variance`.

        Notes
        -----
        This function essentially computes Var[\beta_{t,w}] for t = 1:T

        .. :math::

            fwd\_variance[t] \equiv E((beta_{t,w}-mean_{t,w})^2 |beta_{t}\ for\ 1:t) =
            (obs\_variance / fwd\_variance[t - 1] + chain\_variance + obs\_variance ) *
            (fwd\_variance[t - 1] + obs\_variance)

        .. :math::

            variance[t] \equiv E((beta_{t,w}-mean\_cap_{t,w})^2 |beta\_cap_{t}\ for\ 1:t) =
            fwd\_variance[t - 1] + (fwd\_variance[t - 1] / fwd\_variance[t - 1] + obs\_variance)^2 *
            (variance[t - 1] - (fwd\_variance[t-1] + obs\_variance))

        Parameters
        ----------
        word: int
            The word's ID.
        chain_variance : float
            Gaussian parameter defined in the beta distribution to dictate how the beta values evolve over time.

        Returns
        -------
        (numpy.ndarray, numpy.ndarray)
            The first returned value is the variance of each word in each time slice, the second value is the
            inferred posterior variance for the same pairs.

          r   r   rr   r\   )r   r   r   r8   r!   r4   power)	rH   r   r    INIT_VARIANCE_CONSTr   r   r   tcs	            r   compute_post_variancezsslm.compute_post_variance  sU   L # =&(.(+>>Qq!a% 	I 	IA  %a!e)<~)MPTPa)ab<A#6#GHLOO #1oq1ub"%% 	a 	aAA$ Hl1oa>1QRUVWWQ. @Aq1uP\]^P_F_`HQKK%%r   c                    | j         }| j        |         }| j        |         }| j        |         }| j        |         }d|d<   t          d|dz             D ]C}| j        ||dz
           |z   | j        z   z  }	|	||dz
           z  d|	z
  ||dz
           z  z   ||<   D||         ||<   t          |dz
  dd          D ]9}|dk    rd}	n|||         |z   z  }	|	||         z  d|	z
  ||dz            z  z   ||<   :||fS )u  Get the mean, based on the `Variational Kalman Filtering approach for Approximate Inference (section 3.1)
        <https://mimno.infosci.cornell.edu/info6150/readings/dynamic_topic_models.pdf>`_.

        Notes
        -----
        This function essentially computes E[eta_{t,w}] for t = 1:T.

        .. :math::

            Fwd_Mean(t) ≡  E(beta_{t,w} | beta_ˆ 1:t )
            = (obs_variance / fwd_variance[t - 1] + chain_variance + obs_variance ) * fwd_mean[t - 1] +
            (1 - (obs_variance / fwd_variance[t - 1] + chain_variance + obs_variance)) * beta

        .. :math::

            Mean(t) ≡ E(beta_{t,w} | beta_ˆ 1:T )
            = fwd_mean[t - 1] + (obs_variance / fwd_variance[t - 1] + obs_variance) +
            (1 - obs_variance / fwd_variance[t - 1] + obs_variance)) * mean[t]

        Parameters
        ----------
        word: int
            The word's ID.
        chain_variance : float
            Gaussian parameter defined in the beta distribution to dictate how the beta values evolve over time.

        Returns
        -------
        (numpy.ndarray, numpy.ndarray)
            The first returned value is the mean of each word in each time slice, the second value is the
            inferred posterior mean for the same pairs.

        r   r   r   rr   )r   r   r   r   r   r8   r!   )
rH   r   r    r   r   r   r   r   r   r   s
             r   compute_post_meanzsslm.compute_post_mean*  s>   D  htn(.y=& q!a% 	E 	EA!\!a%%8>%IDL]%]^Ahq1uo-Q#a!e*0DDHQKK 1+Qq1ub"%% 	> 	>A$ H"l1o&FG(1+oQ$q1u+(==DGGX~r   c                     t          j        | j                  D ]L\  \  }}}| j        |         |dz            t          j        | j        |                   z
  | j        |         |<   M| j        S )a  Compute the expected log probability given values of m.

        The appendix describes the Expectation of log-probabilities in equation 5 of the DTM paper;
        The below implementation is the result of solving the equation and is implemented as in the original
        Blei DTM code.

        Returns
        -------
        numpy.ndarray of float
            The expected value for the log probabilities for each word and time slice.

        r   )r4   ndenumerater   r   logr   )rH   wr   r   s       r   compute_expected_log_probzsslm.compute_expected_log_probb  si     >$/:: 	O 	OKFQC$(IaLQ$7"&1:N:N$NDOAq!!r   c                    | j         }| j        }t          j        |          }|t	          |          z  }|d|z  z  }|t	          |          z  }t          j        |          }t          j        ||d                              ||          | _        || _	        || _
        t          |          D ]^}|                     || j
                  \  | j        |<   | j        |<   |                     || j
                  \  | j        |<   | j        |<   _|                                 | _        |                                 | _        dS )a  Initialize the State Space Language Model with LDA sufficient statistics.

        Called for each topic-chain and initializes initial mean, variance and Topic-Word probabilities
        for the first time-slice.

        Parameters
        ----------
        obs_variance : float, optional
            Observed variance used to approximate the true and forward variance.
        chain_variance : float
            Gaussian parameter defined in the beta distribution to dictate how the beta values evolve over time.
        sstats : numpy.ndarray
            Sufficient statistics of the LDA model. Corresponds to matrix beta in the linked paper for time slice 0,
            expected shape (`self.vocab_len`, `num_topics`).

        r]   r   r   N)r   r   r4   r   r2   r   repeatreshaper   r!   r    r8   r   r   r   r   r   r   r   r   r   r   )rH   r!   r    rE   Wr   log_norm_countsr   s           r   rU   zsslm.sslm_counts_inits  s9   " N '&//3///37"3///&11 Ioqq999BB1aHH(, q 	\ 	\A595O5OPQSWSf5g5g2DM!d/2-1-C-CAtGZ-[-[*DIaL$-**$$&&	88::r   c                      j         }d}d}d}d}|dz   }d t           fdt          |          D              D             \   _         _        |                    d          }d}	d}
|
dk    r                     ||          }|
d	k    r                     ||          }t          	                    d
|           ||k    r|	|k     r|	dz  }	|} 
                    ||          \   _         _        |
dk    r                     ||          }|
d	k    r                     ||          }t          j        ||z
  |z            }t          	                    d|	||           ||k    r|	|k                                       _        |S )a  Fits variational distribution.

        This is essentially the m-step.
        Maximizes the approximation of the true posterior for a particular topic using the provided sufficient
        statistics. Updates the values using :meth:`~gensim.models.ldaseqmodel.sslm.update_obs` and
        :meth:`~gensim.models.ldaseqmodel.sslm.compute_expected_log_prob`.

        Parameters
        ----------
        sstats : numpy.ndarray
            Sufficient statistics for a particular topic. Corresponds to matrix beta in the linked paper for the
            current time slice, expected shape (`self.vocab_len`, `num_topics`).

        Returns
        -------
        float
            The lower bound for the true posterior achieved using the fitted approximate distribution.

        r   gư>r\   r   c              3   >   K   | ]}t          j        |          V  d S r#   r4   r   r   r   s     r   r   z sslm.fit_sslm.<locals>.<genexpr>  s*      ooQRXa[[oooooor   c              3   N   K   | ]}                     |j                  V   d S r#   )r   r    r   r   rH   s     r   r   z sslm.fit_sslm.<locals>.<genexpr>  s7      'm'm_`(B(B1dFY(Z(Z'm'm'm'm'm'mr   r   ru   rv   zinitial sslm bound is %fr^   )r   zipr8   r   r   r2   compute_boundcompute_bound_fixedr-   r_   
update_obsr   r   r4   rd   r   r   )rH   rE   r   rj   rm   sslm_fit_thresholdsslm_max_iter	convergedtotalsrl   r}   s   `          r   r   zsslm.fit_sslm  s   ( N	!&*	 po#'m'm'm'mdijkdldl'm'm'm"nooo 	)t( ##E> 	7&&vv66EE> 	=,,VV<<E.666,, 	q1F 	qQJEI"&//&&"A"ADHdi~ ;**66::~ A00@@!2i ?@@IKKVX]_dfoppp ,, 	q1F 	q 88::r   c                      j         } j        d}d}d}d}d} j        }	d t           fdt	          |          D              D             \   _         _                                          _        t           fdt	          |          D                       dz  |	z  }t                              d           t	          ddz             D ]d}d}d}t	          |          D ]} j        |                  }
 j        |         dz
           } j        |                  }|t          j        |
|z
  d          d|	z  z  ||	z  z
  t          j        |	          z
  z  }|||         dz
           |
z  z  }|t          j        |          dz  z  }|dz
            t          j         j        dz
                     z  }|||z   |z   |z
  z  }|S )	a  Compute the maximized lower bound achieved for the log probability of the true posterior.

        Uses the formula presented in the appendix of the DTM paper (formula no. 5).

        Parameters
        ----------
        sstats : numpy.ndarray
            Sufficient statistics for a particular topic. Corresponds to matrix beta in the linked paper for the first
            time slice, expected shape (`self.vocab_len`, `num_topics`).
        totals : list of int of length `len(self.time_slice)`
            The totals for each time slice.

        Returns
        -------
        float
            The maximized lower bound.

        r   c              3   >   K   | ]}t          j        |          V  d S r#   r   r   s     r   r   z%sslm.compute_bound.<locals>.<genexpr>  s*      kkQRXa[[kkkkkkr   c              3   N   K   | ]}                     |j                  V   d S r#   )r   r    r   s     r   r   z%sslm.compute_bound.<locals>.<genexpr>  s7      'i'i[\(>(>q$BU(V(V'i'i'i'i'i'ir   c              3   f   K   | ]+}j         |         d          j         |                  z
  V  ,dS )r   N)r   )r   r   rH   r   s     r   r   z%sslm.compute_bound.<locals>.<genexpr>  s@      NN$-"1%a(8(;;NNNNNNr   r\   zComputing bound, all timesr   rr   )r   r   r    r   r8   r   r   r   r   r2   r-   r_   r   r4   r   r   )rH   rE   r   r   term_1term_2term_3r   entr    mprev_mvr   s   `            @r   r   zsslm.compute_bound  s   & N , lk#'i'i'i'i`efg`h`h'i'i'i"jkkk 	!	4=$$&&	NNNNNU1XXNNNNNQRRUcc0111q!a% 	2 	2AFFC1XX % %IaLO1a!e,M!$Q' Xa&j!,,N0BCNHZ[^`^des^t^ttu&)AE*Q..rvayy1}$QUm^bfTYq1u-=&>&>>F6F?S(611CC
r   c           
      j   d}d}d}| j         }| j        }d}t          j        ||dz   f          }	d}
t	          |          D ]K}||         }d}t	          t          |                    D ]}|||         ||         z  z  }t          j        |          }||k     r$|
"| j        |         }t          j        |          }
~||k     r!t          j        t          |                    }t	          |          D ]"}| 	                    |||	|                   |	|<   #t          j        |          }| |||	||f}| j        |         }d}|dk    r%t          j        t          t          ||||d          }|d	k    r	 |dz  }||k     r|}
|| j        |<   M|                                 | _        | j        | j        fS )
a  Optimize the bound with respect to the observed variables.

        TODO:
        This is by far the slowest function in the whole algorithm.
        Replacing or improving the performance of this would greatly speed things up.

        Parameters
        ----------
        sstats : numpy.ndarray
            Sufficient statistics for a particular topic. Corresponds to matrix beta in the linked paper for the first
            time slice, expected shape (`self.vocab_len`, `num_topics`).
        totals : list of int of length `len(self.time_slice)`
            The totals for each time slice.

        Returns
        -------
        (numpy.ndarray of float, numpy.ndarray of float)
            The updated optimized values for obs and the zeta variational parameter.

        r\   r   gMbP?r   r   Nru   )ffprimex0gtolargsepsilondisprv   )r   r   r4   r`   r8   r$   sqrtr   r   compute_mean_derivr   fmin_cgf_obsdf_obsr   r   )rH   rE   r   OBS_NORM_CUTOFF	STEP_SIZETOLr   r   runsmean_deriv_mtxnorm_cutoff_obsr   w_countscounts_normr   r   r   derivr  r}   s                       r   r   zsslm.update_obs  s   , 	N 1a!e*--q %	" %	"AayHK3x==)) 9 9x{Xa[88'+..K_, " "hqk"$'#,,0 7!xH66H q Y YA(,(?(?1nUVFW(X(XN1%%Xv~q%GhqkE> "*3StU^ef  C E> 	0 *&)O!$$&&	x""r   c                    | j         }| j        |         }d|d<   t          d|dz             D ]W}| j        dk    r$| j        ||dz
           | j        z   | j        z   z  }nd}|||dz
           z  }||dz
  k    r|d|z
  z  }|||<   Xt          |dz
  dd          D ]H}| j        dk    rd}n| j        ||         | j        z   z  }|||         z  d|z
  ||dz            z  z   ||<   I|S )a  Helper functions for optimizing a function.

        Compute the derivative of:

        .. :math::

            E[eta_{t,w}]/d obs_{s,w} for t = 1:T.

        Parameters
        ----------
        word : int
            The word's ID.
        time : int
            The time slice.
        deriv : list of float
            Derivative for each time slice.

        Returns
        -------
        list of float
            Mean derivative for each time slice.

        r   r   rr   r   )r   r   r8   r!   r    )	rH   r   r   r  r   r   r   r   r   s	            r   r  zsslm.compute_mean_derivd  s4   2  }T*a q!a% 	 	A 3& %a!e)<t?R)RUYUf)fgeAEl"Cq1u} AE!HHq1ub"%% 	= 	=A"c) R'<?T=P+PQ58|q1ua!e&<<E!HHr   c                    d}| j         }| j        |         }| j        |         }	t          j        |          | _        t          |          D ]6}
t          j        ||
dz            |	|
dz            dz  z             | j        |
<   7t          |          D ]}||         }d}d}d}d}t          d|dz             D ]~}
||
         }||
dz
           }||
         }||
dz
           }|||z
  ||z
  z  z  }|||
dz
           ||
dz
           | j        |
dz
           z  | j        |
dz
           z  z
  |z  z  }d}|dk    r	 | j	        r+|| j	        z   }||d         |d         z  || j	        z  z  z
  }nd}||z   |z   |z   ||<   |S )a~  Derivation of obs which is used in derivative function `df_obs` while optimizing.

        Parameters
        ----------
        word : int
            The word's ID.
        word_counts : list of int
            Total word counts for each time slice.
        totals : list of int of length `len(self.time_slice)`
            The totals for each time slice.
        mean_deriv_mtx : list of float
            Mean derivative for each time slice.
        deriv : list of float
            Mean derivative for each time slice.

        Returns
        -------
        list of float
            Mean derivative for each time slice.

        r   r   r\   r   ru   rv   rr   )
r   r   r   r4   r`   	temp_vectr8   r   r   r    )rH   r   word_countsr   r  r  	init_multr   r   r   ur   
mean_derivterm1term2term3term4mean_umean_u_prevdmean_udmean_u_prevr}   s                         r   compute_obs_derivzsslm.compute_obs_deriv  s   0 	 y=& !q 	J 	JA "tAE{Xa!e_q5H'H I IDN1q 	5 	5A'*JEEEE1a!e__  a"1q5k$Q-)!a%0&;.7\3IJJ+a!e,q1uqSTu@U0UX\XabcfgbgXh0himtttE> "  4#667a:a=!8YI\=\ ]]u}u,u4E!HHr   )NNNr   r   )r   r   r   r   rR   r   r   r   r   rU   r   r   r   r  r/  r   r   r   r9   r9     s         % % % %4   =& =& =&~6 6 6p  "&; &; &;P9 9 9v@ @ @DJ# J# J#X0 0 0dF F F F Fr   r9   c                   D    e Zd ZdZddZd Zd Zd Zd Z	 	 dd
Z	d Z
dS )rx   a   Posterior values associated with each set of documents.

    TODO: use **Hoffman, Blei, Bach: Online Learning for Latent Dirichlet Allocation, NIPS 2010.**
    to update phi, gamma. End game would be to somehow replace LdaPost entirely with LdaModel.

    Nc                 T   || _         || _        || _        || _        | j        t	          j        |          | _        | j        t	          j        |dz             | _        |8|6t	          j        ||f          | _        t	          j        ||f          | _        d| _        d| _	        dS )a=  Initialize the posterior value structure for the given LDA model.

        Parameters
        ----------
        doc : list of (int, int)
            A BOW representation of the document. Each element in the list is a pair of a word's ID and its number
            of occurences in the document.
        lda : :class:`~gensim.models.ldamodel.LdaModel`, optional
            The underlying LDA model.
        max_doc_len : int, optional
            The maximum number of words in a document.
        num_topics : int, optional
            Number of topics discovered by the LDA model.
        gamma : numpy.ndarray, optional
            Topic weight variational parameters for each document. If not supplied, it will be inferred from the model.
        lhood : float, optional
            The log likelihood lower bound.

        Nr   )
r   rt   r   r   r4   r`   philog_phi
doc_weightrenormalized_doc_weight)rH   r   rt   r@   r   r   r   s          r   rR   zLdaPost.__init__  s    ( 

: 	.*--DJ: 	2*q.11DJ 	?z 	?xj 9::DH8[*$=>>DL '+$$$r   c                 z   | j         j        }t          j        |          }t	          |          D ]}t          | j        |                   ||<    d}| j        D ]\  }}t	          |          D ]1}||         | j         j        |         |         z   | j	        |         |<   2| j	        |         }	| j
        |         }
|	d         }t	          dt          |	                    D ]}t          j        ||	|                   }|	|z
  }	t          j        |	          }
|	| j	        |<   |
| j
        |<   |dz  }| j
        | j	        fS )u  Update variational multinomial parameters, based on a document and a time-slice.

        This is done based on the original Blei-LDA paper, where:
        log_phi := beta * exp(Ψ(gamma)), over every topic for every word.

        TODO: incorporate lee-sueng trick used in
        **Lee, Seung: Algorithms for non-negative matrix factorization, NIPS 2001**.

        Parameters
        ----------
        doc_number : int
            Document number. Unused.
        time : int
            Time slice. Unused.

        Returns
        -------
        (list of float, list of float)
            Multinomial parameters, and their logarithm, for each word in the document.

        r   r   )rt   r   r4   r`   r8   r   r   r   rw   r3  r2  r$   	logaddexpr   )rH   r   r   r   digrY   nword_idcountlog_phi_rowphi_rowr	  r   s                r   
update_phizLdaPost.update_phi  sM   , X(
hz""z"" 	, 	,ATZ]++CFF"h 	 	NGU:&& J J%(Vdhog.Fq.I%IQ"",q/KhqkG AA1c+..// 4 4LKN33 &/Kf[))G)DLO!DHQKFAAx%%r   c                    t          j        | j        j                  | _        d}| j        D ]Q\  }}| j        |         }t          | j        j                  D ] }| j        |xx         ||         |z  z  cc<   !|dz  }R| j        S )a:  Update variational dirichlet parameters.

        This operations is described in the original Blei LDA paper:
        gamma = alpha + sum(phi), over every topic for every word.

        Returns
        -------
        list of float
            The updated gamma parameters for each word in the document.

        r   r   )	r4   r   rt   r(   r   r   r2  r8   r   )rH   r9  r:  r;  r=  rY   s         r   update_gammazLdaPost.update_gamma@  s     WTX^,,
"h 	 	NGUhqkG48.// 4 4
1e!33FAAzr   c                 .   t          d | j        D                       }| j                            | j        j        d         t          |          | j        j        z  z              d| j        j        z  | j        dt          | j                  ddf<   dS )z"Initialize variational posterior. c              3       K   | ]	\  }}|V  
d S r#   r   r   r:  r;  s      r   r   z(LdaPost.init_lda_post.<locals>.<genexpr>X  &      99nguE999999r   r   r]   N)
r2   r   r   fillrt   r(   floatr   r2  r$   )rH   totals     r   init_lda_postzLdaPost.init_lda_postV  s    9999999
q)E%LL48;N,NNOOO&)DH,?&?#dh--"###r   c                     | j         j        }t          j        | j                  }t          t          j        | j         j                            t          |          z
  }|| j        |<   t          |          }d}t          |          D ]}t          | j        |                   |z
  }| j         j        |         | j        |         z
  |z  t          | j        |                   z   t          | j         j        |                   z
  }d}	| j
        D ]j\  }
}| j        |	         |         dk    rI||| j        |	         |         z  || j         j        |
         |         z   | j        |	         |         z
  z  z  }|	dz  }	k|| j        |<   ||z  }|S )zCompute the log likelihood bound.

        Returns
        -------
        float
            The optimal lower bound for the true posterior using the approximate distribution.

        ru   r   r   )rt   r   r4   r2   r   r   r(   r   r   r8   r   r2  rw   r3  )rH   r   	gamma_sumr   digsumr}   rY   e_log_theta_kr   r9  r:  r;  s               r   compute_lda_lhoodzLdaPost.compute_lda_lhood^  s    X(
F4:&&	 tx~..//')2D2DD!&
: ##z"" 	  	 A $DJqM22V;M"TZ]2mC
1&&')01B)C)CD  A"&(  8A;q>A% tA.-$(/RYBZ[\B]2]`d`lmn`opq`r2rstJQ&DJqMZEE r   :0yE>r   c
                    |                                   t          d | j        D                       }
d}|dk    r	 |                                 }d}d}d}|dz  }|}|                                 | _        d}|dk    st          $|                     ||          \  | _        | _	        n9|dk    r3t          ,| 
                    ||t          |||	          \  | _        | _	        |                                 }t          j        ||z
  ||
z  z            }||k    r||k    r|dz  }|}|                                 | _        d}|dk    st          $|                     ||          \  | _        | _	        n9|dk    r3t          ,| 
                    ||t          |||	          \  | _        | _	        |                                 }t          j        ||z
  ||
z  z            }||k    r||k    |S )a  Posterior inference for lda.

        Parameters
        ----------
        doc_number : int
            The documents number.
        time : int
            Time slice.
        ldaseq : object
            Unused.
        LDA_INFERENCE_CONVERGED : float
            Epsilon value used to check whether the inference step has sufficiently converged.
        lda_inference_max_iter : int
            Maximum number of iterations in the inference step.
        g : object
            Unused. Will be useful when the DIM model is implemented.
        g3_matrix: object
            Unused. Will be useful when the DIM model is implemented.
        g4_matrix: object
            Unused. Will be useful when the DIM model is implemented.
        g5_matrix: object
            Unused. Will be useful when the DIM model is implemented.

        Returns
        -------
        float
            The optimal lower bound for the true posterior using the approximate distribution.
        c              3       K   | ]	\  }}|V  
d S r#   r   rC  s      r   r   z'LdaPost.fit_lda_post.<locals>.<genexpr>  rD  r   ru   rv   r   r   )rH  r2   r   rM  r@  r   r9   r>  r2  r3  update_phi_fixedr4   rd   )rH   r   r   ldaseqLDA_INFERENCE_CONVERGEDrL   g	g3_matrix	g4_matrix	g5_matrixrG  r}   r   	lhood_oldr   rl   s                   r   r   zLdaPost.fit_lda_post  s:   > 	9999999E> 	&&((		 	
	&&((
E> 	tT 	t%)__Z%F%F"DHdlle^ 	t 	t%)%:%::tTS\^gir%s%s"DHdl&&((GY.9u3DEFF	11 	Ke?U6U 	KQJEI**,,DJE~ x x)-T)J)J&$,,% xD x)-)>)>z4QUW`bkmv)w)w&$,**,,EU!2y57H IJJI 11 	Ke?U6U 	K r   c                     | j         j        }t          |          D ]M}||         }d}| j        D ]4\  }}	||         |xx         |	| j        |         |         z  z  cc<   |dz  }5|||<   N|S )a  Update lda sequence sufficient statistics from an lda posterior.

        This is very similar to the :meth:`~gensim.models.ldaseqmodel.LdaPost.update_gamma` method and uses
        the same formula.

        Parameters
        ----------
        time : int
            The time slice.
        doc : list of (int, float)
            Unused but kept here for backwards compatibility. The document set in the constructor (`self.doc`) is used
            instead.
        topic_suffstats : list of float
            Sufficient statistics for each topic.

        Returns
        -------
        list of float
            The updated sufficient statistics for each topic.

        r   r   )rt   r   r8   r   r2  )
rH   r   r   rn   r   rY   topic_ssr9  r:  r;  s
             r   r   zLdaPost.update_lda_seq_ss  s    , X(
z"" 	* 	*A&q)HA"&(  !$'''548A;q>+AA'''Q!)OAr   )NNNNNN)rN  r   NNNN)r   r   r   r   rR   r>  r@  rH  rM  r   r   r   r   r   rx   rx     s         $, $, $, $,L1& 1& 1&f  ,@ @ @. . .` NRaeK K K KZ         r   rx   c           	         |\  }}}}}}d}t          |           }	d}
d}d}d}d}| |j        |<   |                    ||j                  \  |j        |<   |j        |<   |j        |         }|j        |         }t          d|	dz             D ]z}||         }||dz
           }||z
  }
||
|
z  z  }|||dz
           |z  ||dz
           t          j	        |||         dz  z             z  |j
        |dz
           z  z
  z  }d}|dk    r	 {|j        dk    r1|d|j        z  z   }||d         |d         z  d|z  |j        z  z  z
  }nd}||z   |z   |z    }|S )a  Function which we are optimising for minimizing obs.

    Parameters
    ----------
    x : list of float
        The obs values for this word.
    sslm : :class:`~gensim.models.ldaseqmodel.sslm`
        The State Space Language Model for DTM.
    word_counts : list of int
        Total word counts for each time slice.
    totals : list of int of length `len(self.time_slice)`
        The totals for each time slice.
    mean_deriv_mtx : list of float
        Mean derivative for each time slice.
    word : int
        The word's ID.
    deriv : list of float
        Mean derivative for each time slice.

    Returns
    -------
    list of float
        The value of the objective function evaluated at point `x`.

    r   r   r   r\   ru   rv   rr   )r$   r   r   r    r   r   r   r8   r4   r   r   )r   r  r9   r#  r   r  r   r  r$  r   r   r'  r(  r)  r*  r   r   r   r   mean_t_prevr}   finals                         r   r  r    s   4 >B:D+v~tUIAA
CEE EEDHTN+/+A+A$H[+\+\(DIdOT]4(9T?D}T"H 1a!e__  a1q5k{"sQU#f,va!e}rvfxXY{]^F^?_?_/_bfbklmpqlqbr/rrrE> 	S  A 3345Q$q')Q]T=P-PQQeme#e+,ELr   c                    |\  }}}}}}| |j         |<   |                    ||j                  \  |j        |<   |j        |<   d}|dk    r|                    |||||          }nR|dk    rL|                    t          j        t          j	        t          j
        t          j        t          j        |          }t          j        |          S )a  Derivative of the objective function which optimises obs.

    Parameters
    ----------
    x : list of float
        The obs values for this word.
    sslm : :class:`~gensim.models.ldaseqmodel.sslm`
        The State Space Language Model for DTM.
    word_counts : list of int
        Total word counts for each time slice.
    totals : list of int of length `len(self.time_slice)`
        The totals for each time slice.
    mean_deriv_mtx : list of float
        Mean derivative for each time slice.
    word : int
        The word's ID.
    deriv : list of float
        Mean derivative for each time slice.

    Returns
    -------
    list of float
        The derivative of the objective function evaluated at point `x`.

    ru   rv   )r   r   r    r   r   r/  compute_obs_deriv_fixedpr   r#  r   r9   r  r4   negative)	r   r  r9   r#  r   r  r   r  r}   s	            r   r  r  I  s    4 >B:D+v~tUDHTN+/+A+A$H[+\+\(DIdOT]4(E~ N&&t[&.RWXX	% N,,FAM18QVQ5EuN N ;ur   )r   loggingnumpyr4   scipy.specialr   r   scipyr   r   r   r   gensim.modelsr   	getLoggerr   r-   SaveLoadr
   r9   rx   r  r  r   r   r   <module>ri     sZ  / /b      * * * * * * * *       " " " " " " " " " " " " " " 
	8	$	$k	 k	 k	 k	 k	%. k	 k	 k	\i i i i i5> i i iX\ \ \ \ \en \ \ \@	G G GT& & & & &r   