
    cf              
       >   d Z ddlZddlZddlmZ ddlZddlm	Z	m
Z
 ddlmZ ddlmZmZmZmZmZ ddlmZ  ej        e          ZdhZh d	Z ed
d          Z eej        ej        ej        ej                   eej        ej        ej         ej                   eej        ej!        ej"        ej                   eej#        ej        ej$        ej                   eej#        ej        ej$        ej                  dZ%ddddddZ& G d de	j'                  Z(dS )a  Calculate topic coherence for topic models. This is the implementation of the four stage topic coherence pipeline
from the paper `Michael Roeder, Andreas Both and Alexander Hinneburg: "Exploring the space of topic coherence measures"
<http://svn.aksw.org/papers/2015/WSDM_Topic_Evaluation/public.pdf>`_.
Typically, :class:`~gensim.models.coherencemodel.CoherenceModel` used for evaluation of topic models.

The four stage pipeline is basically:

    * Segmentation
    * Probability Estimation
    * Confirmation Measure
    * Aggregation

Implementation of this pipeline allows for the user to in essence "make" a coherence measure of his/her choice
by choosing a method in each of the pipelines.

See Also
--------
:mod:`gensim.topic_coherence`
    Internal functions for pipelines.

    N)
namedtuple)
interfacesmatutils)utils)segmentationprobability_estimationdirect_confirmation_measureindirect_confirmation_measureaggregation)unique_ids_from_segmentsu_mass>   c_vc_ucic_w2vc_npmiCoherence_Measurezseg, prob, conf, aggr)r   r   r   r   r   n      
   )r   r   r   r   r   c                      e Zd ZdZ	 	 d!dZed"d            Zed"d            Zed	             Z	d
 Z
ed             Zej        d             Zed             Zej        d             Zed             Zed             Zej        d             Zd Zd Zd Zd Zd Zed             Zd Zd#dZd$dZd Zd Zd Zd Zd  ZdS )%CoherenceModelaZ  Objects of this class allow for building and maintaining a model for topic coherence.

    Examples
    ---------
    One way of using this feature is through providing a trained topic model. A dictionary has to be explicitly provided
    if the model does not contain a dictionary already

    .. sourcecode:: pycon

        >>> from gensim.test.utils import common_corpus, common_dictionary
        >>> from gensim.models.ldamodel import LdaModel
        >>> from gensim.models.coherencemodel import CoherenceModel
        >>>
        >>> model = LdaModel(common_corpus, 5, common_dictionary)
        >>>
        >>> cm = CoherenceModel(model=model, corpus=common_corpus, coherence='u_mass')
        >>> coherence = cm.get_coherence()  # get coherence value

    Another way of using this feature is through providing tokenized topics such as:

    .. sourcecode:: pycon

        >>> from gensim.test.utils import common_corpus, common_dictionary
        >>> from gensim.models.coherencemodel import CoherenceModel
        >>> topics = [
        ...     ['human', 'computer', 'system', 'interface'],
        ...     ['graph', 'minors', 'trees', 'eps']
        ... ]
        >>>
        >>> cm = CoherenceModel(topics=topics, corpus=common_corpus, dictionary=common_dictionary, coherence='u_mass')
        >>> coherence = cm.get_coherence()  # get coherence value

    Nr      c                 Z    ||t          d          ||t          d          | _        |||t          d          |;t          |j        t          j                  rt          d          |j         _        n| _        | _        | _         j        t           j                  _        | _
        | _        |t          v rRt	          j        |          d         r| _        nj j
         fd j
        D              _        nJt          d|          |d	k    r|n1|t          v r j
        t          d
|          nt          d|          |	 _        | _        d _        d _        | _        |
dk    r|
n#t)          dt+          j                    dz
             _        dS )aj  

        Parameters
        ----------
        model : :class:`~gensim.models.basemodel.BaseTopicModel`, optional
            Pre-trained topic model, should be provided if topics is not provided.
            Currently supports :class:`~gensim.models.ldamodel.LdaModel`,
            :class:`~gensim.models.ldamulticore.LdaMulticore`.
            Use `topics` parameter to plug in an as yet unsupported model.
        topics : list of list of str, optional
            List of tokenized topics, if this is preferred over model - dictionary should be provided.
        texts : list of list of str, optional
            Tokenized texts, needed for coherence models that use sliding window based (i.e. coherence=`c_something`)
            probability estimator .
        corpus : iterable of list of (int, number), optional
            Corpus in BoW format.
        dictionary : :class:`~gensim.corpora.dictionary.Dictionary`, optional
            Gensim dictionary mapping of id word to create corpus.
            If `model.id2word` is present, this is not needed. If both are provided, passed `dictionary` will be used.
        window_size : int, optional
            Is the size of the window to be used for coherence measures using boolean sliding window as their
            probability estimator. For 'u_mass' this doesn't matter.
            If None - the default window sizes are used which are: 'c_v' - 110, 'c_uci' - 10, 'c_npmi' - 10.
        coherence : {'u_mass', 'c_v', 'c_uci', 'c_npmi'}, optional
            Coherence measure to be used.
            Fastest method - 'u_mass', 'c_uci' also known as `c_pmi`.
            For 'u_mass' corpus should be provided, if texts is provided, it will be converted to corpus
            using the dictionary. For 'c_v', 'c_uci' and 'c_npmi' `texts` should be provided (`corpus` isn't needed)
        topn : int, optional
            Integer corresponding to the number of top words to be extracted from each topic.
        processes : int, optional
            Number of processes to use for probability estimation phase, any value less than 1 will be interpreted as
            num_cpus - 1.

        Nz*One of model or topics has to be provided.z7dictionary has to be provided if topics are to be used.z*One of texts or corpus has to be provided.zThe associated dictionary should be provided with the corpus or 'id2word' for topic model should be set as the associated dictionary.r   c                 D    g | ]}j                             |          S  )
dictionarydoc2bow).0textselfs     <lib/python3.11/site-packages/gensim/models/coherencemodel.py
<listcomp>z+CoherenceModel.__init__.<locals>.<listcomp>   s)    TTTt66t<<TTT    zQEither 'corpus' with 'dictionary' or 'texts' should be provided for %s coherence.r   z,'texts' should be provided for %s coherence.z(%s coherence is not currently supported.   )
ValueErrorkeyed_vectors
isinstanceid2wordr   FakeDictr   	coherencewindow_sizeSLIDING_WINDOW_SIZEStextscorpusBOOLEAN_DOCUMENT_BASED	is_corpusSLIDING_WINDOW_BASED_topn_model_accumulator_topicstopicsmaxmp	cpu_count	processes)r!   modelr7   r.   r/   r   r,   r'   r+   topnr;   s   `          r"   __init__zCoherenceModel.__init__~   s(   J  	XV 	XIJJJ 	XJ 	XVWWW* 	KU 	Kv 	KIJJJ  	)%-88 0 ST T T #(-(DO #& 	D3DNCD
.. 	Tv&&q) @$ @TTTTTTT 45>@ @ @
 '! 	Tm 	T.. 	Tz \ !OQZ[[[\ GSSS
 &/1nT#aRSAS:T:Tr$   c                 X      fd|D             }|d<   |d<     j         |fi |S )a  Initialize a CoherenceModel with estimated probabilities for all of the given models.
        Use :meth:`~gensim.models.coherencemodel.CoherenceModel.for_topics` method.

        Parameters
        ----------
        models : list of :class:`~gensim.models.basemodel.BaseTopicModel`
            List of models to evaluate coherence of, each of it should implements
            :meth:`~gensim.models.basemodel.BaseTopicModel.get_topics` method.
        dictionary : :class:`~gensim.corpora.dictionary.Dictionary`
            Gensim dictionary mapping of id word.
        topn : int, optional
            Integer corresponding to the number of top words to be extracted from each topic.
        kwargs : object
            Sequence of arguments, see :meth:`~gensim.models.coherencemodel.CoherenceModel.for_topics`.

        Return
        ------
        :class:`~gensim.models.coherencemodel.CoherenceModel`
            CoherenceModel with estimated probabilities for all of the given models.

        Example
        -------
        .. sourcecode:: pycon

            >>> from gensim.test.utils import common_corpus, common_dictionary
            >>> from gensim.models.ldamodel import LdaModel
            >>> from gensim.models.coherencemodel import CoherenceModel
            >>>
            >>> m1 = LdaModel(common_corpus, 3, common_dictionary)
            >>> m2 = LdaModel(common_corpus, 5, common_dictionary)
            >>>
            >>> cm = CoherenceModel.for_models([m1, m2], common_dictionary, corpus=common_corpus, coherence='u_mass')
        c                 >    g | ]}                     |          S r   )top_topics_as_word_lists)r   r<   clsr   r=   s     r"   r#   z-CoherenceModel.for_models.<locals>.<listcomp>   s+    \\\E#..uj$GG\\\r$   r   r=   )
for_topics)rB   modelsr   r=   kwargsr7   s   ` ``  r"   
for_modelszCoherenceModel.for_models   sV    F ]\\\\\U[\\\)|vs~f/////r$   c                    j         s(d j                                        D             _         g }|                                 D ]<}t	          j        ||d          }fd|D             }|                    |           =|S )a  Get `topn` topics as list of words.

        Parameters
        ----------
        model : :class:`~gensim.models.basemodel.BaseTopicModel`
            Pre-trained topic model.
        dictionary : :class:`~gensim.corpora.dictionary.Dictionary`
            Gensim dictionary mapping of id word.
        topn : int, optional
            Integer corresponding to the number of top words to be extracted from each topic.

        Return
        ------
        list of list of str
            Top topics in list-of-list-of-words format.

        c                     i | ]\  }}||	S r   r   )r   kvs      r"   
<dictcomp>z;CoherenceModel.top_topics_as_word_lists.<locals>.<dictcomp>  s    "P"P"PDAq1a"P"P"Pr$   Tr=   reversec                 *    g | ]}j         |         S r   )id2token)r   _idr   s     r"   r#   z;CoherenceModel.top_topics_as_word_lists.<locals>.<listcomp>  s!    AAACz*3/AAAr$   )rO   token2iditems
get_topicsr   argsortappend)r<   r   r=   
str_topicstopicbestnbeststrs    `     r"   rA   z'CoherenceModel.top_topics_as_word_lists  s    & " 	Q"P"PJ4G4M4M4O4O"P"P"PJ
%%'' 	' 	'E$UtDDDEAAAA5AAAGg&&&&r$   c                 
   |st          d          t          d |D                       rt          d          d}|D ]$}|D ]}t          |t          |                    } %t	          |                    d|          |          }t          j        |          }t          j	        dt          |          t          |                     t          d|gt          |          d|}|                                 ||_        |S )	a  Initialize a CoherenceModel with estimated probabilities for all of the given topics.

        Parameters
        ----------
        topics_as_topn_terms : list of list of str
            Each element in the top-level list should be the list of topics for a model.
            The topics for the model should be a list of top-N words, one per topic.

        Return
        ------
        :class:`~gensim.models.coherencemodel.CoherenceModel`
            CoherenceModel with estimated probabilities for all of the given models.

        zlen(topics) must be > 0.c              3   <   K   | ]}t          |          d k    V  dS )r   N)len)r   topic_listss     r"   	<genexpr>z,CoherenceModel.for_topics.<locals>.<genexpr>1  s/      MMs;1$MMMMMMr$   z%found empty topic listing in `topics`r   r=   z.Number of relevant terms for all %d models: %d)r7   r=   r   )r&   anyr8   r\   minpopr   flattenlogginginfor   estimate_probabilitiesr=   )rB   topics_as_topn_termsrE   r=   
topic_listrW   super_topiccms           r"   rC   zCoherenceModel.for_topics  s*     $ 	97888MM8LMMMMM 	FDEEE. 	- 	-J# - -4U,,- 6::fd++T22m$899<$%%s;'7'7	9 	9 	9 RK=s;7G7GRR6RR
!!###	r$   c                 *    t          | j                  S N)strmeasurer!   s    r"   __str__zCoherenceModel.__str__D  s    4<   r$   c                     | j         S )zGet `self._model` field.

        Return
        ------
        :class:`~gensim.models.basemodel.BaseTopicModel`
            Used model.

        )r4   rn   s    r"   r<   zCoherenceModel.modelG  s     {r$   c                 |    || _         |2|                                 }|                     |           || _        dS dS )zSet `self._model` field.

        Parameters
        ----------
        model : :class:`~gensim.models.basemodel.BaseTopicModel`
            Input model.

        N)r4   _get_topics_update_accumulatorr6   )r!   r<   
new_topicss      r"   r<   zCoherenceModel.modelS  sM      	&))++J$$Z000%DLLL	& 	&r$   c                     | j         S )zGet number of top words `self._topn`.

        Return
        ------
        int
            Integer corresponding to the number of top words.

        r3   rn   s    r"   r=   zCoherenceModel.topnc  s     zr$   c                     t          | j        d                   }||k     }| j        || _        |r| j        | _        dS dS |rt          d|z            || _        dS )zSet number of top words `self._topn`.

        Parameters
        ----------
        topn : int
            Number of top words.

        r   Nz7Model unavailable and topic sizes are less than topn=%d)r\   r6   r<   r3   r4   r&   )r!   r=   current_topic_lengthrequires_expansions       r"   r=   zCoherenceModel.topno  s~      #4<?331D8: 	DJ! )![


) ) " c !Z]a!abbbDJJJr$   c                 &    t           | j                 S )zMake pipeline, according to `coherence` parameter value.

        Return
        ------
        namedtuple
            Pipeline that contains needed functions/method for calculated coherence.

        )COHERENCE_MEASURESr+   rn   s    r"   rm   zCoherenceModel.measure  s     "$.11r$   c                 ~     t           j        d                    j        k    r fd j        D             S  j        S )zGet topics `self._topics`.

        Return
        ------
        list of list of str
            Topics as list of tokens.

        r   c                 .    g | ]}|d j                  S rk   rv   )r   rW   r!   s     r"   r#   z)CoherenceModel.topics.<locals>.<listcomp>  s$    AAA5E+4:+&AAAr$   )r\   r6   r3   rn   s   `r"   r7   zCoherenceModel.topics  sF     t|A$*, 	 AAAADLAAAA<r$   c                 p   |Yg }|D ],}|                      |          }|                    |           -| j         t                              d| j                   n>| j        5|                                 }t                              d| j                   nd}|                     |           || _        dS )zSet topics `self._topics`.

        Parameters
        ----------
        topics : list of list of str
            Topics.

        NzJThe currently set model '%s' may be inconsistent with the newly set topicsz(Setting topics to those of the model: %s)	_ensure_elements_are_idsrU   r<   loggerwarningrr   debugrs   r6   )r!   r7   rt   rW   topic_token_idss        r"   r7   zCoherenceModel.topics  s      	J 3 3"&"?"?"F"F!!/2222z  `J      Z 	))++JLLCTZPPPPJ  ,,,!r$   c                 *     fd|D             } fd|D             }t          |          t          |          k    rt          j        |          S t          |          t          |          k    rt          j        |          S t          d          )Nc                 P    g | ]"}|j         j        v j         j        |         #S r   )r   rQ   )r   tr!   s     r"   r#   z;CoherenceModel._ensure_elements_are_ids.<locals>.<listcomp>  s2    ggg1doNfIfg4?3A6gggr$   c                 &    g | ]}|j         v |S r   )r   )r   ir!   s     r"   r#   z;CoherenceModel._ensure_elements_are_ids.<locals>.<listcomp>  s%    AAAaA,@AAAAr$   zEunable to interpret topic as either a list of tokens or a list of ids)r\   nparrayr&   )r!   rW   ids_from_tokensids_from_idss   `   r"   r   z'CoherenceModel._ensure_elements_are_ids  s    gggggggAAAA5AAA#l"3"33 	f8O,,,_!5!55 	f8L)))deeer$   c                 v    |                      |          r#t                              d           d | _        d S d S )NzEWiping cached accumulator since it does not contain all relevant ids.)_relevant_ids_will_differr   r   r5   r!   rt   s     r"   rs   z"CoherenceModel._update_accumulator  sC    ))*55 	%LL`aaa $D	% 	%r$   c                     | j         |                     |          sdS t          | j                            |                    }| j         j                            |           S )NF)r5   _topics_differr   rm   segrelevant_ids
issuperset)r!   rt   new_sets      r"   r   z(CoherenceModel._relevant_ids_will_differ  sa     	D,?,?
,K,K 	5*4<+;+;J+G+GHH$1<<WEEEEr$   c                 R    |d uo#| j         d uot          j        || j                    S rk   )r6   r   array_equalr   s     r"   r   zCoherenceModel._topics_differ  s:    $& AL,Az4<@@@	Br$   c                 B    |                      | j        | j                  S )zEInternal helper function to return topics from a trained topic model.)_get_topics_from_modelr<   r=   rn   s    r"   rr   zCoherenceModel._get_topics  s    **4:tyAAAr$   c                     	 fd|                                  D             S # t          $ r t          d          w xY w)a  Internal helper function to return topics from a trained topic model.

        Parameters
        ----------
        model : :class:`~gensim.models.basemodel.BaseTopicModel`
            Pre-trained topic model.
        topn : int
            Integer corresponding to the number of top words.

        Return
        ------
        list of :class:`numpy.ndarray`
            Topics matrix

        c                 >    g | ]}t          j        |d           S )TrL   )r   rT   )r   rW   r=   s     r"   r#   z9CoherenceModel._get_topics_from_model.<locals>.<listcomp>  s8       EJ T4@@@  r$   zmThis topic model is not currently supported. Supported topic models should implement the `get_topics` method.)rS   AttributeErrorr&   )r<   r=   s    `r"   r   z%CoherenceModel._get_topics_from_model  ss    "	>     ""     	> 	> 	>=> > >	>s   # =c                 @    | j                             | j                  S )zSegment topic, alias for `self.measure.seg(self.topics)`.

        Return
        ------
        list of list of pair
            Segmented topics.

        )rm   r   r7   rn   s    r"   segment_topicszCoherenceModel.segment_topics  s     |,,,r$   c                 L   ||                                  }| j        t          v r&| j                            | j        |          | _        nTt          | j        || j	        | j
        | j                  }| j        dk    r
| j        |d<    | j        j        di || _        | j        S )an  Accumulate word occurrences and co-occurrences from texts or corpus using the optimal method for the chosen
        coherence metric.

        Notes
        -----
        This operation may take quite some time for the sliding window based coherence methods.

        Parameters
        ----------
        segmented_topics : list of list of pair, optional
            Segmented topics, typically produced by :meth:`~gensim.models.coherencemodel.CoherenceModel.segment_topics`.

        Return
        ------
        :class:`~gensim.topic_coherence.text_analysis.CorpusAccumulator`
            Corpus accumulator.

        N)r.   segmented_topicsr   r,   r;   r   r<   r   )r   r+   r0   rm   probr/   r5   dictr.   r   r,   r;   r'   )r!   r   rE   s      r"   re   z%CoherenceModel.estimate_probabilities  s    &  	5#2244>33 
	< $ 1 1$+?O P PDj3C?8H.* * *F ~( 5"&"4w 1 1 ; ;F ; ;D  r$   Fc                 Z   | j         }||                    | j                  }| j        |                     |           t          ||          }| j        t          v s| j        dk    rn.| j        dk    r| j        |d<   d|d<   d|d	<   n| j        d
k    |d<    |j        || j        fi |S )a  Get list of coherence values for each topic based on pipeline parameters.

        Parameters
        ----------
        segmented_topics : list of list of (int, number)
            Topics.
        with_std : bool, optional
            True to also include standard deviation across topic segment sets in addition to the mean coherence
            for each topic.
        with_support : bool, optional
            True to also include support across topic segments. The support is defined as the number of pairwise
            similarity comparisons were used to compute the overall topic coherence.

        Return
        ------
        list of float
            Sequence of similarity measure for each topic.

        N)with_stdwith_supportr   r   r7   nlrrm   r%   gammar   	normalize)	rm   r   r7   r5   re   r   r+   r0   conf)r!   r   r   r   rm   rE   s         r"   get_coherence_per_topicz&CoherenceModel.get_coherence_per_topic&  s    ( , 	8&{{4;77 	:''(8999xlCCC>33 	?t~7P 	?^u$ 	?#{F8 %F9F7OO#'>X#=F;w|,d.?JJ6JJJr$   c                 6    | j                             |          S )a  Aggregate the individual topic coherence measures using the pipeline's aggregation function.
        Use `self.measure.aggr(topic_coherences)`.

        Parameters
        ----------
        topic_coherences : list of float
            List of calculated confirmation measure on each set in the segmented topics.

        Returns
        -------
        float
            Arithmetic mean of all the values contained in confirmation measures.

        )rm   aggr)r!   topic_coherencess     r"   aggregate_measuresz!CoherenceModel.aggregate_measuresL  s     |  !1222r$   c                 T    |                                  }|                     |          S )zGet coherence value based on pipeline parameters.

        Returns
        -------
        float
            Value of coherence.

        )r   r   )r!   confirmed_measuress     r"   get_coherencezCoherenceModel.get_coherence]  s+     "99;;&&'9:::r$   c                 J      fd|D             }                      |          S )aQ  Compare topic models by coherence value.

        Parameters
        ----------
        models : :class:`~gensim.models.basemodel.BaseTopicModel`
            Sequence of topic models.

        Returns
        -------
        list of (float, float)
            Sequence of pairs of average topic coherence and average coherence.

        c                 F    g | ]}                     |j                  S r   )r   r=   )r   r<   r!   s     r"   r#   z1CoherenceModel.compare_models.<locals>.<listcomp>w  s+    ZZZ%33E49EEZZZr$   )compare_model_topics)r!   rD   model_topicss   `  r"   compare_modelszCoherenceModel.compare_modelsi  s4     [ZZZSYZZZ((666r$   c                     | j         }| j        }	 |                     |          }|| _        || _        n# || _        || _        w xY w|S )a  Perform the coherence evaluation for each of the models.

        Parameters
        ----------
        model_topics : list of list of str
            list of list of words for the model trained with that number of topics.

        Returns
        -------
        list of (float, float)
            Sequence of pairs of average topic coherence and average coherence.

        Notes
        -----
        This first precomputes the probabilities once, then evaluates coherence for each model.

        Since we have already precomputed the probabilities, this simply involves using the accumulated stats in the
        :class:`~gensim.models.coherencemodel.CoherenceModel` to perform the evaluations, which should be pretty quick.

        )r6   r=   _compare_model_topicsr7   )r!   r   orig_topics	orig_topn
coherencess        r"   r   z#CoherenceModel.compare_model_topicsz  s]    * lI		"33LAAJ%DK!DII &DK!DI!!!!s	   4 Ac                    g }t          | j        dz
  d          }t          t          | j        |d                    }t	          |          D ]\  }}|| _        i }|D ]t}|| _        |                                 }	t          j        |	          }
t          j	        |
          |
t          j
        |
          <   |	|                     |
          f||<   ut          |                                 \  }	}t          j        |	                              d          }t          j        |          }t!          j        d||fz             |                    ||f           |S )a6  Get average topic and model coherences.

        Parameters
        ----------
        model_topics : list of list of str
            Topics from the model.

        Returns
        -------
        list of (float, float)
            Sequence of pairs of average topic coherence and average coherence.

        r%      r   z Avg coherence for model %d: %.5f)r`   r=   listrange	enumerater7   r   r   r   nanmeanisnanr   zipvaluesvstackmeanrc   rd   rU   )r!   r   r   last_topn_value	topn_grid	model_numr7   coherence_at_nnr   filled_coherencesavg_coherencesavg_topic_coherencesmodel_coherences                 r"   r   z$CoherenceModel._compare_model_topics  sl    
di!mQ//ty/2>>??	!*<!8!8 	G 	GIv DK  N 	c 	c	#'#?#?#A#A 
 %'H-=$>$>!ACL]A^A^!"(+<"="=>%5t7N7NO`7a7a$bq!!/2N4I4I4K4K/L,n#%9-=#>#>#C#CA#F#F  gn55OL;y/>ZZ[[[3_EFFFFr$   )
NNNNNNNr   r   r   )r   rk   )NFF)__name__
__module____qualname____doc__r>   classmethodrF   staticmethodrA   rC   ro   propertyr<   setterr=   rm   r7   r   rs   r   r   rr   r   r   re   r   r   r   r   r   r   r   r$   r"   r   r   \   sp          B UY[]ZU ZU ZU ZUx %0 %0 %0 [%0N    \8 " " ["H! ! ! 	 	 X	 \& & \& 	 	 X	 
[  [* 	2 	2 X	2     X  ]" " ]"8f f f% % %
F F FB B B
B B B > > \>4	- 	- 	-"! "! "! "!H$K $K $K $KL3 3 3"
; 
; 
;7 7 7"  @) ) ) ) )r$   r   ))r   rc   multiprocessingr9   collectionsr   numpyr   gensimr   r   r   gensim.topic_coherencer   r   r	   r
   r   -gensim.topic_coherence.probability_estimationr   	getLoggerr   r   r0   r2   _make_pipeline	s_one_prep_boolean_documentlog_conditional_probabilityarithmetic_mean	s_one_setp_boolean_sliding_windowcosine_similarity
p_word2vecword2vec_similarity	s_one_onelog_ratio_measurer{   r-   TransformationABCr   r   r$   r"   <module>r      s:   ,      " " " " " "     ' ' ' ' ' ' ' '                   
 S R R R R R		8	$	$" ::: /1HIIn1#?#	  >7%7#	  ^)%9#	  ^7#5#	  n7#5#	 3  D   g	 g	 g	 g	 g	Z1 g	 g	 g	 g	 g	r$   