
    c                     \   d Z ddlZddlZddlmZmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZmZmZmZmZ ddlZddlmZmZ dd	lmZ dd
lmZmZ ddlmZmZ  ej        e           Z!	 ddl"m#Z#m$Z$m%Z% n# e&$ r ej'        w xY w	 ddl(m)Z)m*Z*m+Z+m,Z, n"# e&$ r dZ,	 	 	 d#dZ)	 	 	 d$dZ*	 	 	 d$dZ+Y nw xY w G d d edd                    Z-e G d d                      Z. G d de          Z/ G d dej0                  Z1 G d dej0                  Z2 G d d           Z3 G d! d"          Z4dS )%a  
Introduction
============

Learn paragraph and document embeddings via the distributed memory and distributed bag of words models from
`Quoc Le and Tomas Mikolov: "Distributed Representations of Sentences and Documents"
<http://arxiv.org/pdf/1405.4053v2.pdf>`_.

The algorithms use either hierarchical softmax or negative sampling; see
`Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean: "Efficient Estimation of Word Representations in
Vector Space, in Proceedings of Workshop at ICLR, 2013" <https://arxiv.org/pdf/1301.3781.pdf>`_ and
`Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg Corrado, and Jeffrey Dean: "Distributed Representations of Words
and Phrases and their Compositionality. In Proceedings of NIPS, 2013"
<https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf>`_.

For a usage example, see the `Doc2vec tutorial
<https://radimrehurek.com/gensim/auto_examples/tutorials/run_doc2vec_lee.html#sphx-glr-auto-examples-tutorials-run-doc2vec-lee-py>`_.

**Make sure you have a C compiler before installing Gensim, to use the optimized doc2vec routines** (70x speedup
compared to plain NumPy implementation, https://rare-technologies.com/parallelizing-word2vec-in-python/).


Usage examples
==============

Initialize & train a model:

.. sourcecode:: pycon

    >>> from gensim.test.utils import common_texts
    >>> from gensim.models.doc2vec import Doc2Vec, TaggedDocument
    >>>
    >>> documents = [TaggedDocument(doc, [i]) for i, doc in enumerate(common_texts)]
    >>> model = Doc2Vec(documents, vector_size=5, window=2, min_count=1, workers=4)

Persist a model to disk:

.. sourcecode:: pycon

    >>> from gensim.test.utils import get_tmpfile
    >>>
    >>> fname = get_tmpfile("my_doc2vec_model")
    >>>
    >>> model.save(fname)
    >>> model = Doc2Vec.load(fname)  # you can continue training with the loaded model!

Infer vector for a new document:

.. sourcecode:: pycon

    >>> vector = model.infer_vector(["system", "response"])

    N)
namedtupledefaultdict)Iterable)default_timer)	dataclass)zerosfloat32vstackintegerdtype)utilsmatutils)
deprecated)Word2VecFAST_VERSION)KeyedVectorspseudorandom_weak_vector)train_document_dbowtrain_document_dmtrain_document_dm_concat)d2v_train_epoch_dbowd2v_train_epoch_dm_concatd2v_train_epoch_dmCORPUSFILE_VERSIONFTc                      t          d          Nz4Training with corpus_file argument is not supported.NotImplementedError)modelcorpus_fileoffsetstart_doctag_cython_vocab
_cur_epoch_expected_examples_expected_wordswork_neu1docvecs_countword_vectors
word_lockstrain_wordslearn_doctagslearn_wordslearn_hiddendoctag_vectorsdoctag_lockss                      5lib/python3.11/site-packages/gensim/models/doc2vec.pyr   r   _        ""XYYY    c                      t          d          r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r.   r/   r0   r1   r2   s                     r3   r   r   e   r4   r5   c                      t          d          r   r   r7   s                     r3   r   r   k   r4   r5   c                       e Zd ZdZd ZdS )TaggedDocumenta  Represents a document along with a tag, input document format for :class:`~gensim.models.doc2vec.Doc2Vec`.

    A single document, made up of `words` (a list of unicode string tokens) and `tags` (a list of tokens).
    Tags may be one or more unicode string tokens, but typical practice (which will also be the most memory-efficient)
    is for the tags list to include a unique integer id as the only tag.

    Replaces "sentence as a list of words" from :class:`gensim.models.word2vec.Word2Vec`.

    c                 @    | j         j        d| j        d| j        dS )zHuman readable representation of the object's state, used for debugging.

        Returns
        -------
        str
           Human readable representation of the object's state (words and tags).

        <z, >)	__class____name__wordstagsselfs    r3   __str__zTaggedDocument.__str__|   s(      $~666


DIIINNr5   N)r?   
__module____qualname____doc__rD    r5   r3   r:   r:   r   s2         	O 	O 	O 	O 	Or5   r:   z
words tagsc                   l    e Zd ZU dZdZeed<   eed<   eed<   ed             Zej	        d             ZdS )	DoctagaM  A dataclass shape-compatible with keyedvectors.SimpleVocab, extended to record
    details of string document tags discovered during the initial vocabulary scan.

    Will not be used if all presented document tags are ints. No longer used in a
    completed model: just used during initial scan, and for backward compatibility.
    )	doc_countindex
word_countrK   rL   rM   c                     | j         S NrK   rB   s    r3   countzDoctag.count   s
    ~r5   c                     || _         d S rO   rP   )rC   new_vals     r3   rQ   zDoctag.count   s     r5   N)
r?   rE   rF   rG   	__slots__int__annotations__propertyrQ   setterrH   r5   r3   rJ   rJ      sx           5INNNJJJOOO  X \! ! \! ! !r5   rJ   c                       e Zd Z	 	 	 d+ fd		Zed
             Zed             Ze ed          d                         Zej	         ed          d                         Zd Z
 fdZd Z	 d,dZd Z	 	 	 d- fd	Zed             Zd Zd Zd.dZd Zd Zd/dZ ed           d0d!            Ze fd"            Zd1 fd#	Z	 	 d2d%Zd3d&Zd' Zd4d)Zd.d*Z xZ S )5Doc2VecNd      r   rH      
   Tc                    |}||| _         t          |          | _        t          |          | _        t          |          | _        |r2|r0|d|z  z   |z  | _        t                              d| j                   || _        |	pt          | j        |
          | _
        t          j        dt                    | j
        _         t          t           |           j        d||| j        d|z   dz  | j        ||||d	| dS )	u/  Class for training, using and evaluating neural networks described in
        `Distributed Representations of Sentences and Documents <http://arxiv.org/abs/1405.4053v2>`_.

        Parameters
        ----------
        documents : iterable of list of :class:`~gensim.models.doc2vec.TaggedDocument`, optional
            Input corpus, can be simply a list of elements, but for larger corpora,consider an iterable that streams
            the documents directly from disk/network. If you don't supply `documents` (or `corpus_file`), the model is
            left uninitialized -- use if you plan to initialize it in some other way.
        corpus_file : str, optional
            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
            You may use this argument instead of `documents` to get performance boost. Only one of `documents` or
            `corpus_file` arguments need to be passed (or none of them, in that case, the model is left uninitialized).
            Documents' tags are assigned automatically and are equal to line number, as in
            :class:`~gensim.models.doc2vec.TaggedLineDocument`.
        dm : {1,0}, optional
            Defines the training algorithm. If `dm=1`, 'distributed memory' (PV-DM) is used.
            Otherwise, `distributed bag of words` (PV-DBOW) is employed.
        vector_size : int, optional
            Dimensionality of the feature vectors.
        window : int, optional
            The maximum distance between the current and predicted word within a sentence.
        alpha : float, optional
            The initial learning rate.
        min_alpha : float, optional
            Learning rate will linearly drop to `min_alpha` as training progresses.
        seed : int, optional
            Seed for the random number generator. Initial vectors for each word are seeded with a hash of
            the concatenation of word + `str(seed)`. Note that for a fully deterministically-reproducible run,
            you must also limit the model to a single worker thread (`workers=1`), to eliminate ordering jitter
            from OS thread scheduling.
            In Python 3, reproducibility between interpreter launches also requires use of the `PYTHONHASHSEED`
            environment variable to control hash randomization.
        min_count : int, optional
            Ignores all words with total frequency lower than this.
        max_vocab_size : int, optional
            Limits the RAM during vocabulary building; if there are more unique
            words than this, then prune the infrequent ones. Every 10 million word types need about 1GB of RAM.
            Set to `None` for no limit.
        sample : float, optional
            The threshold for configuring which higher-frequency words are randomly downsampled,
            useful range is (0, 1e-5).
        workers : int, optional
            Use these many worker threads to train the model (=faster training with multicore machines).
        epochs : int, optional
            Number of iterations (epochs) over the corpus. Defaults to 10 for Doc2Vec.
        hs : {1,0}, optional
            If 1, hierarchical softmax will be used for model training.
            If set to 0, and `negative` is non-zero, negative sampling will be used.
        negative : int, optional
            If > 0, negative sampling will be used, the int for negative specifies how many "noise words"
            should be drawn (usually between 5-20).
            If set to 0, no negative sampling is used.
        ns_exponent : float, optional
            The exponent used to shape the negative sampling distribution. A value of 1.0 samples exactly in proportion
            to the frequencies, 0.0 samples all words equally, while a negative value samples low-frequency words more
            than high-frequency words. The popular default value of 0.75 was chosen by the original Word2Vec paper.
            More recently, in https://arxiv.org/abs/1804.04212, Caselles-Dupré, Lesaint, & Royo-Letelier suggest that
            other values may perform better for recommendation applications.
        dm_mean : {1,0}, optional
            If 0, use the sum of the context word vectors. If 1, use the mean.
            Only applies when `dm` is used in non-concatenative mode.
        dm_concat : {1,0}, optional
            If 1, use concatenation of context vectors rather than sum/average;
            Note concatenation results in a much-larger model, as the input
            is no longer the size of one (sampled or arithmetically combined) word vector, but the
            size of the tag(s) and all words in the context strung together.
        dm_tag_count : int, optional
            Expected constant number of document tags per document, when using
            dm_concat mode.
        dbow_words : {1,0}, optional
            If set to 1 trains word-vectors (in skip-gram fashion) simultaneous with DBOW
            doc-vector training; If 0, only trains doc-vectors (faster).
        trim_rule : function, optional
            Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary,
            be trimmed away, or handled using the default (discard if word count < min_count).
            Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`),
            or a callable that accepts parameters (word, count, min_count) and returns either
            :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`.
            The rule, if given, is only used to prune vocabulary during current method call and is not stored as part
            of the model.

            The input parameters are of the following types:
                * `word` (str) - the word we are examining
                * `count` (int) - the word's frequency count in the corpus
                * `min_count` (int) - the minimum count threshold.

        callbacks : :obj: `list` of :obj: `~gensim.models.callbacks.CallbackAny2Vec`, optional
            List of callbacks that need to be executed/run at specific stages during training.
        shrink_windows : bool, optional
            New in 4.1. Experimental.
            If True, the effective window size is uniformly sampled from  [1, `window`]
            for each target word during training, to match the original word2vec algorithm's
            approximate weighting of context words by distance. Otherwise, the effective
            window size is always fixed to `window` words to either side.

        Some important internal attributes are the following:

        Attributes
        ----------
        wv : :class:`~gensim.models.keyedvectors.KeyedVectors`
            This object essentially contains the mapping between words and embeddings. After training, it can be used
            directly to query those embeddings in various ways. See the module level docstring for examples.

        dv : :class:`~gensim.models.keyedvectors.KeyedVectors`
            This object contains the paragraph vectors learned from the training data. There will be one such vector
            for each unique document tag supplied during training. They may be individually accessed using the tag
            as an indexed-access key. For example, if one of the training documents used a tag of 'doc003':

            .. sourcecode:: pycon

                >>> model.dv['doc003']

        N   z)using concatenative %d-dimensional layer1)mapfile_pathr\   r   )		sentencesr!   vector_sizesg	null_word	callbackswindowepochsshrink_windowsrH   )	cbow_meanrU   
dbow_words	dm_concatdm_tag_countlayer1_sizeloggerinford   r   dvnponesREALvectors_lockfsuperrZ   __init__)rC   	documentsr!   rd   dm_meandmrl   rm   rn   rr   
dv_mapfilecomment	trim_rulerg   rh   ri   rj   kwargscorpus_iterabler>   s                      r3   rx   zDoc2Vec.__init__   s)   n $ 	%$DNj//Y-- 	W) 	W ,F
 ;{JDKKCTEUVVV&OT%5JOOO !# 6 6 6%gt% 	
%#(B!|n)	
 	
 	
 	
 	
 	
 	
r5   c                     | j          S )zIndicates whether 'distributed memory' (PV-DM) will be used, else 'distributed bag of words'
        (PV-DBOW) is used.

        re   rB   s    r3   r{   z
Doc2Vec.dm5  s     7{r5   c                     | j         S )zIndicates whether 'distributed bag of words' (PV-DBOW) will be used, else 'distributed memory'
        (PV-DM) is used.

        r   rB   s    r3   dbowzDoc2Vec.dbow=  s     wr5   z-The `docvecs` property has been renamed `dv`.c                     | j         S rO   rr   rB   s    r3   docvecszDoc2Vec.docvecsE  s     wr5   c                     || _         d S rO   r   )rC   values     r3   r   zDoc2Vec.docvecsJ  s     r5   c                 6    d| j         _        d| j        _        dS )z!Resets the current word vectors. N)wvnormsrr   rB   s    r3   _clear_post_trainzDoc2Vec._clear_post_trainO  s    r5   c                     t          t          |                                            | j                            | j        dz              d S )Ni  )seed)rw   rZ   init_weightsrr   resize_vectorsr   )rC   r>   s    r3   r   zDoc2Vec.init_weightsT  sE    gt))+++DI$455555r5   c                 f   |j         j        | j         _        |j         j        | j         _        |j         j        | j         _        |j        | _        |j        | _        |j        j        | j        _        |j        j        | j        _        |j        j        | j        _        |                                  dS )a  Copy shareable data structures from another (possibly pre-trained) model.

        This specifically causes some structures to be shared, so is limited to
        structures (like those rleated to the known word/tag vocabularies) that
        won't change during training or thereafter. Beware vocabulary edits/updates
        to either model afterwards: the partial sharing and out-of-band modification
        may leave the other model in a broken state.

        Parameters
        ----------
        other_model : :class:`~gensim.models.doc2vec.Doc2Vec`
            Other model whose internal data structures will be copied over to the current object.

        N)r   key_to_indexindex_to_keyexpandos	cum_tablecorpus_countrr   r   )rC   other_models     r3   
reset_fromzDoc2Vec.reset_fromY  s      +~:*~:&>2$.'4*~:*~:&>2r5   c                    |\  }}| j         j        }| j         j        }|	|         }|
|         }| j        r9t	          | |||||||||t          | j                   ||| j                  \  }}}nl| j        r3t          | |||||||||t          | j                   ||          \  }}}n2t          | |||||||||t          | j                   ||          \  }}}|||fS )N)r1   doctags_lockfr-   r1   r   )
rr   vectorsrv   re   r   lenrl   rm   r   r   )rC   r!   	thread_idr"   cython_vocabthread_private_mem	cur_epochtotal_examplestotal_wordsoffsetsstart_doctagsr   r(   neu1r1   r   r#   examplestally	raw_tallys                       r3   _do_train_epochzDoc2Vec._do_train_epochr  s0    (
d-#$Y/7 	L)=k6<yT4TW-]X\Xg*i *i *i&HeYY ^ 		L)Bk6<yT4TW-]*L *L *L&HeYY
 *<k6<yT4TW-]*L *L *L&HeY
 	))r5   c                     |\  }}d}|D ]} fd|j         D             } j        j        }	 j        j        }
 j        r%|t           |j        ||| j        |	|
          z  }Y j        r |t           |j        |||||	|
          z  }|t           |j        |||||	|
          z  }|                     |          fS )a?  Train model using `job` data.

        Parameters
        ----------
        job : iterable of list of :class:`~gensim.models.doc2vec.TaggedDocument`
            The corpus chunk to be used for training this batch.
        alpha : float
            Learning rate to be used for training this batch.
        inits : (np.ndarray, np.ndarray)
            Each worker threads private work memory.

        Returns
        -------
        (int, int)
             2-tuple (effective word count after ignoring unknown words and sentence length trimming, total word count).

        r   c                 V    g | ]%}|j         v j                             |          &S rH   )rr   	get_index).0tagrC   s     r3   
<listcomp>z)Doc2Vec._do_train_job.<locals>.<listcomp>  s6    [[[CSWSZN[dg//44[[[r5   )r-   r1   r   r   )rA   rr   r   rv   re   r   r@   rl   rm   r   r   _raw_word_count)rC   jobalphainitsr(   r   r   docdoctag_indexesr1   r   s   `          r3   _do_train_jobzDoc2Vec._do_train_job  s   $ 
d 	 	C[[[[[[[N!W_N G1Mw ,#)^UDdo#1     	1#)^UD$#1   
 *#)^UD$#1    d**3////r5   r`         ?c                    ||t          d          ||t          d          |1t          j                            |          st          d|z            |'t	          |t
                    st          d|z            |(|                     || j                  \  }}||d<   ||d<    t          t          |           j
        d	|||||||||	|
|d| dS )
a  Update the model's neural weights.

        To support linear learning-rate decay from (initial) `alpha` to `min_alpha`, and accurate
        progress-percentage logging, either `total_examples` (count of documents) or `total_words` (count of
        raw words in documents) **MUST** be provided. If `documents` is the same corpus
        that was provided to :meth:`~gensim.models.word2vec.Word2Vec.build_vocab` earlier,
        you can simply use `total_examples=self.corpus_count`.

        To avoid common mistakes around the model's ability to do multiple training passes itself, an
        explicit `epochs` argument **MUST** be provided. In the common and recommended case
        where :meth:`~gensim.models.word2vec.Word2Vec.train` is only called once,
        you can set `epochs=self.iter`.

        Parameters
        ----------
        corpus_iterable : iterable of list of :class:`~gensim.models.doc2vec.TaggedDocument`, optional
            Can be simply a list of elements, but for larger corpora,consider an iterable that streams
            the documents directly from disk/network. If you don't supply `documents` (or `corpus_file`), the model is
            left uninitialized -- use if you plan to initialize it in some other way.
        corpus_file : str, optional
            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
            You may use this argument instead of `documents` to get performance boost. Only one of `documents` or
            `corpus_file` arguments need to be passed (not both of them). Documents' tags are assigned automatically
            and are equal to line number, as in :class:`~gensim.models.doc2vec.TaggedLineDocument`.
        total_examples : int, optional
            Count of documents.
        total_words : int, optional
            Count of raw words in documents.
        epochs : int, optional
            Number of iterations (epochs) over the corpus.
        start_alpha : float, optional
            Initial learning rate. If supplied, replaces the starting `alpha` from the constructor,
            for this one call to `train`.
            Use only if making multiple calls to `train`, when you want to manage the alpha learning-rate yourself
            (not recommended).
        end_alpha : float, optional
            Final learning rate. Drops linearly from `start_alpha`.
            If supplied, this replaces the final `min_alpha` from the constructor, for this one call to
            :meth:`~gensim.models.doc2vec.Doc2Vec.train`.
            Use only if making multiple calls to :meth:`~gensim.models.doc2vec.Doc2Vec.train`, when you want to manage
            the alpha learning-rate yourself (not recommended).
        word_count : int, optional
            Count of words already trained. Set this to 0 for the usual
            case of training on all words in documents.
        queue_factor : int, optional
            Multiplier for size of queue (number of workers * queue_factor).
        report_delay : float, optional
            Seconds to wait before reporting progress.
        callbacks : :obj: `list` of :obj: `~gensim.models.callbacks.CallbackAny2Vec`, optional
            List of callbacks that need to be executed/run at specific stages during training.

        NzCEither one of corpus_file or corpus_iterable value must be providedzJBoth corpus_file and corpus_iterable must not be provided at the same timezDParameter corpus_file must be a valid path to a file, got %r insteadzEcorpus_iterable must be an iterable of TaggedDocument, got %r insteadr   r   )r   r!   r   r   ri   start_alpha	end_alpharM   queue_factorreport_delayrg   rH   )	TypeErrorospathisfile
isinstancer   -_get_offsets_and_start_doctags_for_corpusfileworkersrw   rZ   train)rC   r   r!   r   r   ri   r   r   rM   r   r   rg   r   r   r   r>   s                  r3   r   zDoc2Vec.train  sI   t  	c? 	cabbb 	j 	jhiii 	r27>>++F+F 	rbeppqqq 	wz/8/T/T 	wcfuuvvv 	4%)%W%WXceieq%r%r"G] 'F9&3F?#"gt" 	a+){{iT^%LI		a 	a Z`		a 	a 	a 	a 	ar5   c                 F   t           j                            |          fdt                    D             }g }g }t	          j        |d          5 }d}d}t          |          D ]\  }	}
|t          |          k    r n|t          |
          z   }|t          |          k    rZ||         |k     rN|                    |           |                    |	           |dz  }|t          |          k    r||         |k     N|}ddd           n# 1 swxY w Y   ||fS )a  Get offset and initial document tag in a corpus_file for each worker.

        Firstly, approximate offsets are calculated based on number of workers and corpus_file size.
        Secondly, for each approximate offset we find the maximum offset which points to the beginning of line and
        less than approximate offset.

        Parameters
        ----------
        corpus_file : str
            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
        workers : int
            Number of workers.

        Returns
        -------
        list of int, list of int
            Lists with offsets and document tags with length = number of workers.
        c                 :    g | ]}t          z  |z            S rH   )rU   )r   icorpus_file_sizer   s     r3   r   zIDoc2Vec._get_offsets_and_start_doctags_for_corpusfile.<locals>.<listcomp>  s,    WWW1#.'9A=>>WWWr5   rb)moder   r\   N)	r   r   getsizeranger   open	enumerater   append)clsr!   r   approx_offsetsr   r   fincurr_offset_idxprev_fileposline_nolinecurr_fileposr   s     `         @r3   r   z5Doc2Vec._get_offsets_and_start_doctags_for_corpusfile
  s   ( 7??;77WWWWWgWWWZ$/// 	,3OL!*3 , ,"c.&9&99 E+c$ii7%^)<)<< )P_A`coAo )NN<000!((111#q(O	 &^)<)<< )P_A`coAo )  ,	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	," %%s   B0DDDc                 4    t          d |D                       S )a%  Get the number of words in a given job.

        Parameters
        ----------
        job : iterable of list of :class:`~gensim.models.doc2vec.TaggedDocument`
            Corpus chunk.

        Returns
        -------
        int
            Number of raw words in the corpus chunk.

        c              3   >   K   | ]}t          |j                  V  d S rO   )r   r@   )r   sentences     r3   	<genexpr>z*Doc2Vec._raw_word_count.<locals>.<genexpr>D  s,      ;;83x~&&;;;;;;r5   )sum)rC   r   s     r3   r   zDoc2Vec._raw_word_count6  s!     ;;s;;;;;;r5   c                 `    dt          | j                  z  dt          | j                  z  z   S )zGet estimated memory for tag lookup, 0 if using pure int tags.

        Returns
        -------
        int
            The estimated RAM required to look up a tag in bytes.

        <      )r   rr   rB   s    r3   estimated_lookup_memoryzDoc2Vec.estimated_lookup_memoryF  s)     CLL 3TW#555r5   c                     t          |t                    rt          d          |p| j        }|p| j        }|p| j        }t          | j        j        d	                    |                    }|
                    d| j        j                  }t          j        dt                    }dg}t          | j        t                    }| j        s t#          j        | j        t                    }	||z
  t'          |dz
  d          z  }
t)          |          D ]a}| j        rt+          | ||||dd||	  	         n:| j        rt/          | |||||	dd||
  
         nt1          | |||||	dd||
  
         ||
z  }b|d         S )	a  Infer a vector for given post-bulk training document.

        Notes
        -----
        Subsequent calls to this function may infer different representations for the same document.
        For a more stable representation, increase the number of epochs to assert a stricter convergence.

        Parameters
        ----------
        doc_words : list of str
            A document for which the vector representation will be inferred.
        alpha : float, optional
            The initial learning rate. If unspecified, value from model initialization will be reused.
        min_alpha : float, optional
            Learning rate will linearly drop to `min_alpha` over all inference epochs. If unspecified,
            value from model initialization will be reused.
        epochs : int, optional
            Number of times to train the new document. Larger values take more time, but may improve
            quality and run-to-run stability of inferred vectors. If unspecified, the `epochs` value
            from model initialization will be reused.

        Returns
        -------
        np.ndarray
            The inferred paragraph vector for the new document.

        zVParameter doc_words of infer_vector() must be a list of strings (not a single string). )seed_stringr\   rb   r   F)r/   r0   r1   r   )r   strr   r   	min_alphari   r   rr   rd   joinreshapers   rt   ru   r   ro   re   r   zeros_alignedmaxr   r   rm   r   r   )rC   	doc_wordsr   r   ri   r1   r   r   r(   r   alpha_deltar   s               r3   infer_vectorzDoc2Vec.infer_vectorQ  s   8 i%% 	vtuuu#/	&4;1$'2ESVS[S[\eSfSfggg'//473FGG...T%T222w 	H)$*:$GGGDy(C
A,>,>>v 	! 	!Aw #)^UD %E.hu      	()^UD$ %E.hu    
 ")^UD$ %E.hu    [ EEa  r5   c                      t          |t          t          t          f          r#| j        vr j        |         S  j        |         S t           fd|D                       S )au  Get the vector representation of (possibly multi-term) tag.

        Parameters
        ----------
        tag : {str, int, list of str, list of int}
            The tag (or tags) to be looked up in the model.

        Returns
        -------
        np.ndarray
            The vector representations of each tag as a matrix (will be 1D if `tag` was a single tag)

        c                      g | ]
}|         S rH   rH   )r   r   rC   s     r3   r   z'Doc2Vec.__getitem__.<locals>.<listcomp>  s    ,,,1tAw,,,r5   )r   r   rU   r   r   rr   r
   )rC   r   s   ` r3   __getitem__zDoc2Vec.__getitem__  sh     cCg/00 	 $'! $ws|#73<,,,,,,,---r5   c                    g }| j         r|                    d| j         z             | j        r3| j        r|                    d           ne|                    d           nO| j        r|                    d           n2| j        r|                    d           n|                    d           |                    d| j        j        z             | j        r|                    d| j        z             | j	        r|                    d	           | j        r| j        r$| j        r|                    d
| j
        z             | j        dk    r|                    d| j        z             | j        dk    r|                    d| j        z             | j        dk    r|                    d| j        z             | j        j        dd                    |          dS )zAbbreviated name reflecting major configuration parameters.

        Returns
        -------
        str
            Human readable representation of the models internal state.

        z"%s"zdbow+wr   zdm/czdm/mzdm/szd%dzn%dhszw%dr\   zmc%dr   zs%gzt%dr<   ,r=   )r}   r   re   rl   rm   rk   rr   rd   negativer   rh   	min_countsampler   r>   r?   r   )rC   segmentss     r3   rD   zDoc2Vec.__str__  s    < 	3OOFT\12227 	, ())))'''' ~ ,''''> ,OOF++++OOF+++ 33444= 	3OOEDM12227 	"OOD!!!w 	147 	1t 	1OOEDK/000>A 	5OOFT^3444;? 	1OOEDK/000<! 	2OOEDL0111>222CHHX4F4F4F4FGGr5   F*dt_c           	          d}|rH|r)t          | j                  t          | j                  z   }| j                            ||||           |r-d}d}	|rd}d}	| j                            ||||||	d           dS dS )a&  Store the input-hidden weight matrix in the same format used by the original C word2vec-tool.

        Parameters
        ----------
        fname : str
            The file path used to save the vectors in.
        doctag_vec : bool, optional
            Indicates whether to store document vectors.
        word_vec : bool, optional
            Indicates whether to store word vectors.
        prefix : str, optional
            Uniquely identifies doctags from word vocab, and avoids collision in case of repeated string in doctag
            and word vocab.
        fvocab : str, optional
            Optional file path used to save the vocabulary.
        binary : bool, optional
            If True, the data will be saved in binary word2vec format, otherwise - will be saved in plain text.

        NTFrK   )prefixfvocabbinarywrite_headerr   	sort_attr)r   r   rr   save_word2vec_format)
rC   fname
doctag_vecword_vecr   r   r   	total_vecr   r   s
             r3   r   zDoc2Vec.save_word2vec_format  s    ( 	 	K 8LL3tw<<7	G((	JJJ 
	'LF $G((fVF)&% ) ' ' ' ' '
	' 
	'r5   zGensim 4.0.0 implemented internal optimizations that make calls to init_sims() unnecessary. init_sims() is now obsoleted and will be completely removed in future versions. See https://github.com/RaRe-Technologies/gensim/wiki/Migrating-from-Gensim-3.x-to-4c                 <    | j                             |           dS )a  
        Precompute L2-normalized vectors. Obsoleted.

        If you need a single unit-normalized vector for some key, call
        :meth:`~gensim.models.keyedvectors.KeyedVectors.get_vector` instead:
        ``doc2vec_model.dv.get_vector(key, norm=True)``.

        To refresh norms after you performed some atypical out-of-band vector tampering,
        call `:meth:`~gensim.models.keyedvectors.KeyedVectors.fill_norms()` instead.

        Parameters
        ----------
        replace : bool
            If True, forget the original trained vectors and only keep the normalized ones.
            You lose information if you do this.

        )replaceN)rr   	init_sims)rC   r  s     r3   r  zDoc2Vec.init_sims  s#    . 	'*****r5   c                     	  t          t          |           j        |ddi|S # t          $ r!}t                              d           |d}~ww xY w)a  Load a previously saved :class:`~gensim.models.doc2vec.Doc2Vec` model.

        Parameters
        ----------
        fname : str
            Path to the saved file.
        *args : object
            Additional arguments, see `~gensim.models.word2vec.Word2Vec.load`.
        **kwargs : object
            Additional arguments, see `~gensim.models.word2vec.Word2Vec.load`.

        See Also
        --------
        :meth:`~gensim.models.doc2vec.Doc2Vec.save`
            Save :class:`~gensim.models.doc2vec.Doc2Vec` model.

        Returns
        -------
        :class:`~gensim.models.doc2vec.Doc2Vec`
            Loaded model.

        rethrowTzModel load error. Was model saved using code from an older Gensim version? Try loading older model using gensim-3.8.3, then re-saving, to restore compatibility with current code.N)rw   rZ   loadAttributeErrorrp   error)r   argsr   aer>   s       r3   r
  zDoc2Vec.load  sp    0	+5#&&+TJ4J6JJJ 	 	 	LL34 4 4 H	s   !% 
AAAc                     |pi }|                                  |d<   t          | j                  | j        z  t	          t
                    j        z  |d<   t          t          |           	                    ||          S )a  Estimate required memory for a model using current settings.

        Parameters
        ----------
        vocab_size : int, optional
            Number of raw words in the vocabulary.
        report : dict of (str, int), optional
            A dictionary from string representations of the **specific** model's memory consuming members
            to their size in bytes.

        Returns
        -------
        dict of (str, int), optional
            A dictionary from string representations of the model's memory consuming members to their size in bytes.
            Includes members from the base classes as well as weights and tag lookup memory estimation specific to the
            class.

        doctag_lookupdoctag_syn0)report)
r   r   rr   rd   r   ru   itemsizerw   rZ   estimate_memory)rC   
vocab_sizer  r>   s      r3   r  zDoc2Vec.estimate_memory1  so    & 2"&">">"@"@ #DGt/? ?%++BV V}Wd##33Jv3NNNr5   '  c                     |                      ||||          \  }}	|	| _        || _         | j        d|||d|}
|                     |
d                   |
d<   |                     |           dS )	ao	  Build vocabulary from a sequence of documents (can be a once-only generator stream).

        Parameters
        ----------
        documents : iterable of list of :class:`~gensim.models.doc2vec.TaggedDocument`, optional
            Can be simply a list of :class:`~gensim.models.doc2vec.TaggedDocument` elements, but for larger corpora,
            consider an iterable that streams the documents directly from disk/network.
            See :class:`~gensim.models.doc2vec.TaggedBrownCorpus` or :class:`~gensim.models.doc2vec.TaggedLineDocument`
        corpus_file : str, optional
            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
            You may use this argument instead of `documents` to get performance boost. Only one of `documents` or
            `corpus_file` arguments need to be passed (not both of them). Documents' tags are assigned automatically
            and are equal to a line number, as in :class:`~gensim.models.doc2vec.TaggedLineDocument`.
        update : bool
            If true, the new words in `documents` will be added to model's vocab.
        progress_per : int
            Indicates how many words to process before showing/updating the progress.
        keep_raw_vocab : bool
            If not true, delete the raw vocabulary after the scaling is done and free up RAM.
        trim_rule : function, optional
            Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary,
            be trimmed away, or handled using the default (discard if word count < min_count).
            Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`),
            or a callable that accepts parameters (word, count, min_count) and returns either
            :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`.
            The rule, if given, is only used to prune vocabulary during current method call and is not stored as part
            of the model.

            The input parameters are of the following types:
                * `word` (str) - the word we are examining
                * `count` (int) - the word's frequency count in the corpus
                * `min_count` (int) - the minimum count threshold.

        **kwargs
            Additional key word arguments passed to the internal vocabulary construction.

        )r   r!   progress_perr~   )updatekeep_raw_vocabr~   num_retained_wordsr  memoryr  NrH   )
scan_vocabr   corpus_total_wordsprepare_vocabr  prepare_weights)rC   r   r!   r  r  r  r~   r   r   r   report_valuess              r3   build_vocabzDoc2Vec.build_vocabI  s    R %)OO+% %4 %
 %
!\ )"-**w&clwwpvww"&"6"6-PdBe"6"f"fhF+++++r5   c                    t                               d           |}t                               dt          |          t          |                                                     |pd| _        || _        |                     |||          }|                     |d                   |d<   | 	                    |           d	S )
a  Build vocabulary from a dictionary of word frequencies.

        Build model vocabulary from a passed dictionary that contains a (word -> word count) mapping.
        Words must be of type unicode strings.

        Parameters
        ----------
        word_freq : dict of (str, int)
            Word <-> count mapping.
        keep_raw_vocab : bool, optional
            If not true, delete the raw vocabulary after the scaling is done and free up RAM.
        corpus_count : int, optional
            Even if no corpus is provided, this argument can set corpus_count explicitly.
        trim_rule : function, optional
            Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary,
            be trimmed away, or handled using the default (discard if word count < min_count).
            Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`),
            or a callable that accepts parameters (word, count, min_count) and returns either
            :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`.
            The rule, if given, is only used to prune vocabulary during
            :meth:`~gensim.models.doc2vec.Doc2Vec.build_vocab` and is not stored as part of the model.

            The input parameters are of the following types:
                * `word` (str) - the word we are examining
                * `count` (int) - the word's frequency count in the corpus
                * `min_count` (int) - the minimum count threshold.

        update : bool, optional
            If true, the new provided words in `word_freq` dict will be added to model's vocab.

        z$processing provided word frequenciesz<collected %i different raw words, with total frequency of %ir   )r  r~   r  r  r  r  r  N)
rp   rq   r   r   valuesr   	raw_vocabr!  r  r"  )rC   	word_freqr  r   r~   r  r'  r#  s           r3   build_vocab_from_freqzDoc2Vec.build_vocab_from_freq}  s    @ 	:;;; 	J	NNC	 0 0 2 233	
 	
 	
 )-A" **.T]fl*mm"&"6"6-PdBe"6"f"fhF+++++r5   c                    d}d}d}t                      dz
  }d}d}	t          t                    }
d}i }g }t          |          D ]\  }}|	sLt	          |j        t                    r-t                              dt          |j                             |	dz  }	||z  dk    r`||z
  t                      |z
  z  }t          
                    d|||t          |
          t          |                     t                      }|}t          |j                  }|j        D ]}t	          |t          t          f          rt          ||          }/||v r-||         xj        dz  c_        ||         xj        |z  c_        `t#          t          |          |d          ||<   |                    |           |j        D ]}|
|xx         dz  cc<   |t          |j                  z  }| j        r4t          |
          | j        k    rt)          j        |
||           |dz  }|dz   }t          |          |k    r)t                              d	t          |          |           ||k    rFt                              d
||||z
  ||z
  | j        z  t/          t0                    j        z             |dk    rF|D ] }||         j        |z   dz   ||         _        !t7          t9          d|dz                       |z   }|| j        _        |                                D ][\  }}|j        | j        j         |<   | j        !                    |d|j                   | j        !                    |d|j                   \|
| _"        ||fS )Nr   r   r\   gh㈵>ziEach 'words' should be a list of words (usually unicode strings). First 'words' here is instead plain %s.zQPROGRESS: at example #%i, processed %i words (%i words/s), %i word types, %i tags)rL   rM   rK   )r~   z*More unique tags (%i) than documents (%i).zHighest int doctag (%i) larger than count of documents (%i). This means at least %i excess, unused slots (%i bytes) will be allocated for vectors.rM   rK   )#r   r   rU   r   r   r@   r   rp   warningtyperq   r   rA   r   r   rK   rM   rJ   r   max_vocab_sizer   prune_vocabrd   r   ru   r  rL   listr   rr   r   itemsr   set_vecattrr'  )rC   r   r  r~   document_nor   
min_reduceinterval_startinterval_countchecked_string_typesvocab
max_rawintdoctags_lookupdoctags_listdocumentinterval_ratedocument_lengthr   wordr   keytdts                          r3   _scan_vocabzDoc2Vec._scan_vocab  s   
&72 C  
%.%?%? %	  %	 !K' *hnc22 NNBX^,,  
 %)$\)Q. -!,~!=-//TbBb cgmSZZ\IZIZ   "/!,!(.11O} 
1 
1cC?33 1!$Z!5!5JJn, 1&s+55:55&s+66/I666.43|;L;LYhtu.v.v.vs+$++C0000  ! !dq 3x~...K"  s5zzD4G'G  !%yIIIIa
"Q||+ 	jNNG\IZIZ\hiii$ 	NN]L*|*Cl*d.>>tAUU	   ? 	I# W W,:3,?,E
,RUV,Vs#))aa 8 899LHL+#))++ 	> 	>EAr&(hDG #G<???G;====L((r5   順 c                    t                               d           |t          |          }|                     |||          \  }}t                               dt	          | j                  t	          | j                  ||           ||fS )aO  Create the model's vocabulary: a mapping from unique words in the corpus to their frequency count.

        Parameters
        ----------
        documents : iterable of :class:`~gensim.models.doc2vec.TaggedDocument`, optional
            The tagged documents used to create the vocabulary. Their tags can be either str tokens or ints (faster).
        corpus_file : str, optional
            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
            You may use this argument instead of `documents` to get performance boost. Only one of `documents` or
            `corpus_file` arguments need to be passed (not both of them).
        progress_per : int
            Progress will be logged every `progress_per` documents.
        trim_rule : function, optional
            Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary,
            be trimmed away, or handled using the default (discard if word count < min_count).
            Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`),
            or a callable that accepts parameters (word, count, min_count) and returns either
            :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`.
            The rule, if given, is only used to prune vocabulary during
            :meth:`~gensim.models.doc2vec.Doc2Vec.build_vocab` and is not stored as part of the model.

            The input parameters are of the following types:
                * `word` (str) - the word we are examining
                * `count` (int) - the word's frequency count in the corpus
                * `min_count` (int) - the minimum count threshold.

        Returns
        -------
        (int, int)
            Tuple of `(total words in the corpus, number of documents)`.

        z%collecting all words and their countsNzTcollected %i word types and %i unique tags from a corpus of %i examples and %i words)rp   rq   TaggedLineDocumentrB  r   r'  rr   )rC   r   r!   r  r~   r   r   s          r3   r  zDoc2Vec.scan_vocab  s    B 	;<<< 	>0==O$($4$4_lT]$^$^!\bTW|[	
 	
 	

 L((r5   c                     |                      ||||          }|                      ||||          }t          j        t          j        |          t          j        |                    S )a  Compute cosine similarity between two post-bulk out of training documents.

        Parameters
        ----------
        model : :class:`~gensim.models.doc2vec.Doc2Vec`
            An instance of a trained `Doc2Vec` model.
        doc_words1 : list of str
            Input document.
        doc_words2 : list of str
            Input document.
        alpha : float, optional
            The initial learning rate.
        min_alpha : float, optional
            Learning rate will linearly drop to `min_alpha` as training progresses.
        epochs : int, optional
            Number of epoch to train the new document.

        Returns
        -------
        float
            The cosine similarity between `doc_words1` and `doc_words2`.

        )r   r   r   ri   )r   rs   dotr   unitvec)rC   
doc_words1
doc_words2r   r   ri   d1d2s           r3   similarity_unseen_docszDoc2Vec.similarity_unseen_docs'  sf    0 5I^dee5I^deevh&r**H,<R,@,@AAAr5   )NNr[   Nr\   r   r   r\   NNNNrH   r]   r^   T)NNNN)NNNNNNNr   r`   r   rH   )NNN)FTr   NF)F)NN)NNFr  FN)FNNF)NNrC  N)!r?   rE   rF   rx   rW   r{   r   r   r   rX   r   r   r   r   r   r   classmethodr   r   r   r   r   rD   r   r  r
  r  r$  r)  rB  r  rM  __classcell__)r>   s   @r3   rZ   rZ      s       qr^`04T
 T
 T
 T
 T
 T
l   X   X Z?@@  A@ X ^Z?@@  A@ ^  
6 6 6 6 6
  6 LP* * * *:'0 '0 '0T X\15BDPa Pa Pa Pa Pa Pad )& )& [)&V< < < 	6 	6 	6@! @! @! @!D. . .('H 'H 'HR%' %' %' %'N Z	^ 
+ + + 
+(     [@O O O O O O2 V[,02, 2, 2, 2,h0, 0, 0, 0,dH) H) H)T,) ,) ,) ,)\B B B B B B B Br5   rZ   c                       e Zd ZdZdS )Doc2VecVocabCObsolete class retained for now as load-compatibility state captureNr?   rE   rF   rG   rH   r5   r3   rQ  rQ  D          MMMMr5   rQ  c                       e Zd ZdZdS )Doc2VecTrainablesrR  NrS  rH   r5   r3   rV  rV  H  rT  r5   rV  c                       e Zd Zd Zd ZdS )TaggedBrownCorpusc                     || _         dS )zReader for the `Brown corpus (part of NLTK data) <http://www.nltk.org/book/ch02.html#tab-brown-sources>`_.

        Parameters
        ----------
        dirname : str
            Path to folder with Brown corpus.

        N)dirname)rC   rZ  s     r3   rx   zTaggedBrownCorpus.__init__M  s     r5   c           	   #     K   t          j        | j                  D ]}t           j                            | j        |          }t           j                            |          sGt          j        |d          5 }t          |          D ]^\  }}t          j	        |          }d |
                                D             }d |D             }|sFt          ||d|g          V  _	 ddd           n# 1 swxY w Y   dS )zIterate through the corpus.

        Yields
        ------
        :class:`~gensim.models.doc2vec.TaggedDocument`
            Document from `source`.

        r   c                     g | ]=}t          |                    d                     dk    (|                    d           >S )/r`   )r   split)r   r@  s     r3   r   z.TaggedBrownCorpus.__iter__.<locals>.<listcomp>j  s@    !_!_!_1AGGTWLLHYHY]^H^!_!''#,,!_!_!_r5   c                     g | ]B\  }}|d d                                          !|                                d|d d         CS )Nr`   r]  )isalphalower)r   tokenr   s      r3   r   z.TaggedBrownCorpus.__iter__.<locals>.<listcomp>l  sU    rrrJE3`cdfefdf`g`o`o`q`qrs2A2ww?rrrr5   _SENT_N)r   listdirrZ  r   r   r   r   r   r   
to_unicoder^  r:   )rC   r  r   item_nor   
token_tagsr@   s          r3   __iter__zTaggedBrownCorpus.__iter__X  sk      Z-- 	S 	SEGLLu55E7>>%(( E4(( 
SC%.s^^ 	S 	SMGT +D11D "`!_

!_!_!_JrrR\rrrE  ! (1P0QRRRRRR	S
S 
S 
S 
S 
S 
S 
S 
S 
S 
S 
S 
S 
S 
S 
S		S 	Ss   8A/C44C8	;C8	Nr?   rE   rF   rx   rh  rH   r5   r3   rX  rX  L  s7        	 	 	S S S S Sr5   rX  c                       e Zd Zd Zd ZdS )rE  c                     || _         dS )aB  Iterate over a file that contains documents:
        one line = :class:`~gensim.models.doc2vec.TaggedDocument` object.

        Words are expected to be already preprocessed and separated by whitespace. Document tags are constructed
        automatically from the document line number (each document gets a unique integer tag).

        Parameters
        ----------
        source : string or a file-like object
            Path to the file on disk, or an already-open file object (must support `seek(0)`).

        Examples
        --------
        .. sourcecode:: pycon

            >>> from gensim.test.utils import datapath
            >>> from gensim.models.doc2vec import TaggedLineDocument
            >>>
            >>> for document in TaggedLineDocument(datapath("head500.noblanks.cor")):
            ...     pass

        N)source)rC   rl  s     r3   rx   zTaggedLineDocument.__init__s  s    . r5   c              #     K   	 | j                             d           t          | j                   D ]<\  }}t          t	          j        |                                          |g          V  =dS # t          $ r t	          j        | j         d          5 }t          |          D ]<\  }}t          t	          j        |                                          |g          V  =	 ddd           Y dS # 1 swxY w Y   Y dS w xY w)zIterate through the lines in the source.

        Yields
        ------
        :class:`~gensim.models.doc2vec.TaggedDocument`
            Document from `source` specified in the constructor.

        r   r   N)	rl  seekr   r:   r   re  r^  r  r   )rC   rf  r   r   s       r3   rh  zTaggedLineDocument.__iter__  s     
	T KQ!*4;!7!7 P P$U%5d%;%;%A%A%C%CgYOOOOOOP P 	T 	T 	TDK.. T#%.s^^ T TMGT()9$)?)?)E)E)G)G'SSSSSSTT T T T T T T T T T T T T T T T T T T T	Ts7   A+A1 1$D AC1#D 1C5	5D 8C5	9D ?D Nri  rH   r5   r3   rE  rE  r  s7          2T T T T Tr5   rE  )NNFTTTNN)NNTTTNN)5rG   loggingr   collectionsr   r   collections.abcr   timeitr   dataclassesr   numpyr   r	   ru   r
   r   r   rs   gensimr   r   gensim.utilsr   gensim.modelsr   r   gensim.models.keyedvectorsr   r   	getLoggerr?   rp   gensim.models.doc2vec_innerr   r   r   ImportError	NO_CYTHON gensim.models.doc2vec_corpusfiler   r   r   r   r:   rJ   rZ   SaveLoadrQ  rV  rX  rE  rH   r5   r3   <module>r     s  4 4l  				 / / / / / / / / $ $ $ $ $ $             ! ! ! ! ! ! @ @ @ @ @ @ @ @ @ @ @ @ @ @     " " " " " " " " # # # # # # 0 0 0 0 0 0 0 0 M M M M M M M M		8	$	$lllllllllll   
/Z              Z Z Z imcg?CZ Z Z Z qufjDHZ Z Z Z gkcg(,Z Z Z Z Z Z!Z.O O O O OZZ 0,?? O O O, ! ! ! ! ! ! ! !*cB cB cB cB cBh cB cB cBLN N N N N5> N N NN N N N N N N N#S #S #S #S #S #S #S #SL-T -T -T -T -T -T -T -T -T -Ts   &
A1 1A>B B.-B.