
    cM                         d Z ddlmZ ddlZ	 ddlZn# e$ r ddlZY nw xY wddlm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ  ed	          Z G d
 d          ZdS )a  
This module integrates Spotify's `Annoy <https://github.com/spotify/annoy>`_ (Approximate Nearest Neighbors Oh Yeah)
library with Gensim's :class:`~gensim.models.word2vec.Word2Vec`, :class:`~gensim.models.doc2vec.Doc2Vec`,
:class:`~gensim.models.fasttext.FastText` and :class:`~gensim.models.keyedvectors.KeyedVectors` word embeddings.

.. Important::
    To use this module, you must have the ``annoy`` library installed.
    To install it, run ``pip install annoy``.

    )absolute_importN)utils)Doc2Vec)Word2Vec)FastText)KeyedVectorszNAnnoy not installed. To use the Annoy indexer, please run `pip install annoy`.c                   @    e Zd ZdZddZej        fdZd Zd Z	d Z
dS )	AnnoyIndexerac  This class allows the use of `Annoy <https://github.com/spotify/annoy>`_ for fast (approximate)
    vector retrieval in `most_similar()` calls of
    :class:`~gensim.models.word2vec.Word2Vec`, :class:`~gensim.models.doc2vec.Doc2Vec`,
    :class:`~gensim.models.fasttext.FastText` and :class:`~gensim.models.keyedvectors.Word2VecKeyedVectors` models.

    Nc                    d| _         d| _        || _        || _        |r|rt	          | j        t
                    r| j        j        }n`t	          | j        t          t          f          r| j        j	        }n2t	          | j        t          f          r| j        }nt          d          |                     |                                |j        |j                   dS dS dS )a  
        Parameters
        ----------
        model : trained model, optional
            Use vectors from this model as the source for the index.
        num_trees : int, optional
            Number of trees for Annoy indexer.

        Examples
        --------
        .. sourcecode:: pycon

            >>> from gensim.similarities.annoy import AnnoyIndexer
            >>> from gensim.models import Word2Vec
            >>>
            >>> sentences = [['cute', 'cat', 'say', 'meow'], ['cute', 'dog', 'say', 'woof']]
            >>> model = Word2Vec(sentences, min_count=1, seed=1)
            >>>
            >>> indexer = AnnoyIndexer(model, 2)
            >>> model.most_similar("cat", topn=2, indexer=indexer)
            [('cat', 1.0), ('dog', 0.32011348009109497)]

        NzGOnly a Word2Vec, Doc2Vec, FastText or KeyedVectors instance can be used)indexlabelsmodel	num_trees
isinstancer   dvr   r   wvr   
ValueError_build_from_modelget_normed_vectorsindex_to_keyvector_size)selfr   r   kvs       9lib/python3.11/site-packages/gensim/similarities/annoy.py__init__zAnnoyIndexer.__init__.   s    0 

" 
	]Y 
	]$*g.. lZ]DJ8(<== lZ]DJ88 lZ !jkkk""2#8#8#:#:BOR^\\\\\
	] 
	] 
	] 
	]    c                     | j                             |           | j        j        | j        | j        d}t          j        |dz   d          5 }t          j	        |||           ddd           dS # 1 swxY w Y   dS )a  Save AnnoyIndexer instance to disk.

        Parameters
        ----------
        fname : str
            Path to output. Save will produce 2 files:
            `fname`: Annoy index itself.
            `fname.dict`: Index metadata.
        protocol : int, optional
            Protocol for pickle.

        Notes
        -----
        This method saves **only the index**. The trained model isn't preserved.

        )fr   r   .dictwb)protocolN)
r   saver   r   r   r   r   open_pickledump)r   fnamer!   dfouts        r   r"   zAnnoyIndexer.saveW   s    " 	
*(t~QUQ\]]Z.. 	5$LD84444	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5s   A22A69A6c                 0   |dz   }t           j                            |          rt           j                            |          st          d| d| d          	 ddlm} n# t          $ r t          w xY wt          j	        |d          5 }t          j        |                                          }ddd           n# 1 swxY w Y   |d	         | _         ||d
         d          | _        | j                            |           |d         | _        dS )aa  Load an AnnoyIndexer instance from disk.

        Parameters
        ----------
        fname : str
            The path as previously used by ``save()``.

        Examples
        --------
        .. sourcecode:: pycon

            >>> from gensim.similarities.index import AnnoyIndexer
            >>> from gensim.models import Word2Vec
            >>> from tempfile import mkstemp
            >>>
            >>> sentences = [['cute', 'cat', 'say', 'meow'], ['cute', 'dog', 'say', 'woof']]
            >>> model = Word2Vec(sentences, min_count=1, seed=1, epochs=10)
            >>>
            >>> indexer = AnnoyIndexer(model, 2)
            >>> _, temp_fn = mkstemp()
            >>> indexer.save(temp_fn)
            >>>
            >>> new_indexer = AnnoyIndexer()
            >>> new_indexer.load(temp_fn)
            >>> new_indexer.model = model

        r   zCan't find index files 'z' and 'z)' - unable to restore AnnoyIndexer state.r   
AnnoyIndexrbNr   r   angularmetricr   )ospathexistsIOErrorannoyr+   ImportError_NOANNOYr   r#   r$   loadsreadr   r   loadr   )r   r&   
fname_dictr+   r   r'   s         r   r9   zAnnoyIndexer.loadm   s_   8 W_
u%% 	"'..*D*D 	n5nnnnn  	((((((( 	 	 	N	 Z
D)) 	(Qaffhh''A	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	(;Z#y999

ks   A" "A4'B??CCc                     	 ddl m} n# t          $ r t          w xY w ||d          }t	          |          D ]\  }}|                    ||           |                    | j                   || _        || _	        d S )Nr   r*   r-   r.   )
r4   r+   r5   r6   	enumerateadd_itembuildr   r   r   )r   vectorsr   num_featuresr+   r   
vector_numvectors           r   r   zAnnoyIndexer._build_from_model   s    	((((((( 	 	 	N	 
<	:::"+G"4"4 	/ 	/JNN:v....DN###
s   	 c                       j                             ||d          \   fdt          t                              D             S )at  Find `num_neighbors` most similar items.

        Parameters
        ----------
        vector : numpy.array
            Vector for word/document.
        num_neighbors : int
            Number of most similar items

        Returns
        -------
        list of (str, float)
            List of most similar items in format [(`item`, `cosine_distance`), ... ]

        T)include_distancesc                 R    g | ]#}j         |                  d |         dz  z
  f$S )      )r   ).0i	distancesidsr   s     r   
<listcomp>z-AnnoyIndexer.most_similar.<locals>.<listcomp>   s7    UUUSV$a)A,*:&:;UUUr   )r   get_nns_by_vectorrangelen)r   rB   num_neighborsrJ   rK   s   `  @@r   most_similarzAnnoyIndexer.most_similar   s]      55MT 6 ; ;Y VUUUUUU3s88__UUUUr   )NN)__name__
__module____qualname____doc__r   r   PICKLE_PROTOCOLr"   r9   r   rQ    r   r   r
   r
   &   s         '] '] '] ']R $)#8 5 5 5 5,+" +" +"Z  V V V V Vr   r
   )rU   
__future__r   r0   cPickler$   r5   picklegensimr   gensim.models.doc2vecr   gensim.models.word2vecr   gensim.models.fasttextr   gensim.modelsr   r6   r
   rW   r   r   <module>r`      s!  	 	 ' & & & & & 				          ) ) ) ) ) ) + + + + + + + + + + + + & & & & & & ;ghhVV VV VV VV VV VV VV VV VV VVs    	