
    c                       d Z ddlZddlZddlmZ ddlZddlZddlmZm	Z	 ddl
Zddl
mZmZ ddlmZ 	 ddlmZ ddlm
Z d	Zn# e$ r d
ZY nw xY wddlmZmZ ddlmZ  ej        e          Z G d dej                  Z  G d d          Z! G d de          Z" G d d          Z# G d d          Z$ G d d          Z% G d d          Z& G d d          Z'dS )u  Python implementation of Poincaré Embeddings.

These embeddings are better at capturing latent hierarchical information than traditional Euclidean embeddings.
The method is described in detail in `Maximilian Nickel, Douwe Kiela -
"Poincaré Embeddings for Learning Hierarchical Representations" <https://arxiv.org/abs/1705.08039>`_.

The main use-case is to automatically learn hierarchical representations of nodes from a tree-like structure,
such as a Directed Acyclic Graph (DAG), using a transitive closure of the relations. Representations of nodes in a
symmetric graph can also be learned.

This module allows training Poincaré Embeddings from a training file containing relations of graph in a
csv-like format, or from a Python iterable of relations.


Examples
--------
Initialize and train a model from a list

.. sourcecode:: pycon

    >>> from gensim.models.poincare import PoincareModel
    >>> relations = [('kangaroo', 'marsupial'), ('kangaroo', 'mammal'), ('gib', 'cat')]
    >>> model = PoincareModel(relations, negative=2)
    >>> model.train(epochs=50)

Initialize and train a model from a file containing one relation per line

.. sourcecode:: pycon

    >>> from gensim.models.poincare import PoincareModel, PoincareRelations
    >>> from gensim.test.utils import datapath
    >>> file_path = datapath('poincare_hypernyms.tsv')
    >>> model = PoincareModel(PoincareRelations(file_path), negative=2)
    >>> model.train(epochs=50)

    N)Integral)defaultdictCounter)randomfloat32)	spearmanr)grad)numpyTF)utilsmatutils)KeyedVectorsc                       e Zd ZdZddddddddd	ej        d
fdZd"dZd Zd Z	d Z
d Zd Zed#d            Zed             Z fdZe fd            Zd"dZd$dZd Zd"dZed             Zd Zd%d Zd%d!Z xZS )&PoincareModela  Train, use and evaluate Poincare Embeddings.

    The model can be stored/loaded via its :meth:`~gensim.models.poincare.PoincareModel.save`
    and :meth:`~gensim.models.poincare.PoincareModel.load` methods, or stored/loaded in the word2vec format
    via `model.kv.save_word2vec_format` and :meth:`~gensim.models.poincare.PoincareKeyedVectors.load_word2vec_format`.

    Notes
    -----
    Training cannot be resumed from a model loaded via `load_word2vec_format`, if you wish to train further,
    use :meth:`~gensim.models.poincare.PoincareModel.save` and :meth:`~gensim.models.poincare.PoincareModel.load`
    methods instead.

    An important attribute (that provides a lot of additional functionality when directly accessed) are the
    keyed vectors:

    self.kv : :class:`~gensim.models.poincare.PoincareKeyedVectors`
        This object essentially contains the mapping between nodes and embeddings, as well the vocabulary of the model
        (set of unique nodes seen by the model). After training, it can be used to perform operations on the vectors
        such as vector lookup, distance and similarity calculations etc.
        See the documentation of its class for usage examples.

    2   g?
      gh㈵>      ?{Gz?)gMbPgMbP?r   c                    || _         t          |d          | _        g | _        t	          t
                    | _        t          g           | _        d| _	        || _
        || _        |	| _        || _        || _        || _        || _        || _        || _        d| _        || _        || _        t-          j        |          | _        |
| _        d| _        |                     |           dS )a{
  Initialize and train a Poincare embedding model from an iterable of relations.

        Parameters
        ----------
        train_data : {iterable of (str, str), :class:`gensim.models.poincare.PoincareRelations`}
            Iterable of relations, e.g. a list of tuples, or a :class:`gensim.models.poincare.PoincareRelations`
            instance streaming from a file. Note that the relations are treated as ordered pairs,
            i.e. a relation (a, b) does not imply the opposite relation (b, a). In case the relations are symmetric,
            the data should contain both relations (a, b) and (b, a).
        size : int, optional
            Number of dimensions of the trained model.
        alpha : float, optional
            Learning rate for training.
        negative : int, optional
            Number of negative samples to use.
        workers : int, optional
            Number of threads to use for training the model.
        epsilon : float, optional
            Constant used for clipping embeddings below a norm of one.
        regularization_coeff : float, optional
            Coefficient used for l2-regularization while training (0 effectively disables regularization).
        burn_in : int, optional
            Number of epochs to use for burn-in initialization (0 means no burn-in).
        burn_in_alpha : float, optional
            Learning rate for burn-in initialization, ignored if `burn_in` is 0.
        init_range : 2-tuple (float, float)
            Range within which the vectors are randomly initialized.
        dtype : numpy.dtype
            The numpy dtype to use for the vectors in the model (numpy.float64, numpy.float32 etc).
            Using lower precision floats may be useful in increasing training speed and reducing memory usage.
        seed : int, optional
            Seed for random to ensure reproducibility.

        Examples
        --------
        Initialize a model from a list:

        .. sourcecode:: pycon

            >>> from gensim.models.poincare import PoincareModel
            >>> relations = [('kangaroo', 'marsupial'), ('kangaroo', 'mammal'), ('gib', 'cat')]
            >>> model = PoincareModel(relations, negative=2)

        Initialize a model from a file containing one relation per line:

        .. sourcecode:: pycon

            >>> from gensim.models.poincare import PoincareModel, PoincareRelations
            >>> from gensim.test.utils import datapath
            >>> file_path = datapath('poincare_hypernyms.tsv')
            >>> model = PoincareModel(PoincareRelations(file_path), negative=2)

        See :class:`~gensim.models.poincare.PoincareRelations` for more options.

        r   i  FN)
train_dataPoincareKeyedVectorskvall_relationsr   setnode_relationsNegativesBuffer_negatives_buffer_negatives_buffer_sizesizetrain_alphaburn_in_alphaalphanegativeworkersepsilonregularization_coeffburn_in_burn_in_donedtypeseed	np_randomRandomState
_np_random
init_range
_loss_gradbuild_vocab)selfr   r   r"   r#   r$   r%   r&   r'   r!   r.   r)   r*   s                6lib/python3.11/site-packages/gensim/models/poincare.py__init__zPoincareModel.__init__\   s    r %&tQ//)#..!0!4!4&*#	 *
 $8!"
	#/55$$$$$$    Fc           
          t          | j        j                  }t                              d           |D ]I}t          |          dk    rt          dt          |          z            |D ]}|| j        j        v r9| j                            |d| j        	                    |d          dz              It          | j        j                  | j        j        |<   | j        j        
                    |           | j                            |dd           |\  }}| j        j        |         | j        j        |         }	}| j        |                             |	           ||	f}| j        
                    |           Kt                              dt          | j                  t          | j                             t          t          t          | j        j                                      | _        t#          j        t          t          | j        j                            t&                    | _        |                                  |s|                                  dS |                     |           dS )	am  Build the model's vocabulary from known relations.

        Parameters
        ----------
        relations : {iterable of (str, str), :class:`gensim.models.poincare.PoincareRelations`}
            Iterable of relations, e.g. a list of tuples, or a :class:`gensim.models.poincare.PoincareRelations`
            instance streaming from a file. Note that the relations are treated as ordered pairs,
            i.e. a relation (a, b) does not imply the opposite relation (b, a). In case the relations are symmetric,
            the data should contain both relations (a, b) and (b, a).
        update : bool, optional
            If true, only new nodes's embeddings are initialized.
            Use this when the model already has an existing vocabulary and you want to update it.
            If false, all node's embeddings are initialized.
            Use this when you're creating a new vocabulary from scratch.

        Examples
        --------
        Train a model and update vocab for online training:

        .. sourcecode:: pycon

            >>> from gensim.models.poincare import PoincareModel
            >>>
            >>> # train a new model from initial data
            >>> initial_relations = [('kangaroo', 'marsupial'), ('kangaroo', 'mammal')]
            >>> model = PoincareModel(initial_relations, negative=1)
            >>> model.train(epochs=50)
            >>>
            >>> # online training: update the vocabulary and continue training
            >>> online_relations = [('striped_skunk', 'mammal')]
            >>> model.build_vocab(online_relations, update=True)
            >>> model.train(epochs=50)

        z#loading relations from train data..   z0Relation pair "%s" should have exactly two itemscountr   z-loaded %d relations from train data, %d nodesr)   N)lenr   index_to_keyloggerinfo
ValueErrorreprkey_to_indexset_vecattrget_vecattrappendr   addr   r   rangeindices_setnpfromiterintindices_array_init_node_probabilities_init_embeddings_update_embeddings)
r1   	relationsupdateold_index_to_key_lenrelationitemnode_1node_2node_1_indexnode_2_indexs
             r2   r0   zPoincareModel.build_vocab   sJ   F  #47#7889:::! 	0 	0H8}}! f !SVZ[cVdVd!deee  : :47// :G''gtw7J7J4QX7Y7Y\]7]^^^^14TW5I1J1JDG(.G(//555G''gq9999%NFF)-)=f)EtwG[\bGc,L-11,???$l3H%%h////CSI[E\E\^abfbi^j^jkkkuS)=%>%>??@@[s473G/H/H)I)IQTUUU%%''' 	:!!#######$899999r4   c                     t          | j        j                  | j        f}| j                            | j        d         | j        d         |                              | j                  | j        _	        dS )z7Randomly initialize vectors for the items in the vocab.r   r   N)
r9   r   r:   r   r-   uniformr.   astyper)   vectors)r1   shapes     r2   rK   zPoincareModel._init_embeddings   s^    TW)**DI6/11$/!2DdoVWFXZ_``gghlhrssr4   c                 4   t          | j        j                  |z
  | j        f}| j                            | j        d         | j        d         |                              | j                  }t          j
        | j        j        |g          | j        _        dS )zBRandomly initialize vectors for the items in the additional vocab.r   r   N)r9   r   r:   r   r-   rW   r.   rX   r)   rF   concatenaterY   )r1   rO   rZ   vs       r2   rL   z PoincareModel._update_embeddings   sz    TW)**-AA49MO##DOA$68JERRYYZ^Zdee.$'/1)=>>r4   c                     | j         j        d                             t          j                  }t          j        |          | _        ||                                z  | _        dS )z"Initialize a-priori probabilities.r7   N)	r   expandosrX   rF   float64cumsum_node_counts_cumsumsum_node_probabilities)r1   countss     r2   rJ   z&PoincareModel._init_node_probabilities   sN    !'*11"*==#%9V#4#4 #)FJJLL#8   r4   c                 B   | j                                         | j        k     r_| j        d         }| j                            d|dz   | j                  }t          j        | j        |          }t          |          | _         | j         
                    | j                  S )zGet candidate negatives of size `self.negative` from the negative examples buffer.

        Returns
        -------
        numpy.array
            Array of shape (`self.negative`,) containing indices of negative nodes.

        r   )r   	num_itemsr#   rb   r-   randintr   rF   searchsortedr   	get_items)r1   max_cumsum_valueuniform_numberscumsum_table_indicess       r2   _get_candidate_negativesz&PoincareModel._get_candidate_negatives   s     !++--= 	K  $7;"o55a9IA9MtOjkkO#%?43K_#]#] %45I%J%JD"%//>>>r4   c                    | j         |         }t          | j                  t          |          z
  }|| j        k     r*t	          d| j        || j        j        |         fz            t          t          |                    t          | j                  z  }|dk     r|                                 }t          |          }d}t          |          t          |          k    s||z  rM|dz  }|                                 }t          |          }t          |          t          |          k    H||z  M|dk    rt          
                    d||           npt          j        t          | j        |z
                      }| j        |         }	|	|	                                z  }	| j                            || j        |	d          }t          |          S )am  Get a sample of negatives for the given node.

        Parameters
        ----------
        node_index : int
            Index of the positive node for which negative samples are to be returned.

        Returns
        -------
        numpy.array
            Array of shape (self.negative,) containing indices of negative nodes for the given node index.

        zFCannot sample %d negative nodes from a set of %d negative nodes for %sr   r   z(sampled %d times, positive fraction %.5fF)r   preplace)r   r9   r   r#   r=   r:   floatro   r   r;   debugrF   arraylistrE   rd   rc   r-   choice)
r1   
node_indexr   num_remaining_nodespositive_fractionindicesunique_indicestimes_sampledvalid_negativesprobss
             r2   _sample_negativeszPoincareModel._sample_negatives  s    ,Z8!$'llS-@-@@. 	X 3TW5I*5UVW  
 "#n"5"566TWEt# 	j 3355G \\NMw<<3~#6#66 .N^<[ ."7799!$W w<<3~#6#66 .N^<[ . q  kGXijjj !htD,<~,M'N'NOOO,_=EUYY[[ Eo,,_4=TYch,iiGG}}r4   c           	      2   | d         }| dd         }t           j                            ||z
  d          }t           j                            |          }t           j                            |d          }t          j        dd|dz  d|dz  z
  d|dz  z
  z  z  z  z             }t          j        |           }|t           j                            |d                   dz  z  }	t          j        |d         |                                z             |	z   S )a  Computes loss value.

        Parameters
        ----------
        matrix : numpy.array
            Array containing vectors for u, v and negative samples, of shape (2 + negative_size, dim).
        regularization_coeff : float, optional
            Coefficient to use for l2-regularization

        Returns
        -------
        float
            Computed loss value.

        Warnings
        --------
        Only used for autograd gradients, since autograd requires a specific function signature.

        r   r   Naxisr6   )grad_nplinalgnormarccoshexplogrc   )
matrixr&   vector_u	vectors_veuclidean_distsr   	all_normspoincare_distsexp_negative_distancesregularization_terms
             r2   _loss_fnzPoincareModel._loss_fn=  s   * !9122J	!.--h.B-KK~""8,,N''	'::	  A%1tqy=Qa=O*PQ 
 

 ")n_!=!=2W^5H5HST5V5VZ[5[[2159O9S9S9U9UVWWWZmmmr4   c                    t          | j                  dk    }d|z
  }|rDt          j                            |           }||k     r| S | |z  t          j        |           |z  z
  S t          j                            | d          }||k                                     r| S | ||k    xx         ||||k             z  ddt          j        f         z  cc<   | ||k    xx         t          j        | ||k                       |z  z  cc<   | S )a  Clip vectors to have a norm of less than one.

        Parameters
        ----------
        vectors : numpy.array
            Can be 1-D, or 2-D (in which case the norm for each row is checked).
        epsilon : float
            Parameter for numerical stability, each dimension of the vector is reduced by `epsilon`
            if the norm of the vector is greater than or equal to 1.

        Returns
        -------
        numpy.array
            Array with norms clipped below 1.

        r   r   N)r9   rZ   rF   r   r   signallnewaxis)rY   r%   one_d	thresholdr   normss         r2   _clip_vectorszPoincareModel._clip_vectors`  s'   $ GM""a'K	 	9>>'**Di E~)9)9G)CDDINN7N33E	!&&(( *+++	E%9BT<U0UWXWXWXZ\ZdWd/ee+++*+++rwwu	?Q7R/S/SV]/]]+++r4   c                     d| _         ddg}t          t          |                    dg                     |z             |d<    t	          t
          |           j        |i | dS )a  Save complete model to disk, inherited from :class:`~gensim.utils.SaveLoad`.

        See also
        --------
        :meth:`~gensim.models.poincare.PoincareModel.load`

        Parameters
        ----------
        *args
            Positional arguments passed to :meth:`~gensim.utils.SaveLoad.save`.
        **kwargs
            Keyword arguments passed to :meth:`~gensim.utils.SaveLoad.save`.

        Nrd   rb   ignore)r/   r   rv   getsuperr   save)r1   argskwargsattrs_to_ignore	__class__s       r2   r   zPoincareModel.save  sl     02GHtFJJx$<$<==OPPx'mT""'888888r4   c                 p     t          t          |           j        |i |}|                                 |S )a  Load model from disk, inherited from :class:`~gensim.utils.SaveLoad`.

        See also
        --------
        :meth:`~gensim.models.poincare.PoincareModel.save`

        Parameters
        ----------
        *args
            Positional arguments passed to :meth:`~gensim.utils.SaveLoad.load`.
        **kwargs
            Keyword arguments passed to :meth:`~gensim.utils.SaveLoad.load`.

        Returns
        -------
        :class:`~gensim.models.poincare.PoincareModel`
            The loaded model.

        )r   r   loadrJ   )clsr   r   modelr   s       r2   r   zPoincareModel.load  s=    * /mS)).???&&(((r4   c                 L   t          |          }g g }}t          ||          D ]I\  }}|\  }	}
|                    |	           |                    |
           |                    |           J| j        j        |         }| j        j        |                             |d| j        z   | j        f          }|	                    dd          	                    dd          }t          ||||| j                  }|                                 |r|                     |||           |S )a  Create a training batch and compute gradients and loss for the batch.

        Parameters
        ----------
        relations : list of tuples
            List of tuples of positive examples of the form (node_1_index, node_2_index).
        all_negatives : list of lists
            List of lists of negative samples for each node_1 in the positive examples.
        check_gradients : bool, optional
            Whether to compare the computed gradients to autograd gradients for this batch.

        Returns
        -------
        :class:`~gensim.models.poincare.PoincareBatch`
            Node indices, computed gradients and loss for the batch.

        r   r   r6   )r9   ziprB   extendr   rY   reshaper#   r   swapaxesPoincareBatchr&   compute_all_check_gradients)r1   rM   all_negativescheck_gradients
batch_size	indices_u	indices_vrP   	negativesur]   	vectors_ur   batchs                 r2   _prepare_training_batchz%PoincareModel._prepare_training_batch  s/   $ ^^
!29	#&y-#@#@ 	( 	(HiDAqQQY''''GOI.	GOI.66
ADUW[W`7abb	&&q!,,55a;;	iIy$Jcdd 	C!!)]EBBBr4   :0yE>c           	         t           s6t                              d           t                              d           dS | j        t	          t
          j                  | _        d}t          t          ||                    D ]\  }\  }}|\  }	}
|                     t          j
        | j        j        |	         | j        j        |
g|z            f          | j                  }t          j
        |j        dd|f         |j        dddd|f         f          }t          j        ||z
                                            }||k    r|}t                              d|           ||k     sJ d||fz              dS )ai  Compare computed gradients for batch to autograd gradients.

        Parameters
        ----------
        relations : list of tuples
            List of tuples of positive examples of the form (node_1_index, node_2_index).
        all_negatives : list of lists
            List of lists of negative samples for each node_1 in the positive examples.
        batch : :class:`~gensim.models.poincare.PoincareBatch`
            Batch for which computed gradients are to be checked.
        tol : float, optional
            The maximum error between our computed gradients and the reference ones from autograd.

        z;autograd could not be imported, cannot do gradient checkingz3please install autograd to enable gradient checkingN        zGmax difference between computed gradients and autograd gradients: %.10fzdMax difference between computed gradients and autograd gradients %.10f, greater than tolerance %.10f)AUTOGRAD_PRESENTr;   warningr/   r	   r   r   	enumerater   rF   vstackr   rY   r&   gradients_ugradients_vabsmaxr<   )r1   rM   r   r   tolmax_diffirP   r   r   r]   auto_gradientscomputed_gradientsdiffs                 r2   r   zPoincareModel._check_gradients  s      	NNXYYYNNPQQQF? 	;"=#9::DO(1#i2O2O(P(P 	  	 $A$)DAq!__	47?1-twsY/OPQQSWSln nN!#E,=aaad,CUEVWXWXWXZ[Z[Z[]^W^E_+`!a!a6.+==>>BBDDDh  ]_ghhh#~ 	> 	>+.6_=	> 	> 	> 	> 	>r4   c                 $      fd|D             }|S )a_  Get negative examples for each node.

        Parameters
        ----------
        nodes : iterable of int
            Iterable of node indices for which negative samples are to be returned.

        Returns
        -------
        list of lists
            Each inner list is a list of negative samples for a single node in the input list.

        c                 :    g | ]}                     |          S  )r   .0noder1   s     r2   
<listcomp>z9PoincareModel._sample_negatives_batch.<locals>.<listcomp>  s'    FFFt--d33FFFr4   r   )r1   nodesall_indicess   `  r2   _sample_negatives_batchz%PoincareModel._sample_negatives_batch  s%     GFFFFFFr4   c                     |                      d |D                       }|                     |||          }|                     |           |S )a  Perform training for a single training batch.

        Parameters
        ----------
        relations : list of tuples of (int, int)
            List of tuples of positive examples of the form (node_1_index, node_2_index).
        check_gradients : bool, optional
            Whether to compare the computed gradients to autograd gradients for this batch.

        Returns
        -------
        :class:`~gensim.models.poincare.PoincareBatch`
            The batch that was just trained on, contains computed loss for the batch.

        c              3   &   K   | ]}|d          V  dS )r   Nr   )r   rP   s     r2   	<genexpr>z0PoincareModel._train_on_batch.<locals>.<genexpr>  s&      4[4[XXa[4[4[4[4[4[4[r4   )r   r   _update_vectors_batch)r1   rM   r   r   r   s        r2   _train_on_batchzPoincareModel._train_on_batch  sW      444[4[QZ4[4[4[[[,,YWW""5)))r4   c                 b   t          |          }t          t                    }t          |          D ] \  }}||                             |           !|                                D ]F\  }}|dk    r||         }| |                             d          | |d         <   d| |dd         <   GdS )aA  Handle occurrences of multiple updates to the same node in a batch of vector updates.

        Parameters
        ----------
        vector_updates : numpy.array
            Array with each row containing updates to be performed on a certain node.
        node_indices : list of int
            Node indices on which the above updates are to be performed on.

        Notes
        -----
        Mutates the `vector_updates` array.

        Required because vectors[[2, 1, 2]] += np.array([-0.5, 1.0, 0.5]) performs only the last update
        on the row at index 2.

        r   r   r   rg   N)r   r   rv   r   rB   itemsrc   )vector_updatesnode_indicesre   	node_dictr   rx   r7   	positionss           r2   _handle_duplicatesz PoincareModel._handle_duplicates   s    & &&%%	&|44 	, 	,MAzj!((++++!' 	/ 	/Jz !*-I,:9,E,I,Iq,I,Q,QN9R=)-.N9SbS>**	/ 	/r4   c                 0   |j         |j        }}|j        |j        }}t	          |          }| j        |j        dz  z  dz  |z  j        }|                     ||           | j        j	        |xx         |z  cc<   | 
                    | j        j	        |         | j                  | j        j	        |<   | j        |j        dz  ddt          j        f         z  dz  |z  }|                    dd                              dd          }|                    d| j        z   |z  | j        f          }|                     ||           | j        j	        |xx         |z  cc<   | 
                    | j        j	        |         | j                  | j        j	        |<   dS )a  Update vectors for nodes in the given batch.

        Parameters
        ----------
        batch : :class:`~gensim.models.poincare.PoincareBatch`
            Batch containing computed gradients and node indices of the batch for which updates are to be done.

        r6      Nr   r   )r   r   r   r   r9   r"   Tr   r   rY   r   r%   betarF   r   r   r   r#   r   )	r1   r   grad_ugrad_vr   r   r   	u_updates	v_updatess	            r2   r   z#PoincareModel._update_vectors_batch?  s    *E,=$9	^^
Z5;!#34q86AD		9555	"""i/"""%)%7%7	8RTXT`%a%a	"J%*/111bj=!AAAEN	&&q!,,55a;;	%%DM(9Z'G&STT		9555	"""i/"""%)%7%7	8RTXT`%a%a	"""r4     Nc                    | j         dk    rt          d          t          j        dd          }t                              d| j        | j         t          | j                  || j	        | j
        | j        | j        	  	         | j	        dk    rr| j        skt                              d| j	                   | j        | _
        |                     | j	        |||           d	| _        t                              d
           | j        | _
        t                              d|           |                     ||||           t                              d           t          j        di | dS )a  Train Poincare embeddings using loaded data and model parameters.

        Parameters
        ----------
        epochs : int
            Number of iterations (epochs) over the corpus.
        batch_size : int, optional
            Number of examples to train on in a single batch.

        print_every : int, optional
            Prints progress and average loss after every `print_every` batches.
        check_gradients_every : int or None, optional
            Compares computed gradients and autograd gradients after every `check_gradients_every` batches.
            Useful for debugging, doesn't compare by default.

        Examples
        --------
        .. sourcecode:: pycon

            >>> from gensim.models.poincare import PoincareModel
            >>> relations = [('kangaroo', 'marsupial'), ('kangaroo', 'mammal'), ('gib', 'cat')]
            >>> model = PoincareModel(relations, negative=2)
            >>> model.train(epochs=50)

        r   *Multi-threaded version not implemented yetr   )divideinvalidztraining model of size %d with %d workers on %d relations for %d epochs and %d burn-in epochs, using lr=%.5f burn-in lr=%.5f negative=%dr   zDstarting burn-in (%d epochs)----------------------------------------)epochsr   print_everycheck_gradients_everyTzburn-in finishedzEstarting training (%d epochs)----------------------------------------ztraining finishedNr   )r$   NotImplementedErrorrF   seterrr;   r<   r   r9   r   r'   r"   r!   r#   r(   _train_batchwiser    )r1   r   r   r   r   old_settingss         r2   trainzPoincareModel.trainZ  sx   4 <! 	T%&RSSSy(CCC8It|S);%<%<fdlJ*DM		
 	
 	
 <! 	,D$6 	,KK^`d`lmmm+DJ!!|
&; " = = = "&DKK*+++%
[]cdddjk"7 	 	9 	9 	9 	'(((
	!!L!!!!!r4   c           	      B     j         dk    rt          d          t          d|dz             D ]n}t          t          t	           j                                      } j                            |           d}t          j                    }t          t          dt	          |          |          d          D ]\  }	}
|	|z   }t          |          o|	|z  dk    }||
|
|z            } fd|D             }                     ||          }||j        z  }|r||z  }t          j                    |z
  }||z  |z  }t                              d||
|
|z   |fz             t                              d	||z  ||fz             t          j                    }d}pd
S )ak  Train Poincare embeddings using specified parameters.

        Parameters
        ----------
        epochs : int
            Number of iterations (epochs) over the corpus.
        batch_size : int, optional
            Number of examples to train on in a single batch.
        print_every : int, optional
            Prints progress and average loss after every `print_every` batches.
        check_gradients_every : int or None, optional
            Compares computed gradients and autograd gradients after every `check_gradients_every` batches.
            Useful for debugging, doesn't compare by default.

        r   r   r   r   startc                 *    g | ]}j         |         S r   )r   )r   idxr1   s     r2   r   z2PoincareModel._train_batchwise.<locals>.<listcomp>  s!    NNNT/4NNNr4   )r   z2training on epoch %d, examples #%d-#%d, loss: %.2fz5time taken for %d examples: %.2f s, %.2f examples / sN)r$   r   rD   rv   r9   r   r-   shuffletimer   boolr   lossr;   r<   )r1   r   r   r   r   epochr{   avg_loss	last_time	batch_numr   should_printr   batch_indicesrM   result
time_takenspeeds   `                 r2   r   zPoincareModel._train_batchwise  s     <! 	T%&RSSS1fqj)) 	# 	#E5T%7!8!899::GO##G,,,H	I )%3w<<*L*LTU V V V # #	1$-$;<"&'<"="="j9OdCdijBj '!j.(8 9NNNNNNN	--i-YYFK' #+H!%y!8J'*4zAEKKL !Q^X>?@ @ @ KKO&3ZGHI I I !%	I"H%#	# 	#r4   )Fr   )r   )r   r   N)__name__
__module____qualname____doc__rF   r`   r3   r0   rK   rL   rJ   ro   r   staticmethodr   r   r   classmethodr   r   r   r   r   r   r   r   r   __classcell__r   s   @r2   r   r   E   s        , )+#AW[ru4OSUS]deN% N% N% N%`>: >: >: >:@t t t
? ? ?9 9 9? ? ?&+ + +Z  n  n  n \ nD     \ D9 9 9 9 9(     [0# # # #J#> #> #> #>J  "   * / / \/<b b b66" 6" 6" 6"p)# )# )# )# )# )# )# )#r4   r   c                   8    e Zd ZdZd
dZd Zd Zd Zd Zd Z	d	S )r   zCompute Poincare distances, gradients and loss for a training batch.

    Store intermediate state to avoid recomputing multiple times.

    r   c                 d   |j         t          j        ddddf         | _        || _        || _        || _        || _        d| _        d| _	        d| _
        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        dS )ac  
        Initialize instance with sets of vectors for which distances are to be computed.

        Parameters
        ----------
        vectors_u : numpy.array
            Vectors of all nodes `u` in the batch. Expected shape (batch_size, dim).
        vectors_v : numpy.array
            Vectors of all positively related nodes `v` and negatively sampled nodes `v'`,
            for each node `u` in the batch. Expected shape (1 + neg_size, dim, batch_size).
        indices_u : list of int
            List of node indices for each of the vectors in `vectors_u`.
        indices_v : list of lists of int
            Nested list of lists, each of which is a  list of node indices
            for each of the vectors in `vectors_v` for a specific node `u`.
        regularization_coeff : float, optional
            Coefficient to use for l2-regularization

        NF)r   rF   r   r   r   r   r   r&   r   r   norms_unorms_vr"   r   gammar   distance_gradients_ur   distance_gradients_vr   _distances_computed_gradients_computed_distance_gradients_computed_loss_computed)r1   r   r   r   r   r&   s         r2   r3   zPoincareBatch.__init__  s    ( #RZAAA%56"""$8!"#
	
$(!$(!	#( #( ,1)#r4   c                     |                                   |                                  |                                  |                                  dS )z/Convenience method to perform all computations.N)compute_distancescompute_distance_gradientscompute_gradientscompute_lossr1   s    r2   r   zPoincareBatch.compute_all  sP       '')))   r4   c                 f   | j         rdS t          j                            | j        | j        z
  d          }t          j                            | j        d          }t          j                            | j        d          }d|dz  z
  }d|dz  z
  }dd|dz  ||z  z  z  z   }t          j        |          }t          j        |           }|                    d          }	|| _	        || _
        || _        |	| _        || _        || _        || _        || _        || _        || _        d| _         dS )zZCompute and store norms, euclidean distances and poincare distances between input vectors.Nr   r   r6   r   T)r  rF   r   r   r   r   r   r   rc   r   r   r   Zr  r  r  r"   r   )
r1   r   r  r  r"   r   r  r   r   r#  s
             r2   r  zPoincareBatch.compute_distances  s2   # 	F)..$.)Hq.QQ)..a.88)..a.88GqL 7a<A A%%$,7  E**!#!8!8"&&A&...,&<#

	
#'   r4   c                    | j         rdS |                                  |                                  | j        ddt          j        ddf          | j        z  }|| j        z  }|dxx         | j        d         z  cc<   |dxx         | j        dz  | j	        d         z  z  cc<   | j        ddt          j        ddf          | j
        z  }|| j        z  }|                    d          }|| j
        d         z  }t	          j        |                                          rJ t	          j        |                                          rJ || _        || _        d| _         dS )zCCompute and store gradients of loss function for all input vectors.Nr   r6   r   T)r  r  r  r   rF   r   r  r#  r&   r   r  rc   isnananyr   r   )r1   r   r   s      r2   r  zPoincareBatch.compute_gradients  ss   # 	F   ''))) 2111bj!!!3CDDtG``tvA$3A66A$3a7$.:KKK 2111bj!!!3CDDtG``tv!oo1o--t0338K((,,.....8K((,,.....&&#'   r4   c                    | j         rdS |                                  | j        dz  }d| j        | j        z  t          j        | j        dz  dz
            z  z  ddt
          j        ddf         }|| j        z   | j        z  ddt
          j        ddf         }|| j	        z  | j
        z
  }||z  }| j        dk    }|                                rd|                    dd          |<   || _        || j        z   | j        z  ddt
          j        ddf         }|| j
        z  | j	        z
  }||z  }|                                rd|                    dd          |<   || _        d| _         dS )zYCompute and store partial derivatives of poincare distance d(u, v) w.r.t all u and all v.Nr6   r   r   r   T)r  r  r   r"   r   rF   sqrtr  r   r   r   r&  r   r  r  )r1   euclidean_dists_squaredc_u_coeffsr  nan_gradientsv_coeffsr  s           r2   r  z(PoincareBatch.compute_distance_gradients1  s   , 	F   "&"6!";4:	)BGDJ!Oa4G,H,HHI111bjZ[Z[Z[K[\,tz9TZGBJXYXYXYIYZ'$.84>I"
a 	CAB ))!Q//>$8! -ty8DIEqqq"*VWVWVWGWX'$.84>I" 	CAB ))!Q//>$8!,0)))r4   c                     | j         rdS |                                  t          j        | j        d         | j        z                                             | _        d| _         dS )z=Compute and store loss value for the given batch of examples.Nr   T)r  r  rF   r   r   r#  rc   r   r!  s    r2   r   zPoincareBatch.compute_lossO  sa     	F   VD7:TVCDDHHJJJ	"r4   Nr  )
r	  r
  r  r  r3   r   r  r  r  r   r   r4   r2   r   r     s~         
-$ -$ -$ -$^  ( ( (:( ( (41 1 1<# # # # #r4   r   c                        e Zd ZdZef fd	Z fdZed             Zed             Z	d Z
d Zdd	Zd
 Zd Zd ZddZddZd Zd Z xZS )r   a4  Vectors and vocab for the :class:`~gensim.models.poincare.PoincareModel` training class.

    Used to perform operations on the vectors such as vector lookup, distance calculations etc.

    (May be used to save/load final vectors in the plain word2vec format, via the inherited
    methods save_word2vec_format() and load_word2vec_format().)

    Examples
    --------
    .. sourcecode:: pycon

        >>> from gensim.test.utils import datapath
        >>>
        >>> # Read the sample relations file and train the model
        >>> relations = PoincareRelations(file_path=datapath('poincare_hypernyms_large.tsv'))
        >>> model = PoincareModel(train_data=relations)
        >>> model.train(epochs=50)
        >>>
        >>> # Query the trained model.
        >>> wv = model.kv.get_vector('kangaroo.n.01')

    c                 l    t          t          |                               |||           d| _        d S )Nr8   r   )r   r   r3   max_distance)r1   vector_sizevector_countr)   r   s       r2   r3   zPoincareKeyedVectors.__init__p  s8    "D))22;TY2ZZZr4   c                      t          t          |           j        |i | t          | d          s!| j                            d          | _        d S d S )NrY   syn0)r   r   _load_specialshasattr__dict__poprY   )r1   r   r   r   s      r2   r6  z#PoincareKeyedVectors._load_specialst  s]    8"D))8$I&IIItY'' 	5=,,V44DLLL	5 	5r4   c                 l    t                               | |t          j        ddf                   d         S )a~  Compute poincare distance between two input vectors. Convenience method over `vector_distance_batch`.

        Parameters
        ----------
        vector_1 : numpy.array
            Input vector.
        vector_2 : numpy.array
            Input vector.

        Returns
        -------
        numpy.float
            Poincare distance between `vector_1` and `vector_2`.

        Nr   )r   vector_distance_batchrF   r   )vector_1vector_2s     r2   vector_distancez$PoincareKeyedVectors.vector_distancez  s2    " $99(HRZYZYZYZ]D[\\]^__r4   c           	      (   t           j                            | |z
  d          }t           j                            |           }t           j                            |d          }t          j        dd|dz  d|dz  z
  d|dz  z
  z  z  z  z             S )a"  Compute poincare distances between one vector and a set of other vectors.

        Parameters
        ----------
        vector_1 : numpy.array
            vector from which Poincare distances are to be computed, expected shape (dim,).
        vectors_all : numpy.array
            for each row in vectors_all, distance from vector_1 is computed, expected shape (num_vectors, dim).

        Returns
        -------
        numpy.array
            Poincare distance between `vector_1` and each row in `vectors_all`, shape (num_vectors,).

        r   r   r6   )rF   r   r   r   )r<  vectors_allr   r   r   s        r2   r;  z*PoincareKeyedVectors.vector_distance_batch  s    " )..K)?a.HHy~~h''INN;QN77	z A%1tqy=Qa=O*PQ 
 
 	
r4   c                    |                      |          }t          j                            | j        d          }||                     |                   }||k    }|                                rdS t          j                            ||          }t          j        	                    |          }| j
        |         S )a  Get the node closest to `node` that is lower in the hierarchy than `node`.

        Parameters
        ----------
        node : {str, int}
            Key for node for which closest child is to be found.

        Returns
        -------
        {str, None}
            Node closest to `node` that is lower in the hierarchy than `node`.
            If there are no nodes lower in the hierarchy, None is returned.

        r   r   Nmask	distancesrF   r   r   rY   	get_indexr   maru   argminr:   r1   r   all_distancesr   	node_normrC  closest_child_indexs          r2   closest_childz"PoincareKeyedVectors.closest_child       t,,INN4<aN88	dnnT223	I%88:: 	4M== ell=99 !455r4   c                    |                      |          }t          j                            | j        d          }||                     |                   }||k    }|                                rdS t          j                            ||          }t          j        	                    |          }| j
        |         S )a  Get the node closest to `node` that is higher in the hierarchy than `node`.

        Parameters
        ----------
        node : {str, int}
            Key for node for which closest parent is to be found.

        Returns
        -------
        {str, None}
            Node closest to `node` that is higher in the hierarchy than `node`.
            If there are no nodes higher in the hierarchy, None is returned.

        r   r   NrB  rD  rI  s          r2   closest_parentz#PoincareKeyedVectors.closest_parent  rN  r4      c                     d}g }|}||k     r;|                     |                     |                     |d         }|dz  }||k     ;|S )a  Get the list of recursively closest children from the given node, up to a max depth of `max_depth`.

        Parameters
        ----------
        node : {str, int}
            Key for node for which descendants are to be found.
        max_depth : int
            Maximum number of descendants to return.

        Returns
        -------
        list of str
            Descendant nodes from the node `node`.

        r   rg   r   )rB   rM  )r1   r   	max_depthdepthdescendantscurrent_nodes         r2   rU  z PoincareKeyedVectors.descendants  sk      i 	t11,??@@@&r?LQJE i 	 r4   c                     g }|}|                      |          }|2|                    |           |                      |d                   }|2|S )a-  Get the list of recursively closest parents from the given node.

        Parameters
        ----------
        node : {str, int}
            Key for node for which ancestors are to be found.

        Returns
        -------
        list of str
            Ancestor nodes of the node `node`.

        Nrg   )rP  rB   )r1   r   	ancestorsrV  ancestors        r2   rX  zPoincareKeyedVectors.ancestors  sf     	&&|44 	:X&&&**9R=99H  	: r4   c                     |                      |          }|                      |          }|                     ||          S )a%  Calculate Poincare distance between vectors for nodes `w1` and `w2`.

        Parameters
        ----------
        w1 : {str, int}
            Key for first node.
        w2 : {str, int}
            Key for second node.

        Returns
        -------
        float
            Poincare distance between the vectors for nodes `w1` and `w2`.

        Examples
        --------
        .. sourcecode:: pycon

            >>> from gensim.test.utils import datapath
            >>>
            >>> # Read the sample relations file and train the model
            >>> relations = PoincareRelations(file_path=datapath('poincare_hypernyms_large.tsv'))
            >>> model = PoincareModel(train_data=relations)
            >>> model.train(epochs=50)
            >>>
            >>> # What is the distance between the words 'mammal' and 'carnivore'?
            >>> model.kv.distance('mammal.n.01', 'carnivore.n.01')
            2.9742298803339304

        Raises
        ------
        KeyError
            If either of `w1` and `w2` is absent from vocab.

        )
get_vectorr>  )r1   w1w2r<  r=  s        r2   distancezPoincareKeyedVectors.distance  s=    H ??2&&??2&&##Hh777r4   c                 :    dd|                      ||          z   z  S )aS  Compute similarity based on Poincare distance between vectors for nodes `w1` and `w2`.

        Parameters
        ----------
        w1 : {str, int}
            Key for first node.
        w2 : {str, int}
            Key for second node.

        Returns
        -------
        float
            Similarity between the between the vectors for nodes `w1` and `w2` (between 0 and 1).

        Examples
        --------
        .. sourcecode:: pycon

            >>> from gensim.test.utils import datapath
            >>>
            >>> # Read the sample relations file and train the model
            >>> relations = PoincareRelations(file_path=datapath('poincare_hypernyms_large.tsv'))
            >>> model = PoincareModel(train_data=relations)
            >>> model.train(epochs=50)
            >>>
            >>> # What is the similarity between the words 'mammal' and 'carnivore'?
            >>> model.kv.similarity('mammal.n.01', 'carnivore.n.01')
            0.25162107631176484

        Raises
        ------
        KeyError
            If either of `w1` and `w2` is absent from vocab.

        r   )r^  )r1   r\  r]  s      r2   
similarityzPoincareKeyedVectors.similarity0  s#    H Ab"---..r4   r   Nc                     t          |t                    r|dk     rg S |s                     |          n% j        d|         }                     ||          t          |t          t
          f          r                     |          nd|st          j                  }nt          j        d|z             } fd|D             }|r
|d|         }|S )a  Find the top-N most similar nodes to the given node or vector, sorted in increasing order of distance.

        Parameters
        ----------
        node_or_vector : {str, int, numpy.array}
            node key or vector for which similar nodes are to be found.
        topn : int or None, optional
            Number of top-N similar nodes to return, when `topn` is int. When `topn` is None,
            then distance for all nodes are returned.
        restrict_vocab : int or None, optional
            Optional integer which limits the range of vectors which are searched for most-similar values.
            For example, restrict_vocab=10000 would only check the first 10000 node vectors in the vocabulary order.
            This may be meaningful if vocabulary is sorted by descending frequency.

        Returns
        --------
        list of (str, float) or numpy.array
            When `topn` is int, a sequence of (node, distance) is returned in increasing order of distance.
            When `topn` is None, then similarities for all words are returned as a one-dimensional numpy array with the
            size of the vocabulary.

        Examples
        --------
        .. sourcecode:: pycon

            >>> from gensim.test.utils import datapath
            >>>
            >>> # Read the sample relations file and train the model
            >>> relations = PoincareRelations(file_path=datapath('poincare_hypernyms_large.tsv'))
            >>> model = PoincareModel(train_data=relations)
            >>> model.train(epochs=50)
            >>>
            >>> # Which words are most similar to 'kangaroo'?
            >>> model.kv.most_similar('kangaroo.n.01', topn=2)
            [(u'kangaroo.n.01', 0.0), (u'marsupial.n.01', 0.26524229460827725)]

        r   N)topnc                 d    g | ],}r|k    
j         |         t          |                   f-S r   )r:   rs   )r   indexrJ  rx   r1   s     r2   r   z5PoincareKeyedVectors.most_similar.<locals>.<listcomp>  sU     
 
 

?D
?R
u%u]5-A'B'BC
 
 
r4   )	
isinstancer   rE  r:   strrH   rF  r   argsort)	r1   node_or_vectorrb  restrict_vocabnodes_to_useclosest_indicesr  rJ  rx   s	   `      @@r2   most_similarz!PoincareKeyedVectors.most_similarV  s   L dH%% 	$( 	I 	I NN>::MM,_n_=L NN><HHMnsCk22 	77JJJ 	M&.}==OO&.}1t8LLLO
 
 
 
 
 
(
 
 
  	#ETE]Fr4   r   c                      t          |t                    r                     |          }n|}|s j        }n fd|D             } j        |         }                     ||          S )a  Compute Poincare distances from given `node_or_vector` to all nodes in `other_nodes`.
        If `other_nodes` is empty, return distance between `node_or_vector` and all nodes in vocab.

        Parameters
        ----------
        node_or_vector : {str, int, numpy.array}
            Node key or vector from which distances are to be computed.
        other_nodes : {iterable of str, iterable of int, None}, optional
            For each node in `other_nodes` distance from `node_or_vector` is computed.
            If None or empty, distance of `node_or_vector` from all nodes in vocab is computed (including itself).

        Returns
        -------
        numpy.array
            Array containing distances to all nodes in `other_nodes` from input `node_or_vector`,
            in the same order as `other_nodes`.

        Examples
        --------
        .. sourcecode:: pycon

            >>> from gensim.test.utils import datapath
            >>>
            >>> # Read the sample relations file and train the model
            >>> relations = PoincareRelations(file_path=datapath('poincare_hypernyms_large.tsv'))
            >>> model = PoincareModel(train_data=relations)
            >>> model.train(epochs=50)
            >>>
            >>> # Check the distances between a word and a list of other words.
            >>> model.kv.distances('mammal.n.01', ['carnivore.n.01', 'dog.n.01'])
            array([2.97422988, 2.83007402])

            >>> # Check the distances between a word and every other word in the vocab.
            >>> all_distances = model.kv.distances('mammal.n.01')

        Raises
        ------
        KeyError
            If either `node_or_vector` or any node in `other_nodes` is absent from vocab.

        c                 :    g | ]}                     |          S r   )rF  r   s     r2   r   z2PoincareKeyedVectors.distances.<locals>.<listcomp>  s%    JJJdT^^D11JJJr4   )re  rf  r[  rY   r;  )r1   rh  other_nodesinput_vectorother_vectorsother_indicess   `     r2   rE  zPoincareKeyedVectors.distances  s|    T nc** 	*??>::LL)L 	8 LMMJJJJkJJJM L7M)),FFFr4   c                     t          |t                    r|                     |          }n|}t          j                            |          S )a  Compute absolute position in hierarchy of input node or vector.
        Values range between 0 and 1. A lower value indicates the input node or vector is higher in the hierarchy.

        Parameters
        ----------
        node_or_vector : {str, int, numpy.array}
            Input node key or vector for which position in hierarchy is to be returned.

        Returns
        -------
        float
            Absolute position in the hierarchy of the input vector or node.

        Examples
        --------
        .. sourcecode:: pycon

            >>> from gensim.test.utils import datapath
            >>>
            >>> # Read the sample relations file and train the model
            >>> relations = PoincareRelations(file_path=datapath('poincare_hypernyms_large.tsv'))
            >>> model = PoincareModel(train_data=relations)
            >>> model.train(epochs=50)
            >>>
            >>> # Get the norm of the embedding of the word `mammal`.
            >>> model.kv.norm('mammal.n.01')
            0.6423008703542398

        Notes
        -----
        The position in hierarchy is based on the norm of the vector for the node.

        )re  rf  r[  rF   r   r   )r1   rh  rp  s      r2   r   zPoincareKeyedVectors.norm  sC    D nc** 	*??>::LL)Ly~~l+++r4   c                 X    |                      |          |                      |          z
  S )ax  Compute relative position in hierarchy of `node_or_vector_1` relative to `node_or_vector_2`.
        A positive value indicates `node_or_vector_1` is higher in the hierarchy than `node_or_vector_2`.

        Parameters
        ----------
        node_or_vector_1 : {str, int, numpy.array}
            Input node key or vector.
        node_or_vector_2 : {str, int, numpy.array}
            Input node key or vector.

        Returns
        -------
        float
            Relative position in hierarchy of `node_or_vector_1` relative to `node_or_vector_2`.

        Examples
        --------
        .. sourcecode:: pycon

            >>> from gensim.test.utils import datapath
            >>>
            >>> # Read the sample relations file and train the model
            >>> relations = PoincareRelations(file_path=datapath('poincare_hypernyms_large.tsv'))
            >>> model = PoincareModel(train_data=relations)
            >>> model.train(epochs=50)
            >>>
            >>> model.kv.difference_in_hierarchy('mammal.n.01', 'dog.n.01')
            0.05382517902410999

            >>> model.kv.difference_in_hierarchy('dog.n.01', 'mammal.n.01')
            -0.05382517902410999

        Notes
        -----
        The returned value can be positive or negative, depending on whether `node_or_vector_1` is higher
        or lower in the hierarchy than `node_or_vector_2`.

        )r   )r1   node_or_vector_1node_or_vector_2s      r2   difference_in_hierarchyz,PoincareKeyedVectors.difference_in_hierarchy  s*    N yy)**TYY7G-H-HHHr4   )rQ  )r   N)r   )r	  r
  r  r  REALr3   r6  r  r>  r;  rM  rP  rU  rX  r^  r`  rl  rE  r   rw  r  r  s   @r2   r   r   Y  sS        , 9=      5 5 5 5 5 ` ` \`$ 
 
 \
26 6 626 6 62   2  ,&8 &8 &8P$/ $/ $/L= = = =~3G 3G 3G 3Gj&, &, &,P'I 'I 'I 'I 'I 'I 'Ir4   r   c                        e Zd ZdZddZd ZdS )PoincareRelationsz:Stream relations for `PoincareModel` from a tsv-like file.utf8	c                 0    || _         || _        || _        dS )a  Initialize instance from file containing a pair of nodes (a relation) per line.

        Parameters
        ----------
        file_path : str
            Path to file containing a pair of nodes (a relation) per line, separated by `delimiter`.
            Since the relations are asymmetric, the order of `u` and `v` nodes in each pair matters.
            To express a "u is v" relation, the lines should take the form `u delimeter v`.
            e.g: `kangaroo	mammal` is a tab-delimited line expressing a "`kangaroo is a mammal`" relation.

            For a full input file example, see `gensim/test/test_data/poincare_hypernyms.tsv
            <https://github.com/RaRe-Technologies/gensim/blob/master/gensim/test/test_data/poincare_hypernyms.tsv>`_.
        encoding : str, optional
            Character encoding of the input file.
        delimiter : str, optional
            Delimiter character for each relation.

        N)	file_pathencoding	delimiter)r1   r~  r  r  s       r2   r3   zPoincareRelations.__init__  s    ( # "r4   c              #   l   K   t          j         j        d          5 }t          j        d         dk     r|}n fd|D             }t          j        | j                  }|D ]7}t          j        d         dk     r fd|D             }t          |          V  8	 ddd           dS # 1 swxY w Y   dS )zStream relations from self.file_path decoded into unicode strings.

        Yields
        -------
        (unicode, unicode)
            Relation from input file.

        rbr      c              3   L   K   | ]}|                     j                  V  d S Ndecoder  )r   liner1   s     r2   r   z-PoincareRelations.__iter__.<locals>.<genexpr>D  s1      IIT]33IIIIIIr4   r  c                 D    g | ]}|                     j                  S r   r  )r   valuer1   s     r2   r   z.PoincareRelations.__iter__.<locals>.<listcomp>I  s'    HHH55<<66HHHr4   N)	r   openr~  sysversion_infocsvreaderr  tuple)r1   file_objlinesr  rows   `    r2   __iter__zPoincareRelations.__iter__7  s"      Z-- 
	!"Q& J IIIIIIIZ@@@F ! !#A&* IHHHHCHHHCCjj    !
	! 
	! 
	! 
	! 
	! 
	! 
	! 
	! 
	! 
	! 
	! 
	! 
	! 
	! 
	! 
	! 
	! 
	!s   A=B))B-0B-N)r{  r|  )r	  r
  r  r  r3   r  r   r4   r2   rz  rz    s=        DD# # # #0! ! ! ! !r4   rz  c                   $    e Zd ZdZd Zd Zd ZdS )r   z#Buffer and return negative samples.c                 "    || _         d| _        dS )zInitialize instance from list or numpy array of samples.

        Parameters
        ----------
        items : list/numpy.array
            List or array containing negative samples.

        r   N)_items_current_index)r1   r   s     r2   r3   zNegativesBuffer.__init__P  s     r4   c                 :    t          | j                  | j        z
  S )zGet the number of items remaining in the buffer.

        Returns
        -------
        int
            Number of items in the buffer that haven't been consumed yet.

        )r9   r  r  r!  s    r2   rh   zNegativesBuffer.num_items\  s     4;$"555r4   c                 X    | j         }||z   }| xj         |z  c_         | j        ||         S )a  Get the next `num_items` from buffer.

        Parameters
        ----------
        num_items : int
            Number of items to fetch.

        Returns
        -------
        numpy.array or list
            Slice containing `num_items` items from the original data.

        Notes
        -----
        No error is raised if less than `num_items` items are remaining,
        simply all the remaining items are returned.

        )r  r  )r1   rh   start_index	end_indexs       r2   rk   zNegativesBuffer.get_itemsg  s<    & ))+	y({;y011r4   N)r	  r
  r  r  r3   rh   rk   r   r4   r2   r   r   M  sG        --
  
  
 	6 	6 	62 2 2 2 2r4   r   c                   >    e Zd ZdZd Zed             ZddZddZdS )ReconstructionEvaluation=Evaluate reconstruction on given network for given embedding.c                    t                      }t          t                     }t          j        |d          5 }t	          j        |d          }|D ]}t          |          dk    s
J d            |                    |d                   }|                    |d                   }	||                             |	           |	                    ||	g           	 ddd           n# 1 swxY w Y   || _
        || _        || _        dS )	aW  Initialize evaluation instance with tsv file containing relation pairs and embedding to be evaluated.

        Parameters
        ----------
        file_path : str
            Path to tsv file containing relation pairs.
        embedding : :class:`~gensim.models.poincare.PoincareKeyedVectors`
            Embedding to be evaluated.

        rr|  r  r6   %Hypernym pair has more than two itemsr   r   N)r   r   r   r  r  r  r9   rF  rC   rN   r   rM   	embedding)
r1   r~  r  r   rM   fr  r  item_1_indexitem_2_indexs
             r2   r3   z!ReconstructionEvaluation.__init__  sA    $$	Z	3'' 	;1ZT222F ; ;3xx1}MM&MMMM(223q6::(223q6::,'++L999lL9::::;	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 
""s   B!C&&C*-C*c                    | |         }t           j                            | d          }d|j        |<   ||ddt           j        f         k                         d          dz   }t          j        |          t          j        t          |                    z   }t          j        dt          |          dz             t          j        |          z  	                                }t          |          |fS )a:  Compute ranks and Average Precision of positive relations.

        Parameters
        ----------
        all_distances : numpy.array of float
            Array of all distances (floats) for a specific item.
        positive_relations : list
            List of indices of positive relations for the item.

        Returns
        -------
        (list of int, float)
            The list contains ranks of positive relations in the same order as `positive_relations`.
            The float is the Average Precision of the ranking, e.g. ([1, 2, 3, 20], 0.610).

        FrB  TNr   r   rF   rG  ru   rC  r   rc   sortaranger9   meanrv   )rJ  positive_relationspositive_relation_distancesnegative_relation_distancesranks	map_ranksavg_precisions          r2   (get_positive_relation_ranks_and_avg_preczAReconstructionEvaluation.get_positive_relation_ranks_and_avg_prec  s    $ '44F&G#&(ekk-ek&L&L#?C#();<,/J111bj=/YY^^de^ffijjGENNRYs5zz%:%::	)As9~~'9::RWY=O=OOUUWWE{{M))r4   Nc                 <    |                      |          \  }}||dS )ar  Evaluate all defined metrics for the reconstruction task.

        Parameters
        ----------
        max_n : int, optional
            Maximum number of positive relations to evaluate, all if `max_n` is None.

        Returns
        -------
        dict of (str, float)
            (metric_name, metric_value) pairs, e.g. {'mean_rank': 50.3, 'MAP': 0.31}.

        	mean_rankMAPevaluate_mean_rank_and_mapr1   max_nr  map_s       r2   evaluatez!ReconstructionEvaluation.evaluate  )     99%@@	4&t444r4   c                    g }g }t          | j        d          D ]\  }}|| j        vrt          | j        |                   }| j        j        |         }| j                            |          }|                     ||          \  }	}
||	z  }|                    |
           |||k    r nt          j
        |          t          j
        |          fS )a;  Evaluate mean rank and MAP for reconstruction.

        Parameters
        ----------
        max_n : int, optional
            Maximum number of positive relations to evaluate, all if `max_n` is None.

        Returns
        -------
        (float, float)
            (mean_rank, MAP), e.g (50.3, 0.31).

        r   r   )r   r   rM   rv   r  r:   rE  r  rB   rF   r  )r1   r  r  avg_precision_scoresr   rQ   item_relations	item_termitem_distancespositive_relation_ranksr  s              r2   r  z3ReconstructionEvaluation.evaluate_mean_rank_and_map  s     ! 1555 	 	GAt4>) !$."677N3D9I!^55i@@N==nn]] 3#],,E ''666 QY wu~~rw';<<<<r4   r  )	r	  r
  r  r  r3   r  r  r  r  r   r4   r2   r  r    sl        GG# # #2 * * \*45 5 5 5"= = = = = =r4   r  c                   >    e Zd ZdZd Zed             ZddZddZdS )LinkPredictionEvaluationr  c                    t                      }t          t                     t          t                     d}||d}|                                D ]\  }}t          j        |d          5 }	t          j        |	d          }
|
D ]}t          |          dk    s
J d            |                    |d                   }|                    |d                   }||         |         	                    |           |
                    ||g           	 d	d	d	           n# 1 swxY w Y   || _        || _        || _        d	S )
a  Initialize evaluation instance with tsv file containing relation pairs and embedding to be evaluated.

        Parameters
        ----------
        train_path : str
            Path to tsv file containing relation pairs used for training.
        test_path : str
            Path to tsv file containing relation pairs to evaluate.
        embedding : :class:`~gensim.models.poincare.PoincareKeyedVectors`
            Embedding to be evaluated.

        )knownunknownr  r|  r  r6   r  r   r   N)r   r   r   r   r  r  r  r9   rF  rC   rN   rM   r  )r1   
train_path	test_pathr  r   rM   
data_filesrelation_type	data_filer  r  r  r  r  s                 r2   r3   z!LinkPredictionEvaluation.__init__  s    )#..;s;K;KLL	)i@@
(2(8(8(:(: 	? 	?$M9Is++ ?qA666! ? ?Cs88q=QQ*QQQQ#,#6#6s1v#>#>L#,#6#6s1v#>#>Lm,\:>>|LLLLL,!=>>>>?? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 
""s   +B'DD#	&D#	c                    | |         }t           j                            | d          }d|j        |<   d|j        |<   ||ddt           j        f         k                         d          dz   }t          j        |          t          j        t          |                    z   }t          j        dt          |          dz             t          j        |          z  	                                }t          |          |fS )a  Compute ranks and Average Precision of unknown positive relations.

        Parameters
        ----------
        all_distances : numpy.array of float
            Array of all distances for a specific item.
        unknown_relations : list of int
            List of indices of unknown positive relations.
        known_relations : list of int
            List of indices of known positive relations.

        Returns
        -------
        tuple (list of int, float)
            The list contains ranks of positive relations in the same order as `positive_relations`.
            The float is the Average Precision of the ranking, e.g. ([1, 2, 3, 20], 0.610).

        FrB  TNr   r   r  )rJ  unknown_relationsknown_relationsunknown_relation_distancesr  r  r  r  s           r2   'get_unknown_relation_ranks_and_avg_precz@LinkPredictionEvaluation.get_unknown_relation_ranks_and_avg_prec  s    ( &33D%E"&(ekk-ek&L&L#>B#():;<@#(9,/I!!!RZ-/XX]]cd]eehiiGENNRYs5zz%:%::	)As9~~'9::RWY=O=OOUUWWE{{M))r4   Nc                 <    |                      |          \  }}||dS )as  Evaluate all defined metrics for the link prediction task.

        Parameters
        ----------
        max_n : int, optional
            Maximum number of positive relations to evaluate, all if `max_n` is None.

        Returns
        -------
        dict of (str, float)
            (metric_name, metric_value) pairs, e.g. {'mean_rank': 50.3, 'MAP': 0.31}.

        r  r  r  s       r2   r  z!LinkPredictionEvaluation.evaluate%  r  r4   c                    g }g }t          | j        d          D ]\  }}|| j        d         vrt          | j        d         |                   }t          | j        d         |                   }| j        j        |         }| j                            |          }	|                     |	||          \  }
}||
z  }|                    |           |||k    r nt          j
        |          t          j
        |          fS )aB  Evaluate mean rank and MAP for link prediction.

        Parameters
        ----------
        max_n : int, optional
            Maximum number of positive relations to evaluate, all if `max_n` is None.

        Returns
        -------
        tuple (float, float)
            (mean_rank, MAP), e.g (50.3, 0.31).

        r   r   r  r  )r   r   rM   rv   r  r:   rE  r  rB   rF   r  )r1   r  r  r  r   rQ   r  r  r  r  unknown_relation_ranksr  s               r2   r  z3LinkPredictionEvaluation.evaluate_mean_rank_and_map6  s    ! 1555 	 	GAt4>)44  $T^I%>t%D E E"4>'#:4#@AAO3D9I!^55i@@N<<^M^`opp 2"M++E ''666 QY wu~~rw';<<<<r4   r  )	r	  r
  r  r  r3   r  r  r  r  r   r4   r2   r  r    sl        GG# # #: * * \*:5 5 5 5"= = = = = =r4   r  c                   P    e Zd ZdZd Zd Zed             Zed             Zd Z	dS )LexicalEntailmentEvaluationz;Evaluate reconstruction on given network for any embedding.c                    i }t          j        |d          5 }t          j        |d          }|D ],}|d         |d         }}t	          |d                   |||f<   -	 ddd           n# 1 swxY w Y   || _        d| _        dS )	zInitialize evaluation instance with HyperLex text file containing relation pairs.

        Parameters
        ----------
        filepath : str
            Path to HyperLex text file.

        r   r  WORD1WORD2	AVG_SCORENr   )r   r  r  
DictReaderrs   scoresr"   )r1   filepathexpected_scoresr  r  r  word_1word_2s           r2   r3   z$LexicalEntailmentEvaluation.__init__Y  s     Z#&& 	L!^A555F L L!$Ws7|49#k:J4K4K 011L	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	L 	L
 &


s   AA++A/2A/c                 &   	 |                      ||          }|                      ||          }n## t          $ r t          d|d|          w xY wt          j        }d\  }}	|D ])}|D ]$}|                    ||          }
|
|k     r||}	}|
}%*||	J |                    |          |                    |	          }}t          j                            |          t          j                            |          }}dd| j	        ||z
  z  z   z  |z  S )aJ  Compute predicted score - extent to which `term_1` is a type of `term_2`.

        Parameters
        ----------
        embedding : :class:`~gensim.models.poincare.PoincareKeyedVectors`
            Embedding to use for computing predicted score.
        trie : :class:`pygtrie.Trie`
            Trie to use for finding matching vocab terms for input terms.
        term_1 : str
            Input term.
        term_2 : str
            Input term.

        Returns
        -------
        float
            Predicted score (the extent to which `term_1` is a type of `term_2`).

        z#No matching terms found for either z or )NNNrg   r   )
find_matching_termsKeyErrorr=   rF   infr^  r[  r   r   r"   )r1   r  trieterm_1term_2word_1_termsword_2_termsmin_distance
min_term_1
min_term_2r^  r<  r=  norm_1norm_2s                  r2   score_functionz*LexicalEntailmentEvaluation.score_functionk  sS   (	_33D&AAL33D&AALL 	_ 	_ 	_*fffV\V\]^^^	_v!+
J" 	, 	,F& , ,$--ff==l* ,-3V
J#+L	,
 @*@@@&11*==y?S?ST^?_?_(1129>>(3K3KQv778<GGs	   ,/  Ac                 N    |                      d|z            }d |D             }|S )aZ  Find terms in the `trie` beginning with the `word`.

        Parameters
        ----------
        trie : :class:`pygtrie.Trie`
            Trie to use for finding matching terms.
        word : str
            Input word to use for prefix search.

        Returns
        -------
        list of str
            List of matching terms.

        z%s.c                 >    g | ]\  }}d                      |          S ) )join)r   	key_charsr  s      r2   r   zCLexicalEntailmentEvaluation.find_matching_terms.<locals>.<listcomp>  s)    MMM1AE"''),,MMMr4   )r   )r  wordmatchesmatching_termss       r2   r  z/LexicalEntailmentEvaluation.find_matching_terms  s2    " **UT\**MMWMMMr4   c                     	 ddl m} n# t          $ r t          d          w xY w |            }| j        D ]}d||<   |S )a  Create trie with vocab terms of the given embedding to enable quick prefix searches.

        Parameters
        ----------
        embedding : :class:`~gensim.models.poincare.PoincareKeyedVectors`
            Embedding for which trie is to be created.

        Returns
        -------
        :class:`pygtrie.Trie`
            Trie containing vocab terms of the input embedding.

        r   )Triezapygtrie could not be imported, please install pygtrie in order to use LexicalEntailmentEvaluationT)pygtrier  ImportErrorr?   )r  r  
vocab_triekeys       r2   create_vocab_triez-LexicalEntailmentEvaluation.create_vocab_trie  s    	u$$$$$$$ 	u 	u 	usu u u	u TVV
) 	# 	#C"JsOOs   	 #c                    g }g }d}d}|                      |          }| j                                        D ]f\  \  }}}		 |                     ||||          }
n# t          $ r |dz  }Y 3w xY w|dz  }|                    |
           |                    |	           gt                              d|t          | j                  fz             t          ||          }|j
        S )aq  Evaluate spearman scores for lexical entailment for given embedding.

        Parameters
        ----------
        embedding : :class:`~gensim.models.poincare.PoincareKeyedVectors`
            Embedding for which evaluation is to be done.

        Returns
        -------
        float
            Spearman correlation score for the task for input embedding.

        r   r   zskipped pairs: %d out of %d)r  r  r   r  r=   rB   r;   r<   r9   r   correlation)r1   r  predicted_scoresr  skippedr7   r  r  r  expected_scorepredicted_scorespearmans               r2   evaluate_spearmanz-LexicalEntailmentEvaluation.evaluate_spearman  s    ++I66
040A0A0C0C 	3 	3,VVn"&"5"5iVU["\"\   1 QJE##O444"">22221Wc$+>N>N4OOPPP_.>??##s    AA+*A+N)
r	  r
  r  r  r3   r  r  r  r  r	  r   r4   r2   r  r  V  s        EE  $$H $H $HL   \(   \2$ $ $ $ $r4   r  )(r  r  loggingnumbersr   r  r   collectionsr   r   r
   rF   r   r+   r   rx  scipy.statsr   autogradr	   r   r   r  gensimr   r   gensim.models.keyedvectorsr   	getLoggerr	  r;   SaveLoadr   r   r   rz  r   r  r  r  r   r4   r2   <module>r     s  # #J 


        



  , , , , , , , ,     6 6 6 6 6 6 6 6 ! ! ! ! ! !))))))    # " " " " " " " 3 3 3 3 3 3 
	8	$	$v	# v	# v	# v	# v	#EN v	# v	# v	#rX# X# X# X# X# X# X# X#v@I @I @I @I @I< @I @I @IF.! .! .! .! .! .! .! .!b02 02 02 02 02 02 02 02fd= d= d= d= d= d= d= d=Nl= l= l= l= l= l= l= l=^H$ H$ H$ H$ H$ H$ H$ H$ H$ H$s   A AA