U
    Xe                  
   @   s   d dl Zd dlZd dlmZ d dlmZ d dlm	Z	 e
 dd Zej
dddejjejjejjejjdd	d
d Zdd Zdd Zd%ddZdd ZdddejddddfddZdd ZdddejddddfddZdd Zd&d#d$ZdS )'    N)tau_rand_int)tqdmc                 C   s    | dkrdS | dk rdS | S dS )zStandard clamping of a value into a fixed range (in this case -4.0 to
    4.0)

    Parameters
    ----------
    val: float
        The value to be clamped.

    Returns
    -------
    The clamped value, now fixed to be in the range -4.0 to 4.0.
    g      @g      N )valr   r   +lib/python3.8/site-packages/umap/layouts.pyclip   s
    r   zf4(f4[::1],f4[::1])T)resultdiffdimi)fastmathcachelocalsc                 C   s<   d}| j d }t|D ] }| | ||  }||| 7 }q|S )zReduced Euclidean distance.

    Parameters
    ----------
    x: array of shape (embedding_dim,)
    y: array of shape (embedding_dim,)

    Returns
    -------
    The squared euclidean distance between x and y
            r   )shaperange)xyr   r
   r   r	   r   r   r   rdist   s    
r   c           0      C   s(  t |jd D ]}|| |kr|| }|| }| | }|| }t||} |r\dd|t| |   }!|| t| |d  d|t| |   }"|!||  }#|!||  }$|#d|d|!   t||  |"  }%|$d|d|!   t||  |"  }&|| }'|| ||| |  |'  }(|| ||| |  |'  })|| |(|% |)|&   || |  | }*| dkrd| | t| |d  }+|+|t| | d  }+nd}+t|
D ]t},t|+||, ||,   }-|r|-td|* ||, ||,   7 }-||,  |-| 7  < |r||,  |- | 7  < q||  || 7  < t	|||  ||  }.t|.D ]}/t
|| }|| }t||} | dkrd|	 | }+|+d|  |t| | d   }+n||krqNnd}+t|
D ]B},|+dkrt|+||, ||,   }-nd}-||,  |-| 7  < qqN||  |.||  7  < qd S )	Nr         ?   r                    @MbP?)numbapranger   r   pownpexpr   r   intr   )0head_embeddingtail_embeddingheadtail
n_verticesepochs_per_sampleab	rng_stategammar
   
move_otheralphaepochs_per_negative_sampleepoch_of_next_negative_sampleepoch_of_next_samplendensmap_flagdens_phi_sumdens_re_sumdens_re_covdens_re_stddens_re_meandens_lambdadens_Rdens_mudens_mu_totr   jkcurrentotherdist_squaredphiZ	dphi_termZq_jkZq_kjZdrkZdrjZ	re_std_sqZweight_kZweight_jZgrad_cor_coeff
grad_coeffdgrad_dn_neg_samplespr   r   r   '_optimize_layout_euclidean_single_epoch>   s    
&  

 




rF   c                 C   s   | d | d t|jD ]}|| }	|| }
| |	 }||
 }t||}dd|t||   }||	  || 7  < ||
  || 7  < ||	  |7  < ||
  |7  < q d}t|jD ]"}t||| ||   ||< qd S )Nr   r   g:0yE>)	Zfillr   r   sizer   r   r   r   log)r!   r"   r#   r$   r'   r(   Zre_sumZphi_sumr   r;   r<   r=   r>   r?   r@   epsilonr   r   r   -_optimize_layout_euclidean_densmap_epoch_init   s     



rJ   r         @Fc           (      C   s  | j d }|}|| }| }| }tjtd|d}|dkrBi }|dkrNi }|rtjtd|d}t|d d }|d }|d }|d	 }tj|tj	d
}tj|tj	d
}|d } nHd}d}tjdtj	d
}tjdtj	d
}tjdtj	d
}tjdtj	d
}d}!g }"t
|tr|}!t|!}d|kr2| |d< tt|f|D ]4}#|ov|d dkov|#d t| d|d  k}$|$r|| ||||||| tt||  }%t|}&t|||d  }'nd}%d}&d}'|| ||||||||	|
|||||||#|$|||'|%|&|||| |dt|#t|   }|rT|#t|d  dkrTtd|#d|d |!dk	rB|#|!krB|"|   qB|!dk	r|"|   |!dkr| S |"S )a^  Improve an embedding using stochastic gradient descent to minimize the
    fuzzy set cross entropy between the 1-skeletons of the high dimensional
    and low dimensional fuzzy simplicial sets. In practice this is done by
    sampling edges based on their membership strength (with the (1-p) terms
    coming from negative sampling similar to word2vec).
    Parameters
    ----------
    head_embedding: array of shape (n_samples, n_components)
        The initial embedding to be improved by SGD.
    tail_embedding: array of shape (source_samples, n_components)
        The reference embedding of embedded points. If not embedding new
        previously unseen points with respect to an existing embedding this
        is simply the head_embedding (again); otherwise it provides the
        existing embedding to embed with respect to.
    head: array of shape (n_1_simplices)
        The indices of the heads of 1-simplices with non-zero membership.
    tail: array of shape (n_1_simplices)
        The indices of the tails of 1-simplices with non-zero membership.
    n_epochs: int, or list of int
        The number of training epochs to use in optimization, or a list of
        epochs at which to save the embedding. In case of a list, the optimization
        will use the maximum number of epochs in the list, and will return a list
        of embedding in the order of increasing epoch, regardless of the order in
        the epoch list.
    n_vertices: int
        The number of vertices (0-simplices) in the dataset.
    epochs_per_sample: array of shape (n_1_simplices)
        A float value of the number of epochs per 1-simplex. 1-simplices with
        weaker membership strength will have more epochs between being sampled.
    a: float
        Parameter of differentiable approximation of right adjoint functor
    b: float
        Parameter of differentiable approximation of right adjoint functor
    rng_state: array of int64, shape (3,)
        The internal state of the rng
    gamma: float (optional, default 1.0)
        Weight to apply to negative samples.
    initial_alpha: float (optional, default 1.0)
        Initial learning rate for the SGD.
    negative_sample_rate: int (optional, default 5)
        Number of negative samples to use per positive sample.
    parallel: bool (optional, default False)
        Whether to run the computation using numba parallel.
        Running in parallel is non-deterministic, and is not used
        if a random seed has been set, to ensure reproducibility.
    verbose: bool (optional, default False)
        Whether to report information on the current progress of the algorithm.
    densmap: bool (optional, default False)
        Whether to use the density-augmented densMAP objective
    densmap_kwds: dict (optional, default None)
        Auxiliary data for densMAP
    tqdm_kwds: dict (optional, default None)
        Keyword arguments for tqdm progress bar.
    move_other: bool (optional, default False)
        Whether to adjust tail_embedding alongside head_embedding
    Returns
    -------
    embedding: array of shape (n_samples, n_components)
        The optimized embedding.
    r   Tr   parallelNZmu_sumr   lambdaRZmu)ZdtypeZ	var_shiftr   disableZfracr   
   z	completed z / Zepochs)r   copyr   njitrF   rJ   r   sumZzerosfloat32
isinstancelistmaxr   r   floatZsqrtvarZmeandotr    printappend)(r!   r"   r#   r$   n_epochsr%   r&   r'   r(   r)   r*   initial_alphanegative_sample_raterM   verboseZdensmapZdensmap_kwds	tqdm_kwdsr+   r
   r,   r-   r.   r/   optimize_fnZdens_init_fnr:   r7   r8   r9   r2   r3   Zdens_var_shiftZepochs_listZembedding_listr0   r1   r5   r6   r4   r   r   r   optimize_layout_euclidean   s    R
  





rd   c           "   	   C   s  t | jd D ]}|| |kr|| }|| }|| }|| }|||f| \}}|||f| \}}|dkrtd|t|d|   d}nd}d| |d  |d  }t |D ]P}t|||  }||  ||	 7  < |
rt|||  }||  ||	 7  < q||  | | 7  < t|||  ||  } t | D ]}!t|| }|| }|||f| \}}|dkrtd|t|d|   d}n||krq8nd}|d | | |d  }t |D ]*}t|||  }||  ||	 7  < qq8||  | ||  7  < q||fS )Nr   r   r   r   r   ư>)r   r   r   r   r    r   )"r&   r/   r#   r$   r!   r"   output_metricoutput_metric_kwdsr
   r,   r+   r0   r.   r-   r)   r%   r'   r(   r*   r   r;   r<   r=   r>   dist_outputgrad_dist_output_Zrev_grad_dist_outputw_lrA   rB   rC   rD   rE   r   r   r   %_optimize_layout_generic_single_epoch  sb     
 



rm   r   c                 C   s   | j d }|}|| }| }| }tjtdd}|dkr@i }d|krR| |d< tt|f|D ]H}|||||| ||||||||||	||||
 |dt|t|   }qb| S )a	  Improve an embedding using stochastic gradient descent to minimize the
    fuzzy set cross entropy between the 1-skeletons of the high dimensional
    and low dimensional fuzzy simplicial sets. In practice this is done by
    sampling edges based on their membership strength (with the (1-p) terms
    coming from negative sampling similar to word2vec).

    Parameters
    ----------
    head_embedding: array of shape (n_samples, n_components)
        The initial embedding to be improved by SGD.

    tail_embedding: array of shape (source_samples, n_components)
        The reference embedding of embedded points. If not embedding new
        previously unseen points with respect to an existing embedding this
        is simply the head_embedding (again); otherwise it provides the
        existing embedding to embed with respect to.

    head: array of shape (n_1_simplices)
        The indices of the heads of 1-simplices with non-zero membership.

    tail: array of shape (n_1_simplices)
        The indices of the tails of 1-simplices with non-zero membership.

    n_epochs: int
        The number of training epochs to use in optimization.

    n_vertices: int
        The number of vertices (0-simplices) in the dataset.

    epochs_per_sample: array of shape (n_1_simplices)
        A float value of the number of epochs per 1-simplex. 1-simplices with
        weaker membership strength will have more epochs between being sampled.

    a: float
        Parameter of differentiable approximation of right adjoint functor

    b: float
        Parameter of differentiable approximation of right adjoint functor

    rng_state: array of int64, shape (3,)
        The internal state of the rng

    gamma: float (optional, default 1.0)
        Weight to apply to negative samples.

    initial_alpha: float (optional, default 1.0)
        Initial learning rate for the SGD.

    negative_sample_rate: int (optional, default 5)
        Number of negative samples to use per positive sample.

    verbose: bool (optional, default False)
        Whether to report information on the current progress of the algorithm.

    tqdm_kwds: dict (optional, default None)
        Keyword arguments for tqdm progress bar.

    move_other: bool (optional, default False)
        Whether to adjust tail_embedding alongside head_embedding

    Returns
    -------
    embedding: array of shape (n_samples, n_components)
        The optimized embedding.
    r   Tr   NrP   r   )r   rR   r   rS   rm   r   r   rY   )r!   r"   r#   r$   r^   r%   r&   r'   r(   r)   r*   r_   r`   rg   rh   ra   rb   r+   r
   r,   r-   r.   r/   rc   r0   r   r   r   optimize_layout_generic  sJ    V

ro   c           "      C   s  t | jd D ]}|| |kr|| }|| }|| }|| }|||f| \}}|| }d||	|  d   }t |
D ]B}t|||  }||  || 7  < |rz||  | | 7  < qz||  | | 7  < t|||  ||  }t |D ]} t|| }|| }|||f| \}}tt|||  d |	| d  }!| d|! d|! |	|  d   }t |
D ]*}t|||  }||  || 7  < qnq||  |||  7  < qd S )Nr   r   rf   )r   r   r   r    r   r   r   rX   )"r&   r/   r#   r$   r!   r"   rg   rh   weightsigmasr
   r,   r+   r0   r.   r-   r)   r%   rhosr*   r   r;   r<   r=   r>   ri   rj   rl   rA   rB   rC   rD   rE   Zw_hr   r   r   %_optimize_layout_inverse_single_epoch~  sP     
 
&"
rs   c                 C   s   | j d }|}|	| }| }|	 }tjtdd}|dkr@i }d|krR| |d< tt|f|D ]J}||	|||| ||||||||||||||| |dt|t|   }qb| S )a
  Improve an embedding using stochastic gradient descent to minimize the
    fuzzy set cross entropy between the 1-skeletons of the high dimensional
    and low dimensional fuzzy simplicial sets. In practice this is done by
    sampling edges based on their membership strength (with the (1-p) terms
    coming from negative sampling similar to word2vec).

    Parameters
    ----------
    head_embedding: array of shape (n_samples, n_components)
        The initial embedding to be improved by SGD.

    tail_embedding: array of shape (source_samples, n_components)
        The reference embedding of embedded points. If not embedding new
        previously unseen points with respect to an existing embedding this
        is simply the head_embedding (again); otherwise it provides the
        existing embedding to embed with respect to.

    head: array of shape (n_1_simplices)
        The indices of the heads of 1-simplices with non-zero membership.

    tail: array of shape (n_1_simplices)
        The indices of the tails of 1-simplices with non-zero membership.

    weight: array of shape (n_1_simplices)
        The membership weights of the 1-simplices.

    sigmas:

    rhos:

    n_epochs: int
        The number of training epochs to use in optimization.

    n_vertices: int
        The number of vertices (0-simplices) in the dataset.

    epochs_per_sample: array of shape (n_1_simplices)
        A float value of the number of epochs per 1-simplex. 1-simplices with
        weaker membership strength will have more epochs between being sampled.

    a: float
        Parameter of differentiable approximation of right adjoint functor

    b: float
        Parameter of differentiable approximation of right adjoint functor

    rng_state: array of int64, shape (3,)
        The internal state of the rng

    gamma: float (optional, default 1.0)
        Weight to apply to negative samples.

    initial_alpha: float (optional, default 1.0)
        Initial learning rate for the SGD.

    negative_sample_rate: int (optional, default 5)
        Number of negative samples to use per positive sample.

    verbose: bool (optional, default False)
        Whether to report information on the current progress of the algorithm.

    tqdm_kwds: dict (optional, default None)
        Keyword arguments for tqdm progress bar.

    move_other: bool (optional, default False)
        Whether to adjust tail_embedding alongside head_embedding

    Returns
    -------
    embedding: array of shape (n_samples, n_components)
        The optimized embedding.
    r   Trn   NrP   r   )r   rR   r   rS   rs   r   r   rY   )r!   r"   r#   r$   rp   rq   rr   r^   r%   r&   r'   r(   r)   r*   r_   r`   rg   rh   ra   rb   r+   r
   r,   r-   r.   r/   rc   r0   r   r   r   optimize_layout_inverse  sL    `

rt   c           (      C   s  t |}|jd d d }d}|D ]}|jd |kr"|jd }q"t|tj}tjt|	d  tj	| t
|D ]t}|D ]h}||| jd k r|| | |kr|| | }|| | }| | | }|| | }t||}|dkr"d| | t||d  }||t|| d  }nd}t
|D ]} t|||  ||    }!t
| |D ]}"||" }#||#  krd  kr|"krXn nh|||"| |f }$|$dkrX|!t|tt|"d   |||"| |f  ||  | |# |$| f   8 }!qX||   t|!| 7  < |r.t|||  ||    }%t
| |D ]}"||" }#||#  krjd  krj|"kr:n nh|||"| |f }$|$dkr:|%t|tt|"d   |||"| |f  ||  | |# |$| f   8 }%q:||   t|%| 7  < q.|| |  || | 7  < || | dkrFt||| |  || |  }&nd}&t
|&D ]x}'t|	|| jd  }|| | }t||}|dkrd|
 | }|d| |t|| d   }n||krʐqRnd}t
|D ]} |dkrt|||  ||    }!nd}!t
| |D ]}"||" }#||#  kr>d  kr>|"krn nh|||"| |f }$|$dkr|!t|tt|"d   |||"| |f  ||  | |# |$| f   8 }!q||   t|!| 7  < q֐qR|| |  |&|| |  7  < qqzd S )	Nr   r   r   r   r   r   r   r   )lenr   r   ZarangeastypeZint32ZrandomZseedabsZshuffler   r   r   r   r   r    r   )(head_embeddingstail_embeddingsheadstailsr&   r'   r(   regularisation_weights	relationsr)   r*   lambda_r
   r+   r,   r-   r.   r/   r0   Zn_embeddingsZwindow_sizeZmax_n_edgesZe_p_sZembedding_orderr   mr;   r<   r=   r>   r?   rA   rB   rC   offsetZ
neighbor_mZidentified_indexZother_grad_drD   rE   r   r   r   /_optimize_layout_aligned_euclidean_single_epochT  s    
"

(

(






(

 r   P);?V?{Gzt?c                 C   sJ  | d j d }|}tjjtjjd d d }tjjtjjd d d }tjjtjjd d d }tt|D ]J}|	|| 
tj|  |	|| 
tj |	|| 
tj qrtjtd|d}|d kri }d|kr| |d< tt|f|D ]H}|| |||||	|
|||||||||||| |dt|t|   }q| S )Nr   r   TrL   rP   r   )r   r   typedZListZ
empty_listtypesrU   r   ru   r]   rv   r   rS   r   r   rY   )rx   ry   rz   r{   r^   r&   r|   r}   r)   r'   r(   r*   r~   r_   r`   rM   ra   rb   r+   r
   r,   r-   r.   r/   r   rc   r0   r   r   r   !optimize_layout_aligned_euclidean  s`    
r   )	r   r   rK   FFFNNF)
r   r   r   r   r   rK   TFNF)Znumpyr   r   Zumap.distancesZ	distancesZdistZ
umap.utilsr   Z	tqdm.autor   rS   r   r   rU   Zintpr   rF   rJ   rd   rm   Z	euclideanro   rs   rt   r   r   r   r   r   r   <module>   s   

~,         
 M]
 V
  "          