U
    [e`                  3   @   s  d dl Zd dlZd dlmZmZmZmZmZm	Z	m
Z
mZ ejdejdZejdejdZejdejdZeejjZeejjZejdddd	 Zejd
ejejjejjddddejjejjddddgdejjejjejjejjdddd ZejddefddZ ejdddd Z!ejdddd Z"ejdddoddZ#ejddedfddZ$ejddefddZ%ejdddd Z&ejddd d! Z'ejddd"d# Z(ejddd$d% Z)ejd
ejejjejjddddejjejjddddgdejjejjejjejj*ejj*ejjejjd&dd'd( Z+ej,ddd)d* Z-ejddd+d, Z.ejddd-d. Z/ejddd/d0 Z0ejddd1d2 Z1ejddd3d4 Z2ejddd5d6 Z3ejddd7d8 Z4ejddd9d: Z5ejddd;d< Z6ejddd=d> Z7ejd
ejejjejjddddejjejjddddgdejjejjejjejjejjd?dd@dA Z8ejd
dejjejjejjdBddCdD Z9ejd
ejejjejjddddejjejjddddgdejjejjejjdBddEdF Z:ej,dddGdH Z;ejdddIdJ Z<ejdddKdL Z=ej,dddMdN Z>ejdddOdP Z?ejd
ejejjejjddddejjejjddddgdejjejjejjejjejjdQddRdS Z@ejd
ejejjejjddddejjejjddddgdejjejjejjejjejjdQddTdU ZAej,dddVdW ZBe dpdYdZZCejddd[d\ ZDejdd]ed^fd_d`ZEejddedafdbdcZFe ddde ZGe dqdfdgZHe drdhdiZIe djdk ZJeeee!e!e!e"e"e"e"e#e e e$e$e%e'e7e9e?e5e(eDe<e=e@eEeEeHeHeHeHeIeIeFeGeGeJeJeJe&e)e/e.e0e1e2e4e3e6dl2ZKeejLdmeejLdme8e;dme:e;dme8e>dmeAeBdme+e-dmdnZMdS )s    N)allocate_graph_structuresinitialize_graph_structuresinitialize_supplyinitialize_costnetwork_simplex_core
total_costProblemStatussinkhorn_transport_plan   dtype)r
   r
   T)fastmathc                 C   s:   d}t | jd D ]}|| | ||  d 7 }qt|S )z_Standard euclidean distance.

    .. math::
        D(x, y) = \\sqrt{\sum_i (x_i - y_i)^2}
            r   r
   rangeshapenpsqrtxyresulti r   t/mounts/lovelace/software/anaconda3/envs/qiime2-amplicon-2024.2/lib/python3.8/site-packages/pynndescent/distances.py	euclidean   s    r   zf4(f4[::1],f4[::1])   C)readonly)r   diffdimr   )r   localsc                 C   s<   d}| j d }t|D ] }| | ||  }||| 7 }q|S )zVSquared euclidean distance.

    .. math::
        D(x, y) = \sum_i (x_i - y_i)^2
    r   r   r   r   )r   r   r   r    r   r   r   r   r   squared_euclidean'   s    
r#   c                 C   sB   d}t | jd D ]$}|| | ||  d ||  7 }qt|S )zEuclidean distance standardised against a vector of standard
    deviations per coordinate.

    .. math::
        D(x, y) = \sqrt{\sum_i \frac{(x_i - y_i)**2}{v_i}}
    r   r   r
   r   )r   r   sigmar   r   r   r   r   standardised_euclideanF   s    "r%   c                 C   s6   d}t | jd D ]}|t| | ||  7 }q|S )z\Manhattan, taxicab, or l1 distance.

    .. math::
        D(x, y) = \sum_i |x_i - y_i|
    r   r   r   r   r   absr   r   r   r   	manhattanU   s    r(   c                 C   s8   d}t | jd D ] }t|t| | ||  }q|S )zZChebyshev or l-infinity distance.

    .. math::
        D(x, y) = \max_i |x_i - y_i|
    r   r   )r   r   maxr   r'   r   r   r   r   	chebyshevc   s    r*   c                 C   sB   d}t | jd D ]"}|t| | ||  | 7 }q|d|  S )ah  Minkowski distance.

    .. math::
        D(x, y) = \left(\sum_i |x_i - y_i|^p\right)^{\frac{1}{p}}

    This is a general distance. For p=1 it is equivalent to
    manhattan distance, for p=2 it is Euclidean distance, and
    for p=infinity it is Chebyshev distance. In general it is better
    to use the more specialised functions for those distances.
    r   r         ?r&   )r   r   pr   r   r   r   r   	minkowskiq   s     r-   c                 C   sJ   d}t | jd D ]*}||| t| | ||  |  7 }q|d|  S )aW  A weighted version of Minkowski distance.

    .. math::
        D(x, y) = \left(\sum_i w_i |x_i - y_i|^p\right)^{\frac{1}{p}}

    If weights w_i are inverse standard deviations of graph_data in each dimension
    then this represented a standardised Minkowski distance (and is
    equivalent to standardised Euclidean distance for p=1).
    r   r   r+   r&   )r   r   wr,   r   r   r   r   r   weighted_minkowski   s    (r/   c                 C   s   d}t j| jd t jd}t| jd D ]}| | ||  ||< q(t| jd D ]D}d}t| jd D ]}||||f ||  7 }qf||||  7 }qPt |S )Nr   r   r   )r   emptyr   float32r   r   )r   r   vinvr   r   r   tmpjr   r   r   mahalanobis   s    r5   c                 C   sB   d}t | jd D ]}| | || kr|d7 }qt|| jd  S Nr   r   r+   r   r   floatr   r   r   r   hamming   s
    
r9   c                 C   s^   d}t | jd D ]F}t| | t||  }|dkr|t| | ||  | 7 }q|S Nr   r   r&   )r   r   r   r   denominatorr   r   r   canberra   s     r<   c                 C   sl   d}d}t | jd D ]8}|t| | ||  7 }|t| | ||  7 }q|dkrdt|| S dS d S r:   )r   r   r   r'   r8   )r   r   	numeratorr;   r   r   r   r   bray_curtis   s    r>   c                 C   sl   d}d}t | jd D ]4}| | dk}|| dk}||p:|7 }||oF|7 }q|dkrXdS t|| | S d S r:   r7   )r   r   num_non_zero	num_equalr   x_truey_truer   r   r   jaccard   s    rC   )r   r?   r@   rA   rB   r    r   c                 C   sp   d}d}| j d }t|D ]4}| | dk}|| dk}||p>|7 }||oJ|7 }q|dkr\dS t||  S d S r:   )r   r   r   log2)r   r   r?   r@   r    r   rA   rB   r   r   r   alternative_jaccard   s    
rE   c                 C   s   dt d|   S Nr+          @pow)vr   r   r   correct_alternative_jaccard   s    rK   c                 C   sN   d}t | jd D ](}| | dk}|| dk}|||k7 }qt|| jd  S r:   r7   r   r   num_not_equalr   rA   rB   r   r   r   matching  s    rN   c                 C   sl   d}d}t | jd D ]4}| | dk}|| dk}||o:|7 }|||k7 }q|dkrXdS |d| |  S d S Nr   r   rG   r   r   r   r   num_true_truerM   r   rA   rB   r   r   r   dice  s    rS   c                 C   s   d}d}t | jd D ]4}| | dk}|| dk}||o:|7 }|||k7 }q|dkrXdS t|| | jd  || jd   S d S r:   r7   rQ   r   r   r   	kulsinski  s    rT   c                 C   sR   d}t | jd D ](}| | dk}|| dk}|||k7 }qd| | jd |  S rO   rP   rL   r   r   r   rogers_tanimoto0  s    rU   c                 C   s   d}t | jd D ](}| | dk}|| dk}||o6|7 }q|t| dkkrd|t|dkkrddS t| jd | | jd  S d S r:   )r   r   r   sumr8   )r   r   rR   r   rA   rB   r   r   r   
russellrao;  s    $rW   c                 C   sR   d}t | jd D ](}| | dk}|| dk}|||k7 }qd| | jd |  S rO   rP   rL   r   r   r   sokal_michenerI  s    rX   c                 C   sl   d}d}t | jd D ]4}| | dk}|| dk}||o:|7 }|||k7 }q|dkrXdS |d| |  S d S Nr   r         ?rP   rQ   r   r   r   sokal_sneathT  s    r[   c                 C   s   | j d dkrtdtd| d |d   }td| d |d   }t|d t| d t|d  |d   }dt| S )Nr   r
   z6haversine is only defined for 2 dimensional graph_datarZ   r   rG   )r   
ValueErrorr   sinr   cosarcsin)r   r   sin_latsin_longr   r   r   r   	haversined  s    2rb   c           	      C   s   d}d}d}t | jd D ]D}| | dk}|| dk}||o>|7 }||oL| 7 }|| oZ|7 }q| jd | | | }|dks|dkrdS d| | || ||   S d S rO   rP   )	r   r   rR   num_true_falsenum_false_truer   rA   rB   num_false_falser   r   r   yulen  s    
rf   c                 C   s   d}d}d}t | jd D ]8}|| | ||  7 }|| | d 7 }||| d 7 }q|dkrh|dkrhdS |dksx|dkr|dS d|t||   S d S Nr   r   r
   r+   r   )r   r   r   norm_xnorm_yr   r   r   r   cosine  s    rj   )r   rh   ri   r    r   c                 C   s   d}d}d}| j d }t|D ]@}|| | ||  7 }|| | | |  7 }||| ||  7 }q|dkrt|dkrtdS |dks|dkrtS |dkrtS t|| | }t|S d S r:   )r   r   FLOAT32_MAXr   r   rD   r   r   r   rh   ri   r    r   r   r   r   alternative_cosine  s     
rm   )r   r    r   c                 C   sH   d}| j d }t|D ]}|| | ||  7 }q|dkr<dS d| S d S r6   r"   r   r   r   r    r   r   r   r   dot  s    

ro   c                 C   sL   d}| j d }t|D ]}|| | ||  7 }q|dkr<tS t| S d S r:   )r   r   rk   r   rD   rn   r   r   r   alternative_dot  s    
rp   c                 C   s   dt d|   S rF   rH   dr   r   r   correct_alternative_cosine  s    rs   c                 C   s   d}d}d}d}| j d }t|D ]\}| | ||  }||| 7 }|| | ||  7 }|| | | |  7 }||| ||  7 }q"t|}t|}t|| }	|||  }t|td }
t||	 d |
 }|| t|
 d }|| S )Nr   r   
   r
   rG   )r   r   r   r   r'   arccosradiansr]   )r   r   Zd_euc_squaredZd_cosrh   ri   r    r   r   Zmagnitude_differencethetaZsectorZtriangler   r   r   tsss  s&    


rx   c                 C   s   d}d}d}| j d }t|D ]@}|| | ||  7 }|| | | |  7 }||| ||  7 }q|dkrt|dkrtdS |dks|dkrtS |dkrtS |t||  }dt|tj  S d S r6   )r   r   rk   r   r   ru   pirl   r   r   r   true_angular  s     
rz   c                 C   s   dt td|  t j  S rF   )r   ru   rI   ry   rq   r   r   r   true_angular_from_alt_cosine!  s    r{   c           
      C   s   d}d}d}d}d}t | jd D ]}|| | 7 }||| 7 }q"|| jd  }|| jd  }t | jd D ]@}| | | }|| | }	||d 7 }||	d 7 }|||	 7 }qj|dkr|dkrdS |dkrdS d|t||   S d S rg   r   )
r   r   mu_xmu_yrh   ri   dot_productr   	shifted_x	shifted_yr   r   r   correlation&  s*    r   )r   	l1_norm_x	l1_norm_yr    r   c                 C   s   d}d}d}| j d }t|D ]6}|t| | ||  7 }|| | 7 }||| 7 }q|dkrj|dkrjdS |dksz|dkr~dS td|t||   S d S )Nr   r   r+   r   )r   r   r   r   r   r   r   r   r   r    r   r   r   r   	hellingerD  s    
r   c                 C   s   d}d}d}| j d }t|D ]6}|t| | ||  7 }|| | 7 }||| 7 }q|dkrj|dkrjdS |dksz|dkr~tS |dkrtS t|| | }t|S d S r:   )r   r   r   r   rk   rD   r   r   r   r   alternative_hellingerh  s     
r   c                 C   s   t dtd|   S rF   )r   r   rI   rq   r   r   r   correct_alternative_hellinger  s    r   averagec           	      C   sH  t t | }|dkr&|jdd}n|jdd}t j|jt jd}t |j||< |dkrl|d t j	S || }t 
|jt j}|dd  |d d k|dd < | | }|dkr|t j	S t |d	 }t |t t|g|jf}|d
kr
|| t j	S |dkr,||d  d t j	S d|| ||d   d  S )Nordinal	mergesort)kind	quicksortr   r   denser   r)   minrZ   )r   ravelasarrayargsortr0   sizeintparangeastypefloat64onesbool_cumsumnonzeroconcatenatearraylenr   )	amethodarrsorterinvobsr   r   countr   r   r   rankdata  s*     

r   c                 C   s   t | }t |}t||S )N)r   r   )r   r   x_ranky_rankr   r   r   	spearmanr  s    r   )nogili c                 C   s  | dk}|dk}| |  tj}||  tj}| }| }	|| }||	 }||d d f d d |f }
t|jd |jd d\}}}t|| ||j t|
||j	 t
|||}|dkrtdt||||}|tjkrtdn|tjkrtdt|j|j	}|S )Nr   FzDKantorovich distance inputs must be valid probability distributions.z>Optimal transport problem was INFEASIBLE. Please check inputs.z=Optimal transport problem was UNBOUNDED. Please check inputs.)r   r   r   rV   r   r   r   Zsupplyr   costr   r\   r   r   
INFEASIBLEZ	UNBOUNDEDr   flow)r   r   r   max_iterrow_maskcol_maskr   ba_sumb_sumsub_costZnode_arc_dataZspanning_treegraphinit_statusZsolve_statusr   r   r   r   kantorovich  s@      


r   r+   c                 C   s   | dk}|dk}| |  tj}||  tj}| }| }	|| }||	 }||d d f d d |f }
t| ||
|d}|jd }|jd }d}t|D ].}t|D ] }||||f |||f  7 }qq|S )Nr   )r   regularizationr   r   )r   r   r   rV   r	   r   r   )r   r   r   r   r   r   r   r   r   r   r   Ztransport_planZdim_iZdim_jr   r   r4   r   r   r   sinkhorn  s,       

 r   c           
   
   C   s   d}d}d}| j d }t|D ]}|| | 7 }||| 7 }q|t| 7 }|t| 7 }| t | }|t | }d||  }	t|D ]H}|d|| t|| |	|   || t|| |	|     7 }q|S rY   r   r   FLOAT32_EPSr   log)
r   r   r   r   r   r    r   pdf_xpdf_ymr   r   r   jensen_shannon_divergence  s"    
:r   c                 C   s   d}d}t | jd D ]}|| | 7 }||| 7 }q| | }|| }t d|jd D ]4}||  ||d  7  < ||  ||d  7  < qTt|||S )Nr   r   r   )r   r   r-   )r   r   r,   x_sumy_sumr   x_cdfy_cdfr   r   r   wasserstein_1d(  s    r   c                 C   s  d}d}t | jd D ]}|| | 7 }||| 7 }q| | }|| }t d|jd D ]4}||  ||d  7  < ||  ||d  7  < qTt|| | }d}	|dkrt |jd D ]&}|	t|| ||  | | 7 }	q|	d|  S |dkr4t |jd D ]&}|| ||  | }
|	|
|
 7 }	qt|	S |dkrvt |jd D ]$}|	t|| ||  | 7 }	qL|	S tdd S )Nr   r   r   r
   r+   z)Invalid p supplied to Kantorvich distance)r   r   r   medianr'   r   r\   )r   r   r,   r   r   r   r   r   mur   valr   r   r   circular_kantorovich:  s4    $


"r   c           	   	   C   s   d}d}d}| j d }t|D ]}|| | 7 }||| 7 }q|t| 7 }|t| 7 }| t | }|t | }t|D ]D}||| t|| ||   || t|| ||    7 }qt|S r:   r   )	r   r   r   r   r   r    r   r   r   r   r   r   symmetric_kl_divergenced  s     
(r   )2r   l2sqeuclideanr(   taxicabl1r*   	linfinitylinftylinfr-   
seuclideanr%   
wminkowskir/   r5   r<   rj   ro   r   rb   
braycurtisr   rx   rz   r   r   wassersteinr   zwasserstein-1dzkantorovich-1dZkantorovich_1dr   Zcircular_wassersteinr   zjensen-shannonjensen_shannonzsymmetric-klsymmetric_klsymmetric_kullback_lieblerr9   rC   rS   rN   rT   rogerstanimotorW   sokalsneathsokalmichenerrf   )dist
correction)r   r   rj   ro   rz   r   rC   )r
   )r   )r   )r   )Nnumpyr   numbaZpynndescent.optimal_transportr   r   r   r   r   r   r   r	   eyer1   _mock_identityr   
_mock_oneszerosr   Z_dummy_costfinfoepsr   r)   rk   njitr   typesArrayr   uint16r#   r%   r(   r*   r-   r/   r5   r9   r<   r>   rC   uint8rE   	vectorizerK   rN   rS   rT   rU   rW   rX   r[   rb   rf   rj   rm   ro   rp   rs   rx   rz   r{   r   r   r   r   r   r   r   r   r   r   r   r   named_distancesr   fast_distance_alternativesr   r   r   r   <module>   s  (












	




























	




	















#


0

)
A

