U
    aac	~                  %   @   s,  d dl mZ d dlZd dlZd dlZd dlmZmZ d dl	m
Z
mZmZ eejd eejjZeejjZejddd~d
dZejdddd Zejdddd Zejdddd Zejdejejjejjddddejjejjddddgejejdddd Zejejejejjddejejjddfejjejjddddejjejjddddejjejjddddejjejjddddgdejjddd ejjddd ejjejjejjejjejjddddd Z ejdddd Z!ejejej"ejjej"ejjfejjejjddddejjejjddddejjejjddddejjejjddddgdejjejjejjejjejjdddd d! Z#ejejejjejjddddejjejjddddejjejjddddejjejjddddgdejjejjejjejjejjejjd"ddd#d$ Z$e d%d& Z%ejdd'd(d) Z&ejd*ejejjejjddddejjejjddddejjejjddddejjejjddddgdejjddd ejjejjejj'ejjd+d,d-d. Z(e d/d0 Z)e d1d2 Z*e dd4d5Z+e d6d7 Z,e d8d9 Z-ejd*ejejjejjddddejjejjddddejjejjddddejjejjddddgdd'd:d; Z.e d<d= Z/ejd*ejejjejjddddejjejjddddejjejjddddejjejjddddgdejj'ejj'd>d,d?d@ Z0ej1dd'dAdB Z2e dCdD Z3e dEdF Z4e dGdH Z5e dIdJ Z6e dKdL Z7e dMdN Z8e dOdP Z9e dQdR Z:ejdejjejjejjejj'ejjdSd,dTdU Z;ej1dddVdWdX Z<e dYdZ Z=ejdd[ejjid,d\d] Z>e d^d_ Z?e d`da Z@ejdejjejjejjejj'ejjdbd,dcdd ZAej1dddVdedf ZBe dgdh ZCdidj ZDe eCfdkdlZEe ddmdnZFe dodp ZGe dqdr ZHejddsdtddvdwZIejddsdtddxdyZJe&e&e(e)e)e)e*e*e*e*e+e-e.e,e/e4e3e5e6e7e8e9e:e?eEeEeFeFeFeFe@eGeGeHeHeHdz$ZKd{ZLe(ejMd|e(ejMd|e;e<d|e>e<d|eAeBd|e0e2d|d}ZNdS )    )print_functionN)normtau_rand)kantorovichjensen_shannon_divergencesymmetric_kl_divergenceCT)cacheh㈵>:0yE>c                 C   s$   t | | }|||t |  kS N)npabs)abrtolatoldiff r   h/mounts/lovelace/software/anaconda3/envs/qiime2-2023.2/lib/python3.8/site-packages/pynndescent/sparse.pyisclose   s    r   c                 C   s@   t | }t t jdt jd|dd  |d d kf}|| S )N   dtype)r   sortconcatenateonesbool_)arrauxflagr   r   r   
arr_unique   s    
.r"   c                 C   s:   | j d dkr|S |j d dkr$| S tt| |fS d S Nr   )shaper"   r   r   )ar1ar2r   r   r   	arr_union&   s
    r'   c                 C   s:   t | |f}|  |d d |dd  |d d k S )Nr   r   )r   r   r   )r%   r&   r    r   r   r   arr_intersect2   s    r(   zi4(i4[:],i4[:])r   )readonly)i1i2)localsc           	      C   s   | j d dks|j d dkr dS d}d}| j d d }|j d d }| | }|| }d}||kr|d7 }||k r|d7 }| | }nq||k r|d7 }|| }qqqX||k r||k r|d7 }| | }qX||k r||k r|d7 }|| }qXqqX|S Nr   r   r$   )	r%   r&   r*   r+   Zlimit1Zlimit2j1j2resultr   r   r   fast_intersection_size:   s6    



r2   )
result_indresult_datavalr*   r+   r/   r0   )fastmathr,   r	   c                 C   s  | j d |j d  }tj|tjd}tj|tjd}d}d}d}	|| j d k r(||j d k r(| | }
|| }|
|kr|| ||  }|dkr|
||	< |||	< |	d7 }	|d7 }|d7 }q@|
|k r|| }|dkr|
||	< |||	< |	d7 }	|d7 }q@|| }|dkr|||	< |||	< |	d7 }	|d7 }q@|| j d k rv| | }
|| }|dkrj|
||	< |||	< |	d7 }	|d7 }q(||j d k r|| }|| }|dkr|||	< |||	< |	d7 }	|d7 }qv|d |	 }|d |	 }||fS Nr   r   r   )r$   r   zerosint32float32)ind1data1ind2data2result_sizer3   r4   r*   r+   nnzr/   r0   r5   r   r   r   
sparse_sump   sb     





rA   c                 C   s   t | ||| S r   )rA   )r;   r<   r=   r>   r   r   r   sparse_diff   s    rB   )r5   r*   r+   r/   r0   c                 C   s   t jjt jj}t jjt jj}d}d}|| jd k r||jd k r| | }|| }	||	kr|| ||  }
|
dkr|| ||
 |d7 }|d7 }q,||	k r|d7 }q,|d7 }q,||fS r-   )	numbatypedList
empty_listtypesr9   r:   r$   append)r;   r<   r=   r>   r3   r4   r*   r+   r/   r0   r5   r   r   r   
sparse_mul   s$    




rI   )r1   r5   r*   r+   r/   r0   c                 C   s   | j d }|j d }d}d}d}| | }	|| }
|	|
kr|| ||  }||7 }|d7 }||krd|S | | }	|d7 }||kr|S || }
q0|	|
k r|d7 }||kr|S | | }	q0|d7 }||kr|S || }
q0|S )Nr           r   r.   )r;   r<   r=   r>   dim1dim2r1   r*   r+   r/   r0   r5   r   r   r   sparse_dot_product  s8    




rM   c                 C   s  t | |}tj|jd tjd}tj|jd tjd}d}d}d}	|| jd k r*||jd k r*| | }
|| }|
|kr|| ||  }|dkr|| ||	< || ||	< |	d7 }	|d7 }|d7 }qB|
|k r|| }|dkr|| ||	< |	d7 }	|d7 }qB|| }|dkr || ||	< |	d7 }	|d7 }qB|| jd k rl|| }|dkr`|| ||	< |	d7 }	|d7 }q*||jd k r|| }|dkr|| ||	< |	d7 }	|d7 }ql|d |	 }|d |	 }||fS r7   )r'   r   r8   r$   r:   )r;   r<   r=   r>   r3   Zresult_data1Zresult_data2r*   r+   r@   r/   r0   r5   r   r   r   dense_union>  sV    
 





rN   )r6   c                 C   sD   t | |||\}}d}t|jd D ]}||| d 7 }q$t|S )NrJ   r      )rB   ranger$   r   sqrtr;   r<   r=   r>   _aux_datar1   ir   r   r   sparse_euclideanx  s
    rV   z#f4(i4[::1],f4[::1],i4[::1],f4[::1]))rT   r1   r   dimrU   )r6   r,   c           	      C   sD   t | |||\}}d}t|}t|D ]}||| ||  7 }q&|S NrJ   )rB   lenrP   )	r;   r<   r=   r>   rS   rT   r1   rW   rU   r   r   r   sparse_squared_euclidean  s    rZ   c                 C   s@   t | |||\}}d}t|jd D ]}|t|| 7 }q$|S NrJ   r   rB   rP   r$   r   r   rR   r   r   r   sparse_manhattan  s
    r]   c                 C   sB   t | |||\}}d}t|jd D ]}t|t|| }q$|S r[   )rB   rP   r$   maxr   r   rR   r   r   r   sparse_chebyshev  s
    r_          @c           	      C   sL   t | |||\}}d}t|jd D ]}|t|| | 7 }q$|d|  S )NrJ   r         ?r\   )	r;   r<   r=   r>   prS   rT   r1   rU   r   r   r   sparse_minkowski  s
    rc   c                 C   s$   t | |||d jd }t|| S r#   )rB   r$   float)r;   r<   r=   r>   
n_featuresnum_not_equalr   r   r   sparse_hamming  s    rg   c                 C   s~   t |}t |}t| |||\}}d| t j}t| |||\}}	t |	}	t||	||\}
}d}|D ]}||7 }ql|S )Nra   rJ   )r   r   rA   astyper:   rB   rI   )r;   r<   r=   r>   	abs_data1	abs_data2
denom_inds
denom_data
numer_inds
numer_datarS   val_datar1   r5   r   r   r   sparse_canberra  s    



rp   c           	      C   sv   t | |||\}}t|}|jd dkr.dS t|}|dkrDdS t| |||\}}t|}t|}t|| S Nr   rJ   )rA   r   r   r$   sumrB   rd   )	r;   r<   r=   r>   rS   rl   denominatorrn   	numeratorr   r   r   sparse_bray_curtis  s    



ru   c                 C   sB   t | |}| jd |jd  | }|dkr.dS t|| | S d S rq   r2   r$   rd   r;   r<   r=   r>   	num_equalnum_non_zeror   r   r   sparse_jaccard  s
    
rz   )ry   rx   c                 C   sN   t | |}| jd |jd  | }|dkr.dS |dkr:tS t||  S d S rq   )r2   r$   FLOAT32_MAXr   log2rw   r   r   r   sparse_alternative_jaccard  s    
r}   c                 C   s   dt d|   S )Nra   r`   )pow)vr   r   r   correct_alternative_jaccard  s    r   c                 C   s6   t | |}| jd |jd  | }|| }t|| S r#   rv   r;   r<   r=   r>   re   num_true_truery   rf   r   r   r   sparse_matching  s    
r   c                 C   sJ   t | |}| jd |jd  | }|| }|dkr6dS |d| |  S d S )Nr   rJ   r`   r2   r$   r;   r<   r=   r>   r   ry   rf   r   r   r   sparse_dice#  s    
r   c                 C   sR   t | |}| jd |jd  | }|| }|dkr6dS t|| | ||  S d S rq   rv   r   r   r   r   sparse_kulsinski/  s    
r   c                 C   s:   t | |}| jd |jd  | }|| }d| ||  S Nr   r`   r   r   r   r   r   sparse_rogers_tanimoto=  s    
r   c                 C   sl   | j d |j d kr&t| |kr&dS t| |}|t|dkkrX|t|dkkrXdS t|| | S d S rq   )r$   r   allr2   rr   rd   )r;   r<   r=   r>   re   r   r   r   r   sparse_russellraoF  s    "
$r   c                 C   s:   t | |}| jd |jd  | }|| }d| ||  S r   r   r   r   r   r   sparse_sokal_michenerS  s    
r   c                 C   sJ   t | |}| jd |jd  | }|| }|dkr6dS |d| |  S d S )Nr   rJ   g      ?r   r   r   r   r   sparse_sokal_sneath\  s    
r   c           
      C   st   t | |||\}}d}t|}t|}|D ]}	||	7 }q*|dkrL|dkrLdS |dks\|dkr`dS d|||   S d S NrJ   ra   )rI   r   )
r;   r<   r=   r>   rS   rT   r1   norm1norm2r5   r   r   r   sparse_cosineh  s    
r   )r1   norm_xnorm_yrW   rU   c                 C   s   t | |||\}}d}t|}t|}t|}	t|	D ]}
|||
 7 }q6|dkr\|dkr\dS |dksl|dkrptS |dkr|tS || | }t|S d S rX   )rI   r   rY   rP   r{   r   r|   )r;   r<   r=   r>   rS   rT   r1   r   r   rW   rU   r   r   r   sparse_alternative_cosinez  s    r   )r6   r	   c                 C   s2   t dt| dds| dk rdS dtd|   S d S NrJ   gHz>)r   ra   r`   )r   r   r~   dr   r   r   !sparse_correct_alternative_cosine  s    r   c                 C   s   t | |||}d| S )Nra   )rM   r;   r<   r=   r>   r1   r   r   r   
sparse_dot  s    r   r1   c                 C   s*   t | |||}|dkrtS t| S d S rX   )rM   r{   r   r|   r   r   r   r   sparse_alternative_dot  s    r   c                 C   sT  d}d}d}| j d dkr,|j d dkr,dS | j d dksH|j d dkrLdS t|j d D ]}||| 7 }qZt|j d D ]}||| 7 }qz|| }|| }tj|j d tjd}	tj|j d tjd}
t|j d D ]}|| | |	|< qt|j d D ]}|| | |
|< qtt|	d || j d  |d   }tt|
d ||j d  |d   }t| |	||
\}}t|}|D ]}||7 }q~t| j d D ]$}| | |kr||	| | 8 }qt|j d D ]$}|| |kr||
| | 8 }qt	| |}||| ||j d   7 }|dkr2|dkr2dS |dkr@dS d|||   S d S )NrJ   r   ra   r   rO   )
r$   rP   r   emptyr:   rQ   r   rI   setr'   )r;   r<   r=   r>   re   mu_xmu_ydot_productrU   shifted_data1shifted_data2r   r   dot_prod_indsdot_prod_datacommon_indicesr5   all_indicesr   r   r   sparse_correlation  sT      

r   c                 C   s   t | |||\}}d}t|}t|}t|| }	|D ]}
|t|
7 }q<|dkrd|dkrddS |dkst|dkrxdS ||	krdS td||	  S d S r   )rI   r   rr   rQ   )r;   r<   r=   r>   aux_indsrT   r1   r   r   sqrt_norm_prodr5   r   r   r   sparse_hellinger  s    

r   )r1   	l1_norm_x	l1_norm_yrW   rU   c                 C   s   t | |||\}}d}t|}t|}t|}	t|	D ]}
|t||
 7 }q:|dkrf|dkrfdS |dksv|dkrztS |dkrtS t|| | }t|S d S r[   )rI   r   rr   rY   rP   rQ   r{   r|   )r;   r<   r=   r>   r   rT   r1   r   r   rW   rU   r   r   r   sparse_alternative_hellinger	  s    

r   c                 C   s8   t dt| dds| dk rdS tdtd|   S d S r   )r   r   r   rQ   r~   r   r   r   r   $sparse_correct_alternative_hellinger)  s    r   c                 C   s   t | |k S r   )r   r:   )xyr   r   r   dummy_ground_metric1  s    r   c                    s   t   fdd}|S )a  Generate a "ground_metric" suitable for passing to a ``sparse_kantorovich``
    distance function. This should be a metric that, given indices of the data,
    should produce the ground distance between the corresponding vectors. This
    allows the construction of a cost_matrix or ground_distance_matrix between
    sparse samples on the fly -- without having to compute an all pairs distance.
    This is particularly useful for things like word-mover-distance.

    For example, to create a suitable ground_metric for word-mover distance one
    would use:

    ``wmd_ground_metric = create_ground_metric(word_vectors, cosine)``

    Parameters
    ----------
    ground_vectors: array of shape (n_features, d)
        The set of vectors between which ground_distances are measured. That is,
        there should be a vector for each feature of the space one wishes to compute
        Kantorovich distance over.

    metric: callable (numba jitted)
        The underlying metric used to cpmpute distances between feature vectors.

    Returns
    -------
    ground_metric: callable (numba jitted)
        A ground metric suitable for passing to ``sparse_kantorovich``.
    c                    s    |   | S r   r   )Zindex1index2ground_vectorsmetricr   r   ground_metricS  s    z+create_ground_metric.<locals>.ground_metric)rC   njit)r   r   r   r   r   r   create_ground_metric6  s    r   c                 C   sh   t | jd |jd f}t| jd D ]2}t|jd D ]}|| | || |||f< q:q(t|||S r#   )r   r   r$   rP   r   )r;   r<   r=   r>   r   cost_matrixrU   jr   r   r   sparse_kantorovichZ  s
    r   c                 C   s  d}d}d}d}d}	d}
d}t |}t |}dd }|| jd k rF|	|jd k rF| | }||	 }||kr||||  7 }|
|| | 7 }
|||	 | 7 }||
| |}|}|d7 }|	d7 }	q8||k r
||||  7 }|
|| | 7 }
||
| |}|}|d7 }q8||||  7 }|||	 | 7 }||
| |}|}|	d7 }	q8|| jd k r| | }||||  7 }|
|| | 7 }
||
| |}|}|d7 }qF|	|jd k r||	 }||||  7 }|||	 | 7 }||
| |}|}|	d7 }	qt |d| S )NrJ   r   c                 S   s   t t | |S r   )r   powerr   )r   rb   r   r   r   <lambda>q      z'sparse_wasserstein_1d.<locals>.<lambda>r   ra   )r   rr   r$   r   )r;   r<   r=   r>   rb   r1   Zold_inddeltar*   r+   cdf1cdf2r   r   r   r/   r0   r   r   r   sparse_wasserstein_1de  s^    

 



r   c                 C   s   t | |||\}}t||S r   )rN   r   r;   r<   r=   r>   Zdense_data1Zdense_data2r   r   r    sparse_jensen_shannon_divergence  s    r   c                 C   s   t | |||\}}t||S r   )rN   r   r   r   r   r   sparse_symmetric_kl_divergence  s    r   F)parallelr	   ra   c              	   C   s  t | jd D ]}| |df g}	||df g}
td| jd D ] }| ||f dk r^ qfd}tt|	D ]}|	| }||| ||f  || ||f d   }||| ||f  || ||f d   }||| ||d   }||| ||d   }|||||}|
| tkrn||||f k rnt||k rnd} q<qn|rB|	| ||f  |
|||f  qBt| jd D ]P}|t|	k r|	| | ||f< |
| |||f< nd| ||f< tj	|||f< qtq| |fS )Nr   r   TFr   )
rC   pranger$   rP   rY   FLOAT32_EPSr   rH   r   inf)indices	distancesdata_indicesdata_indptr	data_datadist	rng_stateprune_probabilityrU   new_indicesnew_distancesr   r!   kcZfrom_ind	from_dataZto_indto_datar   r   r   r   	diversify  sD      r   c	                 C   s  | j d d }	t|	D ]~}
|| |
 | |
d   }|| |
 | |
d   }t|}tj|j d tjd}td|j d D ]}|| }t|D ]}|| }|| dkr|| }|| }||| ||d   }||| ||d   }||| ||d   }||| ||d   }|||||}|| tkr||| k rt	||k rd||<  q~qq~t|j d D ],}|| }|| dkrjd|| |
 | < qjqd S )Nr   r   r   )
r$   rC   r   r   argsortr   int8rP   r   r   )graph_indptrgraph_indices
graph_datar   r   r   r   r   r   n_nodesrU   current_indicescurrent_dataorderretainedidxr   r   lrb   q	from_indsr   Zto_indsr   r   r   r   r   diversify_csr  s6    
r   )$	euclideanl2sqeuclidean	manhattanl1taxicab	chebyshevlinflinfty	linfinity	minkowskicanberra
braycurtishammingjaccarddicematching	kulsinskirogerstanimoto
russellraosokalmichenersokalsneathcosinecorrelationr   ZwassersteinZwasserstein_1dzwasserstein-1dkantorovich-1dr   	hellingerzjensen-shannonjensen_shannonzsymmetric-klsymmetric_klZsymmetric_kullback_liebler)r   r   r   r   r   r   r   )r   
correction)r   r   r   dotr  r   )r
   r   )r`   )r   )ra   )ra   )O
__future__r   localenumpyr   rC   pynndescent.utilsr   r   pynndescent.distancesr   r   r   	setlocale
LC_NUMERICfinfor:   epsr   r^   r{   r   r   r"   r'   r(   rG   r9   Arrayuint16r2   TuplerA   rB   ListTyperI   rM   rN   rV   intprZ   r]   r_   rc   rg   rp   ru   rz   r}   	vectorizer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   sparse_named_distancessparse_need_n_featuresrQ   !sparse_fast_distance_alternativesr   r   r   r   <module>   s.  







)	
?


	
'
9

	
	



	


	













 
	
<



$
?

	 6
 8,

