U
    e@c                     @   s   d!ddZ d"ddZd#ddZd$d	d
Zd%ddZd&ddZd'ddZd(ddZd)ddZd*ddZ	d+ddZ
d,ddZd-ddZdd Zd.dd Zd S )/Nc                 C   sj   ddl }ddl}ddl}|dkr4| }|j|}|j| | || dd}|j|\}}}}t	|S )a  Fetch the unique tags within a context

    Parameters
    ----------
    context : str
        The context to obtain tags from.

    Returns
    -------
    set
        The set of sample identifers within a context.

    Raises
    ------
    ValueError
        If the requested context is not known.

    Redis Command Summary
    ---------------------
    SMEMBERS <context>:samples-represented
        NSMEMBERSsamples-represented
redbiomredbiom._requestsredbiom.util
get_config	_requestsmake_getvalidutilpartition_samples_by_tagsset)contextgetr   configobs_tags r   l/mounts/lovelace/software/anaconda3/envs/qiime2-amplicon-2023.9/lib/python3.8/site-packages/redbiom/fetch.pytags_in_context   s    r   c                 C   sz   ddl }ddl}ddl}|dkr4| }|j|}|j| | || dd}|sn|j|\}}}}t	|S t	|S dS )aK  Fetch samples in a context

    Parameters
    ----------
    context : str
        The context to obtain samples from.
    unambiguous : bool
        If True, return unambiguous identifiers, if false return ambiguated
        identifiers.
    get : a make_get instance, optional
        A constructed get method.

    Returns
    -------
    set
        The set of sample identifers within a context.

    Raises
    ------
    ValueError
        If the requested context is not known.

    Redis Command Summary
    ---------------------
    SMEMBERS <context>:samples-represented
    r   Nr   r   r   )r   Zunambiguousr   r   r   r   r   tagged_cleanr   r   r   samples_in_context(   s    r   c                 C   sV   ddl }ddl}ddl}|dkr4| }|j|}|j| | || dd}t|S )a  Features in a context

    Parameters
    ----------
    context : str
        The context to obtain samples from.
    get : a make_get instance, optional
        A constructed get method.

    Returns
    -------
    set
        The set of features within a context.

    Raises
    ------
    ValueError
        If the requested context is not known.

    Redis Command Summary
    ---------------------
    SMEMBERS <context>:features-represented
    r   Nr   zfeatures-represented)r   r   r   r   r	   r
   r   r   )r   r   r   r   r   r   r   r   features_in_contextV   s    r   TFc           "      C   s  ddl }ddlm} ddl}ddl}ddl}| }	|j|	}
|j	
| \}}}}|| } i }|dk	r|j	|| |
\}}}}|rdd |D }dd |D }qdd | D }ndd | D }|std	g }g }|jjt|d
dd|
dd}|D ]@\} }||  |D ](}|dk	r||}|t| qqt|d }|dd D ],}|dk	sf|sr||}n
||}qR|dk	rt||stdt|t|  |}|t}|D ]"}|| D ]}||| d< qΐq|D ]8}t||D ]&\}}|| D ]}||| |< qqq||j}|dk	rxg }|d D ](}|dd\} }!|d|!| f  qF||d< ||fS )aL  Fetch metadata for the corresponding samples

    Parameters
    ----------
    samples : iterable of str
        The samples to obtain the metadata for.
    common : bool, optional
        If True (default), only the columns of the metadata common across all
        samples is returned. If False, all columns for all samples are
        returned. If value is missing for a given column and sample, None is
        stored in its place in the resulting DataFrame.
    context : str, optional
        If provided, resolve possible ambiguities in the sample identifiers
        relative to a context.
    restrict_to : Iterable of str, optional
        Restrict the retrieval of metadata to a subset of columns. If this
        parameter is specified, it will override the use of `common`.
    tagged : bool, optional
        Retrieve tagged metadata (e.g., preparation information).

    Returns
    -------
    pandas.DataFrame
        A DataFrame indexed by the sample IDs, with the sample metadata
    dict
        ambiguous associations {sample_id: [tagged_sample_ids]}

    Raises
    ------
    KeyError
        If a key in `restrict_to` is not found.

    Redis command summary
    ---------------------
    MGET metadata:categories:<sample_id> ... metadata:categories:<sample_id>
    HMGET metadata:category:<column> <sample_id> ... <sample_id>
    r   Ndefaultdictc                 S   s   i | ]}||gqS r   r   .0Zrbidr   r   r   
<dictcomp>   s      z#sample_metadata.<locals>.<dictcomp>c                 S   s   i | ]
}||qS r   r   r   r   r   r   r      s      c                 S   s"   i | ]\}}|| d dd qS )r      )split)r   kvr   r   r   r      s      c                 S   s   i | ]}||gqS r   r   )r   r"   r   r   r   r      s      z-None of the samples were found in the context
categoriesMGETmetadata   r   buffer_sizer    z+The following columns were not observed: %s	#SampleIDr   z%s.%s)jsoncollectionsr   pandasr   r   r   r	   r
   r   r   resolve_ambiguitiesitems
ValueErrorbufferedlistextendloadsappendr   unionintersectionissubsetKeyErrordictget_sample_values	DataFrameTr!   )"samplescommonr   restrict_toZtaggedr+   r   pdr   r   r   untaggedr   r   	ambig_mapambig_assocZrbid_mapZall_columnsZall_samplesgetterZcolumns_by_sample
column_setZcolumns_to_getcolumnsr&   sampleZsample_ambiguitycategoryvaluemdZnew_idsitagid_r   r   r   sample_metadata}   s|    '


  




rO   c                 C   sb   ddl }ddl}ddl}| }|j|}|j| | |j||d| g}t	| t
|||dS )a  Fetch sample data from an iterable of features.

    Parameters
    ----------
    context : str
        The name of the context to retrieve sample data from.
    features : Iterable of str
        The features of interest.
    exact : bool
        If True, only samples in which all features exist are obtained.
        Otherwise, all samples with at least one feature are obtained.
    skip_taxonomy : bool, optional
        If true, do not resolve taxonomy. This greatly reduces fetch time.
        Default is false.

    Returns
    -------
    biom.Table
        A Table populated with the found samples.
    dict
        A map of {sample_id_in_table: original_id}. This map can be used to
        identify what samples are ambiguous based off their original IDs.
    r   Nfeature)r   skip_taxonomy)r   r   r   r   r	   r
   r   r   Zids_from_biom_from_samplesiter)r   featuresexactrQ   r   r   r   r>   r   r   r   data_from_features   s    rV   c                 C   s   t | ||dS )a`  Fetch sample data from an iterable of samples.

    Paramters
    ---------
    context : str
        The name of the context to retrieve sample data from.
    samples : Iterable of str
        The samples of interest.
    skip_taxonomy : bool, optional
        If true, do not resolve taxonomy. This greatly reduces fetch time.
        Default is false.

    Returns
    -------
    biom.Table
        A Table populated with the found samples.
    dict
        A map of {sample_id_in_table: original_id}. This map can be used to
        identify what samples are ambiguous based off their original IDs.
    )rQ   )rR   )r   r>   rQ   r   r   r   data_from_samples  s    rW   c           $      C   s  ddl m} ddlm} ddl}ddl}ddl}ddl}ddl}|	 }	|dkr\|j
|	}|j
|	}
|j
| | t|}|j| ||\}}}}g }t }|jjd}|D ]*}|
|d| |}|||f || qdd t|D }dd t| |d	d
D }dd |D }|t|t|f}t|D ]2\}\}}| D ]\}}|||| |f< qNq:|rzd}nt| |||d}|dk	rdd |D }nd}|||||}|| i } | D ]"\}!}"|!dd	\}#}|| |"< q|| fS )a  Create a BIOM table from an iterable of samples

    Parameters
    ----------
    context : str
        The context to obtain sample data from.
    samples : iterable of str
        The samples to fetch.
    get : a make_get instance, optional
        A constructed get method.
    normalize_taxonomy : list, optional
        The ranks to normalize a lineage too (e.g., [k, p, c, o, f, g, s])
    skip_taxonomy : bool, optional
        If true, do not resolve taxonomy. This greatly reduces fetch time.
        Default is false.

    Returns
    -------
    biom.Table
        A Table populated with the found samples.
    dict
        A map of {sample_id_in_table: original_id}. This map can be used to
        identify what samples are ambiguous based off their original IDs.

    Redis command summary
    ---------------------
    HMGET <context>:feature-index-inverted
    EVALSHA <fetch-sample-sha1> 0 context <redbiom-id>
    r   )
itemgetterNzfetch-samplec                 S   s   i | ]\}}||qS r   r   )r   indexZobservedr   r   r   r   u  s    z&_biom_from_samples.<locals>.<dictcomp>c                 S   s   g | ]\}}|qS r   r   r   rN   r   r   r   r   
<listcomp>y  s     z&_biom_from_samples.<locals>.<listcomp>r    )keyc                 S   s   g | ]\}}|qS r   r   rZ   r   r   r   r[   {  s     )	normalizec                 S   s   g | ]}d |iqS )Ztaxonomyr   )r   lineager   r   r   r[     s     r   ) operatorrX   Zscipy.sparsesparsebiomZredbiom.adminr   r   r   r   r	   r
   Zmake_script_execr   r2   r   r.   r   ZadminZScriptManagerr   r5   update	enumeratesortedr/   Z
lil_matrixlentaxon_ancestorsTable
update_idsr!   )$r   r>   r   Znormalize_taxonomyrQ   rX   ssra   r   r   seZ
stable_idsZ
unobservedrD   ZrimapZ
table_dataZunique_indicesZfetch_samplerN   dataZunique_indices_mapZobs_ids
sample_idsmatcolrH   Zcol_dataZobs_idrJ   lineagesZobs_mdtableZambiguity_mapr"   r#   rM   r   r   r   rR   5  s`    



rR   c              	   C   sf  ddl m} ddl}|dkr8ddl}| }|j|}|jj}|t|dd| |ddd}dd	 |D }	t	|	
 }
i }|
rd
}|t|
dd| |d|d}t }|D ]2}t| D ]$\}}|dkrq|||< || qq|}
qx|sdS g }|D ]r}g }|	| }|dk	r&||}|dk	r|| q|ddd }|dk	rVdd |||ddD }|| q|S )a4  Fetch the taxonomy information for a set of IDs

    Parameters
    ----------
    context : str
        The context to operate in
    ids : list or tuple of str
        The IDs to retreive
    get : function, optional
        A get method
    normalize : list, optional
        The ranks to normalize a lineage too (e.g., [k, p, c, o, f, g, s])

    Returns
    -------
    list of list
        The lineage information for each ID in order with ids

    Redis Command Summary
    ---------------------
    HMGET <context>:taxonomy-parents <child> ... <child>
    r   )zip_longestNHMGETr'   zfeature-indexr   r)   Zmultikeyc                 S   s6   i | ].\}}t ||D ]\}}||d k	r,|n|qqS Nzip)r   namesidxnamerN   r   r   r   r     s
     z#taxon_ancestors.<locals>.<dictcomp>ztaxonomy-parentsc                 S   s    g | ]\}}|r|nd | qS )z%s__r   )r   ZlinZrnkr   r   r   r[     s   z#taxon_ancestors.<locals>.<listcomp>F)	fillvalue)Zfuture.moves.itertoolsrq   r   r   r   r	   r
   r1   rS   r2   valuesr   rv   addr   r5   )r   idsr   r]   rq   r   r   hmgetterremapped_bulkremappedto_getZchild_parentr\   rE   
new_to_getblockchildparentro   rN   r^   currentr   r   r   rf     sh       


rf   c              	      s   ddl }|dkr,ddl}| }|j|}|jj}d fg}t }|rg }|D ]X\}	  dkr|| dd|	 }
|t|
 qP|| dd  }| fdd|D  qP|}qD||dd	| |d
dd}dd |D }d|krt	d|S )a  Get tips associated with a taxon

    Parameters
    ----------
    context : str
        The context to operate in
    taxon : str
        The taxon to search for
    get : function, optional
        A get method

    Returns
    -------
    set
        The set of feature IDs found

    Redis Command Summary
    ---------------------
    SMEMBERS <context>:taxonomy-children:<taxon>
    r   Nzhas-terminalr   zterminal-of:%sztaxonomy-children:%sc                    s   g | ]} |fqS r   r   )r   r   taxonr   r   r[     s     z%taxon_descendents.<locals>.<listcomp>rr   r'   zfeature-index-invertedrs   c                 S   s(   h | ] \}}t ||D ]\}}|qqS r   ru   )r   rx   rw   rN   ry   r   r   r   	<setcomp>   s    z$taxon_descendents.<locals>.<setcomp>z$An unassociated index has been found)
r   r   r   r	   r
   r1   r   rb   r3   r0   )r   r   r   r   r   r   r   to_keepr   r   ZtipsZgottenr   r   r   r   r   taxon_descendents  s<    

 r   c                 C   s   ddl }ddl}ddl}|j| }|dk	rL|j|\}}}}|| }t|| |d}dd |D }	dd |D }
|j	|
|	dS )aZ  Obtain the samples and their corresponding category values

    Parameters
    ----------
    category : str
        A metadata column of interest.
    samples : Iterable of str, optional
        If provided, only the specified samples and their values are obtained.

    Returns
    -------
    pandas.Series
        A Series indexed by the Sample ID and valued by the metadata value for
        that sample for the specified category.

    Redis command summary
    ---------------------
    HGETALL metadata:category:<category>
    HMGET metadata:category:<category> <sample_id> ... <sample_id>
    r   N)r   c                 s   s   | ]}|d  V  qdS )r   Nr   r   r#   r   r   r   	<genexpr>M  s     z)category_sample_values.<locals>.<genexpr>c                 s   s   | ]}|d  V  qdS )r    Nr   r   r   r   r   r   N  s     )rk   rY   )
r   r   r-   r	   r
   r   r   r   r;   Series)rI   r>   r   rA   r   rB   r   r   Z	keys_valsrY   rk   r   r   r   category_sample_values+  s    

r   c                 C   sz   ddl }ddl}ddl}|j| }| dkr@t|ddd} g }| D ]"}d| }|t|dd| qH|j	|| dS )	a(  Get the number of samples with usable metadata per category

    Paramaters
    ----------
    categories : list of str, optional
        The specific categories to summarize. If None, then summarize
        all of the categories in the database.

    Returns
    -------
    pandas.Series
        A series keyed by the category and valued by the number of samples
        which have metadata for that category.

    Redis command summary
    ---------------------
    SMEMBERS metadata:categories-represented
    HLEN metadata:category:<category>
    r   Nr&   r   categories-representedcategory:%sZHLENrY   )
r   r   r-   r	   r
   r   r2   r5   intr   )r$   r   rA   r   resultsrI   r\   r   r   r   sample_counts_per_categoryR  s    r   c                    s  ddl }ddlm} ddl}ddl}ddl}|dkr<td| }|j	|}t
|ddd}	|dk	rt
||	st
||	 }
tdd	|
 nt
|}	t
|ddd
} dkrdd |D }n fdd|D }|jj|ddd|dd}g }|D ]P}t| D ]B\}}||kr|dk	rt
||}||	r|| qq|t}|D ]}||| d< qV|	D ]*}t|||D ]\}}||| |< q~qn||j}t|jdkrt
 S |j|d}|j| dS dS )a.  Find samples from metadata

    Parameters
    ----------
    where : str, optional
        SQLite WHERE clause specifying criteria IDs must meet to be
        included in the results. All IDs are included by default.
    tag : str, optional
        A tag specific search. Defaults to sample metadata.
    restrict_to : list of str
        Restrict the retrieval of metadata to a subset of columns.

    Raises
    ------
    KeyError
        If a `restrict_to` column does not appear to be valid
    ValueError
        `restrict_to` must be specified

    Returns
    -------
    list
        A list of sample IDs

    Redis command summary
    ---------------------
    MGET metadata:categories:<sample_id> ... metadata:categories:<sample_id>
    HMGET metadata:category:<column> <sample_id> ... <sample_id>
    r   Nr   zrestrict_to must be setr&   r   r   z8The following requested categories are not not found: %s,r   c                 S   s   h | ]}d |kr|qS )r   r   r   sr   r   r   r     s      zmetadata.<locals>.<setcomp>c                    s   h | ]}| d   r|qS )z%s_)
startswithr   rM   r   r   r     s      r$   r%   r'   r(   r*   )where)r+   r,   r   r-   r   r   r0   r   r	   r
   r   r8   r9   joinr1   rv   r4   r7   r5   r:   r;   r<   r=   re   rG   r&   ZMetadataZ	set_indexr~   )r   rM   r@   r+   r   rA   r   r   r   r$   diffr>   rE   Zsamples_to_getchunkrH   rF   r&   rI   rJ   rK   r   r   r   r&   w  s\    
  

r&   c              	   C   sl   ddl }|dkr$| }|j|}d| }| dkr@|dd|} |jjt| ddd|d|d}d	d
 |D S )a  Obtain the metadata values associated with the requested samples

    Parameters
    ----------
    samples : Iterable of str or None
        The samples to obtain
    category : str
        The category to obtain values for.
    get : function, optional
        A get method

    Returns
    -------
    [(str, str), ...]
        A list of (sample, value) tuples

    Redis command summary
    ---------------------
    HMGET metadata:category:<column> <sample_id> ... <sample_id>
    HMKEYS metadata:category:<column>
    r   Nr   r&   ZHKEYSrr   r'   rs   c                 S   s   g | ]}t | D ]}|qqS r   ru   )r   r   itemr   r   r   r[     s     
  z%get_sample_values.<locals>.<listcomp>)r   r   r	   r
   r1   rS   )r>   rI   r   r   r   r\   rE   r   r   r   r;     s     r;   c                    st   t  t |  krtd fdd}| j|ddd}t  }g }  D ]$\}}||krJ|| || qJ|S )a  Merge ambiguous samples

    Parameters
    ----------
    table : biom.Table
        The table obtained from redbiom
    collapse_map : dict
        A mapping of a sample ID in the table to its collapse
        target name.

    Raises
    ------
    ValueError
        If the IDs present in the table are not a perfect match to the keys
        of the collapse map.

    Returns
    -------
    biom.Table
        A table of the merged data with updated sample identifiers
    IDs are inconsistentc                    s    |  S rt   r   )rL   mcollapse_mapr   r   	collapser  s    z#_ambiguity_merge.<locals>.collapserrH   F)ZaxisZnorm)r   r~   r0   Zcollapser/   r5   r}   )rp   r   r   Zcollapsed_tableseenZkeepr"   r#   r   r   r   _ambiguity_merge  s    
r   c                 C   s  ddl }t|t|  kr$td|j| d|  d }i }| D ]&\}}||krdg ||< || | qLg }| D ]h\}	}
t	|
dkr|
d }|| }|
dd D ]}|| }||kr|}|}q|| q||
d  q| j
t|dd }|s|j|d	d |S )
a  Keep the ambiguous sample with the most reads

    Parameters
    ----------
    table : biom.Table
        The table obtained from redbiom
    ambig_map : dict
        A mapping of a sample ID in the table to its ambiguous form.
    retain_artifact_id : boolean, default False
        If True, do not strip the artifact ID

    Returns
    -------
    biom.Table
        A table of the most volumous data with updated sample identifiers
    r   Nr   rH   r   r    F)ZinplaceT)r-   r   r~   r0   r   sumto_dictr/   r5   re   filterZremove_emptyrh   )rp   rC   Zretain_artifact_idrA   Zsample_countsZambigsr"   r#   r   Zsample_namerl   bestZbest_cntrL   ZcntZsubset_tabler   r   r   _ambiguity_keep_most_reads   s2    r   )N)N)N)TNNF)F)F)NNF)NN)N)N)N)NNN)N)F)r   r   r   rO   rV   rW   rR   rf   r   r   r   r&   r;   r   r   r   r   r   r   <module>   s$   
'
.
'  
x
(
  
d
X
:
'
%
X
)(