U
    ¦ºeZ-  ã                   @   s¨   d dl Z d dlZdddddejdddd	d
dddddddddhZdd„ Zd-dd„Zd.dd„Zdd„ Zd/dd„Z	dd „ Z
d!d"„ Zd#d$„ Zd%d&„ Zd'd(„ Zd)d*„ Zd+d,„ ZdS )0é    NzNot applicableÚUnknownZUnspecifiedzMissing: Not collectedzMissing: Not providedzMissing: Not ProvidedÚmissingÚ zMissing: Restricted accesszMissing:Not reportedzMissing: Not applicableZNAÚnullZNULLZno_dataÚNoneÚnanÚNaNc                 C   sd   ddl }| dks| dkr"|s"|j} | dk	rF|rFtjddd | d¡ | dk	rR| }tdd	„ |D ƒƒS )
z?In support of buffered: determine whether to use from_ or nargsr   Nú-z0Unable to handle --from as well as cmdline itemsT)Úerré   c                 s   s   | ]}|  ¡ V  qd S ©N)Ústrip)Ú.0Ús© r   úk/mounts/lovelace/software/anaconda3/envs/qiime2-amplicon-2023.9/lib/python3.8/site-packages/redbiom/util.pyÚ	<genexpr>   s     z from_or_nargs.<locals>.<genexpr>)ÚsysÚstdinÚclickZechoÚexitÚiter)Zfrom_Znargs_variabler   r   r   r   Úfrom_or_nargs   s    ÿ
r   r   c                    sø   ddl }ddl}ddl}| ¡ }|j |¡}tƒ }|dkrFtd| ƒ‚t|t	tt
fƒs\|g}‡ fdd„}	t	| ƒ} |jj d| ¡}
|D ]l}d}| D ]P}|	||
d||ƒƒ}|sÆ|dkrºtƒ }| |¡ q’|dkrØt|ƒ}q’| |¡}q’|r†| |¡}q†|S )a²  Grab samples from an iterable of IDs

    Parameters
    ----------
    it : iteraable of str
        The IDs to search for
    exact : boolean
        If True, compute the intersection of results per context. If False,
        compute the union of results per context.
    axis : {'feature', 'sample'}
        The axis to operate over.
    contexts : list of str
        The contexts to search in
    min_count : int, optional
        The minimum count (inclusive) to retain an observation.

    Notes
    -----
    Contexts are evaluated independently, and the results of each context are
    unioned.

    Returns
    -------
    set
        The IDs associated with the search IDs.

    r   N>   ÚsampleÚfeaturezUnknown axis: %sc                    s   ‡ fdd„|   ¡ D ƒS )Nc                    s   i | ]\}}|ˆ kr||“qS r   r   )r   ÚkÚv©Ú	min_countr   r   Ú
<dictcomp>M   s       z6ids_from.<locals>.min_count_filter.<locals>.<dictcomp>)Úitems)Zdatr   r   r   Úmin_count_filterL   s    z"ids_from.<locals>.min_count_filterzfetch-%s)ÚredbiomÚredbiom._requestsZredbiom.adminÚ
get_configÚ	_requestsZmake_script_execÚsetÚ
ValueErrorÚ
isinstanceÚlistÚtupleZadminZScriptManagerÚgetÚupdateÚintersectionÚunion)ÚitÚexactÚaxisZcontextsr   r"   ÚconfigÚseZ	retrievedr!   ZfetcherÚcontextZcontext_idsZid_Úblockr   r   r   Úids_from"   s6    
r6   c                 C   s<   |dkr,ddl }ddl}| ¡ }|j |¡}|ddd|  ƒS )a^  Test if a category exists

    Parameters
    ----------
    category : str
        The category to test for
    get : function
        A get method

    Returns
    -------
    bool
        True if the category exists, False otherwise

    Redis Command Summary
    ---------------------
    SISMEMBER <category> metadata:catetories-represented
    Nr   ÚmetadataZ	SISMEMBERzcategories-represented/%s)r"   r#   r$   r%   Úmake_get)Úcategoryr+   r"   r2   r   r   r   Úcategory_existse   s    r:   c                 C   s*   z
t | ƒW S  tk
r$   tj Y S X d S r   )ÚfloatÚ	ExceptionÚnpr   )Útr   r   r   Úfloat_or_nanƒ   s    
r?   c           	      C   st   ddl }|dkr,ddl}| ¡ }|j |¡}t| ƒ\}}}}t|dddƒƒ}t|ƒ |¡s^dS t|ƒ |¡spdS dS )z(Test if all samples have sample metadatar   Nr7   ÚSMEMBERSúsamples-representedFT)r#   r"   r$   r%   r8   Úpartition_samples_by_tagsr&   Úissubset)	Úsamplesr+   r"   r2   ÚuntaggedÚtaggedÚ_Útagged_cleanZrepresentedr   r   r   Úhas_sample_metadataŠ   s    rI   c           	      C   sp   g }g }g }g }| D ]N}|  dd¡}t|ƒdkrX|\}}| |¡ | |¡ | |¡ q| |¡ q||||fS )z1Partition samples by the presence of a sample tagrG   r   é   )ÚsplitÚlenÚappend)	rD   rF   rH   ÚtagsrE   r   ÚpartsÚtagZsample_splitr   r   r   rB   ž   s    

rB   c                 C   s(  ddl m} t|ƒ\}}}}|| ddƒ}t|ƒ\}}	}}
|tƒ}t|	|
ƒD ]\}}||  |¡ qJt|	ƒ}g }i }|D ]$}||kr|| ||< qv| |¡ qvt|ƒ\}}g }t||ƒD ]D\}}||krð| |¡ ||kràg ||< ||  |¡ q¶| |¡ q¶t|ƒ\}}| 	|¡ | 	|¡ ||||fS )aÑ  Determine mappings for requested samples

    This method accepts samples in the form of "sampleid" or "rid_sampleid". It
    then attempts to resolve any sample ambiguities which may exist in the
    context. For a "sampleid" there may be multiple "rid_sampleid" values which
    exists, for instance, the same sample may have multiple preps within the
    same study and datatype (e.g., biological replicates).

    Parameters
    ----------
    context : str
        The context to search within
    samples : Iterable of str
        The samples to resolve. The samples must be in the form of "sampleid"
        or "rid_sampleid". The former is an ambiguous association as it does
        not have a tag affixed (e.g., a sample preparation ID). The latter is
        fully specified and assured to be unique within the context.
    get : redbiom._requests.make_get instance
        A getter

    Returns
    -------
    stable
        A dict of stable QIIME compatible sample IDs, keyed by the QIIME
        compatible ID and valued by the redbiom ID.
    unobserved
        A list of any requested ID which was not observed in the context.
    ambituities
        A dict of untagged sample IDs (e.g., "sampleid") to a list of the
        observed "rid_sampleid" values within the context. In other words,
        this dict associated an unspecific ID to a unique redbiom ID.
    redbiomids
        A dict keyed by "rid_sampleid" and valued by a QIIME compatible sample
        ID.
    r   ©Údefaultdictr@   rA   )
ÚcollectionsrR   rB   r)   ÚziprM   r&   Ú_stable_ids_from_ambigÚ_stable_ids_from_unambigr,   )r4   rD   r+   rR   rE   rF   rG   rH   ÚctxZ
ctx_taggedZctx_tagged_cleanZctx_with_ambigZwith_tagZwithout_tagZctx_known_stableZ
unobservedZknown_ambiguousÚiZstableÚriZunambiguousr>   ZtcZstable_unambZri_unambr   r   r   Úresolve_ambiguities²   s6    $


rZ   c           	      C   sX   i }i }|   ¡ D ]>\}}|D ]0}| dd¡\}}d||f }|||< |||< qq||fS )z'Create stable IDs from an ambiguity maprG   r   ú%s.%s)r    rK   )	Z	ambig_mapZambig_assocrY   r   r   ÚunambigrP   rE   Ústabr   r   r   rU     s    rU   c                 C   sF   i }i }| D ]0}|  dd¡\}}d||f }|||< |||< q||fS )z)Create stable IDs from an unambiguous IDsrG   r   r[   )rK   )r\   ÚassocrY   r   rP   rE   r]   r   r   r   rV     s    
rV   c                 C   sä   ddl m}m} ddlm} ddl}ddl}| |jj¡}||t	ƒddƒ}|j
jd |krl|g|j
j |j
_t|jj d¡ƒ}| t||¡}	|tƒ}
|  ¡ D ]>\}}|| ¡   }|jD ] }|	|ƒD ]}|
|  |¡ qÄq¸qœt|
ƒS )zÒConvert a DataFrame to stem -> index associations

    Parameters
    ----------
    df : pd.DataFrame
        A pandas DataFrame to index

    Returns
    -------
    dict
        {stem: {set of indices}}
    r   ©ÚjoinÚdirnamerQ   NÚassetsÚ	nltk_dataÚenglish)Úos.pathr`   ra   rS   rR   Ú	functoolsÚnltkZPorterStemmerZMARTIN_EXTENSIONSÚ__file__ÚdataÚpathÚ	frozensetZcorpusZ	stopwordsÚwordsÚpartialÚstemsr&   ZiterrowsZisnullÚvaluesÚaddÚdict)Údfr`   ra   rR   rf   rg   ÚstemmerÚnltk_data_pathÚstopsZstem_fÚdr   ÚrowÚvalueÚstemr   r   r   Údf_to_stems$  s"    
rz   c              	   c   s  ddl m}m} ddl}ddl}tdƒ}| t¡ | d¡}| d¡}	||krRdS ||t	ƒddƒ}
|j
jd |
kr„|
g|j
j |j
_|j |¡D ]|}||kst|ƒd	krªq|| ksd
|kr¼q| |¡dk	rÌq|	 |¡dk	rÜqz| |¡ ¡ V  W q tk
r
   Y qY qX qdS )zGather stems from stringr   r_   Nz()!@#$%^&*-+=|{}[]<>./?;:z(^-?\d+\.\d+$)|(^-?\d+$)z^\d+:\d+(am|AM|pm|PM)?$rb   rc   r   ú/)re   r`   ra   Úrerg   r&   r,   ÚNULL_VALUESÚcompilerh   ri   rj   ÚtokenizeZword_tokenizerL   Úmatchry   Úlowerr<   )ru   rs   Ústringr`   ra   r|   rg   Zto_skipZnumeric_regexZ
time_regexrt   Úwordr   r   r   rn   J  s2    


rn   c                 C   sZ   ddl }tƒ }| jddD ](}| ¡ }| | d¡¡ | ¡ ||< q| j|dd | |fS )zÙconvert biom feature ids to md5 and return new table

    Parameters
    ----------
    table : BIOM table

    Returns
    -------
    BIOM table
        The new BIOM table
    dict
        {original_id: new_id}
    r   NZobservation)r1   zutf-8)Úhashlibrq   ZidsÚmd5r,   ÚencodeÚ	hexdigestZ
update_ids)Útabler„   Znew_idsZ_idÚmr   r   r   Úconvert_biom_ids_to_md5u  s    rŠ   )r   )N)N)r   Únumpyr=   r   r}   r   r6   r:   r?   rI   rB   rZ   rU   rV   rz   rn   rŠ   r   r   r   r   Ú<module>   s@            ø
C

P&+