U
    ef                     @   s   d dl mZ d dlmZ d dlZdd ZG dd dZdd	 Zd
d Z	dd Z
dd Zd#ddZdd Zd$ddZd%ddZdd ZG dd deZd&ddZd'd!d"ZdS )(    )
quote_plus)ceilNc                 C   s   t | ddS )N.z%2E)_quote_plusreplace)s r   l/mounts/lovelace/software/anaconda3/envs/qiime2-amplicon-2023.9/lib/python3.8/site-packages/redbiom/admin.pyr      s    r   c                   @   sL   e Zd ZdZdddddZdZi Zedd	d
Zedd Z	edd Z
dS )ScriptManagerz>Static singleton for managing Lua scripts in the Redis backenda  
                    local indices = {}
                    local kid = nil

                    -- for each index and identifier (like python's enumerate)
                    for position, name in ipairs(ARGV) do
                        kid = redis.call('HGET', KEYS[1], name)

                        -- if an identifier was not observed, add it
                        if not kid then
                          kid = redis.call('HINCRBY',
                                           KEYS[1],
                                           'current_id', 1) - 1
                          redis.call('HSET', KEYS[1], name, kid)
                          redis.call('HSET', KEYS[1] .. '-inverted', kid, name)
                        end

                        -- store store the mapping for return
                        indices[position] = tonumber(kid)
                    end
                    return cjson.encode(indices)a  
                    -- Redis has a compile time stack limit for Lua calls
                    -- so rather than recompiling with an arbitrary limit,
                    -- we're going to instead chunk calls where there are a
                    -- large number of arguments. The default is 8000 for the
                    -- stack size, so we'll use 7900 to be close without
                    -- going over
                    -- https://stackoverflow.com/a/39959618/19741
                    local call_in_chunks = function (command, key, args)
                        local step = 7900
                        for i = 1, #args, step do
                            redis.call(command,
                                       key,
                                       unpack(args,
                                              i,
                                              math.min(i + step - 1, #args)))
                        end
                    end

                    -- Lua does not have a natural split, for various reasons
                    -- outlined in the URL below, so we need to do this
                    -- manually. We'll split on "|" which should be safe
                    -- as the values sent are only ever expected to be integers
                    -- http://lua-users.org/wiki/SplitJoin
                    for idx, arg in ipairs(ARGV) do
                        local items = {}
                        for item in string.gmatch(arg, "([^|]+)") do
                            table.insert(items, item)
                        end
                        call_in_chunks('LPUSH', KEYS[idx], items)
                    end
                    return redis.status_reply("OK")a  
                    local context = ARGV[1]
                    local key = ARGV[2]
                    local result = {}
                    local formedkey = context .. ':' .. 'feature' .. ':' .. key

                    local items = redis.call('LRANGE',
                                             formedkey,
                                             '0', '-1')

                    -- adapted from https://gist.github.com/klovadis/5170446
                    local resultkey
                    local ii = context .. ':' .. 'sample' .. '-index-inverted'
                    for idx, v in ipairs(items) do
                        if idx % 2 == 1 then
                            -- it is likely possible to issue a HMGET
                            resultkey = redis.call('HGET', ii, v)
                        else
                            result[resultkey] = tonumber(v)
                        end
                    end

                    return cjson.encode(result)a  
                    local context = ARGV[1]
                    local key = ARGV[2]
                    local result = {}
                    local formedkey = context .. ':' .. 'sample' .. ':' .. key

                    local items = redis.call('LRANGE',
                                             formedkey,
                                             '0', '-1')

                    -- adapted from https://gist.github.com/klovadis/5170446
                    local resultkey
                    local ii = context .. ':' .. 'feature' .. '-index-inverted'
                    for idx, v in ipairs(items) do
                        if idx % 2 == 1 then
                            -- it is likely possible to issue a HMGET
                            resultkey = redis.call('HGET', ii, v)
                        else
                            result[resultkey] = tonumber(v)
                        end
                    end

                    return cjson.encode(result))	get-index	load-datazfetch-featurezfetch-sample)r   r   Tc                 C   s   ddl }ddl}ddl}| }|j }|j|}|j|}tj	
 D ]v\}}| rd|tjkrdqL||d }	d||	f }
|j|d d |d |dd	|
 |dd
d| }||	ksLtqLdS )zLoad scripts into Redis

        Parameters
        ----------
        read_only : bool, optional
            If True, only load read-only scripts. If False, load writable
            scripts
        r   Nasciizscripts/%s/%shostnamez/SCRIPT/LOAD)datastateHSETHGET
scripts/%s)redbiomredbiom._requestshashlib
get_config	_requestsget_session	make_postmake_getr
   _scriptsitems_admin_scriptssha1encode	hexdigestputAssertionError)Z	read_onlyr   r   configr   postgetnamescriptr   Zkeypairobsr   r   r	   load_scriptss   s     

zScriptManager.load_scriptsc                 C   sf   | t jkrt j|  S ddl}ddl}| }|j|}|ddd|  }|dkrXtd|t j| < |S )zRetreive the SHA1 of a script

        Parameters
        ----------
        name : str
            The name of the script to fetch

        Raises
        ------
        ValueError
            If the script name is not recognized
        r   Nr   r   r   zUnknown script)r
   _cacher   r   r   r   r   
ValueError)r'   r   r$   r&   shar   r   r	   r&      s    


zScriptManager.getc                  C   sP   ddl } ddl} |  }| j }||d d  ||d d  i t_dS )z.Flush the loaded scripts in the redis databaser   Nr   z/SCRIPT/FLUSHz/DEL/state:scripts)r   r   r   r   r   r&   r
   r+   )r   r$   r   r   r   r	   drop_scripts   s    
zScriptManager.drop_scriptsN)T)__name__
__module____qualname____doc__r   r   r+   staticmethodr*   r&   r.   r   r   r   r	   r
   
   s    c#
r
   c                  C   sP   ddl } ddl} ddl}|  }| j|}|j d}|ddd|  dS )a  Create a new timestamp in the database

    Notes
    -----
    Time is represented as "%d.%b.%Y" (e.g., 25.Jul.2019).

    Timestamps are pushed into an array such that index 0 is the latest
    timestamp. A reasonable interpretation of this field, and the use of
    this method, is to obtain the timestamps of when the database was
    last updated.

    Redis command summary
    ---------------------
    LPUSH state:timestamps <current_time>
    r   Nz%d.%b.%Yr   ZLPUSHztimestamps/%s)r   r   datetimer   r   r   nowstrftime)r   r4   r$   r%   fmtr   r   r	   create_timestamp   s    r8   c                  C   s0   ddl } ddl} |  }| j|}|dddS )zwObtain the stored timestamps

    Redis command summary
    ---------------------
    LRANGE state:timestamps 0 -1
    r   Nr   ZLRANGEztimestamps/0/-1)r   r   r   r   r   )r   r$   r&   r   r   r	   get_timestamps   s
    r9   c                 C   s   ddl }ddl}| }|j|}z*|ddd| |f  || dd|j  W n(   ddl}td|  |jd  Y nX t	
  dS )	a  Create a context within the cache

    Parameters
    ----------
    name : str
        The name of the context, e.g., deblur@150nt
    description : str
        A brief description about the context, e.g., "Default quality
        filtering, followed by application of Deblur with a trim length of
        150nt."

    Redis commmand summary
    ----------------------
    HSET state:context <name> <description>
    HSET <context>:state db-version <current-db-version>
    r   Nr   r   zcontexts/%s/%szstate/db-version/%szUnable to create context: %s)file)r   r   r   r   r   Z__db_version__sysprintstderrr
   r*   )r'   descriptionr   r$   r%   r;   r   r   r	   create_context   s    r?   c              	      s|  ddl }ddl}|dkrd}n|dkr,d}ntd| | }	|j|	}
td}tdt	t
|| }t||D ]}g }g }|D ]}| j||d	d
}t|jt|jd dstd|t} fdd||j D }ddd t||jD }|| d| d|  || qtt
|}||g| | }|
ddd| | dd| }|
|d|d	d qxdS )a  Manage the loading of data for a particular axis

    Parameters
    ----------
    table : biom.Table
        The table to obtain data from
    ids : iterable of str
        The IDs to obtain data for
    opposite_ids : iterable of str
        The IDs of the opposite axis in the table
    opposite_id_index : dict
        The index which maps an opposite ID to the index value within
        the Redis database for the identifier
    axis_label : str
        The biom.Table axis label of ids
    context : str
        The context to load the data into
    batchsize : int
        The number of identifiers to group into a single request

    Notes
    -----
    This method only supports count data.

    Data are loaded through the "load-data" Lua script managed in the
    ScriptsManager. This method in effect packs the data into a structure
    compatible with Webdis, and the EVALSHA command structure of Redis. The
    "load-data" script then iterates over the "KEYS" and "ARGV"s, parsing
    the respective entries into values that can be directly loaded.

    Redis command summary
    ---------------------
    EVALSHA <load-data-sha1> N <context>:<axis_label>:<id> ... <packeddata> ...

    Note that "N" refers to the number of "KEYS". The "load-data" Lua script
    assumes that there are "N" "KEYS" as well as "N" "ARGV"s. For the call,
    "KEYS" are the prefixed identifiers (e.g., "<context>:<axis_label>:<id>")
    and "ARGV" are the "packeddata". "KEYS" and "ARGV" are expected to be in
    index order with each other.
    r   Nfeatureobservationsamplez%s is unrecognized as an axisr      F)axisZdenseg        zData do not appear to be countsc                    s   g | ]} | qS r   r   .0iopposite_id_indexr   r	   
<listcomp>L  s   z#_load_axis_data.<locals>.<listcomp>|c                 S   s   g | ]\}}d ||f qS )z%d|%dr   )rF   rG   vr   r   r	   rJ   O  s   :EVALSHA/zs-represented/%sSADD)verbose)r   r   r,   r   r   r   r
   r&   maxr   lennparray_splitr   ZallcloseroundZastypeintindicesjoinzipappendstr)tableidsZopposite_idsrI   Z
axis_labelcontext	batchsizer   rD   r$   r%   Z
loader_shasplitsbatchkeysargvZid_valuesZ
int_valuesZremappedpackednkeyspayloadr   rH   r	   _load_axis_data  s@    *




ri   F  c              	      s@  ddl }ddl}ddl}| }|j|}|j|}|j|| t| |||} | 	 dd }	| j	dddd }
dd t
|
t||
dD }dd t
|	t||	d	D }t| |	|
|d	|d
d t| |
|	|d|dd t| j	dd| jdd}|dk	r8||dd |jj}dd | D }||dd||ddd}|D ]$}t
| D ]\}}||| _qLq@|jddD ]ĉ   srg }g } jD ]4}| r|d ||j n||j qd|}||dd j|f   fdd jD }||ddd|  |rrd|}||dd j|f  qrt|	S )a  Load nonzero sample data.

    Parameters
    ----------
    table : biom.Table
        The BIOM table to load.
    context : str
        The context to load into.
    tag : str
        A tag to associated the samples with (e.g., a preparation ID).
    redis_protocol : bool, optional
        Generate commands for bulk load instead of HTTP requests.
    batchsize : int, optional
        The number of samples or features to load at once

    Raises
    ------
    ValueError
        If the context to load into does not exist.
        If a samples metadata has not already been loaded.
        If a table is empty.

    Notes
    -----
    This method does not support non count data.

    The feature IDs are remapped into an integer space to reduce memory
    consumption as sOTUs are large. The index is maintained in Redis under
    <context>:feature-index and <context>:feature-index-inverted.

    The data are stored per sample with keys of the form "data:<sample_id>".
    The string stored is tab delimited, where the even indices (i.e .0, 2, 4,
    etc) correspond to the unique index value for an feature ID, and the
    odd indices correspond to the counts associated with the sample/feature
    combination.

    Redis command summary
    ---------------------
    EVALSHA <get-index-sha1> 1 <context>:feature-index <feature_id>
    EVALSHA <get-index-sha1> 1 <context>:sample-index <redbiom_id>
    LPUSH <context>:samples:<redbiom_id> <count> <feature_id> ...
    LPUSH <context>:features:<redbiom_id> <count> <redbiom_id> ...
    SADD <context>:samples-represented <redbiom_id> ... <redbiom_id>
    SADD <context>:features-represented <feature_id> ... <feature_id>

    Returns
    -------
    int
        The number of samples loaded.
    r   NrA   rD   c                 S   s   i | ]\}}||qS r   r   rF   rG   jr   r   r	   
<dictcomp>  s      z$load_sample_data.<locals>.<dictcomp>r@   c                 S   s   i | ]\}}||qS r   r   rl   r   r   r	   rn     s      rB   
   )r`   i  r   zstate/has-taxonomy/1c                 S   s   i | ]}|j |qS r   r'   )rF   nr   r   r	   rn     s      ZHMGETd   zfeature-index)r&   buffer_sizeZmultikeyF)Zinclude_selfzhas-terminalrO   rP   ztaxonomy-children:%s/%sc                    s   g | ]}d |j  j f qS z%s/%srp   rF   cnoder   r	   rJ     s   z$load_sample_data.<locals>.<listcomp>HMSETztaxonomy-parents/%szterminal-of:%s/%s)r   r   redbiom.utilr   r   r   r   valid_stage_for_loadr^   rZ   	get_indexri   _metadata_to_taxonomy_treemetadatabufferedZtipsr'   	postorderis_tipchildrenr[   rY   rS   )r]   r_   tagZredis_protocolr`   r   r$   r&   r%   samplesr)   Z	obs_indexZ
samp_indextaxonomyZhmgetterZ	tip_namesZids_ZblkentityidxpackZterminal_packrv   rf   Zid_packr   rw   r	   load_sample_data`  st    4


 






r   c                 C   s   |dkrdS ddl }|jdd t| |D }t| D ]N}| rJq<| rTq<t|j	dkr<|j	
dr<|j}||j || q<|S )a  Cast the taxonomy into a tree

    Parameters
    ----------
    ids : list of str
        The feature IDs
    metadata : list of dict
        Feature metadata in index order with the ids.

    Notes
    -----
    Children of unclassified nodes (e.g., s__) are migrated to the parent
    so that no unclassified nodes exist in the tree.

    Returns
    -------
    skbio.TreeNode
        A hierarchy of the taxonomy.
    Nr   c                 S   s   g | ]\}}||d  fqS )r   r   )rF   rG   mr   r   r	   rJ     s   z._metadata_to_taxonomy_tree.<locals>.<listcomp>   __)skbioZTreeNodeZfrom_taxonomyrZ   listr   r   is_rootrS   r'   endswithparentextendr   remove)r^   r   r   trx   r   r   r   r	   r~     s     r~   c                    s  ddl }ddl}ddl}ddl}| }|j|}|j|}|j|}|j	j
 |  } | jd dkrr|  } dk	r| | jd  dd }|j	|stdfdd| | jd  D | | jd < | j| jd dd |d	d
d}	| jtt| jt|	  } t| dkrdS | j}
| j}|  D ]B\}} fddt| j|jD }d| }|d	d||| q,|D ]B} fddt| j| | D }d|d|f }|d	d| qtdd| j }|d	d| dd| j }|d	d| t|
S )a@  Load sample metadata.

    Parameters
    ----------
    md : pd.DataFrame
        QIIME or Qiita compatible metadata.
    tag : str, optional
        A tag associated with the information being loaded such as a
        preparation ID.

    Notes
    -----
    Values considered to be non-informative are omitted from load.

    TODO: expose a stable list of the nullables, see #19

    Returns
    -------
    int
        The number of samples loaded.

    Redis command summary
    ---------------------
    SMEMBERS metadata:samples-represented
    SET metadata:categories:<sample_id> <JSON-of-informative-columns>
    HMSET metadata:category:<column> <sample_id> <val> ... <sample_id> <val>
    SADD metadata:samples-represented <sample_id> ... <sample_id> ...
    SADD metadata:categories-represented <column> ... <column>
    r   Nz	#SampleIDZsample_name%Sample metadata must be loaded first.c                    s   g | ]}d  |f qS z%s_%sr   rE   r   r   r	   rJ   8  s     z(load_sample_metadata.<locals>.<listcomp>TZinplacer   SMEMBERSsamples-representedc                    s   g | ]\}}t | r|qS r   )
_indexable)rF   rv   rG   null_valuesr   r	   rJ   F  s    
zcategories:%sZSETc                    s.   g | ]&\}}t | rd |tt|f qS rt   )r   r   r\   )rF   r   rL   r   r   r	   rJ   N  s   
zcategory:%s/%srO   ry   zsamples-represented/%srP   zcategories-represented/%s)jsonr   r   rz   r   r   r   Zmake_putr   utilZNULL_VALUEScopycolumnsreset_indexhas_sample_metadatar,   	set_indexlocr   setindexrS   ZiterrowsrZ   re   dumpsrY   )mdr   r   r   r$   r%   r"   r&   Zoriginal_idsrepresentedr   Zindexed_columnsr   rowr   keycolZbulk_setrh   r   )r   r   r	   load_sample_metadata  sN    &
r   c                    s\  ddl }ddl}ddl}ddl}| }|j|}|  } | jd dkrR| 	 }  dk	r fdd| | jd  D | | jd < | j
| jd dd |jt| jstd|j| }| D ]&\}}d	|d
|f }	|dd|	 qt|}
dd | jD }|j|j|| jd}| D ](\}}d|d
|f }	|dd|	 q"t|}|
|fS )a  Load stem -> sample associations

    Parameters
    ----------
    md : pd.DataFrame
        QIIME or Qiita compatible metadata.
    tag : str, optional
        A tag associated with the information being loaded such as a
        preparation ID.

    Notes
    -----
    Values considered to be non-informative are omitted from load.

    Returns
    -------
    int
        The number of stems based on metadata values found.
    int
        The number of stems based on the categories found.

    Redis command summary
    ---------------------
    SADD metadata:text-search:<stem> <sample-id> ... <sample-id>
    SADD metadata:category-search:<stem> <category> ... <category>
    r   Nr   c                    s   g | ]}d  |f qS r   r   rE   r   r   r	   rJ     s     z4load_sample_metadata_full_search.<locals>.<listcomp>Tr   r   ztext-search:%s/%srO   r   rP   c                 S   s   g | ]}| d dqS )_ )r   ru   r   r   r	   rJ     s     )r   zcategory-search:%s/%s)r   r   rz   Zpandasr   r   r   r   r   r   r   r   r   r   r   r,   Zdf_to_stemsr   rY   rS   Z	DataFrame)r   r   r   pdr$   r%   Zstemsstemr   rh   Zvalue_stems
categoriesZcatsZ	cat_stemsr   r   r	    load_sample_metadata_full_search]  s8    &r   c                 C   s   | |kS )z?Returns true if the value appears to be something that storabler   )valueZ	nullablesr   r   r	   r     s    r   c                   @   s   e Zd ZdS )AlreadyLoadedN)r/   r0   r1   r   r   r   r	   r     s   r   c                    s   ddl } dkrd | j fdd|  D dd} t|  }|sLtd||d	d
}t|}|| }|sx|rxtd|j|std| | | jdd ddS )a  Tag samples, reduce to only those relevant to load

    Parameters
    ----------
    table : biom.Table
        The table to operate on
    context : str
        The context to load into
    get : make_get instance
        A getter
    tag : str, optional
        The tag to apply to the samples

    Raises
    ------
    ValueError
        If a samples metadata has not already been loaded.
    ValueError
        If the table is empty.
    AlreadyLoaded
        If the table appears to already be loaded.

    Returns
    -------
    biom.Table
        A copy of the input table, filtered to only those samples which are
        novel to the context. Sample IDs reflect tag.
    r   NZUNTAGGEDc                    s   i | ]}|d  |f qS r   r   rE   r   r   r	   rn     s      z#_stage_for_load.<locals>.<dictcomp>Fr   zThe table is empty.r   r   z'The table appears to already be loaded.r   c                 S   s   |   dkS )Nr   )sum)rL   rG   r   r   r   r	   <lambda>      z!_stage_for_load.<locals>.<lambda>rA   rk   )	rz   Z
update_idsr^   r   r,   r   r   r   filter)r]   r_   r&   r   r   r   r   Zto_loadr   r   r	   r|     s$    
r|   rr   c              	   C   s   ddl }ddl}ddl}| }|j|}td}d| |f }	g }
tdt	t
|| }t||D ]<}d}|||	gt| }||ddd|}|
| qh|
S )	a  Get a unique integer value for a key within a context

    Parameters
    ----------
    context : str
        The context to operate in
    keys : list or tuple of str
        The keys to get a unique index for
    axis : str
        Either feature or sample
    batchsize : int, optional
        The number of IDs to query at once

    Notes
    -----
    This method is an atomic equivalent of:

        def get_or_set(d, item):
            if item not in d:
                d[item] = len(d)
            return d[item]

    Returns
    -------
    tuple of int
        The unique integer indices within the context for the keys. This is
        returned in index order with keys.
    r   Nr   z%s:%s-indexrC   1rN   rO   )r   r   r   r   r   r   r
   r&   rR   r   rS   rT   rU   r   loadsrY   r   )r_   rc   rD   r`   r   r   r$   r%   Zindexer_shaZcontext_axisrX   ra   rb   rg   rh   r   r   r   r	   r}     s    
r}   )NFrj   )N)N)N)rr   )urllib.parser   r   mathr   numpyrT   r
   r8   r9   r?   ri   r   r~   r   r   r   r,   r   r|   r}   r   r   r   r	   <module>   s&    8 X  
x)
\
C
7