
    чoe                    8   d Z ddlZddlZddlZddlmZ ddlmZ ddl	m
ZmZ ddlmZmZ ddlmZmZ ddlmZ dd	lmZmZ dd
lmZmZmZmZ ddlmZmZmZm Z m!Z!m"Z"m#Z# ddl$Z%ddl&Z&ddl'm(Z(m)Z)m*Z*m+Z+ ddl,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4 ddl5m6Z6 ddl7m7Z7 ddl8m8Z8 ddl9m9Z9 dZ:dZ;g dZ<dZ=dZ>dZ?dZ@eAeBeCeAeBeCdZD G d de          ZE eeeE          Z
d ZFd ZGd ZHd  ZId! ZJ G d" d#          ZKejL        dfd$ZMeBdfd%ZNeBfd&ZOeBfd'ZPeBfd(ZQeBfd)ZReBdfd*ZSdS )+aj  
BIOM Table (:mod:`biom.table`)
==============================

The biom-format project provides rich ``Table`` objects to support use of the
BIOM file format. The objects encapsulate matrix data (such as OTU counts) and
abstract the interaction away from the programmer.

.. currentmodule:: biom.table

Classes
-------

.. autosummary::
   :toctree: generated/

   Table

Examples
--------
First, let's create a toy table to play around with. For this example, we're
going to construct a 10x4 `Table`, or one that has 10 observations and 4
samples. Each observation and sample will be given an arbitrary but unique
name. We'll also add on some metadata.

>>> import numpy as np
>>> from biom.table import Table
>>> data = np.arange(40).reshape(10, 4)
>>> sample_ids = ['S%d' % i for i in range(4)]
>>> observ_ids = ['O%d' % i for i in range(10)]
>>> sample_metadata = [{'environment': 'A'}, {'environment': 'B'},
...                    {'environment': 'A'}, {'environment': 'B'}]
>>> observ_metadata = [{'taxonomy': ['Bacteria', 'Firmicutes']},
...                    {'taxonomy': ['Bacteria', 'Firmicutes']},
...                    {'taxonomy': ['Bacteria', 'Proteobacteria']},
...                    {'taxonomy': ['Bacteria', 'Proteobacteria']},
...                    {'taxonomy': ['Bacteria', 'Proteobacteria']},
...                    {'taxonomy': ['Bacteria', 'Bacteroidetes']},
...                    {'taxonomy': ['Bacteria', 'Bacteroidetes']},
...                    {'taxonomy': ['Bacteria', 'Firmicutes']},
...                    {'taxonomy': ['Bacteria', 'Firmicutes']},
...                    {'taxonomy': ['Bacteria', 'Firmicutes']}]
>>> table = Table(data, observ_ids, sample_ids, observ_metadata,
...               sample_metadata, table_id='Example Table')

Now that we have a table, let's explore it at a high level first.

>>> table
10 x 4 <class 'biom.table.Table'> with 39 nonzero entries (97% dense)
>>> print(table) # doctest: +NORMALIZE_WHITESPACE
# Constructed from biom file
#OTU ID S0  S1  S2  S3
O0  0.0 1.0 2.0 3.0
O1  4.0 5.0 6.0 7.0
O2  8.0 9.0 10.0    11.0
O3  12.0    13.0    14.0    15.0
O4  16.0    17.0    18.0    19.0
O5  20.0    21.0    22.0    23.0
O6  24.0    25.0    26.0    27.0
O7  28.0    29.0    30.0    31.0
O8  32.0    33.0    34.0    35.0
O9  36.0    37.0    38.0    39.0
>>> print(table.ids()) # doctest: +NORMALIZE_WHITESPACE
['S0' 'S1' 'S2' 'S3']
>>> print(table.ids(axis='observation')) # doctest: +NORMALIZE_WHITESPACE
['O0' 'O1' 'O2' 'O3' 'O4' 'O5' 'O6' 'O7' 'O8' 'O9']
>>> print(table.nnz)  # number of nonzero entries
39

While it's fun to just poke at the table, let's dig deeper. First, we're going
to convert `table` into relative abundances (within each sample), and then
filter `table` to just the samples associated with environment 'A'. The
filtering gets fancy: we can pass in an arbitrary function to determine what
samples we want to keep. This function must accept a sparse vector of values,
the corresponding ID and the corresponding metadata, and should return ``True``
or ``False``, where ``True`` indicates that the vector should be retained.

>>> normed = table.norm(axis='sample', inplace=False)
>>> filter_f = lambda values, id_, md: md['environment'] == 'A'
>>> env_a = normed.filter(filter_f, axis='sample', inplace=False)
>>> print(env_a) # doctest: +NORMALIZE_WHITESPACE
# Constructed from biom file
#OTU ID S0  S2
O0  0.0 0.01
O1  0.0222222222222 0.03
O2  0.0444444444444 0.05
O3  0.0666666666667 0.07
O4  0.0888888888889 0.09
O5  0.111111111111  0.11
O6  0.133333333333  0.13
O7  0.155555555556  0.15
O8  0.177777777778  0.17
O9  0.2 0.19

But, what if we wanted individual tables per environment? While we could just
perform some fancy iteration, we can instead just rely on `Table.partition` for
these operations. `partition`, like `filter`, accepts a function. However, the
`partition` method only passes the corresponding ID and metadata to the
function. The function should return what partition the data are a part of.
Within this example, we're also going to sum up our tables over the partitioned
samples. Please note that we're using the original table (ie, not normalized)
here.

>>> part_f = lambda id_, md: md['environment']
>>> env_tables = table.partition(part_f, axis='sample')
>>> for partition, env_table in env_tables:
...     print(partition, env_table.sum('sample'))
A [ 180.  200.]
B [ 190.  210.]

For this last example, and to highlight a bit more functionality, we're going
to first transform the table such that all multiples of three will be retained,
while all non-multiples of three will get set to zero. Following this, we'll
then collpase the table by taxonomy, and then convert the table into
presence/absence data.

First, let's setup the transform. We're going to define a function that takes
the modulus of every value in the vector, and see if it is equal to zero. If it
is equal to zero, we'll keep the value, otherwise we'll set the value to zero.

>>> transform_f = lambda v,i,m: np.where(v % 3 == 0, v, 0)
>>> mult_of_three = tform = table.transform(transform_f, inplace=False)
>>> print(mult_of_three) # doctest: +NORMALIZE_WHITESPACE
# Constructed from biom file
#OTU ID S0  S1  S2  S3
O0  0.0 0.0 0.0 3.0
O1  0.0 0.0 6.0 0.0
O2  0.0 9.0 0.0 0.0
O3  12.0    0.0 0.0 15.0
O4  0.0 0.0 18.0    0.0
O5  0.0 21.0    0.0 0.0
O6  24.0    0.0 0.0 27.0
O7  0.0 0.0 30.0    0.0
O8  0.0 33.0    0.0 0.0
O9  36.0    0.0 0.0 39.0

Next, we're going to collapse the table over the phylum level taxon. To do
this, we're going to define a helper variable for the index position of the
phylum (see the construction of the table above). Next, we're going to pass
this to `Table.collapse`, and since we want to collapse over the observations,
we'll need to specify 'observation' as the axis.

>>> phylum_idx = 1
>>> collapse_f = lambda id_, md: '; '.join(md['taxonomy'][:phylum_idx + 1])
>>> collapsed = mult_of_three.collapse(collapse_f, axis='observation')
>>> print(collapsed) # doctest: +NORMALIZE_WHITESPACE
# Constructed from biom file
#OTU ID S0  S1  S2  S3
Bacteria; Firmicutes  7.2 6.6 7.2 8.4
Bacteria; Proteobacteria  4.0 3.0 6.0 5.0
Bacteria; Bacteroidetes   12.0    10.5    0.0 13.5

Finally, let's convert the table to presence/absence data.

>>> pa = collapsed.pa()
>>> print(pa) # doctest: +NORMALIZE_WHITESPACE
# Constructed from biom file
#OTU ID S0  S1  S2  S3
Bacteria; Firmicutes  1.0 1.0 1.0 1.0
Bacteria; Proteobacteria  1.0 1.0 1.0 1.0
Bacteria; Bacteroidetes   1.0 1.0 0.0 1.0

    N)deepcopy)datetime)dumpsJSONEncoder)reducepartial)
itemgetteror_)defaultdict)HashableIterable)ndarrayasarrayzerosnewaxis)
coo_matrix
csc_matrix
csr_matrix
isspmatrixvstackhstack
dok_matrix)TableExceptionUnknownAxisErrorUnknownIDErrorDisjointIDError)get_biom_format_version_stringget_biom_format_url_stringflattennatsortprefer_self
index_listH5PY_VLEN_STR__format_version__)errcheck   )_filter)
_transform)
_subsampleDaniel McDonaldz5Copyright 2011-2020, The BIOM Format Development Team)
r*   zJai Ram RideoutzGreg CaporasozJose ClementezJustin KuczynskizAdam Robbins-PiankazJoshua ShorensteinzJose Antonio Navas Molinau   Jorge Cañardo AlastueyzSteven BrownBSDhttp://biom-format.orgzdaniel.mcdonald@colorado.edu)intfloatunicoder-   r.   r/   c                        e Zd Z fdZ xZS )	NpEncoderc                 T   t          |t          j                  rt          |          S t          |t          j                  rt          |          S t          |t          j                  r|                                S t          t          |           
                    |          S N)
isinstancenpintegerr-   floatingr.   r   tolistsuperr1   default)selfobj	__class__s     _/mounts/lovelace/software/anaconda3/envs/kraken-biom/lib/python3.11/site-packages/biom/table.pyr:   zNpEncoder.default   s    c2:&& 	s88Oc2;'' 	::c2:&& 	 ::<<Y%%--c222    )__name__
__module____qualname__r:   __classcell__)r=   s   @r>   r1   r1      s8        3 3 3 3 3 3 3 3 3r?   r1   )clsc                 l    d}d}t          |          D ]\  }}	  | |           #  |}|}Y  nxY w||fS )aR  Identify the first value which cannot be cast

    Paramters
    ---------
    dtype : type
        A type to cast to
    fields : Iterable of str
        A series of str to cast into dtype

    Returns
    -------
    str or None
        A value that cannot be cast
    int or None
        The index of the value that cannot be cast
    N)	enumerate)dtypefieldsbadvalbadidxidxvs         r>   _identify_bad_valuerM      sb    " FFF##  Q	E!HHHH	FFEEFs   &/c                 Z    t          | t                    r|                     d          } | S Nutf8r4   bytesdecodexs    r>   general_parserrV     s*    !U HHVHr?   c                     g }| D ]C}|r?t          |t                    r|                    d          }|                    |           D|r|ndS )zParses the taxonomy valuerP   N)r4   rR   rS   append)value	new_valuerL   s      r>   vlen_list_of_str_parserr[     se    I     	 !U## %HHV$$Q!+99t+r?   c                 J   t          |          f}fd|D             }                    dd          }d|z  }t          |                              t          h          r-|                     ||t          fd|D             |           dS t          |                              t          t          h          rt          | ||           dS g }g }	t          ||          D ]b\  }
}|         }|	d}t          }
|
t          k    r|                    d	          }|                    |           |	                    |
           ct          |	                              t          h          rt          }nd}|                     |t          |          f|||           dS )
z4Creates a dataset for a general atomic type categoryc                 :    g | ]}t          |                   S  type.0mheaders     r>   
<listcomp>z%general_formatter.<locals>.<listcomp>  s#    ***!d1V9oo***r?   /	@@SLASH@@metadata/%sc                 F    g | ]}|                              d           S rP   encodera   s     r>   re   z%general_formatter.<locals>.<listcomp>&  s+     F F Fa6!1!1&!9!9 F F Fr?   shaperG   datacompressionN rP   )lenreplacesetissubsetstrcreate_datasetr#   listtuplevlen_list_of_str_formatterziprl   rX   )grprd   mdrp   rn   dtypes	sanitizedname	formatteddtypes_useddtrc   valdtype_to_uses    `            r>   general_formatterr     s   WWJE****r***F
 sK00I9$D
6{{SE""  %4u!. F F F F2 F F F'2 	 	4 	4 	4 	4 	4 
V		tUm	,	, %"3K@@@@@	__ 
	# 
	#EBF)C{Syyjj((S!!!r""""{$$cU++ 	 (LLL 	R
#	 	 	% 	% 	% 	% 	%r?   c           	         g }g }|D ]}||         |                     d            t          |                    |          t                    r|                     d           ^|                     t          |                    |g           t                               |                     t          ||                              t          j        |          s|dk    rd }	 g }g }|D ]L} |||                   }	|                     ||	i           |                     t          |	                     M|}n8#  t          d|d|d         |                   xY wt          d	|z            t          |          }
t          |          |
f}t          j
        |t          
          }t          |          D ]H\  }}||         t          j        ||                   }d |D             ||dt          |          f<   It          j        |t          j        d          k    d|          }|                     d|z  |t"          ||           dS )z!Creates a (N, ?) vlen str datasetNTFtaxonomyc                 D    |                      d          }d |D             S )N;c                 6    g | ]}|                                 S r^   )strip)rb   ps     r>   re   zGvlen_list_of_str_formatter.<locals>.split_and_strip.<locals>.<listcomp>^  s     111a		111r?   )split)ipartss     r>   split_and_stripz3vlen_list_of_str_formatter.<locals>.split_and_strip\  s%    1151111r?   z
Category 'a   ' is not formatted properly. The most common issue is when 'taxonomy' is represented as a flat string instead of a list. An attempt was made to split this field on a ';' to coerce it into a list but it failed. An example entry (which is not assured to be the problematic entry) is below:
r   zCategory %s not formatted correctly. Did you pass --process-obs-metadata taxonomy when converting  from tsv? Please see Table.to_hdf5 docstring for more informationrG   c                 8    g | ]}|                     d           S rj   rk   rb   rL   s     r>   re   z.vlen_list_of_str_formatter.<locals>.<listcomp>~  s$    @@@Q 0 0@@@r?   rq   rh   rm   )rX   r4   getrv   r   rr   r5   all	TypeErrormaxemptyobjectrF   r   wherearrayrw   r#   )r|   rd   r}   rp   iterable_checkslengthsrc   r   new_mdr   max_list_lenrn   ro   r   rY   s                  r>   rz   rz   F  s   
 OG + +V9""4((((fs++ 	+""5))))""155,,h779 9 9NN3qy>>****6/"" .Z2 2 2H / /A+OAfI66EMM65/222NN3u::....Hi 171f!G H H H $ '--. . . w<<LWWl#E8E(((D" A A1V9
1V9%%@@%@@@QU^8DBHTNN*B55De$  ! ! ! ! !s   (AD> >#E!c                   d   e Zd ZdZ	 	 	 	 dedZd ZdfdZdgd	Zed
             Z	ede
ddfd            Zd Zed             Zed             Zed             Zed             ZdfdZdfdZdhdZdfdZd Zd Zd ZdfdZdidZd ZdjdZd Zdkd Zdfd!Zdfd"Z dld#Z!dfd$Z"dmd%Z#d& Z$d' Z%d( Z&d) Z'dfd*Z(d+dde)d,dfd-Z*d. Z+d/ Z,d0 Z-d1 Z.d2 Z/d3 Z0d4 Z1d5 Z2d6 Z3dnd7Z4d8 Z5dod9Z6dod:Z7dpd;Z8dfd<Z9e:dfd=Z;dqd>Z<dfd?Z=	 	 	 	 drdCZ>dD Z?dE Z@dfdFZAdfdGZB	 	 dsdHZCdtdIZDdndJZEdudLZFdndMZGdN ZHdtdOZIdP ZJdQ ZKdvdRZLdwdTZMdfdUZNdV ZOdWdWePePfdXZQeR	 	 dxdY            ZSdydZZTdzd\ZUd] ZV	 	 d{d^ZWeR	 	 d|d_            ZXd}d`ZYeda             ZZedb             Z[ed+e
dfdc            Z\dde)d,dfddZ]dS )~Tablea
  The (canonically pronounced 'teh') Table.

    Give in to the power of the Table!

    Creates an in-memory representation of a BIOM file. BIOM version 1.0 is
    based on JSON to provide the overall structure for the format while
    versions 2.0 and 2.1 are based on HDF5. For more information see [1]_
    and [2]_

    Paramaters
    ----------
    data : array_like
        An (N,M) sample by observation matrix represented as one of these
        types:
        * An 1-dimensional array of values
        * An n-dimensional array of values
        * An empty list
        * A list of numpy arrays
        * A list of dict
        * A list of sparse matrices
        * A dictionary of values
        * A list of lists
        * A sparse matrix of values
    observation_ids : array_like of str
        A (N,) dataset of the observation IDs, where N is the total number
        of IDs
    sample_ids : array_like of str
        A (M,) dataset of the sample IDs, where M is the total number of IDs
    observation_metadata : list of dicts, optional
        per observation dictionary of annotations where every key represents a
        metadata field that contains specific metadata information,
        ie taxonomy, KEGG pathway, etc
    sample_metadata : array_like of dicts, optional
        per sample dictionary of annotations where every key represents a
        metadata field that contains sample specific metadata information, ie
    table_id : str, optional
        A field that can be used to identify the table
    type : str, see notes
        The type of table represented
    create_date : str, optional
        Date that this table was built
    generated_by : str, optional
        Individual who built the table
    observation_group_metadata : list, optional
        group that contains observation specific group metadata information
        (e.g., phylogenetic tree)
    sample_group_metadata : list, optional
        group that contains sample specific group metadata information
        (e.g., relationships between samples)

    Attributes
    ----------
    shape
    dtype
    nnz
    matrix_data
    type
    table_id
    create_date
    generated_by
    format_version

    Notes
    -----
    Allowed table types are None, "OTU table", "Pathway table", "Function
    table", "Ortholog table", "Gene table", "Metabolite table", "Taxon table"

    Raises
    ------
    TableException
        When an invalid table type is provided.

    References
    ----------
    .. [1] http://biom-format.org/documentation/biom_format.html
    .. [2] D. McDonald, et al. "The Biological Observation Matrix (BIOM) format
       or: how I learned to stop worrying and love the ome-ome"
       GigaScience 2012 1:7
    NTc                    || _         || _        || _        |	| _        t          | _        t          |          sWt          |          t          |          f}|                    dd          }t          
                    |||          | _        n|                                | _        | j                            t                    | _        t          j        |          | _        t          j        |          | _        |.d |D             dhk    rd | _        nt)          |          | _        nd | _        |.d |D             dhk    rd | _        nt)          |          | _        nd | _        || _        |
| _        |rt1          |            d | _        d | _        |                                  |                     ||           d S )Ninput_is_denseF)r   rn   c                     h | ]}| S r^   r^   rb   rc   s     r>   	<setcomp>z!Table.__init__.<locals>.<setcomp>  s    ///!A///r?   Tc                     h | ]}| S r^   r^   r   s     r>   r   z!Table.__init__.<locals>.<setcomp>  s    444!A444r?   )r`   table_idcreate_dategenerated_byr$   format_versionr   rr   r   r   
_to_sparse_datatocsrastyper.   r5   r   _sample_ids_observation_ids_sample_metadatary   _observation_metadata_sample_group_metadata_observation_group_metadatar%   _sample_index
_obs_index_cast_metadata
_index_ids)r;   ro   observation_ids
sample_idsobservation_metadatasample_metadatar   r`   r   r   observation_group_metadatasample_group_metadatavalidateobservation_indexsample_indexkwargsrn   r   s                     r>   __init__zTable.__init__  s    	 &(0$ 	&))3z??;E#ZZ(8%@@N))$~05 * 7 7DJJ DJZ&&u--
:j11 "
? ; ;& 0////D8;;(,%%(-o(>(>%%$(D!+ 543444@@-1**-23G-H-H**)-D&&;#+E( 	TNNN ")<88888r?   c                     |t          | j                  | _        n|| _        |t          | j                  | _        dS || _        dS )zpSets lookups {id:index in _data}.

        Should only be called in constructor as this modifies state.
        N)r"   r   r   r   r   )r;   r   r   s      r>   r   zTable._index_ids  sO    
 !+D,<!=!=D!-D$()>??DOOO/DOOOr?   samplec                 T    |dk    r| j         S |dk    r| j        S t          |          )a}  Return the index lookups of the given axis

        Parameters
        ----------
        axis : {'sample', 'observation'}, optional
            Axis to get the index dict. Defaults to 'sample'

        Returns
        -------
        dict
            lookups {id:index}

        Raises
        ------
        UnknownAxisError
            If provided an unrecognized axis.
        r   observation)r   r   r   r;   axiss     r>   _indexzTable._index"  s9    $ 8%%]""?""4(((r?   Fc                 n    || j         }t          |          r|S t                              |||          S )z0For converting vectors to a compatible self type)rG   r   r   r   )r;   vals	transposerG   s       r>   _conv_to_self_typezTable._conv_to_self_type;  s;    =JEd 	<K##D)U;;;r?   c                     |                                  }| j        dk    r|                    d          S t          j        |          S )zConverts a row/col vector to a dense numpy array.

        Always returns a 1-D row vector for consistency with numpy iteration
        over arrays.
        )r&   r&   r&   )toarrayrn   reshaper5   squeeze)vec	dense_vecs     r>   	_to_densezTable._to_denseE  sD     KKMM	9 $$Q''':i(((r?   c                 .   t          | t                    rLt          | j                  dk    r4|r t	          | ddt
          f         |          }nt	          | |          }|S t          | t                    r*|rt	          | j        |          }nt	          | |          }|S t          | t                    r"t          |           dk    rt          d          S t          | t                    r6t          | d         t                    rt          | |          }|r|j        }|S t          | t                    r6t          | d         t                    rt          | |          }|r|j        }|S t          | t                    r0t          | d                   rt          | |          }|r|j        }|S t          | t                    rt          | ||          }|r|j        }|S t          | t                    rft          | d         t                    rK|r5t          |           }t          |j        |j        |j        ff||          }nt'          | ||          }|S t          |           r| }|r|                                }|S t+          d          )zTry to return a populated scipy.sparse matrix.

        NOTE: assumes the max value observed in row and col defines the size of
        the matrix.
        r&   Nr   r   r   )rG   rn   rn   zUnknown input type)r4   r   rr   rn   nparray_to_sparser   Trx   r   list_nparray_to_sparsedictlist_dict_to_sparser   list_sparse_to_sparsedict_to_sparsecoo_arrays_to_sparsero   rowcollist_list_to_sparser   r   )valuesr   rG   r   rn   matds          r>   r   zTable._to_sparseV  s    fg&& 	3v|+<+<+A+A 7'qqq'z(:EBB'66Jfg&& .	7 7'%88'66J%% '	7#f++*:*:f%%%%% $	7*VAY*H*H $	7(77C eJ%% 	7*VAY*E*E 	7%fe44C eJ%% 	7*VAY*?*? 	7'66C eJ%% 	7 66C eJ%% 	7*VAY*E*E 	7 Fv&&*AFQUAEN+C16eE E E *&%uEEEJ 	7C &mmooJ !5666r?   c                     d } || j                   | _          || j                  | _        | j        r| j        nd| _        | j        r| j        nd| _        dS )zCasts all metadata to defaultdict to support default values.

        Should be called after any modifications to sample/observation
        metadata.
        c                 n   g }| (|                      d          t          |           k    rdS | | D ]t}t          d           }t          |t                    r|                    |           n"|nt          dt          |          z            |                    |           ut          |          S | S )zDo the actual castingNc                      d S r3   r^   r^   r?   r>   <lambda>z=Table._cast_metadata.<locals>.cast_metadata.<locals>.<lambda>  s    D r?   zUnable to cast metadata: %s)
countrr   r   r4   r   updater   reprrX   ry   )r}   
default_mditemr   s       r>   cast_metadataz+Table._cast_metadata.<locals>.cast_metadata  s    J~88D>>SWW,,4~ 
) 
)D#LL11A!$-- 9,-J-1$ZZ.8 9 9 9%%a((((Z(((Ir?   N)r   r   r   r   )r;   r   s     r>   r   zTable._cast_metadata  s    	 	 	, !.d.C D D%2]43M%N%N" *5D''04 	#
 /:D,,59 	(((r?   c                     | j         j        S )z.The shape of the underlying contingency matrix)r   rn   r;   s    r>   rn   zTable.shape       zr?   c                     | j         j        S )z<The type of the objects in the underlying contingency matrix)r   rG   r   s    r>   rG   zTable.dtype  r   r?   c                 L    | j                                          | j         j        S )z@Number of non-zero elements of the underlying contingency matrix)r   eliminate_zerosnnzr   s    r>   r   z	Table.nnz  s"     	
""$$$z~r?   c                     | j         S )zThe sparse matrix object)r   r   s    r>   matrix_datazTable.matrix_data  s     zr?   c                 h    |dvrt          |          |dk    r| j        d         n| j        d         S )a  Return the length of an axis

        Parameters
        ----------
        axis : {'sample', 'observation'}, optional
            The axis to operate on

        Raises
        ------
        UnknownAxisError
            If provided an unrecognized axis.

        Examples
        --------
        >>> from biom import example_table
        >>> print(example_table.length(axis='sample'))
        3
        >>> print(example_table.length(axis='observation'))
        2
        r   r   r   r&   r   )r   rn   r   s     r>   lengthzTable.length  s>    * 000"4((( $ 0 0tz!}}djmCr?   c                     |dk    r,| j         | j                             |           dS || _         dS |dk    r,| j        | j                            |           dS || _        dS t          |          )a  Take a dict of group metadata and add it to an axis

        Parameters
        ----------
        group_md : dict of tuples
            `group_md` should be of the form ``{category: (data type, value)``
        axis : {'sample', 'observation'}, optional
            The axis to operate on

        Raises
        ------
        UnknownAxisError
            If provided an unrecognized axis.
        r   Nr   )r   r   r   r   )r;   group_mdr   s      r>   add_group_metadatazTable.add_group_metadata  s     8*6+228<<<<<.6+++]""/;077AAAAA3;000"4(((r?   wholec                    |dk    rddg}n|dv r|g}nt          d|z            |,|dk    rd| _        d| _        n|dk    rd| _        nd| _        dS |D ]}|                     |          t	          |                     |          |                     |                    D ]\  }}|D ]	}||v r||= 
d |                     |          D             }|d	hk    r|dk    rd| _        d| _        dS )
aI  Remove metadata from an axis

        Parameters
        ----------
        keys : list of str, optional
            The keys to remove from metadata. If None, all keys from the axis
            are removed.
        axis : {'sample', 'observation', 'whole'}, optional
            The axis to operate on. If 'whole', the operation is applied to
            both the sample and observation axes.

        Raises
        ------
        UnknownAxisError
            If the requested axis does not exist.

        Examples
        --------
        >>> from biom import Table
        >>> import numpy as np
        >>> tab = Table(np.array([[1, 2], [3, 4]]),
        ...             ['O1', 'O2'],
        ...             ['S1', 'S2'],
        ...             sample_metadata=[{'barcode': 'ATGC', 'env': 'A'},
        ...                              {'barcode': 'GGTT', 'env': 'B'}])
        >>> tab.del_metadata(keys=['env'])
        >>> for id, md in zip(tab.ids(), tab.metadata()):
        ...     print(id, list(md.items()))
        S1 [('barcode', 'ATGC')]
        S2 [('barcode', 'GGTT')]
        r  r   r   r   z%s is not recognizedNr   c                     h | ]}|sd nd	S )TFr^   )rb   r}   s     r>   r   z%Table.del_metadata.<locals>.<setcomp>A  s1     9 9 9 $&0tt5 9 9 9r?   T)r   r   r   metadatar{   ids)	r;   keysr   axesaxr   r}   kemptiess	            r>   del_metadatazTable.del_metadata  sk   @ 7??m,DD...6DD"#9D#@AAA<w(,%-1**!!(,%%-1*F 	6 	6B}}"}%%-TXX2X..20F0FGG " "2 " "ABwwqE"
9 9!%B!7!79 9 9G4("">>,0D))15D.!	6 	6r?   c                    |                      |          }|d                                D ]N\  }}|                     ||          r2|                     ||          }||                             |           Ons|                     |          }|dk    r!t          fd|D                       | _        n6|dk    r!t          fd|D                       | _        nt          |          | 
                                 dS )a  Take a dict of metadata and add it to an axis.

        Parameters
        ----------
        md : dict of dict
            `md` should be of the form ``{id: {dict_of_metadata}}``
        axis : {'sample', 'observation'}, optional
            The axis to operate on
        r  Nr   c              3   4   K   | ]}|v r|         nd V  d S r3   r^   rb   id_r}   s     r>   	<genexpr>z%Table.add_metadata.<locals>.<genexpr>\  sL       .C .C7:sbyyBsGGd.C .C .C .C .C .Cr?   r   c              3   4   K   | ]}|v r|         nd V  d S r3   r^   r  s     r>   r  z%Table.add_metadata.<locals>.<genexpr>_  sL       3C 3C7:sbyyBsGGd3C 3C 3C 3C 3C 3Cr?   )r  itemsexistsindexr   r  ry   r   r   r   r   )r;   r}   r   r  r  md_entryrK   r  s    `      r>   add_metadatazTable.add_metadataI  s_    ==d=++!# 3 3X;;s;.. 3**St*44CSM((2223
 (((%%Cx(- .C .C .C .C>A.C .C .C )C )C%%&&-2 3C 3C 3C 3C>A3C 3C 3C .C .C** 't,,,r?   c                    |                                  rt          d          	 |\  }}n#  t          d          xY wt          |t                    r$t          |t                    rt          d          t          |t                    r2|j        |j        |                     |          S t          d          t          |t                    r2|j        |j        |                     |          S t          d          | j        	                                dk    r| j        
                                | _        | j        ||f         S )a?  Handles row or column slices

        Slicing over an individual axis is supported, but slicing over both
        axes at the same time is not supported. Partial slices, such as
        `foo[0, 5:10]` are not supported, however full slices are supported,
        such as `foo[0, :]`.

        Parameters
        ----------
        args : tuple or slice
            The specific element (by index position) to return or an entire
            row or column of the data.

        Returns
        -------
        float or spmatrix
            A float is return if a specific element is specified, otherwise a
            spmatrix object representing a vector of sparse data is returned.

        Raises
        ------
        IndexError
            - If the matrix is empty
            - If the arguments do not appear to be a tuple
            - If a slice on row and column is specified
            - If a partial slice is specified

        Notes
        -----
        Switching between slicing rows and columns is inefficient.  Slicing of
        rows requires a CSR representation, while slicing of columns requires a
        CSC representation, and transforms are performed on the data if the
        data are not in the required representation. These transforms can be
        expensive if done frequently.

        .. shownumpydoc
        z4Cannot retrieve an element from an empty/null table.zMust specify (row, col).zCan only slice a single axis.Nz'Can only handle full : slices per axis.coo)is_empty
IndexErrorr4   slicestartstop_get_col_get_rowr   	getformatr   )r;   argsr   r   s       r>   __getitem__zTable.__getitem__e  sI   L ==?? 	' & ' ' '	9HC	97888c5!! 	>je&<&< 	><===c5!! 	(y SX%5}}S))) !JKKKU## 		(y SX%5}}S))) !JKKKz##%%..!Z--//
:c3h''s   + <c                 r    | j                                         | _         | j                             |          S )a  Return the row at ``row_idx``.

        A row vector will be returned as a scipy.sparse matrix in csr format.

        Notes
        -----
        Switching between slicing rows and columns is inefficient.  Slicing of
        rows requires a CSR representation, while slicing of columns requires a
        CSC representation, and transforms are performed on the data if the
        data are not in the required representation. These transforms can be
        expensive if done frequently.

        )r   r   getrow)r;   row_idxs     r>   r!  zTable._get_row  s/     Z%%''
z  )))r?   c                 r    | j                                         | _         | j                             |          S )a   Return the column at ``col_idx``.

        A column vector will be returned as a scipy.sparse matrix in csc
        format.

        Notes
        -----
        Switching between slicing rows and columns is inefficient.  Slicing of
        rows requires a CSR representation, while slicing of columns requires a
        CSC representation, and transforms are performed on the data if the
        data are not in the required representation. These transforms can be
        expensive if done frequently.

        )r   tocscgetcol)r;   col_idxs     r>   r   zTable._get_col  s/     Z%%''
z  )))r?   c                 X   t          |                     |                    t          |j                  z  }t          |          dk    rt	          d          |                     ||d          }|                                 |j        |                    |                   }||fS )a8   Aligns dataframe against biom table, only keeping common ids.

        Parameters
        ----------
        metadata : pd.DataFrame
            The metadata, either respect to the sample metadata
            or observation metadata.
        axis : {'sample', 'observation'}
            The axis on which to operate.

        Returns
        -------
        biom.Table
            A filtered biom table.
        pd.DataFrame
            A filtered metadata table.

        Examples
        --------
        >>> from biom import Table
        >>> import numpy as np
        >>> import pandas as pd
        >>> table = Table(np.array([[0, 0, 1, 1],
        ...                         [2, 2, 4, 4],
        ...                         [5, 5, 3, 3],
        ...                         [0, 0, 0, 1]]),
        ...               ['o1', 'o2', 'o3', 'o4'],
        ...               ['s1', 's2', 's3', 's4'])
        >>> metadata = pd.DataFrame([['a', 'control'],
        ...                          ['c', 'diseased'],
        ...                          ['b', 'control']],
        ...                         index=['s1', 's3', 's2'],
        ...                         columns=['Barcode', 'Treatment'])
        >>> res_table, res_metadata = table.align_to_dataframe(metadata)
        >>> print(res_table)
        # Constructed from biom file
        #OTU ID	s1	s2	s3
        o1	0.0	0.0	1.0
        o2	2.0	2.0	4.0
        o3	5.0	5.0	3.0
        >>> print(res_metadata)
           Barcode Treatment
        s1       a   control
        s2       b   control
        s3       c  diseased
        r  r   z*No common ids between table and dataframe.Fr   inplace)rt   r  r  rr   r   filterremove_emptyloc)r;   r  r   r  tr}   s         r>   align_to_dataframezTable.align_to_dataframe  s    ^ $(((%%&&X^)<)<<s88q== !MNNNKK$K66	\!%%T%**+"ur?   r   c                    d |                                 D             }|t          |                     |                    z  }t          |          dk    rt	          d          |                    |          }|                     ||d          }|                                 |                                 d |                                 D             }|	                    ||          }||fS )	al   Aligns biom table against tree, only keeping common ids.

        Parameters
        ----------
        tree : skbio.TreeNode
            The tree object, either respect to the sample metadata
            or observation metadata.
        axis : {'sample', 'observation'}
            The axis on which to operate.

        Returns
        -------
        biom.Table
            A filtered biom table.
        skbio.TreeNode
            A filtered skbio TreeNode object.

        Examples
        --------
        >>> from biom import Table
        >>> import numpy as np
        >>> from skbio import TreeNode
        >>> table = Table(np.array([[0, 0, 1, 1],
        ...                         [2, 2, 4, 4],
        ...                         [5, 5, 3, 3],
        ...                         [0, 0, 0, 1]]),
        ...               ['o1', 'o2', 'o3', 'o4'],
        ...               ['s1', 's2', 's3', 's4'])
        >>> tree = TreeNode.read([u"((o1,o2)f,o3)r;"])
        >>> res_table, res_tree = table.align_tree(tree)
        >>> print(res_table)
        # Constructed from biom file
        #OTU ID	s1	s2	s3	s4
        o1	0.0	0.0	1.0	1.0
        o2	2.0	2.0	4.0	4.0
        o3	5.0	5.0	3.0	3.0
        >>> print(res_tree.ascii_art())
                            /-o1
                  /f-------|
        -r-------|          \-o2
                 |
                  \-o3
        c                     h | ]	}|j         
S r^   r   )rb   rU   s     r>   r   z#Table.align_tree.<locals>.<setcomp>.  s    ,,,1,,,r?   r  r   z%No common ids between table and tree.)namesFr-  c                     g | ]	}|j         
S r^   r6  )rb   ns     r>   re   z$Table.align_tree.<locals>.<listcomp>6  s    ...A...r?   )
tipsrt   r  rr   r   shearr/  r0  prune
sort_order)r;   treer   r:  common_tips_tree_tableorders           r>   
align_treezTable.align_tree  s    X -,		,,,St!4!4555{q   !HIII


--[tUCC.....""5t"44u}r?   c                     |                                  rt          d          t          fd|                     |          D                       S )a  Reduce over axis using function `f`

        Parameters
        ----------
        f : function
            The function to use for the reduce operation
        axis : {'sample', 'observation'}
            The axis on which to operate

        Returns
        -------
        numpy.array
            A one-dimensional array representing the reduced rows
            (observations) or columns (samples) of the data matrix

        Raises
        ------
        UnknownAxisError
            If `axis` is neither "sample" nor "observation"
        TableException
            If the table's data matrix is empty

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table

        Create a 2x3 table

        >>> data = np.asarray([[0, 0, 1], [1, 3, 42]])
        >>> table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3'],
        ...               [{'foo': 'bar'}, {'x': 'y'}], None)

        Create a reduce function

        >>> func = lambda x, y: x + y

        Reduce table on samples

        >>> table.reduce(func, 'sample') # doctest: +NORMALIZE_WHITESPACE
        array([  1.,   3.,  43.])

        Reduce table on observations

        >>> table.reduce(func, 'observation') # doctest: +NORMALIZE_WHITESPACE
        array([  1.,  46.])
        zCannot reduce an empty tablec                 0    g | ]}t          |          S r^   )r   )rb   rL   fs     r>   re   z Table.reduce.<locals>.<listcomp>o  s!    HHHq!HHHr?   r  )r  r   r   	iter_data)r;   rF  r   s    ` r>   r   zTable.reduce:  sY    ` ==?? 	A !?@@@ HHHHdnn$n.G.GHHHIIIr?   c                    |dk    rd}n!|dk    rd}n|dk    rd}nt          |          t          j        t          j        | j                            |                              }| |j        dk    r|                    d          }|S )	a_  Returns the sum by axis

        Parameters
        ----------
        axis : {'whole', 'sample', 'observation'}, optional
            The axis on which to operate.

        Returns
        -------
        numpy.array or float
            If `axis` is "whole", returns an float representing the whole
            table sum. If `axis` is either "sample" or "observation", returns a
            numpy.array that holds a sum for each sample or observation,
            respectively.

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table

        Create a 2x3 BIOM table:

        >>> data = np.asarray([[0, 0, 1], [1, 3, 42]])
        >>> table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3'])

        Add all values in the table:

        >>> table.sum()
        array(47.0)

        Add all values per sample:

        >>> table.sum(axis='sample') # doctest: +NORMALIZE_WHITESPACE
        array([  1.,  3.,  43.])

        Add all values per observation:

        >>> table.sum(axis='observation') # doctest: +NORMALIZE_WHITESPACE
        array([  1.,  46.])
        r  Nr   r   r   r&   r  r^   )r   r5   r   r   r   sumrn   r   )r;   r   
matrix_sums      r>   rI  z	Table.sumq  s    R 7??DDXDD]""DD"4(((Z
4:>>t>+D+D E EFF
 
 0B 6 6#++A..Jr?   c                    t          |                                           }t          |                     d                    }| j                                        dk    r| j                                        | _        |                     | j                            d          |                                 dd         |                     d          dd         ||| j                  S )a0  Transpose the contingency table

        The returned table will be an entirely new table, including copies of
        the (transposed) data, sample/observation IDs and metadata.

        Returns
        -------
        Table
            Return a new table that is the transpose of caller table.
        r   r  lilTcopyN)	r   r  r   r"  r   r=   r   r  r   )r;   sample_md_copyobs_md_copys      r>   r   zTable.transpose  s     "$--//22t}}-}@@AA:!!U** ))++DJ ~~dj222=="hhjjmTXX=X-I-I!!!-L,k4=J J 	Jr?      c                 ,   |dk    rt          d          |dk    rt          d          |                     d          d|         }|                     d          d|         }|                     |dd	          }|                    |d          S )
av  Get the first n rows and m columns from self

        Parameters
        ----------
        n : int, optional
            The number of rows (observations) to get. This number must be
            greater than 0. If not specified, 5 rows will be retrieved.

        m : int, optional
            The number of columns (samples) to get. This number must be
            greater than 0. If not specified, 5 columns will be
            retrieved.

        Notes
        -----
        Like `head` for Linux like systems, requesting more rows (or columns)
        than exists will silently work.

        Raises
        ------
        IndexError
            If `n` or `m` are <= 0.

        Returns
        -------
        Table
            The subset table.

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table
        >>> data = np.arange(100).reshape(5, 20)
        >>> obs_ids = ['O%d' % i for i in range(1, 6)]
        >>> samp_ids = ['S%d' % i for i in range(1, 21)]
        >>> table = Table(data, obs_ids, samp_ids)
        >>> print(table.head())  # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S1  S2  S3  S4  S5
        O1  0.0 1.0 2.0 3.0 4.0
        O2  20.0 21.0 22.0 23.0 24.0
        O3  40.0 41.0 42.0 43.0 44.0
        O4  60.0 61.0 62.0 63.0 64.0
        O5  80.0 81.0 82.0 83.0 84.0

        r   zn cannot be <= 0.zm cannot be <= 0.r   r  Nr   Fr-  )r  r  r/  )r;   r9  rc   row_idscol_idstables         r>   headz
Table.head  s    ^ 660111660111(((..rr2((())"1"-G-GG||G(|333r?   c                 T    |dk    r| j         S |dk    r| j        S t          |          )az  Return the group metadata of the given axis

        Parameters
        ----------
        axis : {'sample', 'observation'}, optional
            Axis to search for the group metadata. Defaults to 'sample'

        Returns
        -------
        dict
            The corresponding group metadata for the given axis

        Raises
        ------
        UnknownAxisError
            If provided an unrecognized axis.

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table

        Create a 2x3 BIOM table, with group observation metadata and no group
        sample metadata:

        >>> data = np.asarray([[0, 0, 1], [1, 3, 42]])
        >>> group_observation_md = {'tree': ('newick', '(O1:0.3,O2:0.4);')}
        >>> table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3'],
        ...               observation_group_metadata=group_observation_md)

        Get the observation group metadata:

        >>> table.group_metadata(axis='observation')
        {'tree': ('newick', '(O1:0.3,O2:0.4);')}

        Get the sample group metadata:

        >> table.group_metadata()
        None
        r   r   )r   r   r   r   s     r>   group_metadatazTable.group_metadata  s;    R 8..]""33"4(((r?   c                 T    |dk    r| j         S |dk    r| j        S t          |          )ak  Return the ids along the given axis

        Parameters
        ----------
        axis : {'sample', 'observation'}, optional
            Axis to return ids from. Defaults to 'sample'

        Returns
        -------
        1-D numpy array
            The ids along the given axis

        Raises
        ------
        UnknownAxisError
            If provided an unrecognized axis.

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table

        Create a 2x3 BIOM table:

        >>> data = np.asarray([[0, 0, 1], [1, 3, 42]])
        >>> table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3'])

        Get the ids along the observation axis:

        >>> print(table.ids(axis='observation'))
        ['O1' 'O2']

        Get the ids along the sample axis:

        >>> print(table.ids())
        ['S1' 'S2' 'S3']
        r   r   )r   r   r   r   s     r>   r  z	Table.ids.  s;    L 8##]""(("4(((r?   c                    dt          d |                                D                       z  }t          |                     |          j        |          }t          |                     |                    D ]:\  }}|r||vrt          d|d|d          |                    ||          ||<   ;|r<t          |          t          t          |                    k    rt          d          |r| n| 
                                }	|d	k    r||	_        n||	_        |	                    d
d
           t          |	           |	S )a  Update the ids along the given axis.

        Parameters
        ----------
        id_map : dict
            Mapping of old to new ids. All keys and values in this dict should
            be strings.
        axis : {'sample', 'observation'}, optional
            Axis to search for `id`. Defaults to 'sample'
        strict : bool, optional
            If ``True``, raise an error if an id is present in the given axis
            but is not a key in ``id_map``. If False, retain old identifier
            for ids that are present in the given axis but are not keys in
            ``id_map``.
        inplace : bool, optional
            If ``True`` the ids are updated in ``self``; if ``False`` the ids
            are updated in a new table is returned.

        Returns
        -------
        Table
            Table object where ids have been updated.

        Raises
        ------
        UnknownAxisError
            If provided an unrecognized axis.
        TableException
            If an id from ``self`` is not in ``id_map`` and ``strict`` is
            ``True``.

        Examples
        --------
        Create a 2x3 BIOM table:

        >>> data = np.asarray([[0, 0, 1], [1, 3, 42]])
        >>> table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3'])

        Define a mapping of old to new sample ids:

        >>> id_map = {'S1':'s1.1', 'S2':'s2.2', 'S3':'s3.3'}

        Get the ids along the sample axis in the table:

        >>> print(table.ids(axis='sample'))
        ['S1' 'S2' 'S3']

        Update the sample ids and get the ids along the sample axis in the
        updated table:

        >>> updated_table = table.update_ids(id_map, axis='sample')
        >>> print(updated_table.ids(axis='sample'))
        ['s1.1' 's2.2' 's3.3']
        U%dc                 ,    g | ]}t          |          S r^   rr   r   s     r>   re   z$Table.update_ids.<locals>.<listcomp>  s     A A AAQ A A Ar?   r  r   zMapping not provided for z identifier: z>. If this identifier should not be updated, pass strict=False.zDuplicate IDs observedr   N)r   r   r   r  sizerF   r   r   rr   rt   rN  r   r   r   r%   )
r;   id_mapr   strictr.  	str_dtypeupdated_idsrK   old_idresults
             r>   
update_idszTable.update_ids[  sn   n C A A A A ABBB	DHH$H//4IFFF$TXX4X%8%899 	: 	:KC &&..$n ttVVV%& & &
  &zz&&99K  	?;3s;'7'7#8#888$%=>>> !1diikk8!,F&1F#$%%% 	r?   c                     |dk    r| j                                         S |dk    r| j                                         S t          |          )au  Returns the internal data in the correct sparse representation

        Parameters
        ----------
        axis : {'sample', 'observation'}, optional
            Axis to search for `id`. Defaults to 'sample'

        Returns
        -------
        sparse matrix
            The data in csc (axis='sample') or csr (axis='observation')
            representation
        r   r   )r   r)  r   r   r   s     r>   _get_sparse_datazTable._get_sparse_data  sP     8:##%%%]"":##%%%"4(((r?   c                     |dk    r| j         }n|dk    r| j        }nt          |          ||S |                     ||          }|||         ndS )a9  Return the metadata of the identified sample/observation.

        Parameters
        ----------
        id : str
            ID of the sample or observation whose index will be returned.
        axis : {'sample', 'observation'}
            Axis to search for `id`.

        Returns
        -------
        defaultdict or None
            The corresponding metadata ``defaultdict`` or ``None`` of that axis
            does not have metadata.

        Raises
        ------
        UnknownAxisError
            If provided an unrecognized axis.
        UnknownIDError
            If provided an unrecognized sample/observation ID.

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table

        Create a 2x3 BIOM table, with observation metadata and no sample
        metadata:

        >>> data = np.asarray([[0, 0, 1], [1, 3, 42]])
        >>> table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3'],
        ...               [{'foo': 'bar'}, {'x': 'y'}], None)

        Get the metadata of the observation with ID "O2":

        >>> # casting to `dict` as the return is `defaultdict`
        >>> dict(table.metadata('O2', 'observation'))
        {'x': 'y'}

        Get the metadata of the sample with ID "S1":

        >>> table.metadata('S1', 'sample') is None
        True
        r   r   Nr  )r   r   r   r  )r;   idr   r}   rK   s        r>   r  zTable.metadata  sn    \ 8&BB]""+BB"4(((:Ijj$j''.r#wwd2r?   c                 f    |                      |          }||vrt          ||          ||         S )a  Return the index of the identified sample/observation.

        Parameters
        ----------
        id : str
            ID of the sample or observation whose index will be returned.
        axis : {'sample', 'observation'}
            Axis to search for `id`.

        Returns
        -------
        int
            Index of the sample/observation identified by `id`.

        Raises
        ------
        UnknownAxisError
            If provided an unrecognized axis.
        UnknownIDError
            If provided an unrecognized sample/observation ID.

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table

        Create a 2x3 BIOM table:

        >>> data = np.asarray([[0, 0, 1], [1, 3, 42]])
        >>> table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3'])

        Get the index of the observation with ID "O2":

        >>> table.index('O2', 'observation')
        1

        Get the index of the sample with ID "S1":

        >>> table.index('S1', 'sample')
        0
        r  )r   r   )r;   ri  r   
idx_lookups       r>   r  zTable.index  s=    T [[d[++
Z T***"~r?   c                 f    | |                      |d          |                      |d          f         S )a)  Return value in the matrix corresponding to ``(obs_id, samp_id)``

        Parameters
        ----------
        obs_id : str
            The ID of the observation
        samp_id : str
            The ID of the sample

        Returns
        -------
        float
            The data value corresponding to the specified matrix position

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table

        Create a 2x3 BIOM table:

        >>> data = np.asarray([[0, 0, 1], [1, 3, 42]])
        >>> table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'Z3'])

        Retrieve the number of counts for observation `O1` in sample `Z3`.

        >>> print(table.get_value_by_ids('O2', 'Z3'))
        42.0

        See Also
        --------
        Table.data
        r   r   r  )r;   obs_idsamp_ids      r>   get_value_by_idszTable.get_value_by_ids5  s8    D DJJv}55JJw112 3 	3r?   c                 *    |                                  S )zcStringify self

        Default str output for a Table is just row/col ids and data values
        delimited_selfr   s    r>   __str__zTable.__str__Z  s    
 ""$$$r?   c                     | j         \  }}d||t          | j                  | j        |                                 dz  fz  S )zReturns a high-level summary of the table's properties

        Returns
        -------
        str
            A string detailing the shape, class, number of nonzero entries, and
            table density
        z/%d x %d %s with %d nonzero entries (%d%% dense)d   )rn   r   r=   r   get_table_density)r;   rowscolss      r>   __repr__zTable.__repr__a  sK     Z
d@$T^,,dh""$$s*D
 
 	
r?   c                 2    ||                      |          v S )a  Returns whether id exists in axis

        Parameters
        ----------
        id: str
            id to check if exists
        axis : {'sample', 'observation'}, optional
            The axis to check

        Returns
        -------
        bool
            ``True`` if `id` exists, ``False`` otherwise

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table

        Create a 2x3 BIOM table:

        >>> data = np.asarray([[0, 0, 1], [1, 3, 42]])
        >>> table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3'])

        Check whether sample ID is in the table:

        >>> table.exists('S1')
        True
        >>> table.exists('S4')
        False

        Check whether an observation ID is in the table:

        >>> table.exists('O1', 'observation')
        True
        >>> table.exists('O3', 'observation')
        False
        r  )r   )r;   ri  r   s      r>   r  zTable.existsp  s    N T[[d[++++r?   	#OTU IDc           	      2   d |                                  rt          d          |                    fd|                                 D                       }||t          d          ||t          d          |rd| | | d| g}n
d| | | g}||                    d |D                        |                     d	
          }	|                     d	
          }
|dnd}t          |
|                                           D ]\  }}|                    t          t          | 
                    |                              } |          }|ro|	m|	| j        |                  } ||                    |d                    }|||d||}||                    |           |                    |           ||||}||                    |           |                    |           d                    |          S )a  Return self as a string in a delimited form

        Default str output for the Table is just row/col ids and table data
        without any metadata

        Including observation metadata in output: If ``header_key`` is not
        ``None``, the observation metadata with that name will be included
        in the delimited output. If ``header_value`` is also not ``None``, the
        observation metadata will use the provided ``header_value`` as the
        observation metadata name (i.e., the column header) in the delimited
        output.

        ``metadata_formatter``: a function which takes a metadata entry and
        returns a formatted version that should be written to file

        ``observation_column_name``: the name of the first column in the output
        table, corresponding to the observation IDs. For example, the default
        will look something like:

            #OTU ID	Sample1	Sample2
            OTU1	10	2
            OTU2	4	8
        c                 t    t          | t                    r|                     d          S t          |           S rO   )r4   rR   rS   rv   )r   s    r>   to_utf8z%Table.delimited_self.<locals>.to_utf8  s1    !U## xx'''1vvr?   z+Cannot delimit self if I don't have data...c                 &    g | ]} |          S r^   r^   )rb   r   r  s     r>   re   z(Table.delimited_self.<locals>.<listcomp>  s!    >>>awwqzz>>>r?   Nz4You need to specify both header_key and header_valuez# Constructed from biom filer|  c                     g | ]}|d z   S )
r^   rb   r   s     r>   re   z(Table.delimited_self.<locals>.<listcomp>  s    !9!9!9Q!D&!9!9!9r?   r   r  rq   r  )r  r   joinr  
writelinesr  r{   	_iter_obsmaprv   r   r   r   rX   write)r;   delim
header_keyheader_valuemetadata_formatterobservation_column_name	direct_iosamp_idsoutputobs_metadataiterableend_linern  
obs_valuesstr_obs_valsr}   md_out
output_rowr  s                     @r>   rs  zTable.delimited_self  s   4	 	 	 ==?? 	P !NOOO::>>>>488::>>>??!#$JL L L #!$JL L L  	E.*MEM8MM|MMFF
 50C%CCCEF    !9!9&!9!9!9:::}}-}88888//"*22"%h&*nn&6&6#8 #8 	0 	0FJ ::c#t~~j/I/I&J&JKKLWV__F 0l6!$/&"9:++BFF:t,D,DEEVUULLL&&((D
 $MM*----OOJ//// $VUULL((D
$MM*----OOJ////yy   r?   c                 r    |                                  j        r|                      d          j        sdS dS )zCheck whether the table is empty

        Returns
        -------
        bool
            ``True`` if the table is empty, ``False`` otherwise
        r   r  TF)r  r^  r   s    r>   r  zTable.is_empty  s8     xxzz 	dhhMh&B&B&G 	45r?   c                 *    |                                  S )zSee ``biom.table.Table.iter``)iterr   s    r>   __iter__zTable.__iter__  s    yy{{r?   c              #      K   t          | j        d                   D ]/}|                     |          }|                    d          V  0dS )z,Return sample vectors of data matrix vectorsr&   TrM  N)rangern   r   r   )r;   ccolvecs      r>   
_iter_sampzTable._iter_samp  s`      tz!}%% 	. 	.A ]]1%%F"""------		. 	.r?   c              #   r   K   t          | j        d                   D ]}|                     |          V  dS )z)Return observation vectors of data matrixr   N)r  rn   r!  )r;   rs     r>   r  zTable._iter_obs	  sH      tz!}%% 	# 	#A--""""""	# 	#r?   c                     d}|                                  sM| j        t          |                                           t          |                     d                    z  z  }|S )zReturns the fraction of nonzero elements in the table.

        Returns
        -------
        float
            The fraction of nonzero elements in the table
                r   r  )r  r   rr   r  )r;   densitys     r>   rw  zTable.get_table_density  sZ     }} 	NxDHHJJ#dhhMh.J.J*K*KKMG r?   c                 x   t          || j                  sdS | j        |j        k    sdS t          j        |                     d          |                    d                    sdS t          j        |                                 |                                          sdS t          j        |                     d          |                    d                    sdS t          j        |                                 |                                          sdS |                     |j                  sd	S d
S )z9For use in testing, describe how the tables are not equalz$Tables are not of comparable classeszTables are not the same typer   r  z Observation IDs are not the samezSample IDs are not the samez%Observation metadata are not the samez Sample metadata are not the samezData elements are not the samezTables appear equal	r4   r=   r`   r5   array_equalr  r  _data_equalityr   r;   others     r>   descriptive_equalityzTable.descriptive_equality  s#   %00 	:99yEJ&&11~dhhMh::#ii]i;;= = 	655~dhhjj%))++66 	100~dmmm??#nn-n@@B B 	;::~dmmoou~~/?/?@@ 	655""5;// 	433$$r?   c                 x   t          || j                  sdS | j        |j        k    rdS t          j        |                     d          |                    d                    sdS t          j        |                                 |                                          sdS t          j        |                     d          |                    d                    sdS t          j        |                                 |                                          sdS |                     |j                  sdS dS )z<Equality is determined by the data matrix, metadata, and IDsFr   r  Tr  r  s     r>   __eq__zTable.__eq__3  s   %00 	59
""5~dhhMh::#ii]i;;= = 	5~dhhjj%))++66 	5~dmmm??#nn-n@@B B 	5~dmmoou~~/?/?@@ 	5""5;// 	5tr?   c                     | j         j        |j        k    rdS | j         j        |j        k    rdS | j         j        |j        k    rdS | j                                         | _         |                                }| j         |k    j        dk    rdS dS )a  Return ``True`` if both matrices are equal.

        Matrices are equal iff the following items are equal:
        - shape
        - dtype
        - size (nnz)
        - matrix data (more expensive, so checked last)

        The sparse format does not need to be the same between the two
        matrices. ``self`` and ``other`` will be converted to csr format if
        necessary before performing the final comparison.

        Fr   T)r   rn   rG   r   r   r  s     r>   r  zTable._data_equalityH  s     :u{**5:u{**5:>UY&&5Z%%''
J%$q((5tr?   c                     | |k     S r3   r^   r  s     r>   __ne__zTable.__ne__g  s    EM""r?   c                     |dk    r!| dd|                      |d          f         }n6|dk    r!| |                      |d          ddf         }nt          |          |r|                     |          S |S )a   Returns data associated with an `id`

        Parameters
        ----------
        id : str
            ID of the sample or observation whose data will be returned.
        axis : {'sample', 'observation'}
            Axis to search for `id`.
        dense : bool, optional
            If ``True``, return data as dense

        Returns
        -------
        np.ndarray or scipy.sparse.spmatrix
            np.ndarray if ``dense``, otherwise scipy.sparse.spmatrix

        Raises
        ------
        UnknownAxisError
            If provided an unrecognized axis.

        Examples
        --------
        >>> from biom import example_table
        >>> example_table.data('S1', axis='sample')
        array([ 0.,  3.])

        See Also
        --------
        Table.get_value_by_ids

        r   Nr   )r  r   r   )r;   ri  r   densero   s        r>   ro   z
Table.dataj  s    B 84::b(3334DD]""

2}55qqq89DD"4((( 	>>$'''Kr?   c           
         |                      | j                                        |                     d                                          |                                                                 t	          |                     d                    t	          |                                           | j        | j                  S )zReturns a copy of the tabler   r  r_   )r=   r   rN  r  r   r  r   r`   r   s    r>   rN  z
Table.copy  s    ~~djoo//"hhMh::??AA"hhjjoo//&t}}-}'H'HII&t}}77"m#'9  . . 	.r?   c              #     K   |dk    r7|                                  D ] }|r|                     |          V  |V  !dS |dk    r7|                                 D ] }|r|                     |          V  |V  !dS t          |          )a  Yields axis values

        Parameters
        ----------
        dense : bool, optional
            Defaults to ``True``. If ``False``, yield compressed sparse row or
            compressed sparse columns if `axis` is 'observation' or 'sample',
            respectively.
        axis : {'sample', 'observation'}, optional
            Axis to iterate over.

        Returns
        -------
        generator
            Yields list of values for each value in `axis`

        Raises
        ------
        UnknownAxisError
            If axis other than 'sample' or 'observation' passed

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table
        >>> data = np.arange(30).reshape(3,10) # 3 X 10 OTU X Sample table
        >>> obs_ids = ['o1', 'o2', 'o3']
        >>> sam_ids = ['s%i' %i for i in range(1,11)]
        >>> bt = Table(data, observation_ids=obs_ids, sample_ids=sam_ids)

        Lets find the sample with the largest sum

        >>> sample_gen = bt.iter_data(axis='sample')
        >>> max_sample_count = max([sample.sum() for sample in sample_gen])
        >>> print(max_sample_count)
        57.0
        r   r   N)r  r   r  r   )r;   r  r   samp_vobs_vs        r>   rG  zTable.iter_data  s      L 8//++ ! ! !..000000 LLLL	! !
 ]""))      ..//////KKKK	    #4(((r?   c                 \   |                      |          }|                     |          }|dk    r|                                 }n*|dk    r|                                 }nt	          |          |dt          |          z  }|                     ||          }t          |||          S )a  Yields ``(value, id, metadata)``


        Parameters
        ----------
        dense : bool, optional
            Defaults to ``True``. If ``False``, yield compressed sparse row or
            compressed sparse columns if `axis` is 'observation' or 'sample',
            respectively.
        axis : {'sample', 'observation'}, optional
            The axis to iterate over.

        Returns
        -------
        GeneratorType
            A generator that yields (values, id, metadata)

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table

        Create a 2x3 BIOM table:

        >>> data = np.asarray([[0, 0, 1], [1, 3, 42]])
        >>> table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'Z3'])

        Iter over samples and keep those that start with an Z:

        >>> [(values, id, metadata)
        ...     for values, id, metadata in table.iter() if id[0]=='Z']
        [(array([  1.,  42.]), 'Z3', None)]

        Iter over observations and add the 2nd column of the values

        >>> col = [values[1] for values, id, metadata in table.iter()]
        >>> sum(col)
        46.0
        r  r   r   Nr3   r   r  )r  r  r  r  r   rr   rG  r{   )r;   r  r   r  r  iter_s         r>   r  z
Table.iter  s    P hhDh!!==d=++8OO%%EE]""NN$$EE"4(((S)HD665#x(((r?   c              #     K   |                      |          }|                     |          }|dt          |          z  }t          j        t          |                    d|z
  |rfd}nfd}t                    D ]o\  }}	||	         }
||	         }|                     |
||          } ||          D ]6}||         }||         }|                     |||          }||
|f|||ffV  7pdS )a  Pairwise iteration over self

        Parameters
        ----------
        dense : bool, optional
            Defaults to ``True``. If ``False``, yield compressed sparse row or
            compressed sparse columns if `axis` is 'observation' or 'sample',
            respectively.
        axis : {'sample', 'observation'}, optional
            The axis to iterate over.
        tri : bool, optional
            If ``True``, just yield [i, j] and not [j, i]
        diag : bool, optional
            If ``True``, yield [i, i]

        Returns
        -------
        GeneratorType
            Yields [(val_i, id_i, metadata_i), (val_j, id_j, metadata_j)]

        Raises
        ------
        UnknownAxisError

        Examples
        --------
        >>> from biom import example_table

        By default, only the upper triangle without the diagonal  of the
        resulting pairwise combinations is yielded.

        >>> iter_ = example_table.iter_pairwise()
        >>> for (val_i, id_i, md_i), (val_j, id_j, md_j) in iter_:
        ...     print(id_i, id_j)
        S1 S2
        S1 S3
        S2 S3

        The full pairwise combinations can also be yielded though.

        >>> iter_ = example_table.iter_pairwise(tri=False, diag=True)
        >>> for (val_i, id_i, md_i), (val_j, id_j, md_j) in iter_:
        ...     print(id_i, id_j)
        S1 S1
        S1 S2
        S1 S3
        S2 S1
        S2 S2
        S2 S3
        S3 S1
        S3 S2
        S3 S3

        r  Nr3   r&   c                     | z   d          S r3   r^   rK   diag_vinds    r>   tri_fz"Table.iter_pairwise.<locals>.tri_fO  s    3v:;;''r?   c                 V    t          j        d |          | z   d          g          S r3   )r5   r   r  s    r>   r  z"Table.iter_pairwise.<locals>.tri_fR  s-    y#dsd)SV-=!>???r?   r  )r  r  rr   r5   arangerF   ro   )r;   r  r   tridiagr  r  r  rK   r   id_imd_idata_ijid_jmd_jdata_jr  r  s                    @@r>   iter_pairwisezTable.iter_pairwise  sq     n ==d=++hhDh!!S)HiC!!T 	@( ( ( ( ( ( (@ @ @ @ @ @  nn 
	C 
	CFCq6DA;DYYt$eY<<FU3ZZ C C1v{4d%@@d+fdD-ABBBBBC
	C 
	Cr?   c           	          t          j         fd|D             t                    }                               }|t          j        |          |         }dk    rn j        dd|f         }                     |                     d          dd         |dd                              d          | j         j                  S dk    rj j        |ddf         }                     ||dd                                          dd         |                                  j         j                  S t                    )a  Return a new table with `axis` in `order`

        Parameters
        ----------
        order : iterable
            The desired order for axis
        axis : {'sample', 'observation'}, optional
            The axis to operate on

        Returns
        -------
        Table
            A table where the observations or samples are sorted according to
            `order`

        Examples
        --------

        >>> import numpy as np
        >>> from biom.table import Table

        Create a 2x3 BIOM table:

        >>> data = np.asarray([[1, 0, 4], [1, 3, 0]])
        >>> table = Table(data, ['O2', 'O1'], ['S2', 'S1', 'S3'])
        >>> print(table) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S2  S1  S3
        O2  1.0 0.0 4.0
        O1  1.0 3.0 0.0

        Sort the table using a list of samples:

        >>> sorted_table = table.sort_order(['S2', 'S3', 'S1'])
        >>> print(sorted_table) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID	S2	S3	S1
        O2	1.0	4.0	0.0
        O1	1.0	0.0	3.0


        Additionally you could sort the table's observations:

        >>> sorted_table = table.sort_order(['O1', 'O2'], axis="observation")
        >>> print(sorted_table) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID	S2	S1	S3
        O1	1.0	3.0	0.0
        O2	1.0	0.0	4.0

        c                 >    g | ]}                     |           S r  rm  )rb   r   r   r;   s     r>   re   z$Table.sort_order.<locals>.<listcomp>  s)    BBBq$**QT*22BBBr?   r   r  Nr   r   )
r5   r   r-   r  r   r=   r  r   r`   r   )r;   rB  r   fancyr  r   s   ` `   r>   r=  zTable.sort_ordera  sU   h BBBBBEBBB#NNN==d=++x))%0H8"111e8,C>>#"&(((">">qqq"A58"&--]-"C"CX"&-< < <
 ]"""5!!!8,C>>#"'(DHHJJqqqM"*DMMOOT]"&)- - -
 #4(((r?   c                 j    |                       ||                     |                    |          S )af  Return a table sorted along axis

        Parameters
        ----------
        sort_f : function, optional
            Defaults to ``biom.util.natsort``. A function that takes a list of
            values and sorts it
        axis : {'sample', 'observation'}, optional
            The axis to operate on

        Returns
        -------
        biom.Table
            A table whose samples or observations are sorted according to the
            `sort_f` function

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table

        Create a 2x3 BIOM table:

        >>> data = np.asarray([[1, 0, 4], [1, 3, 0]])
        >>> table = Table(data, ['O2', 'O1'], ['S2', 'S1', 'S3'])
        >>> print(table) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S2  S1  S3
        O2  1.0 0.0 4.0
        O1  1.0 3.0 0.0

        Sort the order of samples in the table using the default natural
        sorting:

        >>> new_table = table.sort()
        >>> print(new_table) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S1  S2  S3
        O2  0.0 1.0 4.0
        O1  3.0 1.0 0.0

        Sort the order of observations in the table using the default natural
        sorting:

        >>> new_table = table.sort(axis='observation')
        >>> print(new_table) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S2  S1  S3
        O1  1.0 3.0 0.0
        O2  1.0 0.0 4.0

        Sort the samples in reverse order using a custom sort function:

        >>> sort_f = lambda x: list(sorted(x, reverse=True))
        >>> new_table = table.sort(sort_f=sort_f)
        >>> print(new_table)  # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S3  S2  S1
        O2  4.0 1.0 0.0
        O1  0.0 1.0 3.0
        r  )r=  r  )r;   sort_fr   s      r>   sortz
Table.sort  s3    | vvdhhDh&9&9::FFFr?   c           	      Z   |r| n|                                  }|                    |          }|                    |          }|                     |          }|                    |          }|j        }	t          |	||||||          \  }	}}|	|_        |dk    r<||_        ||_        |	                    | j
                                         d           nA|dk    r;||_        ||_        |	                    d| j                                                    t          |           |S )a  Filter a table based on a function or iterable.

        Parameters
        ----------
        ids_to_keep : iterable, or function(values, id, metadata) -> bool
            If a function, it will be called with the values of the
            sample/observation, its id (a string) and the dictionary
            of metadata of each sample/observation, and must return a
            boolean. If it's an iterable, it must be a list of ids to
            keep.
        axis : {'sample', 'observation'}, optional
            It controls whether to filter samples or observations and
            defaults to "sample".
        invert : bool, optional
            Defaults to ``False``. If set to ``True``, discard samples or
            observations where `ids_to_keep` returns True
        inplace : bool, optional
            Defaults to ``True``. Whether to return a new table or modify
            itself.

        Returns
        -------
        biom.Table
            Returns itself if `inplace`, else returns a new filtered table.

        Raises
        ------
        UnknownAxisError
            If provided an unrecognized axis.

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table

        Create a 2x3 BIOM table, with observation metadata and sample
        metadata:

        >>> data = np.asarray([[0, 0, 1], [1, 3, 42]])
        >>> table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3'],
        ...               [{'full_genome_available': True},
        ...                {'full_genome_available': False}],
        ...               [{'sample_type': 'a'}, {'sample_type': 'a'},
        ...                {'sample_type': 'b'}])

        Define a function to keep only samples with sample_type == 'a'. This
        will drop sample S3, which has sample_type 'b':

        >>> filter_fn = lambda val, id_, md: md['sample_type'] == 'a'

        Get a filtered version of the table, leaving the original table
        untouched:

        >>> new_table = table.filter(filter_fn, inplace=False)
        >>> print(table.ids())
        ['S1' 'S2' 'S3']
        >>> print(new_table.ids())
        ['S1' 'S2']

        Using the same filtering function, discard all samples with sample_type
        'a'. This will keep only sample S3, which has sample_type 'b':

        >>> new_table = table.filter(filter_fn, inplace=False, invert=True)
        >>> print(table.ids())
        ['S1' 'S2' 'S3']
        >>> print(new_table.ids())
        ['S3']

        Filter the table in-place using the same function (drop all samples
        where sample_type is not 'a'):

        >>> table.filter(filter_fn)
        2 x 2 <class 'biom.table.Table'> with 2 nonzero entries (50% dense)
        >>> print(table.ids())
        ['S1' 'S2']

        Filter out all observations in the table that do not have
        full_genome_available == True. This will filter out observation O2:

        >>> filter_fn = lambda val, id_, md: md['full_genome_available']
        >>> table.filter(filter_fn, axis='observation')
        1 x 2 <class 'biom.table.Table'> with 0 nonzero entries (0% dense)
        >>> print(table.ids(axis='observation'))
        ['O1']

        r  )invertr&   Nr   )rN  r  r  r   _axis_to_numr   r'   r   r   r   r   r   r   r   r%   )
r;   ids_to_keepr   r  r.  rU  r  r  r  arrs
             r>   r/  zTable.filter  s7   n  0TYY[[>>t>,,iiTi""&&!!t!,,k$S%(%-%*%0%),24 4 4S( 199 #E%-E"T_1133T::::QYY%(E"*2E'T4#5#:#:#<#<===r?   c           
   #     K   i }|                      d|          D ]\  }}} |||          }t          |t                    st          |          }||vrg g g g||<   ||         d                             |           ||         d                             |           ||         d                             |           |                     |                     |                    }|                                D ]\  }\  }}	}
|dk    rc|                     |	d	          }|}|
}| 	                    d
          dd         }|
|dd         nd}d| j
                                        i}nf|d
k    r`|                     |	d	          }|}|
}| 	                                dd         }|
|dd         nd}d| j                                        i}|t          |||||| j        f| j        dd|fV  dS )a  Yields partitions

        Parameters
        ----------
        f : function
            `f` is given the ID and metadata of the vector and must return
            what partition the vector is part of.
        axis : {'sample', 'observation'}, optional
            The axis to iterate over

        Returns
        -------
        GeneratorType
            A generator that yields (partition, `Table`)

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table
        >>> from biom.util import unzip

        Create a 2x3 BIOM table, with observation metadata and sample
        metadata:

        >>> data = np.asarray([[0, 0, 1], [1, 3, 42]])
        >>> table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3'],
        ...               [{'full_genome_available': True},
        ...                {'full_genome_available': False}],
        ...               [{'sample_type': 'a'}, {'sample_type': 'a'},
        ...                {'sample_type': 'b'}])

        Define a function to bin by sample_type

        >>> f = lambda id_, md: md['sample_type']

        Partition the table and view results

        >>> bins, tables = table.partition(f)
        >>> print(bins[1]) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S1  S2
        O1  0.0 0.0
        O2  1.0 3.0
        >>> print(tables[1]) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S3
        O1  1.0
        O2  42.0
        Fr  r   r   r&      r  r   Tr   r   Nr   r   )r`   r   )r  r4   r   ry   rX   r  _invert_axisr  r   r  r   rN  r   r   r   r`   )r;   rF  r   
partitionsr   r  r}   partr  r   r  ro   r  samp_mdobs_idsobs_mdindicess                    r>   	partitionzTable.partition^	  se     d 
 "YYUY>> 	+ 	+MD#r1S"::D dH-- #T{{:%%$&B<
4 tQ&&s+++tQ&&t,,,tQ&&r****]] 1 1$ 7 7]88-7-=-=-?-? 	) 	))D)3x..v.FF"(((66qqq9"$.AAAd.0D0D0F0FG&&..v.GG!88::aaa=#%>"QQQ%%t)4+=+B+B+D+DEdGXvw"m)26)e) ) ') ) ) ) ) ) )#	) 	)r?   r&   addPathc           	      *
  
 g }g }|rg }nd}|dvrt          d|z            
fd}
dk    rd}d }n
dk    rd	}d
 }nt          
          |r|rt          d          i }i }t           ||            D ]l\  }} |||          }d}	 	 t	          |          \  }}n4# t
          $ r |	rd|d|}t          |          Y :t          $ r Y nw xY w|||<   |dz  }S|||<   md t          t          |                    D             }|dk    rt          j
        n| j        }
dk    rBt          t          |                     d                    t          |          f|          }nAt          t          |                     d                    t          |          f|          }|                     
d	          D ]\  }}} |||          }	 	 t	          |          \  }}n4# t
          $ r |	rd|d|}t          |          Y :t          $ r Y nw xY w||         }|dk    r3t          |j        |j                  D ]\  } }!|| |fxx         |!z  cc<   n?||         }"||"z  }#t          |#j        |#j                  D ]\  } }!|| |fxx         |!z  cc<   Ɍ|rSt          |                                t'          d                    D ]"\  }$}%|                    |||$         i           #d t          |                                t'          d                    D             }
dk    rt+          |j                  }nt/          |          }|                     |          }&n|d }|                     |
          D ]\  }}' ||'          \  }(})t          |(          |k     r' ||'|                     
                    }*|r|*t          |(          z  }*|                    |                     |*                     |                    |           |r)|                    d|(                                i           |                     ||          }&t9          | d           |                     |                     
                    }
dk    r)|}+|},|                     d          dd         }-||nd}.n&|                                 dd         }+|}-|}.||nd},t=          |&|-|+|.|,| j        | j                   S )a  Collapse partitions in a table by metadata or by IDs

        Partition data by metadata or IDs and then collapse each partition into
        a single vector.

        If `include_collapsed_metadata` is ``True``, the metadata for the
        collapsed partition will be a category named 'collapsed_ids', in which
        a list of the original ids that made up the partition is retained

        The remainder is only relevant to setting `one_to_many` to ``True``.

        If `one_to_many` is ``True``, allow vectors to collapse into multiple
        bins if the metadata describe a one-many relationship. Supplied
        functions must allow for iteration support over the metadata key and
        must return a tuple of (path, bin) as to describe both the path in the
        hierarchy represented and the specific bin being collapsed into. The
        uniqueness of the bin is _not_ based on the path but by the name of the
        bin.

        The metadata value for the corresponding collapsed column may include
        more (or less) information about the collapsed data. For example, if
        collapsing "FOO", and there are vectors that span three associations A,
        B, and C, such that vector 1 spans A and B, vector 2 spans B and C and
        vector 3 spans A and C, the resulting table will contain three
        collapsed vectors:

        - A, containing original vectors 1 and 3
        - B, containing original vectors 1 and 2
        - C, containing original vectors 2 and 3

        If a vector maps to the same partition multiple times, it will be
        counted multiple times.

        There are two supported modes for handling one-to-many relationships
        via `one_to_many_mode`: ``add`` and `divide`. ``add`` will add the
        vector counts to each partition that the vector maps to, which may
        increase the total number of counts in the output table. ``divide``
        will divide a vectors's counts by the number of metadata that the
        vector has before adding the counts to each partition. This will not
        increase the total number of counts in the output table.

        If `one_to_many_md_key` is specified, that becomes the metadata
        key that describes the collapsed path. If a value is not specified,
        then it defaults to 'Path'.

        If `strict` is specified, then all metadata pathways operated on
        must be indexable by `metadata_f`.

        `one_to_many` and `norm` are not supported together.

        `one_to_many` and `collapse_f` are not supported together.

        `one_to_many` and `min_group_size` are not supported together.

        A final note on space consumption. At present, the `one_to_many`
        functionality requires a temporary dense matrix representation.

        Parameters
        ----------
        f : function
            Function that is used to determine what partition a vector belongs
            to
        collapse_f : function, optional
            Function that collapses a partition in a one-to-one collapse. The
            expected function signature is:

                dense or sparse_vector <- collapse_f(Table, axis)

            Defaults to a pairwise add.

        norm : bool, optional
            Defaults to ``True``. If ``True``, normalize the resulting table
        min_group_size : int, optional
            Defaults to ``1``. The minimum size of a partition when performing
            a one-to-one collapse
        include_collapsed_metadata : bool, optional
            Defaults to ``True``. If ``True``, retain the collapsed metadata
            keyed by the original IDs of the associated vectors
        one_to_many : bool, optional
            Defaults to ``False``. Perform a one-to-many collapse
        one_to_many_mode : {'add', 'divide'}, optional
            The way to reduce two vectors in a one-to-many collapse
        one_to_many_md_key : str, optional
            Defaults to "Path". If `include_collapsed_metadata` is ``True``,
            store the original vector metadata under this key
        strict : bool, optional
            Defaults to ``False``. Requires full pathway data within a
            one-to-many structure
        axis : {'sample', 'observation'}, optional
            The axis to collapse

        Returns
        -------
        Table
            The collapsed table

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table

        Create a ``Table``

        >>> dt_rich = Table(
        ...    np.array([[5, 6, 7], [8, 9, 10], [11, 12, 13]]),
        ...    ['1', '2', '3'], ['a', 'b', 'c'],
        ...    [{'taxonomy': ['k__a', 'p__b']},
        ...     {'taxonomy': ['k__a', 'p__c']},
        ...     {'taxonomy': ['k__a', 'p__c']}],
        ...    [{'barcode': 'aatt'},
        ...     {'barcode': 'ttgg'},
        ...     {'barcode': 'aatt'}])
        >>> print(dt_rich) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID a   b   c
        1   5.0 6.0 7.0
        2   8.0 9.0 10.0
        3   11.0    12.0    13.0

        Create Function to determine what partition a vector belongs to

        >>> bin_f = lambda id_, x: x['taxonomy'][1]
        >>> obs_phy = dt_rich.collapse(
        ...    bin_f, norm=False, min_group_size=1,
        ...    axis='observation').sort(axis='observation')
        >>> print(obs_phy) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID a   b   c
        p__b    5.0 6.0 7.0
        p__c    19.0    21.0    23.0
        N)r  dividezEUnrecognized one-to-many mode '%s'. Must be either 'add' or 'divide'.c                 \    |                                |                               fS )Nr  )r  r  r2  r   s    r>   axis_ids_mdz#Table.collapse.<locals>.axis_ids_mdN
  s*    EEtE$$ajjdj&;&;<<r?   r   Tc                 
    | |fS r3   r^   offaxisonaxiss     r>   axis_updatez#Table.collapse.<locals>.axis_updateT
  s    ((r?   r   Fc                 
    || fS r3   r^   r  s     r>   r  z#Table.collapse.<locals>.axis_updateZ
  s    ((r?   z/norm and one_to_many are not supported togetherr   zIncomplete pathway, ID: z, metadata: r&   c                     i | ]\  }}||	S r^   r^   )rb   r   r  s      r>   
<dictcomp>z"Table.collapse.<locals>.<dictcomp>
  s    KKKga$KKKr?   r  r  r   r  r  keyc                     g | ]\  }}|S r^   r^   )rb   r  r   s      r>   re   z"Table.collapse.<locals>.<listcomp>
  s.     F F F41aQ F F Fr?   c                 ,    |                      |          S r3   rI  r  s     r>   
collapse_fz"Table.collapse.<locals>.collapse_f
  s    55;;&r?   collapsed_idsr  r   r_   )!
ValueErrorr   AttributeErrorr{   nextr  StopIterationrF   sortedr5   float64rG   r   rr   r  r  r  ro   r  r	   rX   r   r   r   r   r  r  r8   r%   r  r   r   r`   )/r;   rF  r  normmin_group_sizeinclude_collapsed_metadataone_to_manyone_to_many_modeone_to_many_md_keyr`  r   collapsed_datar  collapsed_mdr  r   r  r   md_countr  r}   md_iternum_mdpathwayr  errrk  rG   new_datar   r  columnvidxrL   dvtmpr  r   ro   rU  axis_idsaxis_md
redux_datar   	sample_mdr  r  s/             `                                    r>   collapsezTable.collapse	  s   N % 	 LLL#444 9;KL M M M	= 	= 	= 	= 	= 8I) ) ) ) ]""I) ) ) ) #4((( ~	P G$EG G G
 FHD 1 12 ' 'R!C** -1']]*% 	% 	% 	%! % % $'33#,C",S//1 %H(    )0F9%aKF# & !'KK6&>>1J1JKKKJ #3h">">BJJDJE}$$%s4888+B+B'C'C'*6{{'4,13 3 3 &s4888+G+G'H'H'*6{{'4;@B B B "&E!B!B #8 #8c2!C** 8(,W% 	% 	% 	%! % % $'33#,C",S//1 %H(    (-F'500'*4<'C'C 8 8GD!$T6\222a722228 &c]"Ri'*3;'A'A 8 8GD!$T6\222a72222A 8 ( * I":#3#3#5#5:a==III I IDAq '');VAY(GHHHHF F6*2B2B2D2D6@mm,E ,E ,E F F FM }$$%hj11%h//**844DD!' ' '  $~~ad~;; N Ne$/K$6$6!'x==>11'Zt/@/@/F/FGG
 0#h--/J%%d&=&=j&I&IJJJ$$T***- N ''(//:K:K(LMMM**>Y*OOD 	w]] 1 1$ 7 7]888&J$IhhMh221115G>RRtFFAAAJ#G!F n$IT7J	]4 4 4 	4s0   B!!#C	CCG,,#H	HHc                 @    |dk    rdS |dk    rdS t          |          S )zInvert an axisr   r   r   r   s     r>   r  zTable._invert_axis
  s0    8 =]""8#D)))r?   c                 @    |dk    rdS |dk    rdS t          |          )z"Convert str axis to numerical axisr   r&   r   r   r  r   s     r>   r  zTable._axis_to_num
  s0    81]""1"4(((r?   c                    |dvrt          |          |dk    rMt          j        }|                     d          D ])}t	          ||j                                                  }*n}t          t          |                     |                    | j	                  }t          |                     d|                    D ]!\  }}|j                                        ||<   "|S )a8  Get the minimum nonzero value over an axis

        Parameters
        ----------
        axis : {'sample', 'observation', 'whole'}, optional
            Defaults to "sample". The axis over which to calculate minima.

        Returns
        -------
        scalar of self.dtype or np.array of self.dtype

        Raises
        ------
        UnknownAxisError
            If provided an unrecognized axis.

        Examples
        --------
        >>> from biom import example_table
        >>> print(example_table.min(axis='sample'))
        [ 3.  1.  2.]

        r   r   r  r  Fr  r  r   r  )r   r5   infrG  minro   r   rr   r  rG   rF   )r;   r   min_valro   rK   s        r>   r   z	Table.min  s    0 999"4(((7??fGU33 8 8gty}}778 Cd 3 344DJGGGG&t~~E~'M'MNN / /	T#y}}r?   c                    |dvrt          |          |dk    rNt          j         }|                     d          D ])}t	          ||j                                                  }*nt          j        t          |                     |                    | j	                  }t          |                     d|                    D ]!\  }}|j                                        ||<   "|S )a9  Get the maximum nonzero value over an axis

        Parameters
        ----------
        axis : {'sample', 'observation', 'whole'}, optional
            Defaults to "sample". The axis over which to calculate maxima.

        Returns
        -------
        scalar of self.dtype or np.array of self.dtype

        Raises
        ------
        UnknownAxisError
            If provided an unrecognized axis.

        Examples
        --------
        >>> from biom import example_table
        >>> print(example_table.max(axis='observation'))
        [ 2.  5.]

        r  r  Fr  r  r   r  )r   r5   r  rG  r   ro   r   rr   r  rG   rF   )r;   r   max_valro   rK   s        r>   r   z	Table.max,  s    0 999"4(((7??vgGU33 8 8gty}}778 hs4888#6#677tzJJJG&t~~E~'M'MNN / /	T#y}}r?   c                    |dk     rt          d          |r|rt          d          |                                 }t          j                            |          }|ro|                    |                                          }|                    |           t          |d|                   |                    fd|           nE|	                                }	t          |	|||           |	|_        |                    d |           |                     |          }
|                    d |
           |S )	a	  Randomly subsample without replacement.

        Parameters
        ----------
        n : int
            Number of items to subsample from `counts`.
        axis : {'sample', 'observation'}, optional
            The axis to sample over
        by_id : boolean, optional
            If `False`, the subsampling is based on the counts contained in the
            matrix (e.g., rarefaction). If `True`, the subsampling is based on
            the IDs (e.g., fetch a random subset of samples). Default is
            `False`.
        with_replacement : boolean, optional
            If `False` (default), subsample without replacement. If `True`,
            resample with replacement via the multinomial distribution.
            Should not be `True` if `by_id` is `True`.
        seed : int, optional
            If provided, set the numpy random seed with this value

        Returns
        -------
        biom.Table
            A subsampled version of self

        Raises
        ------
        ValueError
            - If `n` is less than zero.
            - If `by_id` and `with_replacement` are both True.

        Notes
        -----
        Subsampling is performed without replacement. If `n` is greater than
        the sum of a given vector, that vector is omitted from the result.

        Adapted from `skbio.math.subsample`, see biom-format/licenses for more
        information about scikit-bio.

        This code assumes absolute abundance if `by_id` is False.

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table
        >>> table = Table(np.array([[0, 2, 3], [1, 0, 2]]), ['O1', 'O2'],
        ...               ['S1', 'S2', 'S3'])

        Subsample 1 item over the sample axis by value (e.g., rarefaction):

        >>> print(table.subsample(1).sum(axis='sample'))
        [ 1.  1.  1.]

        Subsample 2 items over the sample axis, note that 'S1' is filtered out:

        >>> ss = table.subsample(2)
        >>> print(ss.sum(axis='sample'))
        [ 2.  2.]
        >>> print(ss.ids())
        ['S2' 'S3']

        Subsample by IDs over the sample axis. For this example, we're going to
        randomly select 2 samples and do this 100 times, and then print out the
        set of IDs observed.

        >>> ids = set([tuple(table.subsample(2, by_id=True).ids())
        ...            for i in range(100)])
        >>> print(sorted(ids))
        [('S1', 'S2'), ('S1', 'S3'), ('S2', 'S3')]

        r   zn cannot be negative.z.by_id and with_replacement cannot both be Truer  Nc                     |v S r3   r^   )rL   r   r}   subsets      r>   r   z!Table.subsample.<locals>.<lambda>  s    !v+ r?   c                 2    |                                  dk    S Nr   r  rL   r   r}   s      r>   r   z!Table.subsample.<locals>.<lambda>  s    !%%''A+ r?   c                 2    |                                  dk    S r(  r  r)  s      r>   r   z!Table.subsample.<locals>.<lambda>  s    aeeggk r?   )r  rN  r5   randomdefault_rngr  shufflert   r/  rg  r)   r   r  )r;   r9  r   by_idwith_replacementseedrU  rngr  ro   inv_axisr&  s              @r>   	subsamplezTable.subsampleT  sI   R q554555 	O 	OMNNN		i##D)) 
	B)))&&++--CKKRaR\\FLL5555DLAAAA))++DtQ 0#666EKLL55DLAAA$$T**11AAAr?   c                 6    d }|                      ||          S )a  Convert the table to presence/absence data

        Parameters
        ----------
        inplace : bool, optional
            Defaults to ``True``

        Returns
        -------
        Table
            Returns itself if `inplace`, else returns a new presence/absence
            table.

        Examples
        --------
        >>> from biom.table import Table
        >>> import numpy as np

        Create a 2x3 BIOM table

        >>> data = np.asarray([[0, 0, 1], [1, 3, 42]])
        >>> table = table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3'])

        Convert to presence/absence data

        >>> _ = table.pa()
        >>> print(table.data('O1', 'observation'))
        [ 0.  0.  1.]
        >>> print(table.data('O2', 'observation'))
        [ 1.  1.  1.]
        c                 6    t          j        | dk    dd          S )Nr   g      ?r  )r5   r   )ro   r  r  s      r>   transform_fzTable.pa.<locals>.transform_f  s    8DAIr2...r?   )r.  	transform)r;   r.  r6  s      r>   pazTable.pa  s*    @	/ 	/ 	/ ~~k7~;;;r?   c                 @   |r| n|                                  }|                    |          }|                    |          }|                    |          }|                    |          }t          |||||           |                                 ||_        |S )a
  Iterate over `axis`, applying a function `f` to each vector.

        Only non null values can be modified and the density of the
        table can't increase. However, zeroing values is fine.

        Parameters
        ----------
        f : function(data, id, metadata) -> new data
            A function that takes three values: an array of nonzero
            values corresponding to each observation or sample, an
            observation or sample id, and an observation or sample
            metadata entry. It must return an array of transformed
            values that replace the original values.
        axis : {'sample', 'observation'}, optional
            The axis to operate on. Can be "sample" or "observation".
        inplace : bool, optional
            Defaults to ``True``. Whether to return a new table or modify
            itself.

        Returns
        -------
        biom.Table
            Returns itself if `inplace`, else returns a new transformed table.

        Raises
        ------
        UnknownAxisError
            If provided an unrecognized axis.

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table

        Create a 2x3 table

        >>> data = np.asarray([[0, 0, 1], [1, 3, 42]])
        >>> table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3'],
        ...               [{'foo': 'bar'}, {'x': 'y'}], None)
        >>> print(table) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S1  S2  S3
        O1  0.0 0.0 1.0
        O2  1.0 3.0 42.0

        Create a transform function

        >>> f = lambda data, id_, md: data / 2

        Transform to a new table on samples

        >>> table2 = table.transform(f, 'sample', False)
        >>> print(table2) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S1  S2  S3
        O1  0.0 0.0 0.5
        O2  0.5 1.5 21.0

        `table` hasn't changed

        >>> print(table) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S1  S2  S3
        O1  0.0 0.0 1.0
        O2  1.0 3.0 42.0

        Tranform in place on observations

        >>> table3 = table.transform(f, 'observation', True)

        `table` is different now

        >>> print(table) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S1  S2  S3
        O1  0.0 0.0 0.5
        O2  0.5 1.5 21.0

        but the table returned (`table3`) is the same as `table`

        >>> print(table3) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S1  S2  S3
        O1  0.0 0.0 0.5
        O2  0.5 1.5 21.0

        r  )rN  r  r  rg  r  r(   r   r   )r;   rF  r   r.  rU  r  r  r  s           r>   r8  zTable.transform  s    p  0TYY[[>>t>,,iiTi""$$$$//!!$''3Xq$///r?   averagec                 >    fd}|                      |||          S )a  Convert values to rank abundances from smallest to largest

        Parameters
        ----------
        axis : {'sample', 'observation'}, optional
            The axis to use for ranking.
        inplace : bool, optional
            Defaults to ``True``. If ``True``, performs the ranking in
            place. Otherwise, returns a new table with ranking applied.
        method : str, optional
            The method for handling ties in counts. This can be any valid
            string that can be passed to `scipy.stats.rankdata`.

        Returns
        -------
        biom.Table
            The rank-abundance-transformed table.

        Raises
        ------
        ValueError
            If unknown ``method`` is provided.

        See Also
        --------
        scipy.stats.rankdata

        Examples
        --------
        >>> import numpy as np
        >>> from biom import Table
        >>> data = np.array([[ 99,  12,   8], [  0,  42,   7],
        ...                  [112,  42,   6], [  5,  75,   5]])
        >>> t = Table(data, sample_ids=['s1', 's2', 's3'],
        ...           observation_ids=['o1', 'o2', 'o3', 'o4'])

        Convert observation counts to their ranked abundance, from smallest
        to largest.

        >>> print(t.rankdata())  # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID	s1	s2	s3
        o1	2.0	1.0	4.0
        o2	0.0	2.5	3.0
        o3	3.0	2.5	2.0
        o4	1.0	4.0	1.0

        c                 F    t           j                            |           S )N)method)scipystatsrankdata)r   r  _r>  s      r>   rF  zTable.rankdata.<locals>.fu  s    ;''F';;;r?   r-  r7  )r;   r   r.  r>  rF  s      ` r>   rA  zTable.rankdataD  s5    b	< 	< 	< 	< 	<~~adG~<<<r?   c                 8    d }|                      |||          S )aj  Normalize in place sample values by an observation, or vice versa.

        Parameters
        ----------
        axis : {'sample', 'observation'}, optional
            The axis to use for normalization.
        inplace : bool, optional
            Defaults to ``True``. If ``True``, performs the normalization in
            place. Otherwise, returns a new table with the normalization
            applied.

        Returns
        -------
        biom.Table
            The normalized table

        Examples
        --------
        >>> import numpy as np
        >>> from biom.table import Table

        Create a 2x2 table:

        >>> data = np.asarray([[2, 0], [6, 1]])
        >>> table = Table(data, ['O1', 'O2'], ['S1', 'S2'])

        Get a version of the table normalized on the 'sample' axis, leaving the
        original table untouched:

        >>> new_table = table.norm(inplace=False)
        >>> print(table) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S1  S2
        O1  2.0 0.0
        O2  6.0 1.0
        >>> print(new_table) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S1  S2
        O1  0.25    0.0
        O2  0.75    1.0

        Get a version of the table normalized on the 'observation' axis,
        again leaving the original table untouched:

        >>> new_table = table.norm(axis='observation', inplace=False)
        >>> print(table) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S1  S2
        O1  2.0 0.0
        O2  6.0 1.0
        >>> print(new_table) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S1  S2
        O1  1.0 0.0
        O2  0.857142857143  0.142857142857

        Do the same normalization on 'observation', this time in-place:

        >>> table.norm(axis='observation')
        2 x 2 <class 'biom.table.Table'> with 3 nonzero entries (75% dense)
        >>> print(table) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID S1  S2
        O1  1.0 0.0
        O2  0.857142857143  0.142857142857
        c                 J    | t          |                                           z  S r3   )r.   rI  )r   r  rB  s      r>   rF  zTable.norm.<locals>.f  s    swwyy))))r?   r-  r7  )r;   r   r.  rF  s       r>   r  z
Table.normy  s,    F	* 	* 	* ~~adG~<<<r?   c              #   H  K   | j                                         }|                                 }|                     d          }|j        }|j        }t          |j        dz
            D ]6}||         }||dz            }||         }	|||         D ]}
|	||
         fV  7dS )zYields locations of nonzero elements within the data matrix

        Returns
        -------
        generator
            Yields ``(observation_id, sample_id)`` for each nonzero element
        r   r  r&   N)r   r   r  indptrr  r  r^  )r;   csrr  r  rF  r  r'  r  endrn  r+  s              r>   nonzerozTable.nonzero  s       j  88::(((..+V[1_-- 	2 	2G7OE#CW%F"59- 2 2x0111112	2 	2r?   c                    |rd}d }n
| j         }d }|dv rjt          t          |                     |                    |          }t	          |                     |                    D ]\  }} ||          ||<   nAt          d|          }|                                 D ]}|dxx          ||          z  cc<   |S )	a  Get nonzero summaries about an axis

        Parameters
        ----------
        axis : {'sample', 'observation', 'whole'}
            The axis on which to count nonzero entries
        binary : bool, optional
            Defaults to ``True``. If ``True``, return number of nonzero
            entries. If ``False``, sum the values of the entries.

        Returns
        -------
        numpy.array
            Counts in index order to the axis
        r-   c                 @    |                                  d         j        S r(  )rI  r^  rT   s    r>   opz Table.nonzero_counts.<locals>.op  s    yy{{1~**r?   c                 *    |                                  S r3   r  rT   s    r>   rL  z Table.nonzero_counts.<locals>.op  s    uuwwr?   r   r  r   r&   r   )rG   r   rr   r  rF   rG  )r;   r   binaryrG   rL  rd  rK   r   s           r>   nonzero_countszTable.nonzero_counts  s      		E+ + + + JE   ,,,3txxTx22335AAAF&t~~4~'@'@AA ' '	T bhhs' 1E***F(( & &q			RRXX%				r?   c                     t          |dd                   }|                    |dd                    i }d}|D ]}||vr
|||<   |dz  }|S )z+Determines merge order for id lists A and BNr   r&   )rx   extend)r;   aball_ids	new_orderrK   r  s          r>   _union_id_orderzTable._union_id_order  sl    qt**qt	 	 	C)##!$	#qr?   c                 b    t          |dd                   }i }d}|D ]}||v r
|||<   |dz  }|S )z/Determines the merge order for id lists A and BNr   r&   )rt   )r;   rR  rS  all_brU  rK   r  s          r>   _intersect_id_orderzTable._intersect_id_order
  sP    AaaaD			 	 	Ce||!$	#qr?   c                    |dvrt          |          |r| }n|                                 }|dk    rddg}n|g}|D ]K}|                    |                    |          |                    |          dk             |           L|S )ax  Remove empty samples or observations from the table

        Parameters
        ----------
        axis : {'whole', 'sample', 'observation'}, optional
            The axis on which to operate.
        inplace : bool, optional
            If ``True`` vectors are removed in ``self``; if ``False`` the
            vectors are removed in a new table is returned.

        Raises
        ------
        UnknownAxisError
            If the axis is not recognized.

        Returns
        -------
        Table
            A table object with the zero'd rows, or columns removed as
            specified by the `axis` parameter.
        r  r  r   r   r  r   )r   rN  r/  r  rI  )r;   r   r.  rU  r	  r
  s         r>   r0  zTable.remove_empty  s    , 999"4((( 	 EEIIKKE7??m,DD6D 	N 	NBLL++EII2I,>,>,BC"LMMMMr?   detectc                 "   t          |                     d                    }t          |                                           }t          |                    d                    }t          |                                          }||k    }||k    }|dk    r|r|st          d          |dk    r|st          d          |dk    r|st          d          |dk    r|s|st          d	          |dk    rddg}	n]|dk    r1g }	|r|	                    d           |r|	                    d           n&|dk    rdg}	n|dk    rdg}	nt	          d
|z            | }
|	D ]-}|
                    |                    |          |          }
.|
S )ad  Align self to other over a requested axis

        Parameters
        ----------
        other : biom.Table
            The table to align too
        axis : str, optional, {sample, observation, both, detect}
            If 'sample' or 'observation', align to that axis. If 'both', align
            both axes. If 'detect', align what can be aligned.

        Raises
        ------
        DisjointIDError
            If the requested axis can't be aligned.
        UnknownAxisError
            If an unrecognized axis is specified.

        Examples
        --------
        Align one table to another, for instance a table of 16S data to a table
        of metagenomic data. In this example, we're aligning the samples of the
        two tables.

        >>> from biom import Table
        >>> import numpy as np
        >>> amplicon = Table(np.array([[0, 1, 2], [3, 4, 5]]),
        ...                  ['Ecoli', 'Staphylococcus'],
        ...                  ['S1', 'S2', 'S3'])
        >>> metag = Table(np.array([[6, 7, 8], [9, 10, 11]]),
        ...               ['geneA', 'geneB'],
        ...               ['S3', 'S2', 'S1'])
        >>> amplicon = amplicon.align_to(metag)
        >>> print(amplicon)  # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID	S3	S2	S1
        Ecoli	2.0	1.0	0.0
        Staphylococcus	5.0	4.0	3.0
        r   r  bothzCannot align both axesr   zCannot align sampleszCannot align observationsr[  zNeither axis appears alignablezUnrecognized axis: %s)rt   r  r   rX   r   r=  )r;   r  r   self_oself_sother_oother_salignable_oalignable_srB  rU  aln_axiss               r>   align_tozTable.align_to=  s   N TXX=X1122TXXZZeii]i3344eiikk""''6>>;>;>!":;;;Xk!"8999]"";"!"=>>>X{k!"BCCC6>>"H-EEXE 'X&&& ,]+++XJEE]"""OEE"#:T#ABBB 	4 	4H$$UYYHY%=%=*2 % 4 4EE r?   c           	      4	  " t          || j                  r|g}|                               }dk    rt          d          }t          }t
          }nt          d          }t
          }t          }t                      }t                      }i "|dd         }	|	                    d|            |	D ]}
|
                              }|
                    |          }t          |          }|	                    |          st          d          |                    |           t          |          |z
  rDt          |          |z
  D ]}|
                    ||          "|<   |                    |           t          |          }g }|	D ]%}
t          |t          |
                    |                    z
            }|r}t          |          }t          |
                                        }dk    r||f}n||f}t!          |          } ||
j        |g          }t          |
                    |                    }|                    |           |
                    |          }|(dgt          |
                    |                    z  }nt          |          }|                    "fd|D                        t          |
                                        }|
                              }dk    r|                     |||||          }n|                     |||||          }n|
}|                    |          |k                                    r|                    |           |                    |                    ||                     ' |d |D                       }t-          j        fd	|D                       }g }|D ]C}
|
                              }|dg ||
j                  z  }|                    |           D|d                             |          } dk    r!|                     |||| || j        
          }!n |                     ||||| | j        
          }!|!S )a  Concatenate tables if axis is disjoint

        Parameters
        ----------
        others : iterable of biom.Table, or a single biom.Table instance
            Tables to concatenate
        axis : {'sample', 'observation'}, optional
            The axis to concatenate on. i.e., if axis is 'sample', then tables
            will be joined such that the set of sample IDs in the resulting
            table will be the union of sample IDs across all tables in others.

        Raises
        ------
        DisjointIDError
            If IDs over the axis are not disjoint.

        Notes
        -----
        The type of the table is inherited from self.

        Examples
        --------
        Concatenate three tables in which the sample IDs are disjoint. Note
        the observation IDs in this example are not disjoint (although they
        can be):

        >>> from biom import Table
        >>> import numpy as np
        >>> a = Table(np.array([[0, 1, 2], [3, 4, 5]]), ['O1', 'O2'],
        ...                     ['S1', 'S2', 'S3'],
        ...                     [{'taxonomy': 'foo'}, {'taxonomy': 'bar'}])
        >>> b = Table(np.array([[6, 7, 8], [9, 10, 11]]), ['O3', 'O4'],
        ...                     ['S4', 'S5', 'S6'],
        ...                     [{'taxonomy': 'baz'}, {'taxonomy': 'foobar'}])
        >>> c = Table(np.array([[12, 13, 14], [15, 16, 17]]), ['O1', 'O5'],
        ...                     ['S7', 'S8', 'S9'],
        ...                     [{'taxonomy': 'foo'}, {'taxonomy': 'biz'}])
        >>> d = a.concat([b, c])
        >>> print(d)  # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID	S1	S2	S3	S4	S5	S6	S7	S8	S9
        O1	0.0	1.0	2.0	0.0	0.0	0.0	12.0	13.0	14.0
        O2	3.0	4.0	5.0	0.0	0.0	0.0	0.0	0.0	0.0
        O3	0.0	0.0	0.0	6.0	7.0	8.0	0.0	0.0	0.0
        O4	0.0	0.0	0.0	9.0	10.0	11.0	0.0	0.0	0.0
        O5	0.0	0.0	0.0	0.0	0.0	0.0	15.0	16.0	17.0

        r   r&   r   Nr  zIDs are not disjointc                      g | ]
}|         S r^   r^   )rb   r   invaxis_metadatas     r>   re   z Table.concat.<locals>.<listcomp>   s    "L"L"L1#3A#6"L"L"Lr?   c                     g | ]	}|j         
S r^   )r   rb   r2  s     r>   re   z Table.concat.<locals>.<listcomp>  s    AAAaAMAAAr?   c                 <    g | ]}|                                S r  )r  )rb   r2  r   s     r>   re   z Table.concat.<locals>.<listcomp>  s'    $M$M$M!QUUU%5%5$M$M$Mr?   r_   )r4   r=   r  r	   r   r   rt   insertr  
isdisjointr   r   r  r   rx   rr   r   r   rQ  r   rX   r=  r5   concatenatern   r`   )#r;   othersr   invaxis
dim_getterstackinvstackr  invaxis_ids
all_tablesrU  table_axis_idstable_invaxis_ordertable_invaxisr   invaxis_orderpadded_tablesmissing_ids	n_invaxisn_axisrn   zerodtmp_mattmp_inv_ids
tmp_inv_mdtmp_idstmp_md	tmp_table
concat_mat
concat_ids	concat_mdr  inv_mdconcatrh  s#     `                               @r>   r  zTable.concat  s'   b fdn-- 	 ZF ##D))8#AJEHH#AJEH55eeAAAY
!T"""   	2 	2E"YYDY11N"')))"9"9 344M &&~66 >%&<===OON+++=!!K/ 2m,,{: J JA*/...*I*I$Q'' ""=111{++  .	I .	IE{S1H1H-I-IIJJK $",,	UYYDY11228##&/EE#Y/E #5))"(E$5u#=>> #599'9#:#:;;"";///"^^^99
%"&#eiiWi.E.E*F*F!FJJ!%j!1!1J!!"L"L"L"L"L"L"LMMM uyydy3344T22 8## $wW/96!C !CII !%w/5z!C !CII "	 7++}<AACC I$$Y////$$Y%9%9-?F &: &H &H I I I I UAA=AAABB
^$M$M$M$M}$M$M$MNN
	" 	' 	'E~~4~00H 6JJu{$;$;;X&&&& q!***888^^Jz$*IDI $ G GFF ^^J
M$-vDI $ G GF r?   c                    | g|z   }t          t          d |D                       }t          t          d |D                       }d t          t          |                    D             d t          t          |                    D             d }d t                                          |          D             }d t                                          |          D             }g }g }	|D ]}
|
j                                        }|
                    d	
          }|
                                }fdt          |          D             }fdt          |          D             }|                                D ]@\  \  }}}|                    ||         ||         f           |	                    |           At          j
                            |          }t          j        |	|          }|                    ddg                                          }d |                                D             }|                     |||          S )a  For simple merge operations it is faster to aggregate using pandas

        Parameters
        ----------
        others : Table, or Iterable of Table
            If a Table, then merge with that table. If an iterable, then merge
            all of the tables
        c                 T    g | ]%}t          |                    d                     &S )r   r  rt   r  rj  s     r>   re   z%Table._fast_merge.<locals>.<listcomp>9  s>     $5 $5 $5() %(=(A(A$B$B $5 $5 $5r?   c                 P    g | ]#}t          |                                          $S r^   r  rj  s     r>   re   z%Table._fast_merge.<locals>.<listcomp>;  s&    "@"@"@A3quuww<<"@"@"@r?   c                     i | ]\  }}||	S r^   r^   rb   rK   r   s      r>   r  z%Table._fast_merge.<locals>.<dictcomp>?  s    LLL&#qq#LLLr?   c                     i | ]\  }}||	S r^   r^   r  s      r>   r  z%Table._fast_merge.<locals>.<dictcomp>@  s    JJJaaJJJr?   c                     | d         S )Nr&   r^   rT   s    r>   r   z#Table._fast_merge.<locals>.<lambda>C  s
    1 r?   c                     g | ]\  }}|S r^   r^   rb   r  rL   s      r>   re   z%Table._fast_merge.<locals>.<listcomp>D  s    MMMtq!MMMr?   r  c                     g | ]\  }}|S r^   r^   r  s      r>   re   z%Table._fast_merge.<locals>.<listcomp>E  s    KKKdaKKKr?   r   r  c                 (    i | ]\  }}||         S r^   r^   )rb   rK   r   feature_maps      r>   r  z%Table._fast_merge.<locals>.<dictcomp>Q  s9     A A A"(#q ";q> A A Ar?   c                 (    i | ]\  }}||         S r^   r^   )rb   rK   r   
sample_maps      r>   r  z%Table._fast_merge.<locals>.<dictcomp>S  s9     @ @ @!'a !*Q- @ @ @r?   rm  r   r&   )levelc                 $    g | ]\  \  }}}|||gS r^   r^   )rb   r  r  rL   s       r>   re   z%Table._fast_merge.<locals>.<listcomp>f  s&    FFF96Aq1aAYFFFr?   )r   r
   rF   r   r  r   todokr  rX   pd
MultiIndexfrom_tuplesSeriesgroupbyrI  r=   )r;   ro  tablesall_featuresall_samplesget1feature_ordersample_ordermir   rU  data_as_dokfeat_idsr  table_featurestable_samplesrF  srL   groupedcollapsed_rcv	list_listr  r  s                         @@r>   _fast_mergezTable._fast_merge-  s    & c $5 $5-3$5 $5 $5 6 6S"@"@"@"@"@AA MLIf\6J6J,K,KLLLJJ9VK5H5H+I+IJJJ
 ~MMvk.?.?.A.At'L'L'LMMMKKfZ-=-=-?-?T&J&J&JKKK 	! 	!E+1133K yymy44Hyy{{HA A A A,5h,?,?A A AN@ @ @ @+4X+>+>@ @ @M )..00 ! !	A 		>!,mA.>?@@@a    	! ]&&r**)F"---  q!f5599;; GF0C0C0E0EFFF	~~iEEEr?   unionc                 X   |                                  }|                      d          }|du o|du }|du o|du }	|s|	rY|dk    rS|dk    rMt          |t          t          t          f          r|                     |          S |                     |g          S |dk    r;|                     |                                 |                                          }
nS|dk    r;|                     |                                 |                                          }
nt          d|z            |dk    r?|                     |                     d          |                    d                    }nW|dk    r?|                     |                     d          |                    d                    }nt          d|z            t          |
                                t          d          	          }
t          |                                t          d          	          }|
st          d
          |st          d          |j        }| j        }|j        }| j        }g }g }|
D ][\  }}|                    ||                    |d          f           |                    ||                    |d          f           \d t#          t%          |                    D             }g }g }|                                  }|                                 }|
D ]\  }}|                    |           ||                     |          sd}n|||                  }||                    |          sd}n|||                  }|                     |||                     g }g }|                      d          }|                     d          } |D ]\  }}|                    |           ||                     |d          sd}n|||                  }| |                    |d          sd}n| ||                  }|                     |||                     t%          |
          }!|D ]\  }"}#t)          |!d          }$|                    |"d          r|                    |"d          }%nd}%|                     |"d          r|                     |"d          }&nd}&|%|D ]\  }'}(|(|&|(         |$|'<   nR|&|D ]\  }'})|)|%|)         |$|'<   n:|
D ]7\  }}*||vrd}+n|&||                  }+||vrd},n|%||                  },|+|,z   |$|*<   8|                     |$          ||#<   |                     |                     |          |dd         |dd         ||          S )a`	  Merge two tables together

        The axes, samples and observations, can be controlled independently.
        Both can work on either "union" or "intersection".

        `sample_metadata_f` and `observation_metadata_f` define how to
        merge metadata between tables. The default is to just keep the metadata
        associated to self if self has metadata otherwise take metadata from
        other. These functions are given both metadata dicts and must return
        a single metadata dict

        Parameters
        ----------
        other : biom.Table or Iterable of Table
            The other table to merge with this one. If an iterable, the tables
            are expected to not have metadata.
        sample : 'union', 'intersection', optional
            How the sample axis is handled
        observation : 'union', 'intersection', optional
            How the observation axis is handled
        sample_metadata_f : function, optional
            Defaults to ``biom.util.prefer_self``. Defines how to handle sample
            metadata during merge.
        obesrvation_metadata_f : function, optional
            Defaults to ``biom.util.prefer_self``. Defines how to handle
            observation metdata during merge.

        Returns
        -------
        biom.Table
            The merged table

        Notes
        -----
        - If ``sample_metadata_f`` and ``observation_metadata_f`` are None,
            then a fast merge is applied.
        - There is an implicit type conversion to ``float``.
        - The return type is always that of ``self``

        Examples
        --------

        >>> import numpy as np
        >>> from biom.table import Table

        Create a 2x2 table and a 3x2 table:

        >>> d_a = np.asarray([[2, 0], [6, 1]])
        >>> t_a = Table(d_a, ['O1', 'O2'], ['S1', 'S2'])
        >>> d_b = np.asarray([[4, 5], [0, 3], [10, 10]])
        >>> t_b = Table(d_b, ['O1', 'O2', 'O3'], ['S1', 'S2'])

        Merging the table results in the overlapping samples/observations (see
        `O1` and `S2`) to be summed and the non-overlapping ones to be added to
        the resulting table (see `S3`).

        >>> merged_table = t_a.merge(t_b)
        >>> print(merged_table)  # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID	S1	S2
        O1	6.0	5.0
        O2	6.0	4.0
        O3	10.0	10.0

        r   r  Nr  intersectionzUnknown sample merge type: %sz"Unknown observation merge type: %sr&   r  zNo samples in resulting table!z#No observations in resulting table!c                     g | ]}d S r3   r^   r  s     r>   re   zTable.merge.<locals>.<listcomp>  s    888888r?   r.   r   r   )r  r4   rx   rt   ry   r  rV  r  rY  r   r   r  r	   r   r   rX   r   r  rr   r  r   ro   r   r=   )-r;   r  r   r   sample_metadata_fobservation_metadata_fs_mdo_mdno_md	ignore_mdnew_samp_ordernew_obs_orderother_obs_idxself_obs_idxother_samp_idxself_samp_idxother_samp_orderself_samp_orderro  nsir   r   r  self_sample_mdother_sample_mdr  rK   self_mdother_mdr  r  self_obs_mdother_obs_md
vec_lengthrn  new_obs_idxnew_vec	other_vecself_vecn_idxs_idxo_idxnew_samp_idxself_vec_valueother_vec_values-                                                r>   mergezTable.mergej  s   H }}}}-}001DDL&$. -#t+ 	  	7I 	7  [G%;%;edC%788 7++E222++UI666 W!11$((**eiikkJJNN~%%!55dhhjj%))++NNNN !@6!IJJJ '!! 00m,,eii]i.K.KM MMMN** 44m,,eii]i.K.KM MMM !4     4 4 6 6JqMMJJJ}2244*Q--HHH  	C !ABBB 	H !FGGG (,*
 * 	L 	LLGS##S.*<*<Wd*K*K$LMMM""C):):7D)I)I#JKKKK 98eC$6$677888 
	..**& 	C 	CHCc""" %T[[-=-=%(s);< &ell3.?.?&*>#+>?..wAABBBB mmm77~~=~99% 	E 	EHCNN3 "$++c+*N*N"%l3&78 #S}== $'c(:;MM00(CCDDDD ((
 $1 =	A =	AFKJg666G ||F|77 !!JJv}==		 	 {{6{66  99V];;  &5 9 9NUE()1%9 !&6 : :NUE()25)9: .< M M)G\
 m33)*)1-2H)In44*+*3N74K*L,:_,LGL)) !% 7 7 @ @D~~d55d;;WQQQZ(mVY@ @ 	@r?   c                 d  *+,-./0 t          |t          j        t          j        f          st	          d          |dvrt          |          i |sB?t                    |d|z           /|d|z           0|d|z           .|d|z           dd         }t          j        fdt          |          D             t          	          }0fd
|D             }t          j        t          |          dz   t          j        	          }	d|	d<   t          j        d |D                                                       |	dd<   t          j        .fd|D                       }
t          j        /fd|D                       }|dk    rK|d         dd         }||         }t          |          t          |          f}t!          |
||	f|          }nJ|d         dd         }||         }t          |          t          |          f}t#          |
||	f|          }dt%          d |D                       z  }dt%          d |D                       z  }t          j        ||	          }t          j        ||	          }t)          |||          S |j        d         }|j        d         }|j        d         }t-          t.          d          r-	 t/          j        |          }n# t2          t          f$ r Y nw xY w|j        d         }|j        d         dk    rdn|j        d         }t          |t4                    r|                    d          }t          |t4                    r|                    d          }d **fd } ||d!                   \  }}} ||d                   \  }}}||         d"         }|d#         +|d$         ,|d%         -Wd& }|dk    rdfndf\  }} |||          \  }} |||          \  }} t          |          t          |          f}d' }! |!||          } |!||           }|dk    r| n|}"t          j        |"          d         }#t;          -fd(|#D                       }$t          j        d) |$D                       }%t          j        t          |#          dz   t          j        	          }	d|	d<   |%                                |	dd<   t          j        +fd*|$D                       }
t          j        ,fd+|$D                       }n+}
,}-}	|
||	f}&|dk    rt!          |&|          }'nt#          |&|          }'t)          |'|||pd|pd||||||,          }($d- })|dk    rd!nd}|(                    |)|.           |(S )/a.  Parse an HDF5 formatted BIOM table

        If ids is provided, only the samples/observations listed in ids
        (depending on the value of axis) will be loaded

        The expected structure of this group is below. A few basic definitions,
        N is the number of observations and M is the number of samples. Data
        are stored in both compressed sparse row (for observation oriented
        operations) and compressed sparse column (for sample oriented
        operations).

        Notes
        -----
        The expected HDF5 group structure is below. An example of an HDF5 file
        in DDL can be found here [1]_.

        - ./id                                                  : str, an arbitrary ID  # noqa
        - ./type                                                : str, the table type (e.g, OTU table)  # noqa
        - ./format-url                                          : str, a URL that describes the format  # noqa
        - ./format-version                                      : two element tuple of int32, major and minor  # noqa
        - ./generated-by                                        : str, what generated this file  # noqa
        - ./creation-date                                       : str, ISO format  # noqa
        - ./shape                                               : two element tuple of int32, N by M  # noqa
        - ./nnz                                                 : int32 or int64, number of non zero elems  # noqa
        - ./observation                                         : Group  # noqa
        - ./observation/ids                                     : (N,) dataset of str or vlen str  # noqa
        - ./observation/matrix                                  : Group  # noqa
        - ./observation/matrix/data                             : (nnz,) dataset of float64  # noqa
        - ./observation/matrix/indices                          : (nnz,) dataset of int32  # noqa
        - ./observation/matrix/indptr                           : (M+1,) dataset of int32  # noqa
        - ./observation/metadata                                : Group  # noqa
        - [./observation/metadata/foo]                          : Optional, (N,) dataset of any valid HDF5 type in index order with IDs.  # noqa
        - ./observation/group-metadata                          : Group  # noqa
        - [./observation/group-metadata/foo]                    : Optional, (?,) dataset of group metadata that relates IDs  # noqa
        - [./observation/group-metadata/foo.attrs['data_type']] : attribute of the foo dataset that describes contained type (e.g., newick)  # noqa
        - ./sample                                              : Group  # noqa
        - ./sample/ids                                          : (M,) dataset of str or vlen str  # noqa
        - ./sample/matrix                                       : Group  # noqa
        - ./sample/matrix/data                                  : (nnz,) dataset of float64  # noqa
        - ./sample/matrix/indices                               : (nnz,) dataset of int32  # noqa
        - ./sample/matrix/indptr                                : (N+1,) dataset of int32  # noqa
        - ./sample/metadata                                     : Group  # noqa
        - [./sample/metadata/foo]                               : Optional, (M,) dataset of any valid HDF5 type in index order with IDs.  # noqa
        - ./sample/group-metadata                               : Group  # noqa
        - [./sample/group-metadata/foo]                         : Optional, (?,) dataset of group metadata that relates IDs  # noqa
        - [./sample/group-metadata/foo.attrs['data_type']]      : attribute of the foo dataset that describes contained type (e.g., newick)  # noqa

        The '?' character on the dataset size means that it can be of arbitrary
        length.

        The expected structure for each of the metadata datasets is a list of
        atomic type objects (int, float, str, ...), where the index order of
        the list corresponds to the index order of the relevant axis IDs.
        Special metadata fields have been defined, and they are stored in a
        specific way. Currently, the available special metadata fields are:

        - taxonomy: (N, ?) dataset of str or vlen str
        - KEGG_Pathways: (N, ?) dataset of str or vlen str
        - collapsed_ids: (N, ?) dataset of str or vlen str

        Parameters
        ----------
        h5grp : a h5py ``Group`` or an open h5py ``File``
            The object to load from
        ids : iterable
            The sample/observation ids of the samples/observations that we need
            to retrieve from the hdf5 biom table
        axis : 'sample', 'observation', optional
            The axis to subset on
        parse_fs : dict, optional
            Specify custom parsing functions for metadata fields. This dict is
            expected to be {'metadata_field': function}, where the function
            signature is (object) corresponding to a single row in the
            associated metadata dataset. The return from this function an
            object as well, and is the parsed representation of the metadata.
        subset_with_metadata : bool, optional
            When subsetting (i.e., `ids` is `not None`), whether to also parse
            the metadata. By default, the metadata are also subset. The reason
            for exposing this functionality is that, for large tables, there
            exists a very large overhead for this metadata manipulation.

        Returns
        -------
        biom.Table
            A BIOM ``Table`` object

        Raises
        ------
        ValueError
            If `ids` are not a subset of the samples or observations ids
            present in the hdf5 biom table
            If h5grp is not a HDF5 file or group

        References
        ----------
        .. [1] http://biom-format.org/documentation/format_versions/biom-2.1.html

        See Also
        --------
        Table.to_hdf5

        Examples
        --------
        >>> from biom.table import Table
        >>> from biom.util import biom_open
        >>> with biom_open('rich_sparse_otu_table_hdf5.biom') as f # doctest: +SKIP
        >>>     t = Table.from_hdf5(f) # doctest: +SKIP

        Parse a hdf5 biom table subsetting observations
        >>> from biom.util import biom_open # doctest: +SKIP
        >>> from biom.parse import parse_biom_table
        >>> with biom_open('rich_sparse_otu_table_hdf5.biom') as f # doctest: +SKIP
        >>>     t = Table.from_hdf5(f, ids=["GG_OTU_1"],
        ...                         axis='observation') # doctest: +SKIP
        z1h5grp does not appear to be an HDF5 file or groupr   Nz%s/matrix/indicesz%s/matrix/indptrz%s/matrix/dataz%s/idsc                 "    g | ]\  }}|v 	|S r^   r^   )rb   r   r  r  s      r>   re   z#Table.from_hdf5.<locals>.<listcomp>  s-      /  /  /fa#&#:: !"#-::r?   r   c                 6    g | ]}|         |d z            fS r&   r^   )rb   r   
raw_indptrs     r>   re   z#Table.from_hdf5.<locals>.<listcomp>  s*    KKKa*Q-AaC9KKKr?   r&   r   c                     g | ]
\  }}||z
  S r^   r^   )rb   r  es      r>   re   z#Table.from_hdf5.<locals>.<listcomp>  s     "?"?"?TQ1q5"?"?"?r?   c                 *    g | ]\  }}||         S r^   r^   )rb   r  r  raw_datas      r>   re   z#Table.from_hdf5.<locals>.<listcomp>  s%    "G"G"GTQ8AaC="G"G"Gr?   c                 *    g | ]\  }}||         S r^   r^   )rb   r  r  raw_indicess      r>   re   z#Table.from_hdf5.<locals>.<listcomp>  s&    %M%M%M41ak!A#&6%M%M%Mr?   r   zobservation/idsr   z
sample/idsr[  c                 ,    g | ]}t          |          S r^   r]  r   s     r>   re   z#Table.from_hdf5.<locals>.<listcomp>  s    (A(A(AAQ(A(A(Ar?   c                 ,    g | ]}t          |          S r^   r]  r   s     r>   re   z#Table.from_hdf5.<locals>.<listcomp>  s    )C)C)CQ#a&&)C)C)Cr?   ri  creation-dategenerated-byfromisoformatrn   r`   rq   asciic                 Z    t          | t                    r|                     d          S d S rO   rQ   rT   s    r>   ensure_utf8z$Table.from_hdf5.<locals>.ensure_utf8  s+    !U## xx'''r?   c                    | d         dd         }|j         dk    r2dt          d |D                       z  }t          j        ||          }t	          d           }t
          |d<   t
          |d	<   t
          |d
<   |                               d t          t          |                    D             }| d         	                                D ]Q\  }}|
                    dd          }||         }|dd         }t          ||          D ]\  }	}
 ||
          |	|<   Rt          |          r|nd}fd| d         	                                D             }|||fS )z%Loads all the data of the given groupr  Nr   r[  c                 ,    g | ]}t          |          S r^   r]  r   s     r>   re   z6Table.from_hdf5.<locals>.axis_load.<locals>.<listcomp>)  s    (=(=(=AQ(=(=(=r?   r   c                      t           S r3   )rV   r^   r?   r>   r   z4Table.from_hdf5.<locals>.axis_load.<locals>.<lambda>,  s     r?   r   KEGG_Pathwaysr  c                     g | ]}i S r^   r^   r  s     r>   re   z6Table.from_hdf5.<locals>.axis_load.<locals>.<listcomp>3  s    ..."...r?   r  rg   rf   c                 :    i | ]\  }}| |d                    S r   r^   )rb   catr   r  s      r>   r  z6Table.from_hdf5.<locals>.axis_load.<locals>.<dictcomp>?  sC     E E E"#s ;;s1v.. E E Er?   group-metadata)r^  r   r5   r   r   r[   r   r  rr   r  rs   r{   any)r|   r  	ids_dtypeparserr}   categorydsetparse_fro   md_dictdata_rowgrp_mdr  parse_fss               r>   	axis_loadz"Table.from_hdf5.<locals>.axis_load#  s    e*QQQ-Cx!||!C(=(=(=(=(=$>$>>	jI666 !7!788F!8F:&=F?#&=F?#MM(### /.eCHHoo...B"%j/"7"7"9"9 : :$#++K== *AAAw),R : :%GX(/(9(9GH%%: 2ww(DBE E E E&)*:&;&A&A&C&CE E EFF?"r?   r   matrixro   r  rF  c                 D   |+| dd         }t          j        | j        t                    }npt          j        |          }t          j        | |          }| |         }|j        |j        k    r/t          dt          |          t          |          z
  z            ||fS )zIf desired_ids is not None, makes sure that it is a subset
                of source_ids and returns the desired_ids array-like and a
                boolean array indicating where the desired_ids can be found in
                source_idsNr   z:The following ids could not be found in the biom table: %s)r5   onesrn   boolr   in1dr  rt   )
source_idsdesired_idsr  rK   s       r>   _get_idsz!Table.from_hdf5.<locals>._get_idsN  s    
 &$QQQ-C'*"2$???CC"$*["9"9K'*k::C$S/C yK$555( *G*-k*:*:SXX*E*G H H H Cxr?   c                 |    | r9t          t          j        |           t          j        |                             } | S )zTIf md has data, returns the subset indicated by idx, a
                boolean array)rx   r5   r   r   )r}   rK   s     r>   _subset_metadataz)Table.from_hdf5.<locals>._subset_metadatam  s3      =bjnnRXc]];<<B	r?   c              3   >   K   | ]}|         |d z            fV  dS )r&   Nr^   )rb   r   	h5_indptrs     r>   r  z"Table.from_hdf5.<locals>.<genexpr>z  sE       $ $341y1~.$ $ $ $ $ $r?   c                     g | ]
\  }}||z
  S r^   r^   )rb   r  rH  s      r>   re   z#Table.from_hdf5.<locals>.<listcomp>~  s6     &H &H &H*4% '*Ek &H &H &Hr?   c                 *    g | ]\  }}||         S r^   r^   )rb   r  rH  h5_datas      r>   re   z#Table.from_hdf5.<locals>.<listcomp>  s<     @ @ @",% &eCi0 @ @ @r?   c                 *    g | ]\  }}||         S r^   r^   )rb   r  rH  
h5_indicess      r>   re   z#Table.from_hdf5.<locals>.<listcomp>  s<     !C !C !C%/UC ",E#I!6 !C !C !Cr?   )r`   r   r   r   r   r   c                 *    t          j        |           S r3   )r5   r  )r   r  r}   s      r>   	any_valuez"Table.from_hdf5.<locals>.any_value  s    vd||#r?   r  ) r4   h5pyGroupFiler  r   rt   r5   r   rF   r-   r   rr   int32cumsumrn  r   r   r   r   r   attrshasattrr   r  r   rR   rS   r   r   r   r/  )1rD   h5grpr  r   r  subset_with_metadatar  to_keep	start_endrF  ro   r  r  r  rn   r   obs_ids_dtypesamp_ids_dtyper  r   r   type_r  r  
obs_grp_mdr  samp_grp_mddata_grpr  sampobsobs_idxsamp_idxr  rK   keepindptr_indicesindptr_subsetcsr  r2  r  r  r  r  r  r  r  r  s1     ` `                                     @@@@@@@r>   	from_hdf5zTable.from_hdf5c  s6   r %$*di!899 	& % & & & 000"4(((H# "	1c((C 3d :;K1D89J-45HX_-aaa0Hh  /  /  /  /	(0C0C  /  /  /69; ; ;GKKKK7KKKIXc'llQ.bh???FF1I"?"?Y"?"?"?@@GGIIF122J>"G"G"G"GY"G"G"GHHDn%M%M%M%M9%M%M%MNNGx 121115#G,Ws7||4 $!8FFF .qqq1"7+Ws8}}5 $!8FFF "C(A(A(A(A(A$B$BBM"S)C)C()C)C)C%D%DDNj>>>Gz(.AAAHgx000k$k/2{>28_-- 	&4[AAz*    G$F+r11u{67Jc5!! 	&**W%%CeU## 	*LL))E	 	 		# 	# 	# 	# 	# 	#@ '0im0D&E&E#)25?)C)C&'; ;x(6"i(
X&	 ?     . (,x'7'7ddC[ID#'x55GW!)(D!9!9Hh \\3x==1E   &%fg66F&&w99G #h..((GC8C==#D# $ $ $ $8<$ $ $  N H &H &H8F&H &H &H I IMXc$ii!m28<<<FF1I&--//F122J9 @ @ @ @0>@ @ @ A ADi !C !C !C !C3A!C !C !C D DGG D GFGV$8%000FF%000F&'8V^t/T;+c-7(3	5 5 5 ?$ $ $ %)H$4$4==(DHHYTH***s   K- -L Lc                 :   |                      d          }|                                  }|r&| j                                        }t          j        }n<| j                                        }t          t          j        j        j                  } ||||          S )a  Convert matrix data to a Pandas SparseDataFrame or DataFrame

        Parameters
        ----------
        dense : bool, optional
            If True, return pd.DataFrame instead of pd.SparseDataFrame.

        Returns
        -------
        pd.DataFrame or pd.SparseDataFrame
            A DataFrame indexed on the observation IDs, with the column
            names as the sample IDs.

        Notes
        -----
        Metadata are not included.

        Examples
        --------
        >>> from biom import example_table
        >>> df = example_table.to_dataframe()
        >>> df
             S1   S2   S3
        O1  0.0  1.0  2.0
        O2  3.0  4.0  5.0
        r   r  r  columns)	r  r   r   r  	DataFramerN  r   sparsefrom_spmatrix)r;   r  r  r,  r   constructors         r>   to_dataframezTable.to_dataframe  s    6 m,,((** 	E"**,,C,KK"''))C!","5"CDDK{3eW====r?   float32c                 4   	 ddl }n# t          $ r t          d          w xY w| j        }|r|                                }|                     d          }|                     d          }|                    ||||          }|                                }|S )ug  Convert Table to AnnData format

        Parameters
        ----------
        dense : bool, optional
            If True, set adata.X as np.ndarray instead of sparse matrix.
        dtype: str, optional
            dtype used for storage in anndata object.
        tranpose: bool, optional
            If True, transpose the anndata so that observations are columns

        Returns
        -------
        anndata.AnnData
            AnnData with matrix data and associated observation and
            sample metadata.

        Notes
        -----
        Nested metadata are not included.

        Examples
        --------
        >>> from biom import example_table
        >>> adata = example_table.to_anndata()
        >>> adata
        AnnData object with n_obs × n_vars = 3 × 2
            obs: 'environment'
            var: 'taxonomy_0', 'taxonomy_1'
        r   Nz7Please install anndata package -- `pip install anndata`r   r   )r"  varrG   )anndataImportErrorr   r   metadata_to_dataframeAnnDatar   )	r;   r  rG   r   r5  r   r4  r"  adatas	            r>   
to_anndatazTable.to_anndata  s    >	NNNN 	 	 	I  	  	 ++--C((22((77#UCC!!s    !c                    |                      |          }|t          d|z            g }|D ]}g }i }|                                D ]z\  }}t          |t          t
          f          r?d||<   t          t          |                    D ]}	|                    d||	fz             `d||<   |                    |           {t          |          t          |          k    r|}g }
|D ]k}g }|                                D ]=\  }}||         r|D ]}|                    |           (|                    |           >|
                    |           lt          j
        |
|                     |          |          S )a  Convert axis metadata to a Pandas DataFrame

        Parameters
        ----------
        axis : {'sample', 'observation'}
            The axis to operate on.

        Returns
        -------
        pd.DataFrame
            A DataFrame indexed by the ids of the desired axis, columns by the
            metadata keys over that axis.

        Raises
        ------
        UnknownAxisError
            If the requested axis isn't recognized
        KeyError
            IF the requested axis does not have metadata
        TypeError
            If a metadata column is a list or tuple, but is jagged over the
            axis.

        Notes
        -----
        Nested metadata (e.g., KEGG_Pathways) is not supported.

        Metadata which are lists or tuples (e.g., taxonomy) are expanded such
        that each index position is a unique column. For instance, the key
        taxonomy will become "taxonomy_0", "taxonomy_1", etc where "taxonomy_0"
        corresponds to the 0th index position of the taxonomy.

        Examples
        --------
        >>> from biom import example_table
        >>> example_table.metadata_to_dataframe('observation')
           taxonomy_0     taxonomy_1
        O1   Bacteria     Firmicutes
        O2   Bacteria  Bacteroidetes
        r  Nz%s does not have metadataTz%s_%dFr+  )r  KeyErrorr  r4   ry   rx   r  rr   rX   r  r-  r  )r;   r   r}   mcolstestr,  expandr  rY   rK   rx  rc   r   rL   s                 r>   r7  zTable.metadata_to_dataframe   s   R ]]]%%:6=>>> 	  	 DGF"jjll ( (
UeeT]33 ("&F3K$SZZ00 = =w#s';<<<<= #(F3KNN3''''7||c%jj(( 	 	ACggii & &
U#; &" & &

1& JJu%%%%KK|Dd(;(;UKKKKr?   c                    |i }| j         }| j        r| j        nd|j        d<   | j        r| j        nd|j        d<   d|j        d<   | j        |j        d<   ||j        d	<   |.t          j                                                    |j        d
<   n|                                |j        d
<   | j        |j        d<   ||j        d<   d}|du rd}t          d           }t          |d<   t          |d<   t          |d<   |                    |           t          ddgddg          D ]\  }	}
|                    |	          }| j                            |
          | _        |                     |	          }t#          |          }t#          | j        j                  }|}|                     |	          }|                    d           |rt)          |d                   }t          |dd         |dd                   D ]^\  }}t)          |          |k    rFt+          |d|d         d|dt-          |          d|d         dt-          |                    _t-          |d                   D ]} ||         ||||           |                     |	          }|                    d           |rK|                                D ]6\  }}|\  }}|                    d |z  d!t4          ||"          }||j        d#<   7|                    d$           |                    d%|ft6          j        | j        j        |"           |                    d&|ft6          j        | j        j        |"           |                    d'|ft6          j        | j        j        |"           |dk    r,|                    d(|ft4          d) |D             |"           |                    d(d*g |+           dS ),ap  Store CSC and CSR in place

        The resulting structure of this group is below. A few basic
        definitions, N is the number of observations and M is the number of
        samples. Data are stored in both compressed sparse row [1]_ (CSR, for
        observation oriented operations) and compressed sparse column [2]_
        (CSC, for sample oriented operations).

        Notes
        -----
        This method does not return anything and operates in place on h5grp.

        The expected HDF5 group structure is below. An example of an HDF5 file
        in DDL can be found here [3]_.

        - ./id                                                  : str, an arbitrary ID
        - ./type                                                : str, the table type (e.g, OTU table)
        - ./format-url                                          : str, a URL that describes the format
        - ./format-version                                      : two element tuple of int32, major and minor
        - ./generated-by                                        : str, what generated this file
        - ./creation-date                                       : str, ISO format
        - ./shape                                               : two element tuple of int32, N by M
        - ./nnz                                                 : int32 or int64, number of non zero elems
        - ./observation                                         : Group
        - ./observation/ids                                     : (N,) dataset of str or vlen str
        - ./observation/matrix                                  : Group
        - ./observation/matrix/data                             : (nnz,) dataset of float64
        - ./observation/matrix/indices                          : (nnz,) dataset of int32
        - ./observation/matrix/indptr                           : (M+1,) dataset of int32
        - ./observation/metadata                                : Group
        - [./observation/metadata/foo]                          : Optional, (N,) dataset of any valid HDF5 type in index order with IDs.
        - ./observation/group-metadata                          : Group
        - [./observation/group-metadata/foo]                    : Optional, (?,) dataset of group metadata that relates IDs
        - [./observation/group-metadata/foo.attrs['data_type']] : attribute of the foo dataset that describes contained type (e.g., newick)
        - ./sample                                              : Group
        - ./sample/ids                                          : (M,) dataset of str or vlen str
        - ./sample/matrix                                       : Group
        - ./sample/matrix/data                                  : (nnz,) dataset of float64
        - ./sample/matrix/indices                               : (nnz,) dataset of int32
        - ./sample/matrix/indptr                                : (N+1,) dataset of int32
        - ./sample/metadata                                     : Group
        - [./sample/metadata/foo]                               : Optional, (M,) dataset of any valid HDF5 type in index order with IDs.
        - ./sample/group-metadata                               : Group
        - [./sample/group-metadata/foo]                         : Optional, (?,) dataset of group metadata that relates IDs
        - [./sample/group-metadata/foo.attrs['data_type']]      : attribute of the foo dataset that describes contained type (e.g., newick)

        The '?' character on the dataset size means that it can be of arbitrary
        length.

        The expected structure for each of the metadata datasets is a list of
        atomic type objects (int, float, str, ...), where the index order of
        the list corresponds to the index order of the relevant axis IDs.
        Special metadata fields have been defined, and they are stored in a
        specific way. Currently, the available special metadata fields are:

        - taxonomy: (N, ?) dataset of str or vlen str
        - KEGG_Pathways: (N, ?) dataset of str or vlen str
        - collapsed_ids: (N, ?) dataset of str or vlen str

        Parameters
        ----------
        h5grp : `h5py.Group` or `h5py.File`
            The HDF5 entity in which to write the BIOM formatted data.
        generated_by : str
            A description of what generated the table
        compress : bool, optional
            Defaults to ``True`` means fields will be compressed with gzip,
            ``False`` means no compression
        format_fs : dict, optional
            Specify custom formatting functions for metadata fields. This dict
            is expected to be {'metadata_field': function}, where the function
            signature is (h5py.Group, str, dict, bool) corresponding to the
            specific HDF5 group the metadata dataset will be associated with,
            the category being operated on, the metadata for the entire axis
            being operated on, and whether to enable compression on the
            dataset.  Anything returned by this function is ignored.
        creation_date : datetime, optional
            If provided, use this specific datetime on write as the creation
            timestamp

        See Also
        --------
        Table.from_hdf5

        References
        ----------
        .. [1] http://docs.scipy.org/doc/scipy-0.13.0/reference/generated/scipy.sparse.csr_matrix.html
        .. [2] http://docs.scipy.org/doc/scipy-0.13.0/reference/generated/scipy.sparse.csc_matrix.html
        .. [3] http://biom-format.org/documentation/format_versions/biom-2.1.html

        Examples
        --------
        >>> from biom.util import biom_open  # doctest: +SKIP
        >>> from biom.table import Table
        >>> from numpy import array
        >>> t = Table(array([[1, 2], [3, 4]]), ['a', 'b'], ['x', 'y'])
        >>> with biom_open('foo.biom', 'w') as f:  # doctest: +SKIP
        ...     t.to_hdf5(f, "example")

        NzNo Table IDri  rq   r`   r,   z
format-urlzformat-versionr  r  rn   r   Tgzipc                      t           S r3   )r   r^   r?   r>   r   zTable.to_hdf5.<locals>.<lambda>  s    (9 r?   r   r  r  r   r   rG  cscr  r  r   r&   z+ has inconsistent metadata categories with z:
z: r  r  zgroup-metadata/%sr  rm   	data_typer  zmatrix/datazmatrix/indiceszmatrix/indptrr  c                 8    g | ]}|                     d           S rj   rk   r  s     r>   re   z!Table.to_hdf5.<locals>.<listcomp>#  s$    (G(G(Ga&)9)9(G(G(Gr?   r  )rn   ro   rp   ) r   r   r  r`   r   r   now	isoformatrn   r   rz   r   r{   create_groupr   asformatr  rr   rF  r  rt   r  rx   rX  r  rw   r#   r5   r  ro   r  r  )r;   r  r   compress	format_fscreation_dater   rp   	formatterr   rB  r|   r  len_ids
len_indptrlen_datar}   expother_idr  r  r   r  rY   datatyper   grp_datasets                              r>   to_hdf5zTable.to_hdf5I  s   ~ I h-1]MDMMD+/9<dii"F$<L!(,(;$%&2N# +3<>>+C+C+E+EEK((+8+B+B+D+DEK(#zG Et K 9 9::	 :	*%?	/"%?	/"###x85%.II D	< D	<KD%$$T**C,,U33DJ(((%%C#hhGTZ.//JHD))B Z((( H"Q%jj*-c!""gr!""v*>*> I I&Hh8}}++( 6>XXs1vvv5=XXtH~~~~58VVVT#YYY*H I I I , !%RU H HH (Ih'Xr;GGGG**400H -... >"*.."2"2 > >JC$)MHc"%"4"4+c1"- k #5 #; #;K 6>K%k22X&&&}XK%'Z$(JO+6  8 8 8 /{%'X$(J$6+6  8 8 8 zm%'X$(J$5+6  8 8 8
 {{ ""5
)6(G(G3(G(G(G/: # < < < < ""5B/: # < < < <GD	< D	<r?   c                    d |d         D             }d |d         D             }d |d         D             }d |d         D             }t           |d                  }d|v r|d         d	k    rd
}nd}|d         }	|	|d         }
n|}
d}t          t          d          r3	 t          j        |d                   }n# t          t
          f$ r Y nw xY wt          |
|||||d         ||	||d         |          }|S )ah	  Parse a biom otu table type

        Parameters
        ----------
        json_table : dict
            A JSON object or dict that represents the BIOM table
        data_pump : tuple or None
            A secondary source of data
        input_is_dense : bool
            If `True`, the data contained will be interpretted as dense

        Returns
        -------
        Table

        Examples
        --------
        >>> from biom import Table
        >>> json_obj = {"id": "None",
        ...             "format": "Biological Observation Matrix 1.0.0",
        ...             "format_url": "http://biom-format.org",
        ...             "generated_by": "foo",
        ...             "type": "OTU table",
        ...             "date": "2014-06-03T14:24:40.884420",
        ...             "matrix_element_type": "float",
        ...             "shape": [5, 6],
        ...             "data": [[0,2,1.0],
        ...                      [1,0,5.0],
        ...                      [1,1,1.0],
        ...                      [1,3,2.0],
        ...                      [1,4,3.0],
        ...                      [1,5,1.0],
        ...                      [2,2,1.0],
        ...                      [2,3,4.0],
        ...                      [2,5,2.0],
        ...                      [3,0,2.0],
        ...                      [3,1,1.0],
        ...                      [3,2,1.0],
        ...                      [3,5,1.0],
        ...                      [4,1,1.0],
        ...                      [4,2,1.0]],
        ...             "rows": [{"id": "GG_OTU_1", "metadata": None},
        ...                      {"id": "GG_OTU_2", "metadata": None},
        ...                      {"id": "GG_OTU_3", "metadata": None},
        ...                      {"id": "GG_OTU_4", "metadata": None},
        ...                      {"id": "GG_OTU_5", "metadata": None}],
        ...             "columns": [{"id": "Sample1", "metadata": None},
        ...                         {"id": "Sample2", "metadata": None},
        ...                         {"id": "Sample3", "metadata": None},
        ...                         {"id": "Sample4", "metadata": None},
        ...                         {"id": "Sample5", "metadata": None},
        ...                         {"id": "Sample6", "metadata": None}]
        ...             }
        >>> t = Table.from_json(json_obj)

        c                     g | ]
}|d          S ri  r^   rb   r   s     r>   re   z#Table.from_json.<locals>.<listcomp>e  s    AAACc$iAAAr?   r,  c                     g | ]
}|d          S r  r^   rY  s     r>   re   z#Table.from_json.<locals>.<listcomp>f  s    LLLs3z?LLLr?   c                     g | ]
}|d          S rX  r^   rb   r   s     r>   re   z#Table.from_json.<locals>.<listcomp>g  s    ;;;3t9;;;r?   rx  c                     g | ]
}|d          S r[  r^   r]  s     r>   re   z#Table.from_json.<locals>.<listcomp>h  s    FFFCJFFFr?   matrix_element_typematrix_typer  TFr`   Nro   r  datern   r   )rn   rG   r`   r   r   r   )MATRIX_ELEMENT_TYPEr  r   r  r   r  r   )r;   
json_table	data_pumpr   r   r   r  r  rG   r  ro   r   	table_objs                r>   	from_jsonzTable.from_json*  sY   v BA:i+@AAA
LLj6KLLL;;
6(:;;;FF:f3EFFF#J/D$EFJ&&-(G33!%!&6"f%DDD8_-- 	&4Z5GHHz*   $& *7 3 %$&1'1.'A)79 9 9	 s   B9 9CCc                 F   t          |t                    st          d          |&t          j                                                    }n|                                }|r|                    d           |                    dt          | j                  z             |                    dt          d          z             |                    dt                      z             |                    d|z             |                    d	|z             nDdt          | j                  z  }dt          d          z  }dt                      z  }d|z  }d	|z  }	 | j
        \  }}	n#  d
x}}	Y nxY w|d
k    r|	d
k    rdnd}
d
}|
r| d         }t          |t                    rd}n?t          |t                    rd}n't          |t                    rd}nt          d          |r3|                    d|z             |                    d||	fz             nd|z  }d||	fz  }| j        d}n
d| j        z  }|r|                    |           |r+|                    d           |                    d           nd}dg}t          |                     d                    dz
  }t          |                                           dz
  }dg}d}t!          |                     d                    D ]b\  }}||k    rC|                    dt'          |d                    dt'          |d                    d           nB|                    dt'          |d                    dt'          |d                    d            g }t!          |d
                   D ]3\  }}t          |          d!k    r|                    d"|||fz             4|r|r-|r|                    d#           n|                    d#           |r)|                    d#                    |                     n(|                    d#                    |                     d}d|r|                    d$           n|                    d$           d%g}t!          |                                           D ]\  }}||k    rP|                    d&                    t'          |d                   t'          |d                                        [|                    d'                    t'          |d                   t'          |d                                        |d
         dk    rt          |          dk    rd(g}d)g}d*                    |          }d*                    |          }|rA|                    |           |                    |           |                    d+           dS d,d*                    |||||||||d*                    |          ||g          z  S )-a  Returns a JSON string representing the table in BIOM format.

        Parameters
        ----------
        generated_by : str
            a string describing the software used to build the table
        direct_io : file or file-like object, optional
            Defaults to ``None``. Must implementing a ``write`` function. If
            `direct_io` is not ``None``, the final output is written directly
            to `direct_io` during processing.
        creation_date : datetime, optional
            If provided, use this datetime as the creation date on write.

        Returns
        -------
        str
            A JSON-formatted string representing the biom table
        z"Must specify a generated_by stringN{z"id": "%s",z"format": "%s",r&   r   z"format_url": "%s",z"generated_by": "%s",z"date": "%s",r   TFr   r-   r.   rv   zUnsupported matrix data type.z"matrix_element_type": "%s",z"shape": [%d, %d],z"type": null,z"type": "%s",z"matrix_type": "sparse",z	"data": [r   r  r&   z	"rows": [z{"id": z, "metadata": r  z},z}],r  z
[%d,%d,%r],z],z"columns": [z{{"id": {}, "metadata": {}}},z{{"id": {}, "metadata": {}}}]z"rows": [],z"columns": []rq   }z{%s})r4   rv   r   r   rF  rG  r  r   r   r   rn   r-   r.   r`   rr   r  rF   r  rX   r   r  format)r;   r   r  rL  r  format_
format_urlra  num_rowsnum_colshas_datatest_elementr_  rn   r  r`  ro   max_row_idxmax_col_idxrx  have_written	obs_indexr"  	built_row	col_indexr   r,  
samp_indexr!  s                                r>   to_jsonzTable.to_json  s   & ,,, 	G !EFFF $LNN4466MM)3355M  	3OOC   OOMC,>,>>???OO!.v6678 8 8 OO%*,,-. . . OO3lBCCCOOOm;<<<<#dm"4"44C'*H+ + G.1K1M1MMJ2\AL"]2D	$!%Hhh	$"##Hxxx#a<<HqLL44e  	&:L lC(( 	B"'e,, 	B")c** 	B"' !@AAA  	@OO.#$% % % OO0Hh3GGHHHH"@##$(Hh+??E 9#EE#di/E 	#OOE"""  	!OO6777OOK((((4K=D$(((6677!;$((**oo)}'		}	(E(EFF !	$ !	$NIsK''NuSV}}NNE#a&MMNNN    OuSV}}OOE#a&MMOOO   I"+CF"3"3  	3::$$$$$	9c'BB    $ )  )!,,,,C((( 5OOCHHY$7$78888KK 3 3444#  	OOD!!!!KK "" )$))++ 6 6 	5 	5J[((>EE$q'NNE$q'NN 4  4 5 5 5 5 >EE$q'NNE$q'NN 4  4 5 5 5 5 7k!!c$ii1nn!?D&'Gwwt}}'''"" 	OOD!!!OOG$$$OOC     BGG#%    s   !
E, ,E4c                    t          | t          t          f          sYt          | d          r|                                 } n4t          | d          r|                                 } nt          d          d }| d                                                             d          }t          |          dk    rt          d          |g d	k    rd
}n# ||d                   rd}nt          d          |s
| dd         } g }g }g }| D ]}|                    d          }t          |          dk    sJ |
                    |d                    |
                    |d                    |
                    t          |d                              t          t          |                    }	t          t          |                    }
d t          |	          D             d t          |
          D             t          j        fd|D             t"                    }t          j        fd|D             t"                    }t          j        |          }t'          |||ff          }t)          ||	|
          S )a  Parse an adjacency format into BIOM

        Parameters
        ----------
        lines : list, str, or file-like object
            The tab delimited data to parse

        Returns
        -------
        biom.Table
            A BIOM ``Table`` object

        Notes
        -----
        The input is expected to be of the form: observation, sample, value. A
        header is not required, but if present, it must be of the form:

        #OTU ID<tab>SampleID<tab>value

        Raises
        ------
        ValueError
            If the input is not an iterable or file-like object.
        ValueError
            If the data is incorrectly formatted.

        Examples
        --------
        Parse tab separated adjacency data into a table:

        >>> from biom.table import Table
        >>> from io import StringIO
        >>> data = 'a\tb\t1\na\tc\t2\nd\tc\t3'
        >>> data_fh = StringIO(data)
        >>> test_table = Table.from_adjacency(data_fh)
        	readlines
splitlinesz!Not sure how to handle this inputc                     t          j        d          }|                    |           }|                                \  }}||z
  t	          |           k    rdS dS )Nz0(?=.)([+-]?([0-9]*)(\.([0-9]+))?)([eE][+-]?\d+)?TF)recompilematchspanrr   )r   numericr  r  r  s        r>   is_numz$Table.from_adjacency.<locals>.is_numj  sS    j!TUUGMM$''E**,,KE4uT**tur?   r   r|     z)Does not appear to be an adjacency format)r}  SampleIDrY   Fr  Tr&   Nc                     i | ]\  }}||	S r^   r^   )rb   r   os      r>   r  z(Table.from_adjacency.<locals>.<dictcomp>  s    ;;;daQ;;;r?   c                     i | ]\  }}||	S r^   r^   )rb   r   r  s      r>   r  z(Table.from_adjacency.<locals>.<dictcomp>  s    ===tq!a===r?   c                      g | ]
}|         S r^   r^   )rb   r"  rv  s     r>   re   z(Table.from_adjacency.<locals>.<listcomp>  s    ???3	#???r?   r   c                      g | ]
}|         S r^   r^   )rb   r!  ry  s     r>   re   z(Table.from_adjacency.<locals>.<listcomp>  s    ===T
4(===r?   )r4   rx   ry   r  r|  r}  r  r   r   rr   rX   r.   r   rt   rF   r5   r   r-   r   r   r   )linesr  lhinclude_line_zeroobservationssamplesr   liner   	obs_order
samp_orderr   r   ro   r   rv  ry  s                  @@r>   from_adjacencyzTable.from_adjacency<  s   L %$// 	Fuk** F))-- F((** !DEEE	 	 	 1X^^##D))r77a<<HIII33333 %VBqE]] 	J !%HIII  	!""IE  	+ 	+DJJt$$Eu::????a)))NN58$$$MM%a//**** 3|,,--	CLL))
;;i	&:&:;;;	==y'<'<===
 h????,???sKKKh====W===SIIIz&!!$c
+,,S)Z000r?   c                     t          j        | fi |\  }}}}|d}	nfd|D             }	d}
nfd|D             }
fd|D             }	t          ||||	|
          S )ay  Parse a tab separated (observation x sample) formatted BIOM table

        Parameters
        ----------
        lines : list, or file-like object
            The tab delimited data to parse
        obs_mapping : dict or None
            The corresponding observation metadata
        sample_mapping : dict or None
            The corresponding sample metadata
        process_func : function
            A function to transform the observation metadata

        Returns
        -------
        biom.Table
            A BIOM ``Table`` object

        Examples
        --------
        Parse tab separated data into a table:

        >>> from biom.table import Table
        >>> from io import StringIO
        >>> tsv = 'a\tb\tc\n1\t2\t3\n4\t5\t6'
        >>> tsv_fh = StringIO(tsv)
        >>> func = lambda x : x
        >>> test_table = Table.from_tsv(tsv_fh, None, None, func)
        Nc                 *    g | ]} |          iS r^   r^   )rb   rL   process_func	t_md_names     r>   re   z"Table.from_tsv.<locals>.<listcomp>  s&    GGGQYQ8GGGr?   c                      g | ]
}|         S r^   r^   )rb   	sample_idsample_mappings     r>   re   z"Table.from_tsv.<locals>.<listcomp>  s.     < < <#,  .i8 < < <r?   c                      g | ]
}|         S r^   r^   )rb   rn  obs_mappings     r>   re   z"Table.from_tsv.<locals>.<listcomp>  s    FFFFK/FFFr?   )r   _extract_data_from_tsv)r  r  r  r  r   r   r  ro   t_mdr  r   r  s    ```       @r>   from_tsvzTable.from_tsv  s    B 25CCFCC	WdD	 <LLGGGGG$GGGL!"OO< < < <0:< < <O "FFFFgFFFLT7JoNNNr?   c           	         d t          | t                    s/	 t          | d           n# t          $ r t	          d          w xY wd}d}d}| D ]}|                                s|                    d          s;|s5|                                                              dd         }|dz   }n|} n5|dz  }|                                                              dd         }t          | t                    r| |d         }nH| 	                    d           t          d|          D ]}	|                                  d	 | D             }fd
|D             }
t          fd|
D                       }|s|dk    rd}d}|dd         }n|d         }g }|dd         }g }g }d}t          | d          r=| 	                    d           t          d|          D ]}	|                                 }n
| |d         } t          | |          D ]\  }}|                                s|                    d          r1|                              }|d                                         |d<   |                    |d                    |rj	 t          t          ||dd                             }n# t           $ r6 t#          ||dd                   \  }}d}t%          |||dz   |fz            w xY w	 t          t          ||dd                             }nC# t           $ r6 t#          ||dd                   \  }}d}t%          |||dz   |fz            w xY w|%|                     ||d                              n|                    |d                    t          dt'          |                    D ]5}||          |d          k    r|                    ||||         g           6|dz  }|||||fS )ab  Parse a classic table into (sample_ids, obs_ids, data, metadata,
        name)

        Parameters
        ----------
        lines: list or file-like object
            delimted data to parse
        delim: string
            delimeter in file lines
        dtype: type
            The expected type
        md_parse:  function or None
            funtion used to parse metdata

        Returns
        -------
        list
            sample_ids
        list
            observation_ids
        array
            data
        list
            metadata
        string
            column name if last column is non-numeric

        Notes
        ------
        This is intended to be close to how QIIME classic OTU tables are parsed
        with the exception of the additional md_name field

        This function is ported from QIIME (http://www.qiime.org), previously
        named parse_classic_otu_table. QIIME is a GPL project, but we obtained
        permission from the authors of this function to port it to the BIOM
        Format project (and keep it under BIOM's BSD license).

        .. shownumpydoc
        c                 H    	 t          |            dS # t          $ r Y dS w xY w)NTF)r.   r  )rY   s    r>   isfloatz-Table._extract_data_from_tsv.<locals>.isfloat  s9    et   uus    
!!seekz+Input needs to support seek or be indexableFr   #r&   Nc                     g | ]}|S r^   r^   )rb   r  s     r>   re   z0Table._extract_data_from_tsv.<locals>.<listcomp>&  s    333TD333r?   c                 l    g | ]0}|                     d           d                                         1S )r&   )rsplitr   )rb   r  r  s     r>   re   z0Table._extract_data_from_tsv.<locals>.<listcomp>)  sH     1 1 1 {{5!,,R06688 1 1 1r?   c                 &    g | ]} |          S r^   r^   )rb   r   r  s     r>   re   z0Table._extract_data_from_tsv.<locals>.<listcomp>+  s!    %F%F%FQggajj%F%F%Fr?   r  z-Invalid value on line %d, column %d, value %s)r4   rx   r  r  RuntimeErrorr   
startswithrstripr   r  r  readliner   rF   rX   r  r  rM   r   rr   )r  r  rG   md_parserd   
list_index
data_startr  value_checksr  last_valueslast_column_is_numericmd_namer  r  ro   r  
row_numberlinenorH   r   rI   rJ   msgcolumn_numberr  s    `                       @r>   r  zTable._extract_data_from_tsv  s   R	 	 	 %&& 	CCv&&&&! C C C"AC C CC 

 	3 	3D::<< ??3''   ,![[]]0077;F!+aJJ!+J!OJZZ\\''..qrr2FF eT"" 	4 -LLJJqMMMq*-- ! !    33U333L1 1 1 1#/1 1 1!$%F%F%F%F+%F%F%F!G!G " 	#Z1__GHaaayHHRjGHcrc{H
 5&!! 	'JJqMMMq*-- ( (~~''( *++&E%eZ88 !	 !	LFD::<< s## ZZ&&F))++F2JNN6!9%%%% 0F!#eVABBZ"8"899FF! F F F%8qrr
%K%KNFFIC#C66!8V*D$DEEEF
F!#eVAbD\":":;;FF! F F F%8qrr
%K%KNFFIC#C66!8V*D$DEEEF
 'OOHHVBZ$8$89999OOF2J///!&q#f++!6!6 9 9-(EE!HH44KK]!'!6!8 9 9 9!OJJ$'99s#   - A%J>>A K>%L((A M(c                 8    |                      d|||||          S )a  Return self as a string in tab delimited form

        Default ``str`` output for the ``Table`` is just row/col ids and table
        data without any metadata

        Parameters
        ----------
        header_key : str or ``None``, optional
            Defaults to ``None``
        header_value : str or ``None``, optional
            Defaults to ``None``
        metadata_formatter : function, optional
            Defaults to ``str``.  a function which takes a metadata entry and
            returns a formatted version that should be written to file
        observation_column_name : str, optional
            Defaults to "#OTU ID". The name of the first column in the output
            table, corresponding to the observation IDs.
        direct_io : file or file-like object, optional
            Defaults to ``None``. Must implement a ``write`` function. If
            `direct_io` is not ``None``, the final output is written directly
            to `direct_io` during processing.

        Returns
        -------
        str
            tab delimited representation of the Table

        Examples
        --------

        >>> import numpy as np
        >>> from biom.table import Table

        Create a 2x3 BIOM table, with observation metadata and no sample
        metadata:

        >>> data = np.asarray([[0, 0, 1], [1, 3, 42]])
        >>> table = Table(data, ['O1', 'O2'], ['S1', 'S2', 'S3'],
        ...               [{'foo': 'bar'}, {'x': 'y'}], None)
        >>> print(table.to_tsv()) # doctest: +NORMALIZE_WHITESPACE
        # Constructed from biom file
        #OTU ID	S1	S2	S3
        O1	0.0	0.0	1.0
        O2	1.0	3.0	42.0
        >>> with open("result.tsv", "w") as f:
                table.to_tsv(direct_io=f)
        r|  )r  rr  )r;   r  r  r  r  r  s         r>   to_tsvzTable.to_tsvg  s1    f ""4\#5#:-6 # 8 8 	8r?   )NNNNNNNNTNN)r   )FN)Nr  )r   )r  )rQ  rQ  )r   TT)Nr   )r   T)Tr   )Tr   TF)r   FT)	NTr&   TFr  r  Fr   )r   FFN)T)r   Tr;  )r  T)r[  )Nr   NT)F)Fr2  T)TNN)NF)NN)^r@   rA   rB   __doc__r   r   r   r   staticmethodr   r.   r   r   propertyrn   rG   r   r   r   r  r  r  r$  r!  r   r3  rC  r   rI  r   rV  rX  r  re  rg  r  r  rp  rt  rz  r  rv   rs  r  r  r  r  rw  r  r  r  r  ro   rN  rG  r  r  r=  r    r  r/  r  r  r  r  r   r   r3  r9  r8  rA  r  rI  rO  rV  rY  r0  re  r  r  r!   r  classmethodr)  r1  r:  r7  rU  rf  rz  r  r  r  r  r^   r?   r>   r   r     s       N Nb =AJNHLEI	99 99 99 99v0 0 0) ) ) )2< < < < ) ) \)  %*%<7 <7 <7 \<7|$; $; $;L     X      X    X
   XD D D D4) ) ) )8A6 A6 A6 A6F   8@( @( @(D* * *"* * *$6 6 6 6p6 6 6 6p5J 5J 5Jn8 8 8 8tJ J J094 94 94 94v.) .) .) .)`+) +) +) +)ZV V V Vp) ) ) )*:3 :3 :3 :3x/ / /b#3 #3 #3J% % %
 
 
', ', ', ',R $(Dt*-/8DU! U! U! U!n    . . .# # #
   % % %*  *  ># # #+ + + +Z. . .3) 3) 3) 3)j6) 6) 6) 6)pQC QC QC QCfG) G) G) G)R " >G >G >G >G@r r r rhX) X) X) X)t FG>C<B$,x4 x4 x4 x4t	* * *) ) )& & & &P& & & &P INb b b bH#< #< #< #<Je e e eN3= 3= 3= 3=jF= F= F= F=P2 2 2,% % % %N
 
 
	 	 	& & & &PL L L L\` ` ` `D;F ;F ;Fz #*w +%0w@ w@ w@ w@r @D'+@ @ @ [@D
%> %> %> %>N1 1 1 1fGL GL GLR EI"_< _< _< _<B .2!&X X X [Xtu u u un ^1 ^1 \^1@ 2O 2O \2Oh ,0 R: R: R: \R:h !%4"%'068 68 68 68 68 68r?   r   c                     |-| \  }\  }}t          |          dz   }t          |          dz   }n|\  }}t          | ||f|          }|                                }|                                 |S )af  Map directly on to the coo_matrix constructor

    Parameters
    ----------
    data : tuple
        data must be (values, (rows, cols))
    dtype : type, optional
        Defaults to ``np.float64``
    shape : tuple or ``None``, optional
        Defaults to ``None``. If `shape` is ``None``, shape will be determined
        automatically from `data`.
    Nr&   rn   rG   )r   r   r   r   )	ro   rG   rn   r   rx  ry  n_rowsn_colsr  s	            r>   r   r     s     }#tTQTQ VV$4EBBBF\\^^F
Mr?   c                     t          |  \  }}}|%t          |          dz   }t          |          dz   }n|\  }}t          |||ff||f|          }|                                }|                                 |S )a  Convert a list of lists into a scipy.sparse matrix.

    Parameters
    ----------
    data : iterable of iterables
        `data` should be in the format [[row, col, value], ...]
    dtype : type, optional
        defaults to ``float``
    shape : tuple or ``None``, optional
        Defaults to ``None``. If `shape` is ``None``, shape will be determined
        automatically from `data`.

    Returns
    -------
    scipy.csr_matrix
        The newly generated matrix
    Nr&   r  )r{   r   r   r   r   )	ro   rG   rn   rx  ry  r   r  r  r  s	            r>   r   r     s    $ dD$}TQTQ$.vv6F#% % %F\\^^F
Mr?   c                 Z   | j         dk    rt          d|          S | j         dv r| j        dk    rt          d|          S t          | j                   dk    rd| j         d         f}n| j         }t	          | ||          }|                                }|                                 |S )aJ  Convert a numpy array to a scipy.sparse matrix.

    Parameters
    ----------
    data : numpy.array
        The data to convert into a sparse matrix
    dtype : type, optional
        Defaults to ``float``. The type of data to be represented.

    Returns
    -------
    scipy.csr_matrix
        The newly generated matrix
    r  r   r   )ri  )r   r&   r   r&   r  )rn   r   r^  rr   r   r   r   )ro   rG   rn   r  s       r>   r   r     s     zT &....	'	'	'DINN
 &....	TZA		DJqM"
E777F\\^^F
Mr?   c                     t          | t          |           t          | d                   f|          }|                                }|                                 |S )af  Takes a list of numpy arrays and creates a scipy.sparse matrix.

    Parameters
    ----------
    data : iterable of numpy.array
        The data to convert into a sparse matrix
    dtype : type, optional
        Defaults to ``float``. The type of data to be represented.

    Returns
    -------
    scipy.csr_matrix
        The newly generated matrix
    r   r  )r   rr   r   r   )ro   rG   r  s      r>   r   r     sQ     SYYDG$=UKKKF\\^^F
Mr?   c                    t          | d                   rn| d         j        d         | d         j        d         k    r#t          |           }| d         j        d         }nt          |           }| d         j        d         }nt          d | D                       }t	          |t          d                    d         dz   }t	          |t          d                    d         dz   }||k    rt          |           }nt          |           }t          |           } t          | ||f|          }|                                }|	                                 |S )av  Takes a list of scipy.sparse matrices and creates a scipy.sparse mat.

    Parameters
    ----------
    data : iterable of scipy.sparse matrices
        The data to convert into a sparse matrix
    dtype : type, optional
        Defaults to ``float``. The type of data to be represented.

    Returns
    -------
    scipy.csr_matrix
        The newly generated matrix
    r   r&   c                 6    g | ]}|                                 S r^   r  rb   r   s     r>   re   z)list_sparse_to_sparse.<locals>.<listcomp>4       333AFFHH333r?   r  r  )
r   rn   rr   r   r   r	   r   r   r   r   )ro   rG   r  r  all_keysr  s         r>   r   r     sC    $q' 7=d1gmA...YYF!W]1%FFYYF!W]1%FF33d33344X:a==111!4q8X:a==111!4q8F??YYFFYYF$<<DVV$4#% % %F\\^^F
Mr?   c                 &   t          | d                   rr| d         j        d         | d         j        d         k    r%d}t          |           }| d         j        d         }nd}t          |           }| d         j        d         }nt          d | D                       }t	          |t          d                    d         dz   }t	          |t          d                    d         dz   }||k    rd}t          |           }nd}t          |           }g }g }g }t          |           D ]\  }	}
|
                                D ]\  \  }}}|r@|                    |           |                    |	           |                    |           J|                    |	           |                    |           |                    |           t          |||ff||f|          }|
                                }|                                 |S )ae  Takes a list of dict {(row,col):val} and creates a scipy.sparse mat.

    Parameters
    ----------
    data : iterable of dicts
        The data to convert into a sparse matrix
    dtype : type, optional
        Defaults to ``float``. The type of data to be represented.

    Returns
    -------
    scipy.csr_matrix
        The newly generated matrix
    r   r&   TFc                 6    g | ]}|                                 S r^   r  r  s     r>   re   z'list_dict_to_sparse.<locals>.<listcomp>]  r  r?   r  r  )r   rn   rr   r   r   r	   rF   r  rX   r   r   r   )ro   rG   is_colr  r  r  rx  ry  r   r'  r   row_valr+  r   r  s                  r>   r   r   D  s    $q' 7=d1gmA...FYYF!W]1%FFFYYF!W]1%FF33d33344X:a==111!4q8X:a==111!4q8F??FYYFFFYYFDDD!$ 
! 
!'*yy{{ 		! 		!#Wg !G$$$G$$$C    G$$$G$$$C    		! d|,VV4D#% % %F\\^^F
Mr?   c                    |st          |                                 t          d                    d         dz   }t          |                                 t          d                    d         dz   }n|\  }}g }g }g }|                                 D ]G\  \  }}	}
|                    |           |                    |	           |                    |
           Ht          |||ff||f|          S )aS  Takes a dict {(row,col):val} and creates a scipy.sparse matrix.

    Parameters
    ----------
    data : dict
        The data to convert into a sparse matrix
    dtype : type, optional
        Defaults to ``float``. The type of data to be represented.

    Returns
    -------
    scipy.csr_matrix
        The newly generated matrix
    Nr   r  r&   r  )r   r  r	   r  rX   r   )ro   rG   rn   r  r  rx  ry  r   r  r  rL   s              r>   r   r   }  s     }TYY[[jmm444Q7!;TYY[[jmm444Q7!;DDDZZ\\  	AAAAd| 4'-v&6eE E E Er?   )Tr  numpyr5   scipy.statsr?  r  rN  r   r   jsonr   _json_dumpsr   	functoolsr   r   operatorr	   r
   collectionsr   collections.abcr   r   r   r   r   r   scipy.sparser   r   r   r   r   r   r   pandasr  r  biom.exceptionr   r   r   r   	biom.utilr   r   r   r    r!   r"   r#   r$   biom.errr%   r'   r(   r)   
__author____copyright____credits____license____url____maintainer__	__email__r-   r.   rv   rb  r1   rM   rV   r[   r   rz   r   r  r   r   r   r   r   r   r   r^   r?   r>   <module>r     s-  b bX                      2 2 2 2 2 2 2 2 % % % % % % % % $ $ $ $ $ $ $ $ # # # # # # . . . . . . . . 2 2 2 2 2 2 2 2 2 2 2 26 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6     				- - - - - - - - - - - -+ + + + + + + + + + + + + + + + + + + +             " " " " " " " " " " " " 
G: : : 
""*	 "Ec!EcC C 3 3 3 3 3 3 3 3 	+++  :  	, 	, 	,+% +% +%\>! >! >!BVL8 VL8 VL8 VL8 VL8 VL8 VL8 VL8rX &(Zt    B %*    B #( # # # #L (-    * ', $ $ $ $N %* 6 6 6 6r  %D E E E E E Er?   