U
    \d                     @   s   d dl mZ d dlmZ d dlZd dlZd dlZd dlZd dl	m
Z
 dd Zdd Zd	d
 Zdd Zdd Zdd Zdd Zdd Zdd ZdS )    )division)statsN)SeqIOc                 C   s:  t t}i }z"t| jdks$tt| d}W nv tt	fk
rn } z|
d|  td W 5 d}~X Y n tk
r } z|
d|   td W 5 d}~X Y nX |v |D ]j}z | d }t| d }W n< tk
r } z|
d|  td W 5 d}~X Y qX |||< qW 5 Q R X |d|   |S )	a6  Parse an abundance or coverage file

    The abundance/coverage file is a flat file of the format
    "genome_id<TAB>abundance"
    or
    "genome_id<TAB>coverage"

    Args:
        abundance_file (string): the path to the abundance file

    Returns:
        dict: genome_id as keys, abundance as values
    r   rz Failed to open abundance file:%s   NzAbundance file seems empty: %sz!Failed to read abundance file: %szLoaded abundance file: %s)logging	getLogger__name__osstatst_sizeAssertionErroropenIOErrorOSErrorerrorsysexitsplitfloat
IndexErrorinfo)Zabundance_fileloggerabundance_dicfelineZ	genome_id	abundance r   ,lib/python3.8/site-packages/iss/abundance.pyparse_abundance_file   s,    
r    c                 C   s&   i }t | }| D ]}d| ||< q|S )zGenerate uniform abundance distribution from a number of records

    Args:
        record_list (list): a list of record.id

    Returns:
        dict: a dictionary with records as keys, abundance as values
    r   len)record_listr   	n_recordsrecordr   r   r   uniform8   s
    	r&   c                 C   sJ   i }t | }tjjdd|d}|t| }t| |D ]\}}|||< q4|S )zGenerate scaled halfnormal abundance distribution from a number of
        records

    Args:
        record_list (list): a list of record.id

    Returns:
        dict: a dictionary with records as keys, abundance as values
    g        g      ?)locZscalesize)r"   r   Zhalfnormrvssumzipr#   r   r$   distdist_scaledr%   r   r   r   r   
halfnormalI   s    

r/   c                 C   sF   i }t | }tjj|d}|t| }t| |D ]\}}|||< q0|S )zGenerate scaled exponential abundance distribution from a number of
        records

    Args:
        record_list (list): a list of record.id

    Returns:
        dict: a dictionary with records as keys, abundance as values
    r(   )r"   nprandomexponentialr*   r+   r,   r   r   r   r3   ]   s    

r3   c                 C   sF   i }t | }tjj|d}|t| }t| |D ]\}}|||< q0|S )zGenerate scaled lognormal abundance distribution from a number of
        records

    Args:
        record_list (list): a list of record.id

    Returns:
        dict: a dictionary with records as keys, abundance as values
    r0   )r"   r1   r2   	lognormalr*   r+   r,   r   r   r   r4   q   s    

r4   c                 C   s^   i }t | }tjjd|d}d| tjj|d }|t| }t| |D ]\}}|||< qH|S )zGenerate scaled zero-inflated lognormal abundance distribution from a
        number of records

    Args:
        record_list (list): a list of record.id

    Returns:
        dict: a dictionary with records as keys, abundance as values
    g?)pr(   r   r0   )	r"   r   Z	bernoullir)   r1   r2   r4   r*   r+   )r#   r   r$   Zzero_inflatedr-   r.   r%   r   r   r   r   zero_inflated_lognormal   s    

r6   c                 C   s   | | }|| | }|S )a  Calculate the coverage of a genome in a metagenome given its size and
    abundance

    Args:
        total_n_reads (int): total amount of reads in the dataset
        species_abundance (float): abundance of the species, between 0 and 1
        read_length (int): length of the reads in the dataset
        genome_size (int): size of the genome

    Returns:
        float: coverage of the genome
    r   )Ztotal_n_readsZspecies_abundanceZread_lengthZgenome_sizeZn_readsZcoverager   r   r   to_coverage   s    r7   c              
   C   s   t t}|d }zt|d}W n: tk
rZ } z|d|  td W 5 d}~X Y n6X |* |  D ]\}}|	d||f  qjW 5 Q R X dS )zWrite the abundance dictionary to a file

    Args:
        abundance_dic (dict): the abundance dictionary
        output (str): the output file name
    z_abundance.txtwzFailed to open output file: %sr   Nz%s	%s
)
r   r   r	   r   PermissionErrorr   r   r   itemswrite)r   outputr   Zoutput_abundancer   r   r%   r   r   r   r   to_file   s    
r=   c                    sJ  g } D ] }| dd t|dD  qtt| t| } ||   } fdd| D }t|| | D ]\}}	i }
| krrzJt|d}|2 t|d}dd |D }tdd	 |D }W 5 Q R X W n$ t	k
r } z W 5 d
}~X Y qrX tdd	 |D }|D ]$}t
|}|	||  }||
|j< qqr| }||
 |S )aM  Computes the abundance dictionary for a mix of complete and
    draft genomes

    Args:
        genomes (list): list of all input records
        draft (list): draft genome files
        distribution (function): distribution function to use
        output (str): output file

    Returns:
        dict: the abundance dictionary
    c                 S   s   g | ]
}|j qS r   )name.0r%   r   r   r   
<listcomp>   s     zdraft.<locals>.<listcomp>Zfastac                    s   i | ]\}}| kr||qS r   r   )r@   kvdraftr   r   
<dictcomp>   s    zdraft.<locals>.<dictcomp>r   c                 S   s   g | ]}|qS r   r   r?   r   r   r   rA      s     c                 s   s   | ]}t |V  qd S Nr!   r?   r   r   r   	<genexpr>   s     zdraft.<locals>.<genexpr>Nc                 s   s   | ]}t |V  qd S rG   r!   r?   r   r   r   rH      s     )extendr   parselistsetr:   r=   r   r*   	Exceptionr"   idcopyupdate)ZgenomesrE   Zdistributionr<   Zdraft_recordsdr   Zcomplete_genomes_abundancekeyr   Z	draft_dicr   Z
fasta_fileZtotal_lengthr   r%   lengthZcontig_abundanceZfull_abundance_dicr   rD   r   rE      s<    


 
rE   )Z
__future__r   Zscipyr   r
   r   r   Znumpyr1   ZBior   r    r&   r/   r3   r4   r6   r7   r=   rE   r   r   r   r   <module>   s   (