a
    h*                     @   sz  d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZ G dd	 d	eZG d
d deZd*ee ee ee eedddZeedddZ d+eeef eddddZ!d,eeef eddddZ"ee#e dddZ$dd Z%dd Z&d-ee ee ed d!d"Z'd#d$ Z(d%d& Z)ed'd(d)Z*dS ).    N)partial)islice)Path)IterableIteratorOptionalUnion)logger)	betabinomc                   @   s   e Zd ZdZdS )ConfigzBConfig contains the parameters related to specific alignment file.N)__name__
__module____qualname____doc__ r   r   Z/mounts/lovelace/software/anaconda3/envs/py39/lib/python3.9/site-packages/metaDMG/utils.pyr      s   r   c                   @   s\   e Zd ZdZee dddZeedddZeddd	Z	edd
dZ
ddddZdS )ConfigszkConfigs contains the parameters related to config file.
    Inherits from dict. Implements iterations.
    )returnc                 c   s   | d d }| d d }| d   }|D ]}t| }||d< |d | |d< || d |d< |d	 dkr~|| d
 |d< n|| d |d< || d |d< || d |d< |d d | |d< || d |d< |V  q(dS )zjIteration

        Yields
        ------
        Iterator[Config]
            Allow for iteration
        
output_dirlcaZpmdsamplessamplebamz.mismatches.txt.gzZpath_mismatches_txtdamage_modez.mismatches.stat.txt.gzZpath_mismatches_statz	.stat.txtz.lca.txt.gzZpath_lcaz.log.txtZpath_lca_logtmppath_tmpz.pmd.txt.gzpath_pmdN)keysr   )selfZdir_lcaZdir_pmdr   r   configr   r   r   __iter__    s"    zConfigs.__iter__)nr   c                 C   s   t t| |dS )zGets the n'th config

        Parameters
        ----------
        n
            The index

        Returns
        -------
        Config
            A single configuration
        N)nextr   )r   r!   r   r   r   get_nthA   s    zConfigs.get_nthc                 C   s   | j ddS )zpGet the first config

        Returns
        -------
        Config
            A single configuration
        r   )r!   )r#   r   r   r   r   	get_firstP   s    zConfigs.get_firstc                 C   s   t | d  S )zmThe number of configs

        Returns
        -------
        int
            The number of configs
        r   )lenr   r$   r   r   r   __len__Z   s    zConfigs.__len__Nc                 C   s   t | d t| d }| d }|| }tjdd}tjdd}||kr`td| d| d	 n ||krtd| d
| d dS )zECompare the number of configs to the number of parallel_samples used.parallel_samplesr   cores_per_sampleT)ZlogicalFzThe total number of jobs z0 are higher than the number of parallel_samples z. Do not do this unless you know what you are doing. Try decreasing either 'parallel_samples' or 'parallel_samples-per-sample'.z5 are higher than the real number of parallel_samples z1 (non-logical). This might decrease performance. N)minr&   psutil	cpu_countr	   warninginfo)r   r(   r)   ZN_jobsZ	max_coresZmax_cores_realr   r   r   check_number_of_jobsd   s"    zConfigs.check_number_of_jobs)r   r   r   r   r   r   r    intr#   r%   r'   r/   r   r   r   r   r      s   !

r   F)config_filelog_portlog_pathforcer   c           
      C   sd  | du rt d} |  s*td t td|  d t| d}t	|}W d   n1 sf0    Y  t
|}||d< ||d< |d	d
 |dd |dd |dd ||d< g d}|D ]}|| rt || ||< q|d  D ]\}}	t |	|d |< q| D ],\}}	t|	tr|	 rt|||< q|d rTdnd|d< t|S )a  Create an instance of Configs from a config file

    Parameters
    ----------
    config_file
        The config file to load
    log_port
        Optional log port, by default None
    log_path
        Optional log path, by default None
    force
        Whether or not the computations are force, by default False

    Returns
    -------
        An instance of Configs

    Raises
    ------
    typer.Abort
        If not a proper config file
    Nconfig.yamlz*Error! Please select a proper config file!zUsing z as config file.rr2   r3   forward_onlyFr)      r   r   Z	min_readsr   r4   )namesnodesZacc2taxr   r1   r   custom_database)r   existsr	   errortyperAbortr.   openyamlZ	safe_loadupdate_old_config
setdefaultitems
isinstancestrisdigitr0   r   )
r1   r2   r3   r4   filedpathspathkeyvalr   r   r   make_configs|   s8    
(
rN   )rI   r   c                 C   sr   d| v r| S t d ddddddd	d
d
ddddd}i }|  D ] \}}||v rZ|| }|||< qB|d |S )NversionzGUsing an old version of the config file. Please remake the config file.metaDMG_cppZmin_mapping_qualityZmin_edit_distZmax_edit_distZmin_similarity_scoreZmax_similarity_scoreZweight_typer   r;   r(   r)   r1   )zmetaDMG-lcaZminmapqZeditdistminZeditdistmaxZsimscorelowZsimscorehighZ
weighttypeZstorage_dirdirZfix_ncbiZcoresr)   Zconfig_pathZforced)r	   r-   rD   pop)rI   Z	d_old2newZd_newrL   valuer   r   r   rB      s4    

rB   )rH   
missing_okr   c                 C   s   t | j|d d S )N)rT   )r   unlink)rH   rT   r   r   r   remove_file   s    rV   )rK   rT   r   c                 C   sZ   z<t | } |  D ]}| r(t| qt| q|   W n tyT   |sP Y n0 dS )zmRemove everything in a directory

    Parameters
    ----------
    path
        Directory to be deleted
    N)r   iterdiris_filerV   remove_directoryrmdirFileNotFoundError)rK   rT   childr   r   r   rY      s    	

rY   )sr   c                 C   s   t d| S )zSplit a string by comma, space, or both.

    Parameters
    ----------
    s
        Input string

    Returns
    -------
        List of strings
    z[^,\s]+)refindall)r]   r   r   r   split_string	  s    r`   c                  C   s4   zdd l } W n" ty.   td t Y n0 d S )Nr   zAThe 'fit' extras has to be installed: pip install "metaDMG[fit]" )ZmetaDMG.fitModuleNotFoundErrorprintr>   r?   ZmetaDMGr   r   r   check_metaDMG_fit!  s
    rd   c                  C   s4   zdd l } W n" ty.   td t Y n0 d S )Nr   zAThe 'viz' extras has to be installed: pip install "metaDMG[viz]" )ZmetaDMG.vizra   rb   r>   r?   rc   r   r   r   check_metaDMG_viz*  s
    re   )r1   results_dirr   c                 C   sD   | dur|durt d|r |S | du r0td} t| }|d d S )a  Helper function that gets the results directory from either the
    config file or the results directory directly.

    Parameters
    ----------
    config_file
        Config file, by default None
    results_dir
        Results directory, by default None

    Returns
    -------
        Path to the results directory

    Raises
    ------
    AssertionError
        If both config file and results directory are set, raise error
    Nz2'config_file' and 'results_dir' cannot both be setr5   r   results)AssertionErrorr   rN   )r1   rf   Zconfigsr   r   r   get_results_dir6  s    ri   c                 C   s  t dd | jD }|rd}nd}d| jv r2d}nd}| | d j}| | d	 j}| | d
 j}| | d j}tdd | jD }|rt|d }	n(tt|d td| d dg}	|	d}	dd | jD }
| jd d |
f j}|d| t	|	d   | }|| }d| | }t
||j|jd}t & td | | }W d    n1 sn0    Y  d|t|< tjtj|jdd |	 D dtj|dd |	 D dfdd}|S )Nc                 S   s   g | ]}|d kqS )Zdamager   ).0columnr   r   r   
<listcomp>a      z-get_single_fit_prediction.<locals>.<listcomp> ZMAP_zk-1FTAqcphic                 s   s*   | ]"}| d rt|dd V  qdS )zk++r8   N)
startswithr0   splitrj   namer   r   r   	<genexpr>r  s   z,get_single_fit_prediction.<locals>.<genexpr>r8   )ry   r8   c                 S   s    g | ]}| d p| dqS )zN+zN-)rt   rv   r   r   r   rl   ~  s   )r!   abignorer   c                 S   s   g | ]}d |dqS )Dxrs   r   rj   xir   r   r   rl     rm   )columnsc                 S   s   g | ]}d |dqS )ZDx_stdrs   r   r~   r   r   r   rl     rm   Zaxis)anyr   valuesmaxnpZarangeZhstackZreshapeZilocabssp_betabinomTwarningscatch_warningsfilterwarningsstdisnanpdconcatZ	DataFrameflatten)
df_resultsZBayesianprefixr7   ro   rp   rq   rr   Zmax_positionxZmask_NNr}   alphabetadistr   df_Dxr   r   r   get_single_fit_prediction_  sN    
 


,	r   c                 C   s"   t | }tj| jdd|fddS )NT)Zdropr8   r   )r   r   r   Zreset_index)r   r   r   r   r   append_fit_predictions  s    r   )r   c                 C   s   ddl }ddl}ddl}| d }|jjddd || dT}|j|| d  d| d	  |jd
}|jdg|j	|d}|
  W d   n1 s0    Y  dS )zRun the PMD command from metaDMG-cpp and output the result to the gzipped txt_out

    Parameters
    ----------
    alignment_file
        Alignment file to compute the PMD scores on
    txt_out
        The (gzipped) output txt file
    r   Nr   T)parentsexist_okZwtrP   z pmd r   )stdoutgzip)stdinr   )r   shlex
subprocessparentmkdirr@   Popenru   PIPEr   communicate)r   r   r   r   Ztxt_outZzip_outcppzipr   r   r   run_PMD  s     r   )NNF)F)F)NN)+r^   r   	functoolsr   	itertoolsr   pathlibr   typingr   r   r   r   numpyr   Zpandasr   r+   r>   rA   Z	logger_ttr	   Zscipy.statsr
   r   dictr   r   r0   rF   boolrN   rB   rV   rY   listr`   rd   re   ri   r   r   r   r   r   r   r   <module>   sP   c   G(	  )=