package gridss.kraken;

import au.edu.wehi.idsv.kraken.KrakenReportLine;
import au.edu.wehi.idsv.kraken.SeqIdToTaxIdMap;
import au.edu.wehi.idsv.ncbi.MinimalTaxonomyNode;
import au.edu.wehi.idsv.ncbi.TaxonomyHelper;
import au.edu.wehi.idsv.ncbi.TaxonomyLevel;
import com.google.common.collect.Lists;
import gridss.cmdline.ReferenceCommandLineProgram;
import gridss.cmdline.programgroups.DataConversion;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.variant.vcf.VCFConstants;
import it.unimi.dsi.fastutil.ints.Int2IntMap;
import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;

@CommandLineProgramProperties(summary = "Processes a Kraken2 report and extracts the sequences with the most hits", oneLineSummary = "Processes a Kraken2 report and extracts the sequences with the most hits.", programGroup = DataConversion.class)
/* loaded from: input_file:gridss/kraken/IdentifyViralTaxa.class */
public class IdentifyViralTaxa extends CommandLineProgram {
    private static final int NCBI_VIRUS_TAXID = 10239;
    private static final Log log = Log.getInstance(IdentifyViralTaxa.class);
    private static final Comparator<KrakenReportLine> SORT_ORDER = KrakenReportLine.ByCountAssignedDirectly.reversed().thenComparing(KrakenReportLine.ByCountAssignedToTree.reversed());

    @Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "Kraken2 report file.")
    public File INPUT_KRAKEN2_REPORT;

    @Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Summary csv")
    public File OUTPUT;

    @Argument(shortName = "RO", doc = "Kraken2 report filtered to only taxa of interest.", optional = true)
    public File REPORT_OUTPUT;

    @Argument(shortName = "SRO", doc = "Kraken2 report filtered to only taxa included in the output fasta file.", optional = true)
    public File SUMMARY_REPORT_OUTPUT;

    @Argument(doc = "File containing NCBI Taxonomy IDs to extract. One taxonomy ID per line.", optional = true)
    public File TAXONOMY_ID_LIST;

    @Argument(doc = "NCBI taxonomy nodes.dmp. Download and extract from https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip")
    public File NCBI_NODES_DMP;

    @Argument(doc = "Kraken2 seqid2taxid.map mapping file")
    public File SEQID2TAXID_MAP;

    @Argument(doc = "Kraken2 library.fna files. Downloaded by kraken2-build. Must be indexed. Do not run kraken2-build --clean as these files will be removed. Files are checked in order and all the contigs for the given taxid from the first matching file are extracted.", optional = true)
    public List<File> KRAKEN_REFERENCES;

    @Argument(doc = "NCBI Taxonomy IDs to extract. All taxonomic entries under these IDs are also extracted. Defaults to all viruses. Specifying TAXONOMY_ID_LIST will override this value.")
    public List<Integer> TAXONOMY_IDS = Lists.newArrayList(Integer.valueOf(NCBI_VIRUS_TAXID));

    @Argument(doc = "Maximum number of NCBI taxonomic identifiers to extract sequences for.", optional = true)
    public Integer TAXA_TO_RETURN = null;

    @Argument(doc = "Minimum number of supporting reads", optional = true)
    public int MIN_SUPPORTING_READS = 1;

    @Argument(doc = "Taxonomic level for which only one sequence will be output. Useful to prevent multiple strains of the same/similar viruses being output.", optional = true)
    public TaxonomyLevel TAXONOMIC_DEDUPLICATION_LEVEL = TaxonomyLevel.Genus;
    public int TAXA_PER_DEDUPLICATION_LEVEL = 1;

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // picard.cmdline.CommandLineProgram
    public String[] customCommandLineValidation() {
        return (this.KRAKEN_REFERENCES == null || this.KRAKEN_REFERENCES.size() == 0) ? new String[]{"KRAKEN_REFERENCES required. This file is located under library/viral/library.fna in the kraken2 database directory."} : (this.TAXONOMY_ID_LIST == null || this.TAXONOMY_IDS == null || this.TAXONOMY_IDS.size() != 1 || this.TAXONOMY_IDS.get(0).intValue() == NCBI_VIRUS_TAXID) ? super.customCommandLineValidation() : new String[]{"TAXONOMY_ID_LIST and TAXONOMY_IDS are mutually exclusive. Specify one or the other."};
    }

    @Override // picard.cmdline.CommandLineProgram
    protected int doWork() {
        IOUtil.assertFileIsReadable(this.INPUT_KRAKEN2_REPORT);
        IOUtil.assertFileIsReadable(this.NCBI_NODES_DMP);
        IOUtil.assertFileIsReadable(this.SEQID2TAXID_MAP);
        IOUtil.assertFileIsWritable(this.OUTPUT);
        if (this.TAXONOMY_ID_LIST != null) {
            IOUtil.assertFileIsReadable(this.TAXONOMY_ID_LIST);
        }
        if (this.SUMMARY_REPORT_OUTPUT != null) {
            IOUtil.assertFileIsWritable(this.SUMMARY_REPORT_OUTPUT);
        }
        if (this.OUTPUT != null) {
            IOUtil.assertFileIsWritable(this.OUTPUT);
        }
        if (this.TAXA_TO_RETURN == null) {
            this.TAXA_TO_RETURN = Integer.MAX_VALUE;
        }
        try {
            ArrayList arrayList = new ArrayList(this.KRAKEN_REFERENCES.size());
            for (File file : this.KRAKEN_REFERENCES) {
                IOUtil.assertFileIsReadable(file);
                ReferenceCommandLineProgram.ensureSequenceDictionary(file);
                arrayList.add(new IndexedFastaSequenceFile(file));
            }
            if (this.TAXONOMY_ID_LIST != null) {
                log.info("Loading taxonomy IDs of interest from ", this.TAXONOMY_ID_LIST);
                this.TAXONOMY_IDS = (List) Files.readAllLines(this.TAXONOMY_ID_LIST.toPath()).stream().map(Integer::parseInt).collect(Collectors.toList());
                log.info("Loaded " + this.TAXONOMY_IDS.size() + " taxonomy IDs");
                if (this.TAXONOMY_IDS.size() == 0) {
                    throw new IllegalArgumentException("TAXONOMY_ID_LIST must contain at least one taxonomy ID");
                }
            }
            log.info("Loading seqid2taxid.map from ", this.SEQID2TAXID_MAP);
            Map<String, Integer> createLookup = SeqIdToTaxIdMap.createLookup(this.SEQID2TAXID_MAP);
            log.info("Loading NCBI taxonomy from ", this.NCBI_NODES_DMP);
            Map<Integer, MinimalTaxonomyNode> parseMinimal = TaxonomyHelper.parseMinimal(this.NCBI_NODES_DMP);
            boolean[] createInclusionLookup = TaxonomyHelper.createInclusionLookup(this.TAXONOMY_IDS, parseMinimal);
            boolean[] addAncestors = TaxonomyHelper.addAncestors(createInclusionLookup, parseMinimal);
            log.info("Parsing Kraken2 report from ", this.INPUT_KRAKEN2_REPORT);
            List list = (List) Files.lines(this.INPUT_KRAKEN2_REPORT.toPath()).map(str -> {
                return new KrakenReportLine(str);
            }).collect(Collectors.toList());
            Int2IntMap createTaxaGroupLookup = createTaxaGroupLookup(parseMinimal, list, this.TAXONOMIC_DEDUPLICATION_LEVEL);
            List list2 = (List) list.stream().filter(krakenReportLine -> {
                return addAncestors[krakenReportLine.taxonomyId];
            }).collect(Collectors.toList());
            if (this.REPORT_OUTPUT != null) {
                log.info("Writing abridged report to ", this.REPORT_OUTPUT);
                Files.write(this.REPORT_OUTPUT.toPath(), (Iterable<? extends CharSequence>) list2.stream().map(krakenReportLine2 -> {
                    return krakenReportLine2.line;
                }).collect(Collectors.toList()), new OpenOption[0]);
            }
            boolean[] zArr = new boolean[createInclusionLookup.length];
            arrayList.stream().flatMap(indexedFastaSequenceFile -> {
                return indexedFastaSequenceFile.getSequenceDictionary().getSequences().stream();
            }).mapToInt(sAMSequenceRecord -> {
                return ((Integer) createLookup.get(sAMSequenceRecord.getSequenceName())).intValue();
            }).forEach(i -> {
                zArr[i] = true;
            });
            List list3 = (List) ((Map) list2.stream().filter(krakenReportLine3 -> {
                return zArr[krakenReportLine3.taxonomyId];
            }).filter(krakenReportLine4 -> {
                return krakenReportLine4.countAssignedToTree >= ((long) this.MIN_SUPPORTING_READS);
            }).collect(Collectors.groupingBy(krakenReportLine5 -> {
                return Integer.valueOf(createTaxaGroupLookup.getOrDefault(krakenReportLine5.taxonomyId, krakenReportLine5.taxonomyId));
            }))).values().stream().flatMap(list4 -> {
                return list4.stream().sorted(SORT_ORDER).limit(this.TAXA_PER_DEDUPLICATION_LEVEL);
            }).sorted(SORT_ORDER).limit(this.TAXA_TO_RETURN.intValue()).collect(Collectors.toList());
            if (this.SUMMARY_REPORT_OUTPUT != null) {
                log.info("Writing summary kraken report to ", this.SUMMARY_REPORT_OUTPUT);
                Files.write(this.SUMMARY_REPORT_OUTPUT.toPath(), (Iterable<? extends CharSequence>) list3.stream().map(krakenReportLine6 -> {
                    return krakenReportLine6.line;
                }).collect(Collectors.toList()), new OpenOption[0]);
            }
            ArrayList arrayList2 = new ArrayList();
            arrayList2.add(createSummaryHeader());
            arrayList2.addAll((Collection) list3.stream().map(krakenReportLine7 -> {
                return createSummaryLine(list, parseMinimal, krakenReportLine7);
            }).collect(Collectors.toList()));
            log.info("Found viral presence for " + list3.size() + " genera. Writing summary to  ", this.OUTPUT);
            Files.write(this.OUTPUT.toPath(), arrayList2, new OpenOption[0]);
            return 0;
        } catch (IOException e) {
            log.error(e, new Object[0]);
            throw new RuntimeIOException(e);
        }
    }

    private String createSummaryHeader() {
        return "taxid_genus\tname_genus\treads_genus_tree\ttaxid_species\tname_species\treads_species_tree\ttaxid_assigned\tname_assigned\treads_assigned_tree\treads_assigned_direct";
    }

    /* JADX WARN: Failed to find 'out' block for switch in B:15:0x007b. Please report as an issue. */
    /* JADX WARN: Failed to find 'out' block for switch in B:7:0x0043. Please report as an issue. */
    private String createSummaryLine(List<KrakenReportLine> list, Map<Integer, MinimalTaxonomyNode> map, KrakenReportLine krakenReportLine) {
        Map map2 = (Map) list.stream().collect(Collectors.toMap(krakenReportLine2 -> {
            return Integer.valueOf(krakenReportLine2.taxonomyId);
        }, krakenReportLine3 -> {
            return krakenReportLine3;
        }));
        KrakenReportLine krakenReportLine4 = krakenReportLine;
        KrakenReportLine krakenReportLine5 = krakenReportLine;
        KrakenReportLine krakenReportLine6 = krakenReportLine;
        while (true) {
            KrakenReportLine krakenReportLine7 = krakenReportLine6;
            if (krakenReportLine7 != null && krakenReportLine7.taxonomyId > 1) {
                String str = krakenReportLine7.rank;
                boolean z = -1;
                switch (str.hashCode()) {
                    case 71:
                        if (str.equals(VCFConstants.PER_GENOTYPE_COUNT)) {
                            z = true;
                            break;
                        }
                        break;
                    case 83:
                        if (str.equals("S")) {
                            z = false;
                            break;
                        }
                        break;
                }
                switch (z) {
                    case false:
                        krakenReportLine5 = krakenReportLine7;
                        break;
                    case true:
                        krakenReportLine4 = krakenReportLine7;
                        break;
                }
                int i = map.get(Integer.valueOf(krakenReportLine7.taxonomyId)).parentTaxId;
                if (i > 1) {
                    krakenReportLine6 = (KrakenReportLine) map2.get(Integer.valueOf(i));
                }
            }
        }
        return String.format("%d\t%s\t%d\t%d\t%s\t%d\t%d\t%s\t%d\t%d", Integer.valueOf(krakenReportLine4.taxonomyId), krakenReportLine4.scientificName.trim(), Long.valueOf(krakenReportLine4.countAssignedToTree), Integer.valueOf(krakenReportLine5.taxonomyId), krakenReportLine5.scientificName.trim(), Long.valueOf(krakenReportLine5.countAssignedToTree), Integer.valueOf(krakenReportLine.taxonomyId), krakenReportLine.scientificName.trim(), Long.valueOf(krakenReportLine.countAssignedToTree), Long.valueOf(krakenReportLine.countAssignedDirectly));
    }

    public static Int2IntMap createTaxaGroupLookup(Map<Integer, ? extends MinimalTaxonomyNode> map, List<KrakenReportLine> list, TaxonomyLevel taxonomyLevel) {
        Map map2 = (Map) list.stream().collect(Collectors.toMap(krakenReportLine -> {
            return Integer.valueOf(krakenReportLine.taxonomyId);
        }, krakenReportLine2 -> {
            return krakenReportLine2;
        }));
        Int2IntOpenHashMap int2IntOpenHashMap = new Int2IntOpenHashMap();
        for (KrakenReportLine krakenReportLine3 : list) {
            int i = krakenReportLine3.taxonomyId;
            if (taxonomyLevel != null) {
                KrakenReportLine krakenReportLine4 = krakenReportLine3;
                while (true) {
                    KrakenReportLine krakenReportLine5 = krakenReportLine4;
                    if (krakenReportLine5 == null) {
                        break;
                    }
                    if (taxonomyLevel.krakenAbbreviation().equals(krakenReportLine5.rank)) {
                        i = krakenReportLine5.taxonomyId;
                    }
                    if (map.get(Integer.valueOf(krakenReportLine5.taxonomyId)) != null && krakenReportLine5.taxonomyId != map.get(Integer.valueOf(krakenReportLine5.taxonomyId)).parentTaxId) {
                        krakenReportLine4 = (KrakenReportLine) map2.get(Integer.valueOf(map.get(Integer.valueOf(krakenReportLine5.taxonomyId)).parentTaxId));
                    }
                }
            }
            int2IntOpenHashMap.put(krakenReportLine3.taxonomyId, i);
        }
        return int2IntOpenHashMap;
    }

    public static void main(String[] strArr) {
        System.exit(new IdentifyViralTaxa().instanceMain(strArr));
    }
}
