package gridss;

import au.edu.wehi.idsv.sam.ChimericAlignment;
import au.edu.wehi.idsv.vcf.SvType;
import au.edu.wehi.idsv.vcf.VcfFilter;
import au.edu.wehi.idsv.vcf.VcfInfoAttributes;
import au.edu.wehi.idsv.vcf.VcfSvConstants;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Range;
import com.google.common.collect.RangeSet;
import com.google.common.collect.Sets;
import com.google.common.collect.TreeRangeSet;
import freemarker.template.Template;
import gridss.cmdline.ReferenceCommandLineProgram;
import gridss.cmdline.programgroups.VariantCalling;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.tribble.annotation.Strand;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.VariantContextComparator;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
import htsjdk.variant.vcf.VCFFileReader;
import htsjdk.variant.vcf.VCFHeader;
import java.io.File;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import joptsimple.internal.Strings;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import picard.cmdline.StandardOptionDefinitions;

@CommandLineProgramProperties(summary = "Filters an annotated VIRUSBreakend VCF to only likely integration sites", oneLineSummary = "Filters an annotated VIRUSBreakend VCF to only likely integration sites", programGroup = VariantCalling.class)
/* loaded from: input_file:gridss/VirusBreakendFilter.class */
public class VirusBreakendFilter extends ReferenceCommandLineProgram {
    private static final Log log = Log.getInstance(VirusBreakendFilter.class);

    @Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "VIRUSBreakend VCF file to filter")
    public File INPUT;

    @Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Filtered VCF")
    public File OUTPUT;

    @Argument(doc = "Minimum portion of host alignment that does not match a simple or low complexity repeat")
    public double MINIMUM_REPEAT_OVERLAP = 1.0d;

    @Argument(doc = "Minimum portion of breakend sequence that aligns to host genome")
    public double MINIMUM_HOST_OVERLAP = 0.5d;

    @Argument(doc = "Minimum assembly mapping quality for integration site to be considered unambiguous")
    public int MINIMUM_MAPQ = 10;

    @Argument(doc = "Kraken taxonomic identifiers associated with host genome")
    public List<Integer> TAXONOMY_IDS = null;

    public static void main(String[] strArr) {
        System.exit(new VirusBreakendFilter().instanceMain(strArr));
    }

    @Override // picard.cmdline.CommandLineProgram
    protected int doWork() {
        IOUtil.assertFileIsReadable(this.INPUT);
        IOUtil.assertFileIsWritable(this.OUTPUT);
        if (getReference() == null) {
            throw new IllegalArgumentException("Missing host REFERENCE_SEQUENCE");
        }
        SAMSequenceDictionary sequenceDictionary = getReference().getSequenceDictionary();
        if (sequenceDictionary == null) {
            throw new IllegalArgumentException("Missing .dict file for host genome");
        }
        VCFFileReader vCFFileReader = new VCFFileReader(this.INPUT, false);
        try {
            VCFHeader fileHeader = vCFFileReader.getFileHeader();
            if (!fileHeader.getFilterLines().stream().anyMatch(vCFFilterHeaderLine -> {
                return vCFFilterHeaderLine.getID() == VcfFilter.LOW_MAPQ.filter();
            })) {
                fileHeader.addMetaDataLine(VcfFilter.LOW_MAPQ.header());
            }
            Iterator<SAMSequenceRecord> it2 = fileHeader.getSequenceDictionary().getSequences().iterator();
            while (it2.hasNext()) {
                sequenceDictionary.addSequence(it2.next());
            }
            VariantContextWriterBuilder outputFile = new VariantContextWriterBuilder().setReferenceDictionary(sequenceDictionary).setOutputFile(this.OUTPUT);
            List list = (List) vCFFileReader.iterator().stream().filter(variantContext -> {
                return shouldKeep(variantContext);
            }).flatMap(variantContext2 -> {
                return transformToBreakpointNotation(sequenceDictionary, variantContext2, this.MINIMUM_MAPQ).stream();
            }).sorted(new VariantContextComparator(sequenceDictionary)).collect(Collectors.toList());
            VariantContextWriter build = outputFile.build();
            try {
                fileHeader.setSequenceDictionary(sequenceDictionary);
                build.writeHeader(fileHeader);
                Iterator it3 = list.iterator();
                while (it3.hasNext()) {
                    build.add((VariantContext) it3.next());
                }
                if (build != null) {
                    build.close();
                }
                vCFFileReader.close();
                return 0;
            } finally {
            }
        } catch (Throwable th) {
            try {
                vCFFileReader.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }

    public static List<VariantContext> transformToBreakpointNotation(SAMSequenceDictionary sAMSequenceDictionary, VariantContext variantContext, int i) {
        List<ChimericAlignment> infoToChimeric = infoToChimeric(variantContext, VcfInfoAttributes.BREAKEND_ALIGNMENTS.attribute());
        infoToChimeric.sort(ChimericAlignment.ByMapqAlignedLength);
        ChimericAlignment chimericAlignment = infoToChimeric.get(0);
        boolean startsWith = variantContext.getAlternateAllele(0).getDisplayString().startsWith(".");
        boolean z = startsWith != chimericAlignment.isNegativeStrand;
        int referenceLength = chimericAlignment.pos + (z ? chimericAlignment.cigar.getReferenceLength() - 1 : 0);
        String str = variantContext.getID() + "_host";
        String str2 = variantContext.getID() + "_virus";
        String displayString = variantContext.getAlternateAllele(0).getDisplayString();
        String substring = displayString.substring(1, displayString.length() - 1);
        char charAt = displayString.charAt(0) == '.' ? displayString.charAt(displayString.length() - 1) : displayString.charAt(0);
        String substring2 = !startsWith ? substring.substring(0, chimericAlignment.getFirstAlignedBaseReadOffset()) : substring.substring(chimericAlignment.getLastAlignedBaseReadOffset() + 1);
        int start = variantContext.getStart();
        Strand strand = startsWith ? Strand.NEGATIVE : Strand.POSITIVE;
        Strand strand2 = z ? Strand.POSITIVE : Strand.NEGATIVE;
        String str3 = chimericAlignment.rname;
        String contig = variantContext.getContig();
        String reverseComplement = strand == strand2 ? SequenceUtil.reverseComplement(substring2) : substring2;
        VariantContextBuilder filters = new VariantContextBuilder().chr(chimericAlignment.rname).start(referenceLength).stop(referenceLength).id(str).log10PError(variantContext.getLog10PError()).filters(getFilter(chimericAlignment, variantContext.getFilters(), i));
        String[] strArr = new String[2];
        strArr[0] = Template.NO_NS_PREFIX;
        strArr[1] = (strand2 == Strand.FORWARD ? Template.NO_NS_PREFIX + reverseComplement : "") + (strand == Strand.FORWARD ? DefaultExpressionEngine.DEFAULT_ATTRIBUTE_END : "[") + contig + ":" + start + (strand == Strand.FORWARD ? DefaultExpressionEngine.DEFAULT_ATTRIBUTE_END : "[") + (strand2 == Strand.FORWARD ? "" : reverseComplement + Template.NO_NS_PREFIX);
        VariantContextBuilder alleles = filters.alleles(strArr);
        VariantContextBuilder filters2 = new VariantContextBuilder(variantContext).id(str2).filters(getFilter(chimericAlignment, variantContext.getFilters(), i));
        String[] strArr2 = new String[2];
        strArr2[0] = String.valueOf(charAt);
        strArr2[1] = (strand == Strand.FORWARD ? charAt + substring2 : "") + (strand2 == Strand.FORWARD ? DefaultExpressionEngine.DEFAULT_ATTRIBUTE_END : "[") + str3 + ":" + referenceLength + (strand2 == Strand.FORWARD ? DefaultExpressionEngine.DEFAULT_ATTRIBUTE_END : "[") + (strand == Strand.FORWARD ? "" : substring2 + charAt);
        VariantContextBuilder alleles2 = filters2.alleles(strArr2);
        for (VcfInfoAttributes vcfInfoAttributes : new VcfInfoAttributes[]{VcfInfoAttributes.BREAKEND_ALIGNMENTS, VcfInfoAttributes.INSERTED_SEQUENCE_NCBI_TAXONOMY_ID, VcfInfoAttributes.INSERTED_SEQUENCE_REPEATMASKER_SA_TAG, VcfInfoAttributes.INSERTED_SEQUENCE_REPEATMASKER_OVERLAP, VcfInfoAttributes.INSERTED_SEQUENCE_REPEATMASKER_REPEAT_TYPE, VcfInfoAttributes.INSERTED_SEQUENCE_REPEATMASKER_REPEAT_CLASS, VcfInfoAttributes.INSERTED_SEQUENCE_REPEATMASKER_ORIENTATION}) {
            if (variantContext.hasAttribute(vcfInfoAttributes.attribute())) {
                alleles.attribute(vcfInfoAttributes.attribute(), variantContext.getAttribute(vcfInfoAttributes.attribute()));
            }
        }
        alleles.attribute(VcfSvConstants.BREAKEND_EVENT_ID_KEY, variantContext.getID());
        alleles2.attribute(VcfSvConstants.BREAKEND_EVENT_ID_KEY, variantContext.getID());
        alleles.attribute(VcfSvConstants.MATE_BREAKEND_ID_KEY, alleles2.getID());
        alleles2.attribute(VcfSvConstants.MATE_BREAKEND_ID_KEY, alleles.getID());
        alleles.attribute("SVTYPE", SvType.BND.name());
        alleles2.attribute("SVTYPE", SvType.BND.name());
        return ImmutableList.of(alleles2.make(), alleles.make());
    }

    private static Set<String> getFilter(ChimericAlignment chimericAlignment, Set<String> set, int i) {
        HashSet newHashSet = Sets.newHashSet(set);
        if (chimericAlignment == null || chimericAlignment.mapq < i) {
            newHashSet.add(VcfFilter.LOW_MAPQ.filter());
        }
        return newHashSet;
    }

    private boolean shouldKeep(VariantContext variantContext) {
        String displayString = variantContext.getAlternateAllele(0).getDisplayString();
        int attributeAsInt = variantContext.getAttributeAsInt(VcfInfoAttributes.INSERTED_SEQUENCE_NCBI_TAXONOMY_ID.attribute(), -1);
        if (this.TAXONOMY_IDS != null && this.TAXONOMY_IDS.size() > 0 && !this.TAXONOMY_IDS.contains(Integer.valueOf(attributeAsInt))) {
            return false;
        }
        if ((!displayString.startsWith(".") && !displayString.endsWith(".")) || displayString.length() < 2) {
            return false;
        }
        int length = displayString.length() - 2;
        List<ChimericAlignment> infoToChimeric = infoToChimeric(variantContext, VcfInfoAttributes.BREAKEND_ALIGNMENTS.attribute());
        RangeSet<Integer> repeatRanges = repeatRanges(variantContext);
        for (ChimericAlignment chimericAlignment : infoToChimeric) {
            int lastAlignedBaseReadOffset = (chimericAlignment.getLastAlignedBaseReadOffset() - chimericAlignment.getFirstAlignedBaseReadOffset()) + 1;
            if (lastAlignedBaseReadOffset / length >= this.MINIMUM_HOST_OVERLAP && repeatRanges.subRangeSet(Range.openClosed(Integer.valueOf(chimericAlignment.getFirstAlignedBaseReadOffset()), Integer.valueOf(chimericAlignment.getLastAlignedBaseReadOffset() + 1))).asRanges().stream().mapToInt(range -> {
                return ((Integer) range.upperEndpoint()).intValue() - ((Integer) range.lowerEndpoint()).intValue();
            }).sum() / lastAlignedBaseReadOffset <= this.MINIMUM_REPEAT_OVERLAP) {
                return true;
            }
        }
        return false;
    }

    private static List<ChimericAlignment> infoToChimeric(VariantContext variantContext, String str) {
        if (Strings.isNullOrEmpty(str)) {
            return Collections.emptyList();
        }
        List<String> attributeAsStringList = variantContext.getAttributeAsStringList(str, "");
        if (attributeAsStringList.size() == 0 || Strings.isNullOrEmpty(attributeAsStringList.get(0))) {
            Collections.emptyList();
        }
        return (List) attributeAsStringList.stream().map(str2 -> {
            return new ChimericAlignment(str2, "[|:]");
        }).collect(Collectors.toList());
    }

    private static RangeSet<Integer> repeatRanges(VariantContext variantContext) {
        TreeRangeSet create = TreeRangeSet.create();
        for (ChimericAlignment chimericAlignment : infoToChimeric(variantContext, VcfInfoAttributes.INSERTED_SEQUENCE_REPEATMASKER_SA_TAG.attribute())) {
            if (chimericAlignment.rname.contains("Simple_repeat") || chimericAlignment.rname.contains("Low_complexity")) {
                create.add(Range.closedOpen(Integer.valueOf(chimericAlignment.getFirstAlignedBaseReadOffset()), Integer.valueOf(chimericAlignment.getLastAlignedBaseReadOffset() + 1)));
            }
        }
        return create;
    }
}
