package gridss;

import au.edu.wehi.idsv.FileSystemContext;
import au.edu.wehi.idsv.VariantContextRepeatMaskerAnnotator;
import au.edu.wehi.idsv.alignment.BwaStreamingAligner;
import au.edu.wehi.idsv.alignment.ExternalProcessStreamingAligner;
import au.edu.wehi.idsv.alignment.StreamingAligner;
import au.edu.wehi.idsv.util.FileHelper;
import au.edu.wehi.idsv.vcf.InsertedSequenceAnnotator;
import au.edu.wehi.idsv.vcf.VcfInfoAttributes;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import freemarker.core.FMParserConstants;
import gridss.cmdline.ReferenceCommandLineProgram;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
import htsjdk.variant.vcf.VCFFileReader;
import htsjdk.variant.vcf.VCFHeader;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.broadinstitute.barclay.argparser.Argument;
import picard.cmdline.StandardOptionDefinitions;

/* loaded from: input_file:gridss/AnnotateInsertedSequence.class */
public class AnnotateInsertedSequence extends ReferenceCommandLineProgram {
    private static final Log log = Log.getInstance(AnnotateInsertedSequence.class);

    @Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "VCF file to annotate")
    public File INPUT;

    @Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Annotated VCF file")
    public File OUTPUT;

    @Argument(doc = "Number of worker threads to spawn. Defaults to number of cores available. Note that I/O threads are not included in this worker thread count so CPU usage can be higher than the number of worker thread.", shortName = "THREADS")
    public int WORKER_THREADS = Runtime.getRuntime().availableProcessors();

    @Argument(doc = "Minimum inserted sequence length for realignment. Generally, short read aligners are not able to uniquely align sequences shorter than 18-20 bases.", optional = true)
    public int MIN_SEQUENCE_LENGTH = 20;

    @Argument(doc = "Command line arguments to run external aligner. In-process bwa alignment is used if this value is null. Aligner output must be written to stdout and the records MUST match the input fastq order. The aligner must support using \"-\" as the input filename when reading from stdin.Java argument formatting is used with %1$s being the fastq file to align, %2$s the reference genome, and %3$d the number of threads to use.", optional = true)
    public List<String> ALIGNER_COMMAND_LINE = Lists.newArrayList(BWA_COMMAND_LINE);

    @Argument(doc = "Number of records to buffer when performing in-process or streaming alignment. Not applicable when performing external alignment.", optional = true)
    public int ALIGNER_BATCH_SIZE = this.MAX_RECORDS_IN_RAM.intValue();

    @Argument(doc = "Whether to align inserted sequences to REFERENCE_GENOME. Valid values are:APPEND (Append alignments to REFERENCE_GENOME to the BEALN field), REPLACE (Replace all BEALN fields)  (default),ADD_MISSING (Add alignments to records missing a BEALN field, andSKIP (do not align).", optional = true)
    public AlignmentStatus ALIGNMENT = AlignmentStatus.REPLACE;

    @Argument(doc = "Annotate inserted sequences with RepeatMasker annotations. Use bedops rmsk2bed to generate the bed file from the RepeatMasker .fa.out file.", optional = true)
    public File REPEAT_MASKER_BED = null;

    /* loaded from: input_file:gridss/AnnotateInsertedSequence$AlignmentStatus.class */
    public enum AlignmentStatus {
        APPEND,
        REPLACE,
        ADD_MISSING,
        SKIP
    }

    public static void main(String[] strArr) {
        System.exit(new AnnotateInsertedSequence().instanceMain(strArr));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // gridss.cmdline.ReferenceCommandLineProgram, picard.cmdline.CommandLineProgram
    public String[] customCommandLineValidation() {
        return (this.REPEAT_MASKER_BED == null || this.REPEAT_MASKER_BED.isFile()) ? super.customCommandLineValidation() : new String[]{"REPEAT_MASKER_BED: file not found"};
    }

    @Override // picard.cmdline.CommandLineProgram
    public int doWork() {
        Iterator<VariantContext> it2;
        StreamingAligner bwaStreamingAligner;
        IOUtil.assertFileIsReadable(this.INPUT);
        IOUtil.assertFileIsWritable(this.OUTPUT);
        IOUtil.assertFileIsReadable(this.REFERENCE_SEQUENCE);
        log.info("Annotating inserted sequences in " + this.INPUT);
        try {
            SAMSequenceDictionary sequenceDictionary = new IndexedFastaSequenceFile(this.REFERENCE_SEQUENCE).getSequenceDictionary();
            if (this.ALIGNMENT != AlignmentStatus.SKIP) {
                if (this.ALIGNER_COMMAND_LINE == null || this.ALIGNER_COMMAND_LINE.size() == 0) {
                    log.info("Using in-process bwa alignment");
                    bwaStreamingAligner = new BwaStreamingAligner(this.REFERENCE_SEQUENCE, sequenceDictionary, this.WORKER_THREADS, this.ALIGNER_BATCH_SIZE * FMParserConstants.NATURAL_GT);
                } else {
                    log.info("Using external process alignment");
                    bwaStreamingAligner = new ExternalProcessStreamingAligner(SamReaderFactory.make(), this.ALIGNER_COMMAND_LINE, this.REFERENCE_SEQUENCE, this.WORKER_THREADS, sequenceDictionary);
                }
                it2 = new InsertedSequenceAnnotator(this.INPUT, bwaStreamingAligner, this.MIN_SEQUENCE_LENGTH, this.ALIGNMENT == AlignmentStatus.REPLACE, this.ALIGNMENT == AlignmentStatus.ADD_MISSING);
            } else {
                it2 = new VCFFileReader(this.INPUT, false).iterator();
            }
            if (this.REPEAT_MASKER_BED != null) {
                log.info("Loading RepeatMasker bed file from " + this.REPEAT_MASKER_BED);
                VariantContextRepeatMaskerAnnotator variantContextRepeatMaskerAnnotator = new VariantContextRepeatMaskerAnnotator(this.REPEAT_MASKER_BED);
                log.info("RepeatMasker bed file loaded from " + this.REPEAT_MASKER_BED);
                Sets.SetView intersection = Sets.intersection(Sets.newHashSet(variantContextRepeatMaskerAnnotator.getRepeatMaskerContigs()), (Set) sequenceDictionary.getSequences().stream().map(sAMSequenceRecord -> {
                    return sAMSequenceRecord.getContig();
                }).collect(Collectors.toSet()));
                if (intersection.size() < Math.min(variantContextRepeatMaskerAnnotator.getRepeatMaskerContigs().size(), sequenceDictionary.size()) * 0.5d) {
                    log.warn(String.format("Only %d chromosomes in common between REFERENCE_SEQUENCE and REPEAT_MASKER_BED. Are you sure your chromosome names match?", Integer.valueOf(intersection.size())));
                }
                it2 = Iterators.transform(it2, variantContext -> {
                    return variantContextRepeatMaskerAnnotator.apply(variantContext);
                });
            }
            saveVcf(this.INPUT, this.OUTPUT, it2);
            log.info("Annotated variants written to " + this.OUTPUT);
            return 0;
        } catch (IOException e) {
            log.error(e, new Object[0]);
            throw new RuntimeException(e);
        }
    }

    protected void saveVcf(File file, File file2, Iterator<VariantContext> it2) throws IOException {
        VCFFileReader vCFFileReader = new VCFFileReader(file, false);
        try {
            VCFHeader fileHeader = vCFFileReader.getFileHeader();
            vCFFileReader.close();
            fileHeader.addMetaDataLine(VcfInfoAttributes.BREAKEND_ALIGNMENTS.infoHeader());
            if (this.REPEAT_MASKER_BED != null) {
                fileHeader.addMetaDataLine(VcfInfoAttributes.INSERTED_SEQUENCE_REPEATMASKER_OVERLAP.infoHeader());
                fileHeader.addMetaDataLine(VcfInfoAttributes.INSERTED_SEQUENCE_REPEATMASKER_REPEAT_TYPE.infoHeader());
                fileHeader.addMetaDataLine(VcfInfoAttributes.INSERTED_SEQUENCE_REPEATMASKER_REPEAT_CLASS.infoHeader());
                fileHeader.addMetaDataLine(VcfInfoAttributes.INSERTED_SEQUENCE_REPEATMASKER_ORIENTATION.infoHeader());
            }
            File workingFileFor = Defaults.OUTPUT_TO_TEMP_FILE ? FileSystemContext.getWorkingFileFor(file2) : file2;
            VariantContextWriterBuilder outputFile = new VariantContextWriterBuilder().setOutputFile(workingFileFor);
            if (fileHeader.getSequenceDictionary() != null) {
                outputFile = outputFile.setReferenceDictionary(fileHeader.getSequenceDictionary());
            }
            ProgressLogger progressLogger = new ProgressLogger(log);
            VariantContextWriter build = outputFile.build();
            try {
                build.writeHeader(fileHeader);
                while (it2.hasNext()) {
                    VariantContext next = it2.next();
                    build.add(next);
                    progressLogger.record(next.getContig(), next.getStart());
                }
                if (build != null) {
                    build.close();
                }
                if (workingFileFor != file2) {
                    FileHelper.move(workingFileFor, file2, true);
                }
            } catch (Throwable th) {
                if (build != null) {
                    try {
                        build.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
                throw th;
            }
        } catch (Throwable th3) {
            try {
                vCFFileReader.close();
            } catch (Throwable th4) {
                th3.addSuppressed(th4);
            }
            throw th3;
        }
    }
}
