package picard.analysis.artifacts;

import freemarker.template.Template;
import htsjdk.samtools.AlignmentBlock;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.filter.AggregateFilter;
import htsjdk.samtools.filter.AlignedFilter;
import htsjdk.samtools.filter.DuplicateReadFilter;
import htsjdk.samtools.filter.FailsVendorReadQualityFilter;
import htsjdk.samtools.filter.InsertSizeFilter;
import htsjdk.samtools.filter.MappingQualityFilter;
import htsjdk.samtools.filter.NotPrimaryAlignmentFilter;
import htsjdk.samtools.filter.SamRecordFilter;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.CodeUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.IntervalListReferenceSequenceMask;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.StringUtil;
import htsjdk.tribble.index.interval.IntervalIndexCreator;
import htsjdk.variant.vcf.VCFConstants;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.analysis.SinglePassSamProgram;
import picard.analysis.artifacts.SequencingArtifactMetrics;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import picard.util.DbSnpBitSetUtil;
import picard.util.VariantType;

@CommandLineProgramProperties(summary = "Collect metrics to quantify single-base sequencing artifacts.  <p>This tool examines two sources of sequencing errors associated with hybrid selection protocols.  These errors are divided into two broad categories, pre-adapter and bait-bias.  Pre-adapter errors can arise from laboratory manipulations of a nucleic acid sample e.g. shearing and occur prior to the ligation of adapters for PCR amplification (hence the name pre-adapter).  </p><p>Bait-bias artifacts occur during or after the target selection step, and correlate with substitution rates that are 'biased', or higher for sites having one base on the reference/positive strand relative to sites having the complementary base on that strand.  For example, during the target selection step, a (G>T) artifact might result in a higher substitution rate at sites with a G on the positive strand (and C on the negative), relative to sites with the flip (C positive)/(G negative).  This is known as the 'G-Ref' artifact. </p><p>For additional information on these types of artifacts, please see the corresponding GATK dictionary entries on <a href='https://www.broadinstitute.org/gatk/guide/article?id=6333'>bait-bias</a> and <a href='https://www.broadinstitute.org/gatk/guide/article?id=6332'>pre-adapter artifacts</a>.</p><p>This tool produces four files; summary and detail metrics files for both pre-adapter and bait-bias artifacts. The detailed metrics show the error rates for each type of base substitution within every possible triplet base configuration.  Error rates associated with these substitutions are Phred-scaled and provided as quality scores, the lower the value, the more likely it is that an alternate base call is due to an artifact. The summary metrics provide likelihood information on the 'worst-case' errors. </p><h4>Usage example:</h4><pre>java -jar picard.jar CollectSequencingArtifactMetrics \\<br />     I=input.bam \\<br />     O=artifact_metrics.txt \\<br />     R=reference_sequence.fasta</pre>Please see the metrics at the following links <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.PreAdapterDetailMetrics'>PreAdapterDetailMetrics</a>, <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.PreAdapterSummaryMetrics'>PreAdapterSummaryMetrics</a>, <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.BaitBiasDetailMetrics'>BaitBiasDetailMetrics</a>, and <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.BaitBiasSummaryMetrics'>BaitBiasSummaryMetrics</a> for complete descriptions of the output metrics produced by this tool. <hr />", oneLineSummary = CollectSequencingArtifactMetrics.USAGE_SUMMARY, programGroup = DiagnosticsAndQCProgramGroup.class)
@DocumentedFeature
/* loaded from: input_file:picard/analysis/artifacts/CollectSequencingArtifactMetrics.class */
public class CollectSequencingArtifactMetrics extends SinglePassSamProgram {
    static final String USAGE_SUMMARY = "Collect metrics to quantify single-base sequencing artifacts.  ";
    static final String USAGE_DETAILS = "<p>This tool examines two sources of sequencing errors associated with hybrid selection protocols.  These errors are divided into two broad categories, pre-adapter and bait-bias.  Pre-adapter errors can arise from laboratory manipulations of a nucleic acid sample e.g. shearing and occur prior to the ligation of adapters for PCR amplification (hence the name pre-adapter).  </p><p>Bait-bias artifacts occur during or after the target selection step, and correlate with substitution rates that are 'biased', or higher for sites having one base on the reference/positive strand relative to sites having the complementary base on that strand.  For example, during the target selection step, a (G>T) artifact might result in a higher substitution rate at sites with a G on the positive strand (and C on the negative), relative to sites with the flip (C positive)/(G negative).  This is known as the 'G-Ref' artifact. </p><p>For additional information on these types of artifacts, please see the corresponding GATK dictionary entries on <a href='https://www.broadinstitute.org/gatk/guide/article?id=6333'>bait-bias</a> and <a href='https://www.broadinstitute.org/gatk/guide/article?id=6332'>pre-adapter artifacts</a>.</p><p>This tool produces four files; summary and detail metrics files for both pre-adapter and bait-bias artifacts. The detailed metrics show the error rates for each type of base substitution within every possible triplet base configuration.  Error rates associated with these substitutions are Phred-scaled and provided as quality scores, the lower the value, the more likely it is that an alternate base call is due to an artifact. The summary metrics provide likelihood information on the 'worst-case' errors. </p><h4>Usage example:</h4><pre>java -jar picard.jar CollectSequencingArtifactMetrics \\<br />     I=input.bam \\<br />     O=artifact_metrics.txt \\<br />     R=reference_sequence.fasta</pre>Please see the metrics at the following links <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.PreAdapterDetailMetrics'>PreAdapterDetailMetrics</a>, <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.PreAdapterSummaryMetrics'>PreAdapterSummaryMetrics</a>, <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.BaitBiasDetailMetrics'>BaitBiasDetailMetrics</a>, and <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.BaitBiasSummaryMetrics'>BaitBiasSummaryMetrics</a> for complete descriptions of the output metrics produced by this tool. <hr />";

    @Argument(doc = "An optional list of intervals to restrict analysis to.", optional = true)
    public File INTERVALS;

    @Argument(doc = "VCF format dbSNP file, used to exclude regions around known polymorphisms from analysis.", optional = true)
    public File DB_SNP;
    private static final String UNKNOWN_LIBRARY = "UnknownLibrary";
    private static final String UNKNOWN_SAMPLE = "UnknownSample";
    private File preAdapterSummaryOut;
    private File preAdapterDetailsOut;
    private File baitBiasSummaryOut;
    private File baitBiasDetailsOut;
    private File errorSummaryFile;
    private IntervalListReferenceSequenceMask intervalMask;
    private DbSnpBitSetUtil dbSnpMask;
    private SamRecordFilter recordFilter;
    private static final Log log = Log.getInstance(CollectSequencingArtifactMetrics.class);

    @Argument(shortName = "Q", doc = "The minimum base quality score for a base to be included in analysis.")
    public int MINIMUM_QUALITY_SCORE = 20;

    @Argument(shortName = "MQ", doc = "The minimum mapping quality score for a base to be included in analysis.")
    public int MINIMUM_MAPPING_QUALITY = 30;

    @Argument(shortName = "MIN_INS", doc = "The minimum insert size for a read to be included in analysis.")
    public int MINIMUM_INSERT_SIZE = 60;

    @Argument(shortName = "MAX_INS", doc = "The maximum insert size for a read to be included in analysis. Set to 0 to have no maximum.")
    public int MAXIMUM_INSERT_SIZE = IntervalIndexCreator.DEFAULT_FEATURE_COUNT;

    @Argument(shortName = "UNPAIRED", doc = "Include unpaired reads. If set to true then all paired reads will be included as well - MINIMUM_INSERT_SIZE and MAXIMUM_INSERT_SIZE will be ignored.")
    public boolean INCLUDE_UNPAIRED = false;

    @Argument(shortName = "DUPES", doc = "Include duplicate reads. If set to true then all reads flagged as duplicates will be included as well.")
    public boolean INCLUDE_DUPLICATES = false;

    @Argument(shortName = "NON_PF", doc = "Whether or not to include non-PF reads.")
    public boolean INCLUDE_NON_PF_READS = false;

    @Argument(shortName = "TANDEM", doc = "Set to true if mate pairs are being sequenced from the same strand, i.e. they're expected to face the same direction.")
    public boolean TANDEM_READS = false;

    @Argument(doc = "When available, use original quality scores for filtering.")
    public boolean USE_OQ = true;

    @Argument(doc = "The number of context bases to include on each side of the assayed base.")
    public int CONTEXT_SIZE = 1;

    @Argument(doc = "If specified, only print results for these contexts in the detail metrics output. However, the summary metrics output will still take all contexts into consideration.", optional = true)
    public Set<String> CONTEXTS_TO_PRINT = new HashSet();

    @Argument(shortName = "EXT", doc = "Append the given file extension to all metric file names (ex. OUTPUT.pre_adapter_summary_metrics.EXT). None if null", optional = true)
    public String FILE_EXTENSION = null;
    private String currentRefString = null;
    private int currentRefIndex = -1;
    private final Set<String> samples = new HashSet();
    private final Set<String> libraries = new HashSet();
    private final Map<String, ArtifactCounter> artifactCounters = new HashMap();

    @Override // picard.cmdline.CommandLineProgram
    protected boolean requiresReference() {
        return true;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // picard.cmdline.CommandLineProgram
    public String[] customCommandLineValidation() {
        ArrayList arrayList = new ArrayList();
        int i = (2 * this.CONTEXT_SIZE) + 1;
        if (this.CONTEXT_SIZE < 0) {
            arrayList.add("CONTEXT_SIZE cannot be negative");
        }
        for (String str : this.CONTEXTS_TO_PRINT) {
            if (str.length() != i) {
                arrayList.add("Context " + str + " is not the length implied by CONTEXT_SIZE: " + i);
            }
        }
        if (this.MINIMUM_INSERT_SIZE < 0) {
            arrayList.add("MINIMUM_INSERT_SIZE cannot be negative");
        }
        if (this.MAXIMUM_INSERT_SIZE < 0) {
            arrayList.add("MAXIMUM_INSERT_SIZE cannot be negative");
        }
        if (this.MAXIMUM_INSERT_SIZE > 0 && this.MAXIMUM_INSERT_SIZE < this.MINIMUM_INSERT_SIZE) {
            arrayList.add("MAXIMUM_INSERT_SIZE cannot be less than MINIMUM_INSERT_SIZE unless set to 0");
        }
        if (arrayList.isEmpty()) {
            return null;
        }
        return (String[]) arrayList.toArray(new String[arrayList.size()]);
    }

    /* JADX WARN: Multi-variable type inference failed */
    @Override // picard.analysis.SinglePassSamProgram
    protected void setup(SAMFileHeader sAMFileHeader, File file) {
        String str = null != this.FILE_EXTENSION ? this.FILE_EXTENSION : "";
        this.preAdapterSummaryOut = new File(this.OUTPUT + SequencingArtifactMetrics.PRE_ADAPTER_SUMMARY_EXT + str);
        this.preAdapterDetailsOut = new File(this.OUTPUT + SequencingArtifactMetrics.PRE_ADAPTER_DETAILS_EXT + str);
        this.baitBiasSummaryOut = new File(this.OUTPUT + SequencingArtifactMetrics.BAIT_BIAS_SUMMARY_EXT + str);
        this.baitBiasDetailsOut = new File(this.OUTPUT + SequencingArtifactMetrics.BAIT_BIAS_DETAILS_EXT + str);
        this.errorSummaryFile = new File(this.OUTPUT + SequencingArtifactMetrics.ERROR_SUMMARY_EXT + str);
        IOUtil.assertFilesAreWritable(Arrays.asList(this.preAdapterSummaryOut, this.preAdapterDetailsOut, this.baitBiasSummaryOut, this.baitBiasDetailsOut, this.errorSummaryFile));
        for (SAMReadGroupRecord sAMReadGroupRecord : sAMFileHeader.getReadGroups()) {
            this.samples.add(CodeUtil.getOrElse(sAMReadGroupRecord.getSample(), UNKNOWN_SAMPLE));
            this.libraries.add(CodeUtil.getOrElse(sAMReadGroupRecord.getLibrary(), UNKNOWN_LIBRARY));
        }
        if (this.INTERVALS != null) {
            IOUtil.assertFileIsReadable(this.INTERVALS);
            IntervalList uniqued = IntervalList.fromFile(this.INTERVALS).uniqued();
            this.intervalMask = new IntervalListReferenceSequenceMask(uniqued);
            if (this.DB_SNP != null) {
                IOUtil.assertFileIsReadable(this.DB_SNP);
                this.dbSnpMask = new DbSnpBitSetUtil(this.DB_SNP, sAMFileHeader.getSequenceDictionary(), EnumSet.noneOf(VariantType.class), uniqued, Optional.of(log));
            }
        } else if (this.DB_SNP != null) {
            IOUtil.assertFileIsReadable(this.DB_SNP);
            this.dbSnpMask = new DbSnpBitSetUtil(this.DB_SNP, sAMFileHeader.getSequenceDictionary(), EnumSet.noneOf(VariantType.class), null, Optional.of(log));
        }
        ArrayList arrayList = new ArrayList();
        if (!this.INCLUDE_NON_PF_READS) {
            arrayList.add(new FailsVendorReadQualityFilter());
        }
        arrayList.add(new NotPrimaryAlignmentFilter());
        if (!this.INCLUDE_DUPLICATES) {
            arrayList.add(new DuplicateReadFilter());
        }
        arrayList.add(new AlignedFilter(true));
        arrayList.add(new MappingQualityFilter(this.MINIMUM_MAPPING_QUALITY));
        if (!this.INCLUDE_UNPAIRED) {
            arrayList.add(new InsertSizeFilter(this.MINIMUM_INSERT_SIZE, this.MAXIMUM_INSERT_SIZE == 0 ? Integer.MAX_VALUE : this.MAXIMUM_INSERT_SIZE));
        }
        this.recordFilter = new AggregateFilter(arrayList);
        String join = StringUtil.join(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR, new ArrayList(this.samples));
        for (String str2 : this.libraries) {
            this.artifactCounters.put(str2, new ArtifactCounter(join, str2, this.CONTEXT_SIZE, this.TANDEM_READS));
        }
    }

    @Override // picard.analysis.SinglePassSamProgram
    protected void acceptRead(SAMRecord sAMRecord, ReferenceSequence referenceSequence) {
        byte[] baseQualities;
        char upperCase;
        int i;
        if (this.recordFilter.filterOut(sAMRecord)) {
            return;
        }
        String str = sAMRecord.getReadGroup() == null ? UNKNOWN_LIBRARY : (String) CodeUtil.getOrElse(sAMRecord.getReadGroup().getLibrary(), UNKNOWN_LIBRARY);
        if (!this.libraries.contains(str)) {
            throw new PicardException("Record contains library that is missing from header: " + str);
        }
        int i2 = (2 * this.CONTEXT_SIZE) + 1;
        ArtifactCounter artifactCounter = this.artifactCounters.get(str);
        byte[] readBases = sAMRecord.getReadBases();
        if (this.USE_OQ) {
            byte[] originalBaseQualities = sAMRecord.getOriginalBaseQualities();
            baseQualities = originalBaseQualities == null ? sAMRecord.getBaseQualities() : originalBaseQualities;
        } else {
            baseQualities = sAMRecord.getBaseQualities();
        }
        for (AlignmentBlock alignmentBlock : sAMRecord.getAlignmentBlocks()) {
            for (int i3 = 0; i3 < alignmentBlock.getLength(); i3++) {
                int readStart = alignmentBlock.getReadStart() + i3;
                int referenceStart = alignmentBlock.getReferenceStart() + i3;
                if (baseQualities[readStart - 1] >= this.MINIMUM_QUALITY_SCORE && (upperCase = Character.toUpperCase((char) readBases[readStart - 1])) != 'N' && ((this.intervalMask == null || this.intervalMask.get(referenceSequence.getContigIndex(), referenceStart)) && ((this.dbSnpMask == null || !this.dbSnpMask.isDbSnpSite(referenceSequence.getName(), referenceStart)) && (i = (referenceStart - this.CONTEXT_SIZE) - 1) >= 0 && i + i2 <= referenceSequence.length()))) {
                    String refContext = getRefContext(referenceSequence, i, i2);
                    if (!refContext.contains(Template.NO_NS_PREFIX) && SequenceUtil.isUpperACGTN((byte) upperCase)) {
                        artifactCounter.countRecord(refContext, upperCase, sAMRecord);
                    }
                }
            }
        }
    }

    private String getRefContext(ReferenceSequence referenceSequence, int i, int i2) {
        if (this.currentRefIndex != referenceSequence.getContigIndex()) {
            this.currentRefString = new String(referenceSequence.getBases()).toUpperCase();
            this.currentRefIndex = referenceSequence.getContigIndex();
        }
        return this.currentRefString.substring(i, i + i2);
    }

    @Override // picard.analysis.SinglePassSamProgram
    protected void finish() {
        MetricsFile metricsFile = getMetricsFile();
        MetricsFile metricsFile2 = getMetricsFile();
        MetricsFile metricsFile3 = getMetricsFile();
        MetricsFile metricsFile4 = getMetricsFile();
        MetricsFile metricsFile5 = getMetricsFile();
        for (ArtifactCounter artifactCounter : this.artifactCounters.values()) {
            artifactCounter.finish();
            metricsFile.addAllMetrics(artifactCounter.getPreAdapterSummaryMetrics());
            metricsFile3.addAllMetrics(artifactCounter.getBaitBiasSummaryMetrics());
            for (SequencingArtifactMetrics.PreAdapterDetailMetrics preAdapterDetailMetrics : artifactCounter.getPreAdapterDetailMetrics()) {
                if (this.CONTEXTS_TO_PRINT.isEmpty() || this.CONTEXTS_TO_PRINT.contains(preAdapterDetailMetrics.CONTEXT)) {
                    metricsFile2.addMetric(preAdapterDetailMetrics);
                }
            }
            for (SequencingArtifactMetrics.BaitBiasDetailMetrics baitBiasDetailMetrics : artifactCounter.getBaitBiasDetailMetrics()) {
                if (this.CONTEXTS_TO_PRINT.isEmpty() || this.CONTEXTS_TO_PRINT.contains(baitBiasDetailMetrics.CONTEXT)) {
                    metricsFile4.addMetric(baitBiasDetailMetrics);
                }
            }
        }
        metricsFile2.write(this.preAdapterDetailsOut);
        metricsFile.write(this.preAdapterSummaryOut);
        metricsFile4.write(this.baitBiasDetailsOut);
        metricsFile3.write(this.baitBiasSummaryOut);
        if (!metricsFile2.getMetrics().isEmpty()) {
            List metrics = metricsFile2.getMetrics();
            metrics.forEach(preAdapterDetailMetrics2 -> {
                if (preAdapterDetailMetrics2.REF_BASE == 'G' || preAdapterDetailMetrics2.REF_BASE == 'T') {
                    preAdapterDetailMetrics2.REF_BASE = (char) SequenceUtil.complement((byte) preAdapterDetailMetrics2.REF_BASE);
                    preAdapterDetailMetrics2.ALT_BASE = (char) SequenceUtil.complement((byte) preAdapterDetailMetrics2.ALT_BASE);
                }
            });
            Map map = (Map) metrics.stream().collect(Collectors.groupingBy(preAdapterDetailMetrics3 -> {
                return preAdapterDetailMetrics3.REF_BASE + ">" + preAdapterDetailMetrics3.ALT_BASE;
            }));
            Iterator it2 = new TreeSet(map.keySet()).iterator();
            while (it2.hasNext()) {
                String str = (String) it2.next();
                List list = (List) map.get(str);
                ErrorSummaryMetrics errorSummaryMetrics = new ErrorSummaryMetrics();
                errorSummaryMetrics.REF_BASE = ((SequencingArtifactMetrics.PreAdapterDetailMetrics) list.get(0)).REF_BASE;
                errorSummaryMetrics.ALT_BASE = ((SequencingArtifactMetrics.PreAdapterDetailMetrics) list.get(0)).ALT_BASE;
                errorSummaryMetrics.SUBSTITUTION = str;
                errorSummaryMetrics.REF_COUNT = list.stream().mapToLong(preAdapterDetailMetrics4 -> {
                    return preAdapterDetailMetrics4.PRO_REF_BASES + preAdapterDetailMetrics4.CON_REF_BASES;
                }).sum();
                errorSummaryMetrics.ALT_COUNT = list.stream().mapToLong(preAdapterDetailMetrics5 -> {
                    return preAdapterDetailMetrics5.PRO_ALT_BASES + preAdapterDetailMetrics5.CON_ALT_BASES;
                }).sum();
                errorSummaryMetrics.calculateDerivedFields();
                metricsFile5.addMetric(errorSummaryMetrics);
            }
        }
        metricsFile5.write(this.errorSummaryFile);
    }

    @Override // picard.analysis.SinglePassSamProgram
    protected boolean usesNoRefReads() {
        return false;
    }
}
