From 955b7594b48d4afc9ef54201c930168112998280 Mon Sep 17 00:00:00 2001 From: znorgaard Date: Tue, 23 Jul 2024 11:56:00 -0700 Subject: [PATCH] refactor: move isConsensus check --- .../umi/CollectDuplexSeqMetrics.scala | 5 +++++ .../umi/CollectDuplexSeqMetricsTest.scala | 15 +++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/fulcrumgenomics/umi/CollectDuplexSeqMetrics.scala b/src/main/scala/com/fulcrumgenomics/umi/CollectDuplexSeqMetrics.scala index 5d18b6d98..4d32a53bf 100644 --- a/src/main/scala/com/fulcrumgenomics/umi/CollectDuplexSeqMetrics.scala +++ b/src/main/scala/com/fulcrumgenomics/umi/CollectDuplexSeqMetrics.scala @@ -307,6 +307,11 @@ class CollectDuplexSeqMetrics // Build the iterator we'll use based on whether or not we're restricting to a set of intervals val in = SamSource(input) val _filteredIterator = in.iterator.filter(r => r.paired && r.mapped && r.mateMapped && r.firstOfPair && !r.secondary && !r.supplementary) + .tapEach { + r => + if (Umis.isConsensusRead(r)) throw new IllegalArgumentException("Found consensus record. Expected UMI-grouped BAM") + else r + } val iterator = intervals match { case None => _filteredIterator case Some(path) => diff --git a/src/test/scala/com/fulcrumgenomics/umi/CollectDuplexSeqMetricsTest.scala b/src/test/scala/com/fulcrumgenomics/umi/CollectDuplexSeqMetricsTest.scala index f1939c68d..8c646a91a 100644 --- a/src/test/scala/com/fulcrumgenomics/umi/CollectDuplexSeqMetricsTest.scala +++ b/src/test/scala/com/fulcrumgenomics/umi/CollectDuplexSeqMetricsTest.scala @@ -26,13 +26,13 @@ package com.fulcrumgenomics.umi import java.nio.file.{Path, Paths} import java.util.Random - import com.fulcrumgenomics.FgBioDef._ import com.fulcrumgenomics.bam.api.SamOrder import com.fulcrumgenomics.commons.util.SimpleCounter import com.fulcrumgenomics.testing.SamBuilder.{Minus, Plus} import com.fulcrumgenomics.testing.{SamBuilder, UnitSpec} -import com.fulcrumgenomics.umi.ConsensusTags.PerRead.AllPerReadTags +import com.fulcrumgenomics.umi.ConsensusTags.PerBase.AbRawReadCount +import com.fulcrumgenomics.umi.ConsensusTags.PerRead.{AllPerReadTags, BaRawReadCount} import com.fulcrumgenomics.util.{Io, Metric, Rscript} import htsjdk.samtools.util.{Interval, IntervalList} import org.apache.commons.math3.distribution.NormalDistribution @@ -314,6 +314,17 @@ class CollectDuplexSeqMetricsTest extends UnitSpec { duplexFamilies.find(f => f.ab_size == 6 && f.ba_size == 0).get.count shouldBe 1 } + it should "raise an exception if duplex consensus records are provided" in { + val builder = new SamBuilder(readLength=100, sort=Some(SamOrder.TemplateCoordinate)) + builder.addPair( + contig=1, + start1=1000, + start2=1100, + attrs=Map(RX -> "AAA-GGG", MI -> "1/A", AbRawReadCount -> 10, BaRawReadCount -> 10) + ) + an[IllegalArgumentException] shouldBe thrownBy { exec(builder) } + } + "CollectDuplexSeqMetrics.updateUmiMetrics" should "not count duplex umis" in collector(duplex=false).foreach { c => val builder = new SamBuilder(readLength=10) builder.addPair(start1=100, start2=200, attrs=Map(RX -> "AAA-CCC", MI -> "1/A"))