WARNING: THIS SITE IS A MIRROR OF GITHUB.COM / IT CANNOT LOGIN OR REGISTER ACCOUNTS / THE CONTENTS ARE PROVIDED AS-IS / THIS SITE ASSUMES NO RESPONSIBILITY FOR ANY DISPLAYED CONTENT OR LINKS / IF YOU FOUND SOMETHING MAY NOT GOOD FOR EVERYONE, CONTACT ADMIN AT ilovescratch@foxmail.com
Skip to content

Commit 343512b

Browse files
committed
fix: preserve input order in filterToMostCommonAlignment
1 parent 4a8644b commit 343512b

File tree

1 file changed

+20
-11
lines changed

1 file changed

+20
-11
lines changed

src/main/scala/com/fulcrumgenomics/umi/UmiConsensusCaller.scala

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -399,17 +399,19 @@ trait UmiConsensusCaller[ConsensusRead <: SimpleRead] {
399399
* NOTE: filtered out reads are sent to the `rejectsMethod` method and do not need further handling
400400
*/
401401
protected[umi] def filterToMostCommonAlignment(recs: Seq[SourceRead]): Seq[SourceRead] = if (recs.size < 2) recs else {
402-
val groups = new ArrayBuffer[AlignmentGroup]
403-
val sorted = recs.sortBy(r => -r.length).toIndexedSeq
402+
val recsIndexed = recs.toIndexedSeq
403+
// Sort indices by record length (descending) - avoids allocating a sorted copy of records
404+
val sortedIndices: Array[Int] = recsIndexed.indices.sortBy(i => -recsIndexed(i).length).toArray
404405

405-
forloop (from=0, until=sorted.length) { i =>
406-
val simpleCigar = simplifyCigar(sorted(i).cigar)
406+
val groups = new ArrayBuffer[AlignmentGroup]
407+
forloop (from=0, until=sortedIndices.length) { si =>
408+
val simpleCigar = simplifyCigar(recsIndexed(sortedIndices(si)).cigar)
407409
var found = false
408-
groups.foreach { g => if (simpleCigar.isPrefixOf(g.cigar)) { g.add(i); found = true } }
410+
groups.foreach { g => if (simpleCigar.isPrefixOf(g.cigar)) { g.add(si); found = true } }
409411

410412
if (!found) {
411-
val newGroup = new AlignmentGroup(simpleCigar, new mutable.BitSet(sorted.size))
412-
newGroup.add(i)
413+
val newGroup = new AlignmentGroup(simpleCigar, new mutable.BitSet(sortedIndices.length))
414+
newGroup.add(si)
413415
groups += newGroup
414416
}
415417
}
@@ -419,12 +421,19 @@ trait UmiConsensusCaller[ConsensusRead <: SimpleRead] {
419421
}
420422
else {
421423
val bestGroup = groups.maxBy(_.size)
422-
val keepers = new ArrayBuffer[SourceRead](bestGroup.size)
423-
forloop (from=0, until=sorted.length) { i =>
424-
if (bestGroup.contains(i)) keepers += sorted(i)
425-
else sorted(i).sam.foreach(rejectRecords(RejectionReason.MinorityAlignment, _))
424+
// BitSet tracking which original indices are kept - allows O(n) reconstruction in original order
425+
val keptIndices = new mutable.BitSet(recsIndexed.length)
426+
forloop (from=0, until=sortedIndices.length) { si =>
427+
val origIdx = sortedIndices(si)
428+
if (bestGroup.contains(si)) keptIndices += origIdx
429+
else recsIndexed(origIdx).sam.foreach(rejectRecords(RejectionReason.MinorityAlignment, _))
426430
}
427431

432+
// Reconstruct in original order with a linear scan
433+
val keepers = new ArrayBuffer[SourceRead](keptIndices.size)
434+
forloop (from=0, until=recsIndexed.length) { i =>
435+
if (keptIndices.contains(i)) keepers += recsIndexed(i)
436+
}
428437
keepers.toIndexedSeq
429438
}
430439
}

0 commit comments

Comments
 (0)