-
Notifications
You must be signed in to change notification settings - Fork 3
feat: minor speedups #56
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,3 @@ | ||
| [toolchain] | ||
| channel = "1.85" | ||
| channel = "1.85.0" | ||
| components = ["rustfmt", "clippy"] |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -93,6 +93,7 @@ impl ReadSet { | |||||
| const SPACE: u8 = b' '; | ||||||
| const COLON: u8 = b':'; | ||||||
| const PLUS: u8 = b'+'; | ||||||
| const READ_NUMBERS: &[u8] = b"12345678"; | ||||||
|
|
||||||
| /// Produces an iterator over references to the template segments stored in this ``ReadSet``. | ||||||
| fn template_segments(&self) -> SegmentIter { | ||||||
|
|
@@ -213,7 +214,12 @@ impl ReadSet { | |||||
| None => { | ||||||
| // If no pre-existing comment, assume the read is a passing filter, non-control | ||||||
| // read and generate a comment for it (sample barcode is added below). | ||||||
| write!(writer, "{}:N:0:", read_num)?; | ||||||
| if read_num < Self::READ_NUMBERS.len() { | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||
| writer.write_all(&[Self::READ_NUMBERS[read_num - 1]])?; | ||||||
| write!(writer, ":N:0:")?; | ||||||
| } else { | ||||||
| write!(writer, "{}:N:0:", read_num)?; | ||||||
| } | ||||||
| } | ||||||
| Some(chars) => { | ||||||
| // Else check it's a 4-part name... fix the read number at the front and | ||||||
|
|
@@ -239,7 +245,11 @@ impl ReadSet { | |||||
| &chars[first_colon_idx + 1..chars.len()] | ||||||
| }; | ||||||
|
|
||||||
| write!(writer, "{}:", read_num)?; | ||||||
| if read_num < Self::READ_NUMBERS.len() { | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you don't take the suggestion above... what if
Suggested change
|
||||||
| writer.write_all(&[Self::READ_NUMBERS[read_num - 1], b':'])?; | ||||||
| } else { | ||||||
| write!(writer, "{}:", read_num)?; | ||||||
| } | ||||||
| writer.write_all(remainder)?; | ||||||
|
|
||||||
| if *remainder.last().unwrap() != Self::COLON { | ||||||
|
|
@@ -619,7 +629,7 @@ pub(crate) struct Demux { | |||||
| #[clap(long, short = 'd', default_value = "2")] | ||||||
| min_mismatch_delta: usize, | ||||||
|
|
||||||
| /// The number of threads to use. Cannot be less than 3. | ||||||
| /// The number of threads to use. Cannot be less than 5. | ||||||
| #[clap(long, short = 't', default_value = "8")] | ||||||
| threads: usize, | ||||||
|
|
||||||
|
|
@@ -666,9 +676,12 @@ impl Demux { | |||||
| read_structures.iter().map(|s| s.segments_by_type(*output_type).count()).sum(); | ||||||
|
|
||||||
| for idx in 1..=segment_count { | ||||||
| output_type_writers.push(BufWriter::new(File::create( | ||||||
| output_dir.join(format!("{}.{}{}.fq.gz", prefix, file_type_code, idx)), | ||||||
| )?)); | ||||||
| output_type_writers.push(BufWriter::with_capacity( | ||||||
| 65_536usize, | ||||||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was seeing |
||||||
| File::create( | ||||||
| output_dir.join(format!("{}.{}{}.fq.gz", prefix, file_type_code, idx)), | ||||||
| )?, | ||||||
| )); | ||||||
| } | ||||||
|
|
||||||
| match output_type { | ||||||
|
|
@@ -1189,6 +1202,7 @@ mod tests { | |||||
| skip_reasons: vec![], | ||||||
| }; | ||||||
| let demux_result = demux_inputs.execute(); | ||||||
| #[allow(clippy::permissions_set_readonly_false)] | ||||||
| permissions.set_readonly(false); | ||||||
| fs::set_permissions(tmp.path(), permissions).unwrap(); | ||||||
| demux_result.unwrap(); | ||||||
|
|
@@ -1963,7 +1977,7 @@ mod tests { | |||||
| vec!["AAAAAAA", &SAMPLE1_BARCODE[0..7]], // barcode too short | ||||||
| vec!["CCCCCCC", SAMPLE1_BARCODE], // barcode the correct length | ||||||
| vec!["", SAMPLE1_BARCODE], // template basese too short | ||||||
| vec!["G", SAMPLE1_BARCODE], // barcode the correct length | ||||||
| vec!["G", SAMPLE1_BARCODE], | ||||||
| ]; | ||||||
|
|
||||||
| let input_files = vec![ | ||||||
|
|
@@ -1999,7 +2013,7 @@ mod tests { | |||||
| vec!["AAAAAAA", &SAMPLE1_BARCODE[0..7]], // barcode too short | ||||||
| vec!["CCCCCCC", SAMPLE1_BARCODE], // barcode the correct length | ||||||
| vec!["", SAMPLE1_BARCODE], // template basese too short | ||||||
| vec!["G", SAMPLE1_BARCODE], // barcode the correct length | ||||||
| vec!["G", SAMPLE1_BARCODE], | ||||||
| ]; | ||||||
|
|
||||||
| let input_files = vec![ | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,9 @@ pub struct Sample { | |
| pub sample_id: String, | ||
| /// DNA barcode associated with the sample | ||
| pub barcode: String, | ||
| /// DNA barcode as a byte | ||
| #[serde(skip_deserializing)] | ||
| pub barcode_bytes: Vec<u8>, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Question: Why add a
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm getting confused about parts of a previous implementation that has since been removed. |
||
| /// index of the sample in the [`SampleGroup`] object, used for syncing indices across | ||
| /// different structs | ||
| #[serde(skip_deserializing)] | ||
|
|
@@ -53,7 +56,8 @@ impl Sample { | |
| barcode.as_bytes().iter().all(|&b| is_valid_iupac(b)), | ||
| "All sample barcode bases must be one of A, C, G, T, U, R, Y, S, W, K, M, D, V, H, B, N" | ||
| ); | ||
| Self { sample_id: name, barcode, ordinal } | ||
| let barcode_bytes = barcode.as_bytes().to_vec(); | ||
| Self { sample_id: name, barcode, barcode_bytes, ordinal } | ||
| } | ||
|
|
||
| /// Returns the header line expected by serde when deserializing | ||
|
|
@@ -294,11 +298,7 @@ mod tests { | |
| let barcode = "GATTACA".to_owned(); | ||
| let ordinal = 0; | ||
| let sample = Sample::new(ordinal, name.clone(), barcode.clone()); | ||
| assert_eq!( | ||
| Sample { sample_id: name, barcode, ordinal }, | ||
| sample, | ||
| "Sample differed from expectation" | ||
| ); | ||
| assert_eq!(Sample::new(ordinal, name, barcode), sample, "Sample differed from expectation"); | ||
| } | ||
|
|
||
| // ############################################################################################ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
minor (non-blocking) suggestion:
Start at
0and add9.Then, below, you can just write: