Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion noodles
26 changes: 26 additions & 0 deletions rust/bioscript-cli/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,32 @@ fn batch_lookup_query_plan_runs_and_preserves_requested_result_order() {
assert!(stdout.contains("II"));
}

#[test]
fn lookup_variant_details_returns_counts_and_decision_fields() {
let root = repo_root();
let script = root.join("rust/bioscript-cli/tests/fixtures/lookup_details.py");

let output = Command::new(env!("CARGO_BIN_EXE_bioscript"))
.current_dir(&root)
.arg("--input-file")
.arg("old/examples/apol1/test_snps.txt")
.arg(script)
.output()
.unwrap();

assert!(
output.status.success(),
"stderr: {}",
String::from_utf8_lossy(&output.stderr)
);
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("VariantObservation"));
assert!(stdout.contains("genotype='AG'"));
assert!(stdout.contains("raw_counts={"));
assert!(stdout.contains("decision="));
assert!(stdout.contains("evidence=["));
}

#[test]
fn inspect_subcommand_reports_detected_vendor_and_platform() {
let root = repo_root();
Expand Down
17 changes: 17 additions & 0 deletions rust/bioscript-cli/tests/fixtures/lookup_details.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
VARIANT = bioscript.variant(
rsid="rs73885319",
grch38="22:36265860-36265860",
ref="A",
alt="G",
kind="snp",
)


def main():
genotypes = bioscript.load_genotypes(input_file)
details = genotypes.lookup_variant_details(VARIANT)
print(details)


if __name__ == "__main__":
main()
4 changes: 4 additions & 0 deletions rust/bioscript-core/src/variant.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::collections::BTreeMap;

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct GenomicLocus {
pub chrom: String,
Expand Down Expand Up @@ -41,6 +43,8 @@ pub struct VariantObservation {
pub ref_count: Option<u32>,
pub alt_count: Option<u32>,
pub depth: Option<u32>,
pub raw_counts: BTreeMap<String, u32>,
pub decision: Option<String>,
pub evidence: Vec<String>,
}

Expand Down
103 changes: 92 additions & 11 deletions rust/bioscript-formats/src/alignment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@ use noodles::{
fasta::{self, repository::adapters::IndexedReader as FastaIndexedReader},
sam::{
self,
alignment::{
Record as _,
record::{Cigar as _, Sequence as _},
},
alignment::{Record as _, record::Cigar as _},
},
};

Expand Down Expand Up @@ -45,7 +42,6 @@ pub(crate) struct AlignmentRecord {
pub start: i64,
pub end: i64,
pub is_unmapped: bool,
pub sequence: Vec<u8>,
pub cigar: Vec<AlignmentOp>,
}

Expand Down Expand Up @@ -103,7 +99,68 @@ where

let selected_containers = select_query_containers(reader.index(), &header, &region)?;

stream_selected_containers(
stream_selected_alignment_records(
path,
&mut reader,
&header,
&region,
locus.end,
&selected_containers,
&mut on_record,
)?;

Ok(())
}

pub(crate) fn for_each_raw_cram_record<F>(
path: &Path,
options: &GenotypeLoadOptions,
reference_file: &Path,
locus: &GenomicLocus,
mut on_record: F,
) -> Result<(), RuntimeError>
where
F: FnMut(cram::Record<'_>) -> Result<bool, RuntimeError>,
{
let repository = build_reference_repository(reference_file)?;
let mut builder =
cram::io::indexed_reader::Builder::default().set_reference_sequence_repository(repository);

if let Some(index_path) = options.input_index.as_ref() {
let index = crai::fs::read(index_path).map_err(|err| {
RuntimeError::Io(format!(
"failed to read CRAM index {} for {}: {err}",
index_path.display(),
path.display()
))
})?;
builder = builder.set_index(index);
}

let mut reader = builder.build_from_path(path).map_err(|err| {
RuntimeError::Io(format!(
"failed to open indexed CRAM {}: {err}",
path.display()
))
})?;

let header = reader.read_header().map_err(|err| {
RuntimeError::Io(format!(
"failed to read CRAM header {}: {err}",
path.display()
))
})?;

let region = build_region(&header, locus).ok_or_else(|| {
RuntimeError::Unsupported(format!(
"indexed CRAM does not contain contig {} for {}:{}-{}",
locus.chrom, locus.chrom, locus.start, locus.end
))
})?;

let selected_containers = select_query_containers(reader.index(), &header, &region)?;

stream_selected_cram_records(
path,
&mut reader,
&header,
Expand Down Expand Up @@ -188,7 +245,7 @@ fn select_query_containers(
.collect())
}

fn stream_selected_containers<F>(
fn stream_selected_alignment_records<F>(
path: &Path,
reader: &mut cram::io::indexed_reader::IndexedReader<std::fs::File>,
header: &sam::Header,
Expand All @@ -199,6 +256,32 @@ fn stream_selected_containers<F>(
) -> Result<(), RuntimeError>
where
F: FnMut(AlignmentRecord) -> Result<bool, RuntimeError>,
{
stream_selected_cram_records(
path,
reader,
header,
region,
locus_end,
selected_containers,
&mut |record| {
let alignment_record = build_alignment_record_from_cram(path, &record)?;
on_record(alignment_record)
},
)
}

fn stream_selected_cram_records<F>(
path: &Path,
reader: &mut cram::io::indexed_reader::IndexedReader<std::fs::File>,
header: &sam::Header,
region: &Region,
locus_end: i64,
selected_containers: &[SelectedContainer],
on_record: &mut F,
) -> Result<(), RuntimeError>
where
F: FnMut(cram::Record<'_>) -> Result<bool, RuntimeError>,
{
let interval = region.interval();

Expand Down Expand Up @@ -293,7 +376,7 @@ where
return Ok(true);
}

match on_record(alignment_record) {
match on_record(record.clone()) {
Ok(true) => Ok(true),
Ok(false) => {
stop = true;
Expand Down Expand Up @@ -351,7 +434,7 @@ where
return Ok(true);
}

match on_record(alignment_record) {
match on_record(record.clone()) {
Ok(true) => Ok(true),
Ok(false) => {
stop = true;
Expand Down Expand Up @@ -445,7 +528,6 @@ fn build_alignment_record_from_cram(
None => start,
};

let sequence: Vec<u8> = record.sequence().iter().collect();
let cigar = record
.cigar()
.iter()
Expand All @@ -463,7 +545,6 @@ fn build_alignment_record_from_cram(
start,
end,
is_unmapped,
sequence,
cigar,
})
}
Expand Down
Loading
Loading