Michael B. Hall $\in$ Coin Group @ Doherty Institute
@mbhall88 | michael.hall2 [at] unimelb.edu.au
Slides: mbh.sh/csi-rust
[dependencies]
clap = { version = "3.1", features = ["derive"] }Examples from mbhall88/rasusa
/// Randomly subsample reads to a specified coverage.
#[derive(Parser, Debug)]
#[clap(author, version, about)]
struct Args {
/// The fast{a,q} file(s) to subsample.
#[clap(
short,
long,
multiple_values = true,
required = true
)]
pub input: Vec<PathBuf>,
}
fn main() {
let args = Args::parse();
for file in args.input {
println!("Got input file {:?}", file);
}
}
$ rasusa --help
rasusa 0.1.0
Randomly subsample reads to a specified coverage
USAGE:
rasusa --input <INPUT>...
OPTIONS:
-h, --help Print help information
-i, --input <INPUT>... The fast{a,q} file(s) to subsample
-V, --version Print version information
Usage
$ rasusa -i someFile otherFile
Got input file "someFile"
Got input file "otherFile"
/// Randomly subsample reads to a specified coverage.
#[derive(Parser, Debug)]
#[clap(author, version, about)]
struct Args {
/// The fast{a,q} file(s) to subsample.
#[clap(
short,
long,
multiple_values = true,
required = true,
validator = check_path_exists
)]
pub input: Vec<PathBuf>,
}
/// A utility function that allows the CLI to error if a path doesn't exist
fn check_path_exists(s: &str) -> Result<(), String> {
let path = PathBuf::from(s);
if path.exists() {
Ok(())
} else {
Err(format!("{} does not exist", s))
}
}
$ rasusa -i existingFile fakeFile
error: Invalid value "fakeFile" for '--input <INPUT>...': fakeFile does not exist
For more information try --help
/// Randomly subsample reads to a specified coverage.
#[derive(Parser, Debug)]
#[clap(author, version, about)]
struct Args {
/// u: uncompressed; b: Bzip2; g: Gzip; l: Lzma
///
/// Rasusa will attempt to infer the output
/// compression format automatically from the
/// filename extension. This option is used to
/// override that.
#[clap(
short = 'O',
long,
value_name = "u|b|g|l",
parse(try_from_str = parse_compression_format),
default_value = "u"
)]
pub output_type: niffler::compression::Format,
}
fn parse_compression_format(s: &str) -> Result<niffler::compression::Format, String> {
match s {
"b" | "B" => Ok(niffler::Format::Bzip),
"g" | "G" => Ok(niffler::Format::Gzip),
"l" | "L" => Ok(niffler::Format::Lzma),
"u" | "U" => Ok(niffler::Format::No),
_ => Err(format!("{} is not a known output format", s.to_string())),
}
}
$ rasusa -i inFile -O t
error: Invalid value "t" for '--output-type <u|b|g|l>': t is not a known output format
For more information try --help
$ rasusa -h
rasusa 0.1.0
Randomly subsample reads to a specified coverage
USAGE:
rasusa [OPTIONS] --input <INPUT>...
OPTIONS:
-h, --help Print help information
-i, --input <INPUT>... The fast{a,q} file(s) to subsample
-O, --output-type <u|b|g|l> u: uncompressed; b: Bzip2; g: Gzip; l: Lzma [default: u]
-V, --version Print version information
$ rasusa --help
rasusa 0.1.0
Randomly subsample reads to a specified coverage
USAGE:
rasusa [OPTIONS] --input <INPUT>...
OPTIONS:
-h, --help
Print help information
-i, --input <INPUT>...
The fast{a,q} file(s) to subsample
-O, --output-type <u|b|g|l>
u: uncompressed; b: Bzip2; g: Gzip; l: Lzma
Rasusa will attempt to infer the output compression format automatically from the
filename extension. This option is used to override that. If writing to stdout, the
default is uncompressed
[default: u]
-V, --version
Print version information
One of the best features of Rust is enforced acknowledgement of the possibility of an error
Rust uses Result<T, E> to encapsulate this
pub enum Result<T, E> {
Ok(T),
Err(E),
}
std::fs::create_dir(args.output);
warning: unused `Result` that must be used
--> src/main.rs:67:5
|
67 | std::fs::create_dir(args.output);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
= note: `#[warn(unused_must_use)]` on by default
= note: this `Result` may be an `Err` variant, which should be handled
std::fs::create_dir(args.output)?;
$ rasusa -i in.fq -o fakeDir/subdir
Error: No such file or directory (os error 2)
[dependencies]
anyhow = "1.0"
std::fs::create_dir(args.output)
.context("Failed to create the output directory")?;
$ rasusa -i in.fq -o fakeDir/subdir
Error: Failed to create the output directory
Caused by:
No such file or directory (os error 2)
Most concurrency problems are compile-time in Rust - rather than run-time
This avoids time lost trying to recreate concurrent run-time bugs
Example from mbhall88/psdm
let seqs: Vec<Vec<u8>> = ...;
let pairwise_indices: Vec<Vec<usize>> = ...;
let dists: Vec<u64> = pairwise_indices
.as_slice()
.map(|ix| {
let i = ix[0];
let j = ix[1];
if i == j {
0
else {
hamming_distance(&seqs[i], &seqs[j])
}
}
})
.collect();
[dependencies]
rayon = "1.5"
use rayon::prelude::*;
let seqs: Vec<Vec<u8>> = ...;
let pairwise_indices: Vec<Vec<usize>> = ...;
let dists: Vec<u64> = pairwise_indices
.as_slice()
.into_par_iter()
.map(|ix| {
let i = ix[0];
let j = ix[1];
if i == j {
0
else {
hamming_distance(&seqs[i], &seqs[j])
}
}
})
.collect();
(Okay, maybe three lines 🤫)