diff --git a/Cargo.lock b/Cargo.lock index 7026b4a62..af2464345 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -32,6 +32,12 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "anes" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc43e46599f3d77fcf2f2ca89e4d962910b0c19c44e7b58679cbbdfd1820a662" + [[package]] name = "anstream" version = "0.6.21" @@ -189,6 +195,12 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bpaf" +version = "0.9.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "473976d7a8620bb1e06dcdd184407c2363fe4fec8e983ee03ed9197222634a31" + [[package]] name = "bstr" version = "1.12.1" @@ -218,6 +230,12 @@ version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "castaway" version = "0.2.4" @@ -254,6 +272,33 @@ dependencies = [ "num-traits", ] +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "clap" version = "4.5.53" @@ -352,6 +397,24 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion2" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77cd1059d67baa066c334993d8d6e757ad257d21030db6a9a945dddbb559d4fe" +dependencies = [ + "anes", + "bpaf", + "cast", + "ciborium", + "num-traits", + "oorandom", + "rayon", + "serde", + "serde_json", + "walkdir", +] + [[package]] name = "crossbeam" version = "0.8.4" @@ -408,6 +471,12 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-common" version = "0.1.7" @@ -1505,6 +1574,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "handlebars" version = "6.3.2" @@ -2005,6 +2085,7 @@ version = "22.4.15" dependencies = [ "backtrace", "bitvec", + "criterion2", "git-version", "git2", "gix-config", @@ -2019,6 +2100,7 @@ dependencies = [ "percent-encoding", "pest", "pest_derive", + "rand 0.9.2", "rayon", "regex", "rs_tracing", @@ -2457,6 +2539,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + [[package]] name = "openssl" version = "0.10.75" diff --git a/josh-core/Cargo.toml b/josh-core/Cargo.toml index 963e084c6..d97401463 100644 --- a/josh-core/Cargo.toml +++ b/josh-core/Cargo.toml @@ -9,6 +9,10 @@ keywords = ["git", "monorepo", "workflow", "scm"] readme = "../README.md" edition = "2024" +[[bench]] +name = "ultrawide" +harness = false + [dependencies] backtrace = "0.3.76" bitvec = "1.0.1" @@ -37,5 +41,9 @@ sled = "0.34.7" tracing = { workspace = true } toml = { workspace = true } +[dev-dependencies] +rand = "0.9.2" +criterion2 = { version = "3.0.2" } + [features] incubating = [] diff --git a/josh-core/benches/ultrawide.rs b/josh-core/benches/ultrawide.rs new file mode 100644 index 000000000..df4ca5c08 --- /dev/null +++ b/josh-core/benches/ultrawide.rs @@ -0,0 +1,65 @@ +use criterion::{Criterion, criterion_group, criterion_main}; +use itertools::Itertools; +use rand::Rng; +use rand::distr::{Alphabetic, Distribution}; +use rand::rngs::ThreadRng; +use std::path::PathBuf; + +const N_PATHS: usize = 30; + +fn generate_paths() -> Vec { + const PATH_COMPONENTS_MAX: usize = 10; + const PATH_COMPONENT_LEN: usize = 2; + + // Create a single path component -- random lowercase characters, + // length of PATH_COMPONENT_LEN + fn make_path_component(rng: &mut ThreadRng) -> String { + (0..PATH_COMPONENT_LEN) + .map(|_| { + let ch = Alphabetic.sample(rng) as char; + ch.to_ascii_lowercase() + }) + .collect() + } + + // Create a single path -- anywhere from 1 to PATH_COMPONENTS_MAX components + fn make_path(rng: &mut ThreadRng) -> PathBuf { + let num_components = rng.random_range(1..=PATH_COMPONENTS_MAX); + let mut path = PathBuf::new(); + + for _ in 0..num_components { + path.push(make_path_component(rng)) + } + + path + } + + let mut rng = rand::rng(); + + // Finally, create N_PATHS of random paths + (0..N_PATHS).map(|_| make_path(&mut rng)).collect() +} + +fn ultrawide(c: &mut Criterion) { + c.bench_function("ultrawide_filter_parse", |b| { + b.iter_with_setup_wrapper(|runner| { + let filter = generate_paths() + .into_iter() + .map(|p| { + let p = p.display().to_string(); + format!("::{p}/") + }) + .join(","); + + let filter = format!(":[{}]", filter); + + runner.run(move || { + let filter = josh_core::filter::parse(&filter).expect("failed to parse"); + std::hint::black_box(filter); + }) + }); + }); +} + +criterion_group!(benches, ultrawide); +criterion_main!(benches); diff --git a/josh-core/src/filter/opt.rs b/josh-core/src/filter/opt.rs index 64fe12455..d83edf627 100644 --- a/josh-core/src/filter/opt.rs +++ b/josh-core/src/filter/opt.rs @@ -6,7 +6,6 @@ use super::*; use std::cmp::Ordering; use std::collections::HashMap; -use std::collections::VecDeque; use std::sync::LazyLock; static OPTIMIZED: LazyLock>> = @@ -201,78 +200,140 @@ fn last_chain(rest: Filter, filter: Filter) -> (Filter, Filter) { } } +#[derive(Default)] +struct PathTrie { + children: HashMap, + indices: Vec, +} + +impl PathTrie { + fn insert(&mut self, path: &Path, index: usize) { + let mut node = self; + for comp in path.components() { + let key = comp.as_os_str().to_owned(); + node = node.children.entry(key).or_default(); + } + node.indices.push(index); + } + + fn find_overlapping(&self, path: &Path) -> Vec { + let mut result = Vec::new(); + let mut node = self; + + result.extend(&node.indices); + for comp in path.components() { + match node.children.get(comp.as_os_str()) { + Some(child) => { + node = child; + result.extend(&node.indices); + } + None => return result, + } + } + + node.collect_descendants(&mut result); + result + } + + fn collect_descendants(&self, result: &mut Vec) { + for child in self.children.values() { + result.extend(&child.indices); + child.collect_descendants(result); + } + } +} + pub fn prefix_sort(filters: &[Filter]) -> Vec { + if filters.len() <= 1 { + return filters.to_vec(); + } + let n = filters.len(); + let mut outgoing: Vec> = vec![Default::default(); n]; - // Step 1: Build graph of ordering constraints - let mut graph: HashMap> = HashMap::new(); - let mut indegree = vec![0; n]; - - for i in 0..n { - for j in i + 1..n { - let src_i = src_path(filters[i].clone()); - let dst_i = dst_path(filters[i].clone()); - let src_j = src_path(filters[j].clone()); - let dst_j = dst_path(filters[j].clone()); - - let constraint = if src_j.starts_with(&src_i) || src_i.starts_with(&src_j) { - Some((i, j)) - } else if dst_j.starts_with(&dst_i) || dst_i.starts_with(&dst_j) { - Some((i, j)) - } else { - None - }; + let mut src_trie = PathTrie::default(); + let mut dst_trie = PathTrie::default(); - if let Some((a, b)) = constraint { - graph.entry(a).or_default().push(b); - indegree[b] += 1; - } + for (i, filter) in filters.iter().enumerate() { + let src = src_path(filter.clone()); + let dst = dst_path(filter.clone()); + + for j in src_trie.find_overlapping(&src) { + outgoing[j].insert(i); } + + for j in dst_trie.find_overlapping(&dst) { + outgoing[j].insert(i); + } + + src_trie.insert(&src, i); + dst_trie.insert(&dst, i); } - // Step 2: Sort indices alphabetically by (src, dst) - let mut indices: Vec = (0..n).collect(); - indices.sort_by(|&i, &j| { - let key_i = (src_path(filters[i].clone()), dst_path(filters[i].clone())); - let key_j = (src_path(filters[j].clone()), dst_path(filters[j].clone())); + topo_sort_with_tiebreak(&outgoing, filters) +} + +fn topo_sort_with_tiebreak( + outgoing: &[std::collections::HashSet], + filters: &[Filter], +) -> Vec { + use std::collections::BinaryHeap; - match key_i.0.cmp(&key_j.0) { - Ordering::Equal => key_i.1.cmp(&key_j.1), - other => other, + let mut indegree: Vec = vec![0; filters.len()]; + for neighbors in outgoing { + for &j in neighbors { + indegree[j] += 1; } - }); + } + + // Use a BinaryHeap with a wrapper for custom ordering + #[derive(Eq, PartialEq)] + struct SortKey(usize, std::path::PathBuf, std::path::PathBuf); // (index, src, dst) - // Step 3: Topological sort with alphabetical tie-break - let mut result = Vec::new(); - let mut available: VecDeque = indices + impl Ord for SortKey { + fn cmp(&self, other: &Self) -> Ordering { + match other.1.cmp(&self.1) { + Ordering::Equal => other.2.cmp(&self.2), + ord => ord, + } + } + } + + impl PartialOrd for SortKey { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } + } + + let make_key = |i: usize| -> SortKey { + SortKey( + i, + src_path(filters[i].clone()), + dst_path(filters[i].clone()), + ) + }; + + let mut heap: BinaryHeap = indegree .iter() - .copied() - .filter(|&i| indegree[i] == 0) + .enumerate() + .filter(|(_, deg)| **deg == 0) + .map(|(i, _)| make_key(i)) .collect(); - while let Some(i) = available.pop_front() { - result.push(i); - if let Some(neighbors) = graph.get(&i) { - for &j in neighbors { - indegree[j] -= 1; - if indegree[j] == 0 { - // Insert j into available, keeping alphabetical order - let pos = available.iter().position(|&x| { - let key_j = (src_path(filters[j].clone()), dst_path(filters[j].clone())); - let key_x = (src_path(filters[x].clone()), dst_path(filters[x].clone())); - key_j < key_x - }); - if let Some(p) = pos { - available.insert(p, j); - } else { - available.push_back(j); - } - } + let mut result = Vec::with_capacity(filters.len()); + + while let Some(SortKey(i, _, _)) = heap.pop() { + result.push(filters[i].clone()); + + for &j in outgoing[i].iter() { + indegree[j] -= 1; + if indegree[j] == 0 { + heap.push(make_key(j)); } } } - result.into_iter().map(|i| filters[i].clone()).collect() + result } fn common_pre(filters: &Vec) -> Option<(Filter, Vec)> {