WARNING: THIS SITE IS A MIRROR OF GITHUB.COM / IT CANNOT LOGIN OR REGISTER ACCOUNTS / THE CONTENTS ARE PROVIDED AS-IS / THIS SITE ASSUMES NO RESPONSIBILITY FOR ANY DISPLAYED CONTENT OR LINKS / IF YOU FOUND SOMETHING MAY NOT GOOD FOR EVERYONE, CONTACT ADMIN AT ilovescratch@foxmail.com
Skip to content

Commit d6ed9e8

Browse files
Fix prefix sort complexity
Current implementation doesn't scale well and can lead to high CPU use
1 parent 4fabd6b commit d6ed9e8

File tree

1 file changed

+117
-56
lines changed

1 file changed

+117
-56
lines changed

josh-core/src/filter/opt.rs

Lines changed: 117 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
use super::*;
77
use std::cmp::Ordering;
88
use std::collections::HashMap;
9-
use std::collections::VecDeque;
109
use std::sync::LazyLock;
1110

1211
static OPTIMIZED: LazyLock<std::sync::Mutex<std::collections::HashMap<Filter, Filter>>> =
@@ -201,78 +200,140 @@ fn last_chain(rest: Filter, filter: Filter) -> (Filter, Filter) {
201200
}
202201
}
203202

203+
#[derive(Default)]
204+
struct PathTrie {
205+
children: HashMap<std::ffi::OsString, PathTrie>,
206+
indices: Vec<usize>,
207+
}
208+
209+
impl PathTrie {
210+
fn insert(&mut self, path: &Path, index: usize) {
211+
let mut node = self;
212+
for comp in path.components() {
213+
let key = comp.as_os_str().to_owned();
214+
node = node.children.entry(key).or_default();
215+
}
216+
node.indices.push(index);
217+
}
218+
219+
fn find_overlapping(&self, path: &Path) -> Vec<usize> {
220+
let mut result = Vec::new();
221+
let mut node = self;
222+
223+
result.extend(&node.indices);
224+
for comp in path.components() {
225+
match node.children.get(comp.as_os_str()) {
226+
Some(child) => {
227+
node = child;
228+
result.extend(&node.indices);
229+
}
230+
None => return result,
231+
}
232+
}
233+
234+
node.collect_descendants(&mut result);
235+
result
236+
}
237+
238+
fn collect_descendants(&self, result: &mut Vec<usize>) {
239+
for child in self.children.values() {
240+
result.extend(&child.indices);
241+
child.collect_descendants(result);
242+
}
243+
}
244+
}
245+
204246
pub fn prefix_sort(filters: &[Filter]) -> Vec<Filter> {
247+
if filters.len() <= 1 {
248+
return filters.to_vec();
249+
}
250+
205251
let n = filters.len();
252+
let mut outgoing: Vec<std::collections::HashSet<usize>> = vec![Default::default(); n];
206253

207-
// Step 1: Build graph of ordering constraints
208-
let mut graph: HashMap<usize, Vec<usize>> = HashMap::new();
209-
let mut indegree = vec![0; n];
210-
211-
for i in 0..n {
212-
for j in i + 1..n {
213-
let src_i = src_path(filters[i].clone());
214-
let dst_i = dst_path(filters[i].clone());
215-
let src_j = src_path(filters[j].clone());
216-
let dst_j = dst_path(filters[j].clone());
217-
218-
let constraint = if src_j.starts_with(&src_i) || src_i.starts_with(&src_j) {
219-
Some((i, j))
220-
} else if dst_j.starts_with(&dst_i) || dst_i.starts_with(&dst_j) {
221-
Some((i, j))
222-
} else {
223-
None
224-
};
254+
let mut src_trie = PathTrie::default();
255+
let mut dst_trie = PathTrie::default();
225256

226-
if let Some((a, b)) = constraint {
227-
graph.entry(a).or_default().push(b);
228-
indegree[b] += 1;
229-
}
257+
for (i, filter) in filters.iter().enumerate() {
258+
let src = src_path(filter.clone());
259+
let dst = dst_path(filter.clone());
260+
261+
for j in src_trie.find_overlapping(&src) {
262+
outgoing[j].insert(i);
230263
}
264+
265+
for j in dst_trie.find_overlapping(&dst) {
266+
outgoing[j].insert(i);
267+
}
268+
269+
src_trie.insert(&src, i);
270+
dst_trie.insert(&dst, i);
231271
}
232272

233-
// Step 2: Sort indices alphabetically by (src, dst)
234-
let mut indices: Vec<usize> = (0..n).collect();
235-
indices.sort_by(|&i, &j| {
236-
let key_i = (src_path(filters[i].clone()), dst_path(filters[i].clone()));
237-
let key_j = (src_path(filters[j].clone()), dst_path(filters[j].clone()));
273+
topo_sort_with_tiebreak(&outgoing, filters)
274+
}
275+
276+
fn topo_sort_with_tiebreak(
277+
outgoing: &[std::collections::HashSet<usize>],
278+
filters: &[Filter],
279+
) -> Vec<Filter> {
280+
use std::collections::BinaryHeap;
238281

239-
match key_i.0.cmp(&key_j.0) {
240-
Ordering::Equal => key_i.1.cmp(&key_j.1),
241-
other => other,
282+
let mut indegree: Vec<usize> = vec![0; filters.len()];
283+
for neighbors in outgoing {
284+
for &j in neighbors {
285+
indegree[j] += 1;
242286
}
243-
});
287+
}
288+
289+
// Use a BinaryHeap with a wrapper for custom ordering
290+
#[derive(Eq, PartialEq)]
291+
struct SortKey(usize, std::path::PathBuf, std::path::PathBuf); // (index, src, dst)
244292

245-
// Step 3: Topological sort with alphabetical tie-break
246-
let mut result = Vec::new();
247-
let mut available: VecDeque<usize> = indices
293+
impl Ord for SortKey {
294+
fn cmp(&self, other: &Self) -> Ordering {
295+
match other.1.cmp(&self.1) {
296+
Ordering::Equal => other.2.cmp(&self.2),
297+
ord => ord,
298+
}
299+
}
300+
}
301+
302+
impl PartialOrd for SortKey {
303+
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
304+
Some(self.cmp(other))
305+
}
306+
}
307+
308+
let make_key = |i: usize| -> SortKey {
309+
SortKey(
310+
i,
311+
src_path(filters[i].clone()),
312+
dst_path(filters[i].clone()),
313+
)
314+
};
315+
316+
let mut heap: BinaryHeap<SortKey> = indegree
248317
.iter()
249-
.copied()
250-
.filter(|&i| indegree[i] == 0)
318+
.enumerate()
319+
.filter(|(_, deg)| **deg == 0)
320+
.map(|(i, _)| make_key(i))
251321
.collect();
252322

253-
while let Some(i) = available.pop_front() {
254-
result.push(i);
255-
if let Some(neighbors) = graph.get(&i) {
256-
for &j in neighbors {
257-
indegree[j] -= 1;
258-
if indegree[j] == 0 {
259-
// Insert j into available, keeping alphabetical order
260-
let pos = available.iter().position(|&x| {
261-
let key_j = (src_path(filters[j].clone()), dst_path(filters[j].clone()));
262-
let key_x = (src_path(filters[x].clone()), dst_path(filters[x].clone()));
263-
key_j < key_x
264-
});
265-
if let Some(p) = pos {
266-
available.insert(p, j);
267-
} else {
268-
available.push_back(j);
269-
}
270-
}
323+
let mut result = Vec::with_capacity(filters.len());
324+
325+
while let Some(SortKey(i, _, _)) = heap.pop() {
326+
result.push(filters[i].clone());
327+
328+
for &j in outgoing[i].iter() {
329+
indegree[j] -= 1;
330+
if indegree[j] == 0 {
331+
heap.push(make_key(j));
271332
}
272333
}
273334
}
274335

275-
result.into_iter().map(|i| filters[i].clone()).collect()
336+
result
276337
}
277338

278339
fn common_pre(filters: &Vec<Filter>) -> Option<(Filter, Vec<Filter>)> {

0 commit comments

Comments
 (0)