|
6 | 6 | use super::*; |
7 | 7 | use std::cmp::Ordering; |
8 | 8 | use std::collections::HashMap; |
9 | | -use std::collections::VecDeque; |
10 | 9 | use std::sync::LazyLock; |
11 | 10 |
|
12 | 11 | static OPTIMIZED: LazyLock<std::sync::Mutex<std::collections::HashMap<Filter, Filter>>> = |
@@ -201,78 +200,140 @@ fn last_chain(rest: Filter, filter: Filter) -> (Filter, Filter) { |
201 | 200 | } |
202 | 201 | } |
203 | 202 |
|
| 203 | +#[derive(Default)] |
| 204 | +struct PathTrie { |
| 205 | + children: HashMap<std::ffi::OsString, PathTrie>, |
| 206 | + indices: Vec<usize>, |
| 207 | +} |
| 208 | + |
| 209 | +impl PathTrie { |
| 210 | + fn insert(&mut self, path: &Path, index: usize) { |
| 211 | + let mut node = self; |
| 212 | + for comp in path.components() { |
| 213 | + let key = comp.as_os_str().to_owned(); |
| 214 | + node = node.children.entry(key).or_default(); |
| 215 | + } |
| 216 | + node.indices.push(index); |
| 217 | + } |
| 218 | + |
| 219 | + fn find_overlapping(&self, path: &Path) -> Vec<usize> { |
| 220 | + let mut result = Vec::new(); |
| 221 | + let mut node = self; |
| 222 | + |
| 223 | + result.extend(&node.indices); |
| 224 | + for comp in path.components() { |
| 225 | + match node.children.get(comp.as_os_str()) { |
| 226 | + Some(child) => { |
| 227 | + node = child; |
| 228 | + result.extend(&node.indices); |
| 229 | + } |
| 230 | + None => return result, |
| 231 | + } |
| 232 | + } |
| 233 | + |
| 234 | + node.collect_descendants(&mut result); |
| 235 | + result |
| 236 | + } |
| 237 | + |
| 238 | + fn collect_descendants(&self, result: &mut Vec<usize>) { |
| 239 | + for child in self.children.values() { |
| 240 | + result.extend(&child.indices); |
| 241 | + child.collect_descendants(result); |
| 242 | + } |
| 243 | + } |
| 244 | +} |
| 245 | + |
204 | 246 | pub fn prefix_sort(filters: &[Filter]) -> Vec<Filter> { |
| 247 | + if filters.len() <= 1 { |
| 248 | + return filters.to_vec(); |
| 249 | + } |
| 250 | + |
205 | 251 | let n = filters.len(); |
| 252 | + let mut outgoing: Vec<std::collections::HashSet<usize>> = vec![Default::default(); n]; |
206 | 253 |
|
207 | | - // Step 1: Build graph of ordering constraints |
208 | | - let mut graph: HashMap<usize, Vec<usize>> = HashMap::new(); |
209 | | - let mut indegree = vec![0; n]; |
210 | | - |
211 | | - for i in 0..n { |
212 | | - for j in i + 1..n { |
213 | | - let src_i = src_path(filters[i].clone()); |
214 | | - let dst_i = dst_path(filters[i].clone()); |
215 | | - let src_j = src_path(filters[j].clone()); |
216 | | - let dst_j = dst_path(filters[j].clone()); |
217 | | - |
218 | | - let constraint = if src_j.starts_with(&src_i) || src_i.starts_with(&src_j) { |
219 | | - Some((i, j)) |
220 | | - } else if dst_j.starts_with(&dst_i) || dst_i.starts_with(&dst_j) { |
221 | | - Some((i, j)) |
222 | | - } else { |
223 | | - None |
224 | | - }; |
| 254 | + let mut src_trie = PathTrie::default(); |
| 255 | + let mut dst_trie = PathTrie::default(); |
225 | 256 |
|
226 | | - if let Some((a, b)) = constraint { |
227 | | - graph.entry(a).or_default().push(b); |
228 | | - indegree[b] += 1; |
229 | | - } |
| 257 | + for (i, filter) in filters.iter().enumerate() { |
| 258 | + let src = src_path(filter.clone()); |
| 259 | + let dst = dst_path(filter.clone()); |
| 260 | + |
| 261 | + for j in src_trie.find_overlapping(&src) { |
| 262 | + outgoing[j].insert(i); |
230 | 263 | } |
| 264 | + |
| 265 | + for j in dst_trie.find_overlapping(&dst) { |
| 266 | + outgoing[j].insert(i); |
| 267 | + } |
| 268 | + |
| 269 | + src_trie.insert(&src, i); |
| 270 | + dst_trie.insert(&dst, i); |
231 | 271 | } |
232 | 272 |
|
233 | | - // Step 2: Sort indices alphabetically by (src, dst) |
234 | | - let mut indices: Vec<usize> = (0..n).collect(); |
235 | | - indices.sort_by(|&i, &j| { |
236 | | - let key_i = (src_path(filters[i].clone()), dst_path(filters[i].clone())); |
237 | | - let key_j = (src_path(filters[j].clone()), dst_path(filters[j].clone())); |
| 273 | + topo_sort_with_tiebreak(&outgoing, filters) |
| 274 | +} |
| 275 | + |
| 276 | +fn topo_sort_with_tiebreak( |
| 277 | + outgoing: &[std::collections::HashSet<usize>], |
| 278 | + filters: &[Filter], |
| 279 | +) -> Vec<Filter> { |
| 280 | + use std::collections::BinaryHeap; |
238 | 281 |
|
239 | | - match key_i.0.cmp(&key_j.0) { |
240 | | - Ordering::Equal => key_i.1.cmp(&key_j.1), |
241 | | - other => other, |
| 282 | + let mut indegree: Vec<usize> = vec![0; filters.len()]; |
| 283 | + for neighbors in outgoing { |
| 284 | + for &j in neighbors { |
| 285 | + indegree[j] += 1; |
242 | 286 | } |
243 | | - }); |
| 287 | + } |
| 288 | + |
| 289 | + // Use a BinaryHeap with a wrapper for custom ordering |
| 290 | + #[derive(Eq, PartialEq)] |
| 291 | + struct SortKey(usize, std::path::PathBuf, std::path::PathBuf); // (index, src, dst) |
244 | 292 |
|
245 | | - // Step 3: Topological sort with alphabetical tie-break |
246 | | - let mut result = Vec::new(); |
247 | | - let mut available: VecDeque<usize> = indices |
| 293 | + impl Ord for SortKey { |
| 294 | + fn cmp(&self, other: &Self) -> Ordering { |
| 295 | + match other.1.cmp(&self.1) { |
| 296 | + Ordering::Equal => other.2.cmp(&self.2), |
| 297 | + ord => ord, |
| 298 | + } |
| 299 | + } |
| 300 | + } |
| 301 | + |
| 302 | + impl PartialOrd for SortKey { |
| 303 | + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
| 304 | + Some(self.cmp(other)) |
| 305 | + } |
| 306 | + } |
| 307 | + |
| 308 | + let make_key = |i: usize| -> SortKey { |
| 309 | + SortKey( |
| 310 | + i, |
| 311 | + src_path(filters[i].clone()), |
| 312 | + dst_path(filters[i].clone()), |
| 313 | + ) |
| 314 | + }; |
| 315 | + |
| 316 | + let mut heap: BinaryHeap<SortKey> = indegree |
248 | 317 | .iter() |
249 | | - .copied() |
250 | | - .filter(|&i| indegree[i] == 0) |
| 318 | + .enumerate() |
| 319 | + .filter(|(_, deg)| **deg == 0) |
| 320 | + .map(|(i, _)| make_key(i)) |
251 | 321 | .collect(); |
252 | 322 |
|
253 | | - while let Some(i) = available.pop_front() { |
254 | | - result.push(i); |
255 | | - if let Some(neighbors) = graph.get(&i) { |
256 | | - for &j in neighbors { |
257 | | - indegree[j] -= 1; |
258 | | - if indegree[j] == 0 { |
259 | | - // Insert j into available, keeping alphabetical order |
260 | | - let pos = available.iter().position(|&x| { |
261 | | - let key_j = (src_path(filters[j].clone()), dst_path(filters[j].clone())); |
262 | | - let key_x = (src_path(filters[x].clone()), dst_path(filters[x].clone())); |
263 | | - key_j < key_x |
264 | | - }); |
265 | | - if let Some(p) = pos { |
266 | | - available.insert(p, j); |
267 | | - } else { |
268 | | - available.push_back(j); |
269 | | - } |
270 | | - } |
| 323 | + let mut result = Vec::with_capacity(filters.len()); |
| 324 | + |
| 325 | + while let Some(SortKey(i, _, _)) = heap.pop() { |
| 326 | + result.push(filters[i].clone()); |
| 327 | + |
| 328 | + for &j in outgoing[i].iter() { |
| 329 | + indegree[j] -= 1; |
| 330 | + if indegree[j] == 0 { |
| 331 | + heap.push(make_key(j)); |
271 | 332 | } |
272 | 333 | } |
273 | 334 | } |
274 | 335 |
|
275 | | - result.into_iter().map(|i| filters[i].clone()).collect() |
| 336 | + result |
276 | 337 | } |
277 | 338 |
|
278 | 339 | fn common_pre(filters: &Vec<Filter>) -> Option<(Filter, Vec<Filter>)> { |
|
0 commit comments