//! Filter pipeline with `|` chaining. Splits a query into //! segments, applies the appropriate filter strategy to each, //! and chains results through stages. Supports incremental //! caching: unchanged stages keep their results. use super::filter::{Filter, FuzzyFilter}; use super::strategy::{self, FilterKind}; /// A multi-stage filter pipeline. Each `|` in the query /// creates a new stage that filters the previous stage's /// output. Implements [`Filter`] so it can be used as a /// drop-in replacement for a single filter. pub struct FilterPipeline { /// Master item list: (original index, label). items: Vec<(usize, String)>, /// Pipeline stages, one per `|`-separated segment. stages: Vec, /// The last raw query string, used for diffing. last_raw_query: String, } struct PipelineStage { /// The raw segment text (including prefix chars). raw_segment: String, kind: FilterKind, inverse: bool, /// The query text after prefix stripping. query_text: String, /// The strategy-specific filter (only used for fuzzy stages). fuzzy: Option, /// Items passing this stage (indices into master list). cached_indices: Vec, dirty: bool, } /// Split a raw query on unescaped `|` characters, respecting /// regex delimiters (`/pattern/` and `!/pattern/`). Returns /// the segments with `\|` unescaped to literal `|`. fn split_pipeline(query: &str) -> Vec { let mut segments = Vec::new(); let mut current = String::new(); let chars: Vec = query.chars().collect(); let len = chars.len(); let mut i = 0; let mut in_regex = false; // Position of the opening `/` in current segment (char count into current) let mut regex_open_pos: usize = 0; while i < len { let c = chars[i]; // Escaped pipe: always produce literal `|` if c == '\\' && i + 1 < len && chars[i + 1] == '|' { current.push('|'); i += 2; continue; } // Detect regex opening: `/` or `!/` at start of a segment // (current is empty or whitespace-only after a previous pipe) if !in_regex { let trimmed = current.trim(); // `/pattern/` if c == '/' && (trimmed.is_empty() || trimmed == "!") { in_regex = true; regex_open_pos = current.len(); current.push(c); i += 1; continue; } } // Detect regex closing: `/` that is not the opening slash if in_regex && c == '/' { if current.len() > regex_open_pos { // This is the closing slash in_regex = false; } current.push(c); i += 1; continue; } // Unescaped pipe outside regex: split here if c == '|' && !in_regex { segments.push(current.trim().to_string()); current = String::new(); i += 1; continue; } current.push(c); i += 1; } segments.push(current.trim().to_string()); // Filter out empty segments segments.into_iter().filter(|s| !s.is_empty()).collect() } impl Default for FilterPipeline { fn default() -> Self { Self::new() } } impl FilterPipeline { pub fn new() -> Self { Self { items: Vec::new(), stages: Vec::new(), last_raw_query: String::new(), } } /// Evaluate all dirty stages in order. Each stage filters /// against the previous stage's cached_indices. fn evaluate(&mut self) { for stage_idx in 0..self.stages.len() { if !self.stages[stage_idx].dirty { continue; } let input_indices: Vec = if stage_idx == 0 { self.items.iter().map(|(idx, _)| *idx).collect() } else { self.stages[stage_idx - 1].cached_indices.clone() }; let stage = &mut self.stages[stage_idx]; let result = match stage.kind { FilterKind::Fuzzy => Self::eval_fuzzy(stage, &input_indices, stage_idx), FilterKind::Exact => { Self::eval_simple(stage, &input_indices, &self.items, |label, query| { label.to_lowercase().contains(&query.to_lowercase()) }) } FilterKind::Regex => { let re = fancy_regex::Regex::new(&stage.query_text).ok(); Self::eval_simple(stage, &input_indices, &self.items, |label, _query| { match &re { Some(r) => r.is_match(label).unwrap_or(false), None => true, // invalid regex matches everything } }) } }; self.stages[stage_idx].cached_indices = result; self.stages[stage_idx].dirty = false; } } fn eval_fuzzy( stage: &mut PipelineStage, input_indices: &[usize], stage_idx: usize, ) -> Vec { let Some(fuzzy) = stage.fuzzy.as_mut() else { return Vec::new(); }; fuzzy.set_query(&stage.query_text); let fuzzy_results: Vec = (0..fuzzy.matched_count()) .filter_map(|i| fuzzy.matched_index(i)) .collect(); if stage.inverse { let fuzzy_set: std::collections::HashSet = fuzzy_results.into_iter().collect(); input_indices .iter() .copied() .filter(|idx| !fuzzy_set.contains(idx)) .collect() } else if stage_idx == 0 { fuzzy_results } else { let input_set: std::collections::HashSet = input_indices.iter().copied().collect(); fuzzy_results .into_iter() .filter(|idx| input_set.contains(idx)) .collect() } } fn eval_simple( stage: &PipelineStage, input_indices: &[usize], items: &[(usize, String)], matcher: impl Fn(&str, &str) -> bool, ) -> Vec { if stage.query_text.is_empty() { return input_indices.to_vec(); } if stage.inverse { input_indices .iter() .copied() .filter(|&idx| !matcher(&items[idx].1, &stage.query_text)) .collect() } else { input_indices .iter() .copied() .filter(|&idx| matcher(&items[idx].1, &stage.query_text)) .collect() } } } impl Filter for FilterPipeline { fn push(&mut self, index: usize, label: &str) { self.items.push((index, label.to_string())); // Push to any existing fuzzy filters in stages for stage in &mut self.stages { if let Some(ref mut fuzzy) = stage.fuzzy { fuzzy.push(index, label); } stage.dirty = true; } } fn set_query(&mut self, query: &str) { self.last_raw_query = query.to_string(); let segments = split_pipeline(query); // Reconcile stages with new segments let mut new_len = segments.len(); // If query is empty, clear everything if segments.is_empty() { self.stages.clear(); new_len = 0; } // Compare position-by-position for (i, seg) in segments.iter().enumerate() { if i < self.stages.len() { if self.stages[i].raw_segment == *seg { // Unchanged: keep cache continue; } // Changed: update this stage, mark dirty let parsed = strategy::parse_segment(seg); self.stages[i].raw_segment = seg.clone(); self.stages[i].kind = parsed.kind; self.stages[i].inverse = parsed.inverse; self.stages[i].query_text = parsed.query.to_string(); self.stages[i].dirty = true; // Mark all downstream stages dirty too for j in (i + 1)..self.stages.len() { self.stages[j].dirty = true; } } else { // New stage let parsed = strategy::parse_segment(seg); let fuzzy = if parsed.kind == FilterKind::Fuzzy { let mut f = FuzzyFilter::new(); for (idx, label) in &self.items { f.push(*idx, label); } Some(f) } else { None }; self.stages.push(PipelineStage { raw_segment: seg.clone(), kind: parsed.kind, inverse: parsed.inverse, query_text: parsed.query.to_string(), fuzzy, cached_indices: Vec::new(), dirty: true, }); } } // Truncate extra stages self.stages.truncate(new_len); // Evaluate dirty stages self.evaluate(); } fn matched_count(&self) -> usize { match self.stages.last() { Some(stage) => stage.cached_indices.len(), None => self.items.len(), } } fn matched_index(&self, match_position: usize) -> Option { match self.stages.last() { Some(stage) => stage.cached_indices.get(match_position).copied(), None => self.items.get(match_position).map(|(idx, _)| *idx), } } } #[cfg(test)] mod tests { use super::*; fn push_items(p: &mut FilterPipeline, labels: &[&str]) { for (i, label) in labels.iter().enumerate() { p.push(i, label); } } fn matched_labels<'a>(p: &FilterPipeline, labels: &'a [&str]) -> Vec<&'a str> { (0..p.matched_count()) .filter_map(|i| p.matched_index(i)) .map(|idx| labels[idx]) .collect() } #[test] fn empty_query_returns_all() { let mut p = FilterPipeline::new(); let labels = &["apple", "banana", "cherry"]; push_items(&mut p, labels); p.set_query(""); assert_eq!(p.matched_count(), 3); } #[test] fn single_fuzzy_stage() { let mut p = FilterPipeline::new(); let labels = &["apple", "banana", "cherry"]; push_items(&mut p, labels); p.set_query("ban"); let result = matched_labels(&p, labels); assert_eq!(result, vec!["banana"]); } #[test] fn single_exact_stage() { let mut p = FilterPipeline::new(); let labels = &["apple", "pineapple", "cherry"]; push_items(&mut p, labels); p.set_query("'apple"); let result = matched_labels(&p, labels); assert!(result.contains(&"apple")); assert!(result.contains(&"pineapple")); assert!(!result.contains(&"cherry")); } #[test] fn two_stage_pipeline() { let mut p = FilterPipeline::new(); let labels = &["error_log", "warning_temp", "info_log", "debug_temp"]; push_items(&mut p, labels); p.set_query("'log | !temp"); let result = matched_labels(&p, labels); assert!(result.contains(&"error_log")); assert!(result.contains(&"info_log")); assert!(!result.contains(&"warning_temp")); assert!(!result.contains(&"debug_temp")); } #[test] fn three_stage_pipeline() { let mut p = FilterPipeline::new(); let labels = &[ "error_log_123", "warning_temp_456", "info_log_789", "debug_temp_012", ]; push_items(&mut p, labels); p.set_query("'log | !temp | /[0-9]+/"); let result = matched_labels(&p, labels); assert!(result.contains(&"error_log_123")); assert!(result.contains(&"info_log_789")); assert_eq!(result.len(), 2); } #[test] fn incremental_stage_1_preserved() { let mut p = FilterPipeline::new(); let labels = &["error_log", "warning_temp", "info_log", "debug_temp"]; push_items(&mut p, labels); // First query p.set_query("'log | !error"); let result = matched_labels(&p, labels); assert_eq!(result, vec!["info_log"]); // Edit stage 2 only: stage 1 cache should be preserved p.set_query("'log | !info"); let result = matched_labels(&p, labels); assert_eq!(result, vec!["error_log"]); } #[test] fn pop_stage_on_backspace() { let mut p = FilterPipeline::new(); let labels = &["error_log", "warning_temp", "info_log"]; push_items(&mut p, labels); p.set_query("'log | !error"); assert_eq!(matched_labels(&p, labels), vec!["info_log"]); // Backspace over the pipe: now just "'log" p.set_query("'log"); let result = matched_labels(&p, labels); assert!(result.contains(&"error_log")); assert!(result.contains(&"info_log")); assert_eq!(result.len(), 2); } #[test] fn empty_segments_skipped() { let mut p = FilterPipeline::new(); let labels = &["apple", "banana"]; push_items(&mut p, labels); p.set_query("apple | | banana"); // Middle empty segment should be ignored // This should be equivalent to "apple | banana" // which is fuzzy "apple" then fuzzy "banana". // "apple" matches apple, "banana" matches banana. // Pipeline: first stage matches apple, second stage filters that for banana. // Neither "apple" nor "banana" matches both, so 0 results. assert_eq!(p.matched_count(), 0); } #[test] fn escaped_pipe() { let mut p = FilterPipeline::new(); let labels = &["foo|bar", "foobar", "baz"]; push_items(&mut p, labels); p.set_query("'foo\\|bar"); let result = matched_labels(&p, labels); assert_eq!(result, vec!["foo|bar"]); } #[test] fn pipe_inside_regex_not_split() { let mut p = FilterPipeline::new(); let labels = &["foo", "bar", "baz"]; push_items(&mut p, labels); p.set_query("/foo|bar/"); let result = matched_labels(&p, labels); assert!(result.contains(&"foo")); assert!(result.contains(&"bar")); assert!(!result.contains(&"baz")); } #[test] fn inverse_exact() { let mut p = FilterPipeline::new(); let labels = &["apple", "banana", "cherry"]; push_items(&mut p, labels); p.set_query("!'banana"); let result = matched_labels(&p, labels); assert!(result.contains(&"apple")); assert!(result.contains(&"cherry")); assert!(!result.contains(&"banana")); } #[test] fn inverse_regex() { let mut p = FilterPipeline::new(); let labels = &["item-001", "item-abc", "item-123"]; push_items(&mut p, labels); p.set_query("!/[0-9]+/"); let result = matched_labels(&p, labels); assert_eq!(result, vec!["item-abc"]); } #[test] fn add_items_picked_up() { let mut p = FilterPipeline::new(); let labels = &["apple", "banana"]; push_items(&mut p, labels); p.set_query("'cherry"); assert_eq!(p.matched_count(), 0); // Add new item p.push(2, "cherry"); // Re-evaluate with same query p.set_query("'cherry"); assert_eq!(p.matched_count(), 1); assert_eq!(p.matched_index(0), Some(2)); } #[test] fn split_pipeline_basic() { let segs = split_pipeline("foo | bar"); assert_eq!(segs, vec!["foo", "bar"]); } #[test] fn split_pipeline_escaped() { let segs = split_pipeline("foo\\|bar"); assert_eq!(segs, vec!["foo|bar"]); } #[test] fn split_pipeline_regex() { let segs = split_pipeline("/foo|bar/ | baz"); assert_eq!(segs, vec!["/foo|bar/", "baz"]); } #[test] fn split_pipeline_empty_segments() { let segs = split_pipeline("foo | | bar"); assert_eq!(segs, vec!["foo", "bar"]); } #[test] fn split_pipeline_inverse_regex() { let segs = split_pipeline("!/foo|bar/ | baz"); assert_eq!(segs, vec!["!/foo|bar/", "baz"]); } // -- Pipeline edge case tests -- #[test] fn fuzzy_as_second_stage() { let mut p = FilterPipeline::new(); let labels = &["error_log", "warning_temp", "info_log", "debug_log"]; push_items(&mut p, labels); // Exact first, then fuzzy second p.set_query("'log | debug"); let result = matched_labels(&p, labels); assert_eq!(result, vec!["debug_log"]); } #[test] fn three_stage_edit_stage_one() { let mut p = FilterPipeline::new(); let labels = &[ "error_log_123", "warning_temp_456", "info_log_789", "debug_temp_012", ]; push_items(&mut p, labels); p.set_query("'log | !error | /[0-9]+/"); assert_eq!(matched_labels(&p, labels), vec!["info_log_789"]); // Edit stage 1: now match "temp" instead of "log" p.set_query("'temp | !error | /[0-9]+/"); let result = matched_labels(&p, labels); assert!(result.contains(&"warning_temp_456")); assert!(result.contains(&"debug_temp_012")); assert!(!result.contains(&"error_log_123")); } #[test] fn invalid_regex_in_pipeline() { let mut p = FilterPipeline::new(); let labels = &["apple", "banana", "cherry"]; push_items(&mut p, labels); // Invalid regex: unclosed bracket. Should match everything (graceful degradation). p.set_query("/[invalid/"); assert_eq!(p.matched_count(), 3); } #[test] fn same_query_twice_stable() { let mut p = FilterPipeline::new(); let labels = &["apple", "banana", "cherry"]; push_items(&mut p, labels); p.set_query("ban"); let first = matched_labels(&p, labels); p.set_query("ban"); let second = matched_labels(&p, labels); assert_eq!(first, second); } #[test] fn query_shrink_to_single() { let mut p = FilterPipeline::new(); let labels = &["apple", "banana", "cherry"]; push_items(&mut p, labels); p.set_query("'ban | !x"); let result = matched_labels(&p, labels); assert_eq!(result, vec!["banana"]); // Shrink back to single stage p.set_query("'ban"); let result = matched_labels(&p, labels); assert_eq!(result, vec!["banana"]); } #[test] fn all_items_excluded() { let mut p = FilterPipeline::new(); let labels = &["apple", "banana"]; push_items(&mut p, labels); p.set_query("xyz"); assert_eq!(p.matched_count(), 0); } #[test] fn single_regex_stage() { let mut p = FilterPipeline::new(); let labels = &["item-001", "item-abc", "item-123"]; push_items(&mut p, labels); p.set_query("/[0-9]+/"); let result = matched_labels(&p, labels); assert_eq!(result, vec!["item-001", "item-123"]); } #[test] fn inverse_fuzzy_stage() { let mut p = FilterPipeline::new(); let labels = &["apple", "banana", "cherry"]; push_items(&mut p, labels); p.set_query("!ban"); let result = matched_labels(&p, labels); assert!(result.contains(&"apple")); assert!(result.contains(&"cherry")); assert!(!result.contains(&"banana")); } }