618 lines
20 KiB
Rust
618 lines
20 KiB
Rust
//! Filter pipeline with `|` chaining. Splits a query into
|
|
//! segments, applies the appropriate filter strategy to each,
|
|
//! and chains results through stages. Supports incremental
|
|
//! caching: unchanged stages keep their results.
|
|
|
|
use super::filter::{Filter, FuzzyFilter};
|
|
use super::strategy::{self, FilterKind};
|
|
|
|
/// A multi-stage filter pipeline. Each `|` in the query
|
|
/// creates a new stage that filters the previous stage's
|
|
/// output. Implements [`Filter`] so it can be used as a
|
|
/// drop-in replacement for a single filter.
|
|
pub struct FilterPipeline {
|
|
/// Master item list: (original index, label).
|
|
items: Vec<(usize, String)>,
|
|
/// Pipeline stages, one per `|`-separated segment.
|
|
stages: Vec<PipelineStage>,
|
|
/// The last raw query string, used for diffing.
|
|
last_raw_query: String,
|
|
}
|
|
|
|
struct PipelineStage {
|
|
/// The raw segment text (including prefix chars).
|
|
raw_segment: String,
|
|
kind: FilterKind,
|
|
inverse: bool,
|
|
/// The query text after prefix stripping.
|
|
query_text: String,
|
|
/// The strategy-specific filter (only used for fuzzy stages).
|
|
fuzzy: Option<FuzzyFilter>,
|
|
/// Items passing this stage (indices into master list).
|
|
cached_indices: Vec<usize>,
|
|
dirty: bool,
|
|
}
|
|
|
|
/// Split a raw query on unescaped `|` characters, respecting
|
|
/// regex delimiters (`/pattern/` and `!/pattern/`). Returns
|
|
/// the segments with `\|` unescaped to literal `|`.
|
|
fn split_pipeline(query: &str) -> Vec<String> {
|
|
let mut segments = Vec::new();
|
|
let mut current = String::new();
|
|
let chars: Vec<char> = query.chars().collect();
|
|
let len = chars.len();
|
|
let mut i = 0;
|
|
let mut in_regex = false;
|
|
// Position of the opening `/` in current segment (char count into current)
|
|
let mut regex_open_pos: usize = 0;
|
|
|
|
while i < len {
|
|
let c = chars[i];
|
|
|
|
// Escaped pipe: always produce literal `|`
|
|
if c == '\\' && i + 1 < len && chars[i + 1] == '|' {
|
|
current.push('|');
|
|
i += 2;
|
|
continue;
|
|
}
|
|
|
|
// Detect regex opening: `/` or `!/` at start of a segment
|
|
// (current is empty or whitespace-only after a previous pipe)
|
|
if !in_regex {
|
|
let trimmed = current.trim();
|
|
// `/pattern/`
|
|
if c == '/' && (trimmed.is_empty() || trimmed == "!") {
|
|
in_regex = true;
|
|
regex_open_pos = current.len();
|
|
current.push(c);
|
|
i += 1;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Detect regex closing: `/` that is not the opening slash
|
|
if in_regex && c == '/' {
|
|
if current.len() > regex_open_pos {
|
|
// This is the closing slash
|
|
in_regex = false;
|
|
}
|
|
current.push(c);
|
|
i += 1;
|
|
continue;
|
|
}
|
|
|
|
// Unescaped pipe outside regex: split here
|
|
if c == '|' && !in_regex {
|
|
segments.push(current.trim().to_string());
|
|
current = String::new();
|
|
i += 1;
|
|
continue;
|
|
}
|
|
|
|
current.push(c);
|
|
i += 1;
|
|
}
|
|
|
|
segments.push(current.trim().to_string());
|
|
|
|
// Filter out empty segments
|
|
segments.into_iter().filter(|s| !s.is_empty()).collect()
|
|
}
|
|
|
|
impl Default for FilterPipeline {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
impl FilterPipeline {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
items: Vec::new(),
|
|
stages: Vec::new(),
|
|
last_raw_query: String::new(),
|
|
}
|
|
}
|
|
|
|
/// Evaluate all dirty stages in order. Each stage filters
|
|
/// against the previous stage's cached_indices.
|
|
fn evaluate(&mut self) {
|
|
for stage_idx in 0..self.stages.len() {
|
|
if !self.stages[stage_idx].dirty {
|
|
continue;
|
|
}
|
|
|
|
let input_indices: Vec<usize> = if stage_idx == 0 {
|
|
self.items.iter().map(|(idx, _)| *idx).collect()
|
|
} else {
|
|
self.stages[stage_idx - 1].cached_indices.clone()
|
|
};
|
|
|
|
let stage = &mut self.stages[stage_idx];
|
|
|
|
let result = match stage.kind {
|
|
FilterKind::Fuzzy => Self::eval_fuzzy(stage, &input_indices, stage_idx),
|
|
FilterKind::Exact => {
|
|
Self::eval_simple(stage, &input_indices, &self.items, |label, query| {
|
|
label.to_lowercase().contains(&query.to_lowercase())
|
|
})
|
|
}
|
|
FilterKind::Regex => {
|
|
let re = fancy_regex::Regex::new(&stage.query_text).ok();
|
|
Self::eval_simple(stage, &input_indices, &self.items, |label, _query| {
|
|
match &re {
|
|
Some(r) => r.is_match(label).unwrap_or(false),
|
|
None => true, // invalid regex matches everything
|
|
}
|
|
})
|
|
}
|
|
};
|
|
|
|
self.stages[stage_idx].cached_indices = result;
|
|
self.stages[stage_idx].dirty = false;
|
|
}
|
|
}
|
|
|
|
fn eval_fuzzy(
|
|
stage: &mut PipelineStage,
|
|
input_indices: &[usize],
|
|
stage_idx: usize,
|
|
) -> Vec<usize> {
|
|
let Some(fuzzy) = stage.fuzzy.as_mut() else {
|
|
return Vec::new();
|
|
};
|
|
fuzzy.set_query(&stage.query_text);
|
|
let fuzzy_results: Vec<usize> = (0..fuzzy.matched_count())
|
|
.filter_map(|i| fuzzy.matched_index(i))
|
|
.collect();
|
|
if stage.inverse {
|
|
let fuzzy_set: std::collections::HashSet<usize> = fuzzy_results.into_iter().collect();
|
|
input_indices
|
|
.iter()
|
|
.copied()
|
|
.filter(|idx| !fuzzy_set.contains(idx))
|
|
.collect()
|
|
} else if stage_idx == 0 {
|
|
fuzzy_results
|
|
} else {
|
|
let input_set: std::collections::HashSet<usize> =
|
|
input_indices.iter().copied().collect();
|
|
fuzzy_results
|
|
.into_iter()
|
|
.filter(|idx| input_set.contains(idx))
|
|
.collect()
|
|
}
|
|
}
|
|
|
|
fn eval_simple(
|
|
stage: &PipelineStage,
|
|
input_indices: &[usize],
|
|
items: &[(usize, String)],
|
|
matcher: impl Fn(&str, &str) -> bool,
|
|
) -> Vec<usize> {
|
|
if stage.query_text.is_empty() {
|
|
return input_indices.to_vec();
|
|
}
|
|
if stage.inverse {
|
|
input_indices
|
|
.iter()
|
|
.copied()
|
|
.filter(|&idx| !matcher(&items[idx].1, &stage.query_text))
|
|
.collect()
|
|
} else {
|
|
input_indices
|
|
.iter()
|
|
.copied()
|
|
.filter(|&idx| matcher(&items[idx].1, &stage.query_text))
|
|
.collect()
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Filter for FilterPipeline {
|
|
fn push(&mut self, index: usize, label: &str) {
|
|
self.items.push((index, label.to_string()));
|
|
// Push to any existing fuzzy filters in stages
|
|
for stage in &mut self.stages {
|
|
if let Some(ref mut fuzzy) = stage.fuzzy {
|
|
fuzzy.push(index, label);
|
|
}
|
|
stage.dirty = true;
|
|
}
|
|
}
|
|
|
|
fn set_query(&mut self, query: &str) {
|
|
self.last_raw_query = query.to_string();
|
|
let segments = split_pipeline(query);
|
|
|
|
// Reconcile stages with new segments
|
|
let mut new_len = segments.len();
|
|
|
|
// If query is empty, clear everything
|
|
if segments.is_empty() {
|
|
self.stages.clear();
|
|
new_len = 0;
|
|
}
|
|
|
|
// Compare position-by-position
|
|
for (i, seg) in segments.iter().enumerate() {
|
|
if i < self.stages.len() {
|
|
if self.stages[i].raw_segment == *seg {
|
|
// Unchanged: keep cache
|
|
continue;
|
|
}
|
|
// Changed: update this stage, mark dirty
|
|
let parsed = strategy::parse_segment(seg);
|
|
self.stages[i].raw_segment = seg.clone();
|
|
self.stages[i].kind = parsed.kind;
|
|
self.stages[i].inverse = parsed.inverse;
|
|
self.stages[i].query_text = parsed.query.to_string();
|
|
self.stages[i].dirty = true;
|
|
// Mark all downstream stages dirty too
|
|
for j in (i + 1)..self.stages.len() {
|
|
self.stages[j].dirty = true;
|
|
}
|
|
} else {
|
|
// New stage
|
|
let parsed = strategy::parse_segment(seg);
|
|
let fuzzy = if parsed.kind == FilterKind::Fuzzy {
|
|
let mut f = FuzzyFilter::new();
|
|
for (idx, label) in &self.items {
|
|
f.push(*idx, label);
|
|
}
|
|
Some(f)
|
|
} else {
|
|
None
|
|
};
|
|
self.stages.push(PipelineStage {
|
|
raw_segment: seg.clone(),
|
|
kind: parsed.kind,
|
|
inverse: parsed.inverse,
|
|
query_text: parsed.query.to_string(),
|
|
fuzzy,
|
|
cached_indices: Vec::new(),
|
|
dirty: true,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Truncate extra stages
|
|
self.stages.truncate(new_len);
|
|
|
|
// Evaluate dirty stages
|
|
self.evaluate();
|
|
}
|
|
|
|
fn matched_count(&self) -> usize {
|
|
match self.stages.last() {
|
|
Some(stage) => stage.cached_indices.len(),
|
|
None => self.items.len(),
|
|
}
|
|
}
|
|
|
|
fn matched_index(&self, match_position: usize) -> Option<usize> {
|
|
match self.stages.last() {
|
|
Some(stage) => stage.cached_indices.get(match_position).copied(),
|
|
None => self.items.get(match_position).map(|(idx, _)| *idx),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn push_items(p: &mut FilterPipeline, labels: &[&str]) {
|
|
for (i, label) in labels.iter().enumerate() {
|
|
p.push(i, label);
|
|
}
|
|
}
|
|
|
|
fn matched_labels<'a>(p: &FilterPipeline, labels: &'a [&str]) -> Vec<&'a str> {
|
|
(0..p.matched_count())
|
|
.filter_map(|i| p.matched_index(i))
|
|
.map(|idx| labels[idx])
|
|
.collect()
|
|
}
|
|
|
|
#[test]
|
|
fn empty_query_returns_all() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["apple", "banana", "cherry"];
|
|
push_items(&mut p, labels);
|
|
p.set_query("");
|
|
assert_eq!(p.matched_count(), 3);
|
|
}
|
|
|
|
#[test]
|
|
fn single_fuzzy_stage() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["apple", "banana", "cherry"];
|
|
push_items(&mut p, labels);
|
|
p.set_query("ban");
|
|
let result = matched_labels(&p, labels);
|
|
assert_eq!(result, vec!["banana"]);
|
|
}
|
|
|
|
#[test]
|
|
fn single_exact_stage() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["apple", "pineapple", "cherry"];
|
|
push_items(&mut p, labels);
|
|
p.set_query("'apple");
|
|
let result = matched_labels(&p, labels);
|
|
assert!(result.contains(&"apple"));
|
|
assert!(result.contains(&"pineapple"));
|
|
assert!(!result.contains(&"cherry"));
|
|
}
|
|
|
|
#[test]
|
|
fn two_stage_pipeline() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["error_log", "warning_temp", "info_log", "debug_temp"];
|
|
push_items(&mut p, labels);
|
|
p.set_query("'log | !temp");
|
|
let result = matched_labels(&p, labels);
|
|
assert!(result.contains(&"error_log"));
|
|
assert!(result.contains(&"info_log"));
|
|
assert!(!result.contains(&"warning_temp"));
|
|
assert!(!result.contains(&"debug_temp"));
|
|
}
|
|
|
|
#[test]
|
|
fn three_stage_pipeline() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &[
|
|
"error_log_123",
|
|
"warning_temp_456",
|
|
"info_log_789",
|
|
"debug_temp_012",
|
|
];
|
|
push_items(&mut p, labels);
|
|
p.set_query("'log | !temp | /[0-9]+/");
|
|
let result = matched_labels(&p, labels);
|
|
assert!(result.contains(&"error_log_123"));
|
|
assert!(result.contains(&"info_log_789"));
|
|
assert_eq!(result.len(), 2);
|
|
}
|
|
|
|
#[test]
|
|
fn incremental_stage_1_preserved() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["error_log", "warning_temp", "info_log", "debug_temp"];
|
|
push_items(&mut p, labels);
|
|
// First query
|
|
p.set_query("'log | !error");
|
|
let result = matched_labels(&p, labels);
|
|
assert_eq!(result, vec!["info_log"]);
|
|
|
|
// Edit stage 2 only: stage 1 cache should be preserved
|
|
p.set_query("'log | !info");
|
|
let result = matched_labels(&p, labels);
|
|
assert_eq!(result, vec!["error_log"]);
|
|
}
|
|
|
|
#[test]
|
|
fn pop_stage_on_backspace() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["error_log", "warning_temp", "info_log"];
|
|
push_items(&mut p, labels);
|
|
p.set_query("'log | !error");
|
|
assert_eq!(matched_labels(&p, labels), vec!["info_log"]);
|
|
|
|
// Backspace over the pipe: now just "'log"
|
|
p.set_query("'log");
|
|
let result = matched_labels(&p, labels);
|
|
assert!(result.contains(&"error_log"));
|
|
assert!(result.contains(&"info_log"));
|
|
assert_eq!(result.len(), 2);
|
|
}
|
|
|
|
#[test]
|
|
fn empty_segments_skipped() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["apple", "banana"];
|
|
push_items(&mut p, labels);
|
|
p.set_query("apple | | banana");
|
|
// Middle empty segment should be ignored
|
|
// This should be equivalent to "apple | banana"
|
|
// which is fuzzy "apple" then fuzzy "banana".
|
|
// "apple" matches apple, "banana" matches banana.
|
|
// Pipeline: first stage matches apple, second stage filters that for banana.
|
|
// Neither "apple" nor "banana" matches both, so 0 results.
|
|
assert_eq!(p.matched_count(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn escaped_pipe() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["foo|bar", "foobar", "baz"];
|
|
push_items(&mut p, labels);
|
|
p.set_query("'foo\\|bar");
|
|
let result = matched_labels(&p, labels);
|
|
assert_eq!(result, vec!["foo|bar"]);
|
|
}
|
|
|
|
#[test]
|
|
fn pipe_inside_regex_not_split() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["foo", "bar", "baz"];
|
|
push_items(&mut p, labels);
|
|
p.set_query("/foo|bar/");
|
|
let result = matched_labels(&p, labels);
|
|
assert!(result.contains(&"foo"));
|
|
assert!(result.contains(&"bar"));
|
|
assert!(!result.contains(&"baz"));
|
|
}
|
|
|
|
#[test]
|
|
fn inverse_exact() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["apple", "banana", "cherry"];
|
|
push_items(&mut p, labels);
|
|
p.set_query("!'banana");
|
|
let result = matched_labels(&p, labels);
|
|
assert!(result.contains(&"apple"));
|
|
assert!(result.contains(&"cherry"));
|
|
assert!(!result.contains(&"banana"));
|
|
}
|
|
|
|
#[test]
|
|
fn inverse_regex() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["item-001", "item-abc", "item-123"];
|
|
push_items(&mut p, labels);
|
|
p.set_query("!/[0-9]+/");
|
|
let result = matched_labels(&p, labels);
|
|
assert_eq!(result, vec!["item-abc"]);
|
|
}
|
|
|
|
#[test]
|
|
fn add_items_picked_up() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["apple", "banana"];
|
|
push_items(&mut p, labels);
|
|
p.set_query("'cherry");
|
|
assert_eq!(p.matched_count(), 0);
|
|
|
|
// Add new item
|
|
p.push(2, "cherry");
|
|
// Re-evaluate with same query
|
|
p.set_query("'cherry");
|
|
assert_eq!(p.matched_count(), 1);
|
|
assert_eq!(p.matched_index(0), Some(2));
|
|
}
|
|
|
|
#[test]
|
|
fn split_pipeline_basic() {
|
|
let segs = split_pipeline("foo | bar");
|
|
assert_eq!(segs, vec!["foo", "bar"]);
|
|
}
|
|
|
|
#[test]
|
|
fn split_pipeline_escaped() {
|
|
let segs = split_pipeline("foo\\|bar");
|
|
assert_eq!(segs, vec!["foo|bar"]);
|
|
}
|
|
|
|
#[test]
|
|
fn split_pipeline_regex() {
|
|
let segs = split_pipeline("/foo|bar/ | baz");
|
|
assert_eq!(segs, vec!["/foo|bar/", "baz"]);
|
|
}
|
|
|
|
#[test]
|
|
fn split_pipeline_empty_segments() {
|
|
let segs = split_pipeline("foo | | bar");
|
|
assert_eq!(segs, vec!["foo", "bar"]);
|
|
}
|
|
|
|
#[test]
|
|
fn split_pipeline_inverse_regex() {
|
|
let segs = split_pipeline("!/foo|bar/ | baz");
|
|
assert_eq!(segs, vec!["!/foo|bar/", "baz"]);
|
|
}
|
|
|
|
// -- Pipeline edge case tests --
|
|
|
|
#[test]
|
|
fn fuzzy_as_second_stage() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["error_log", "warning_temp", "info_log", "debug_log"];
|
|
push_items(&mut p, labels);
|
|
// Exact first, then fuzzy second
|
|
p.set_query("'log | debug");
|
|
let result = matched_labels(&p, labels);
|
|
assert_eq!(result, vec!["debug_log"]);
|
|
}
|
|
|
|
#[test]
|
|
fn three_stage_edit_stage_one() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &[
|
|
"error_log_123",
|
|
"warning_temp_456",
|
|
"info_log_789",
|
|
"debug_temp_012",
|
|
];
|
|
push_items(&mut p, labels);
|
|
p.set_query("'log | !error | /[0-9]+/");
|
|
assert_eq!(matched_labels(&p, labels), vec!["info_log_789"]);
|
|
|
|
// Edit stage 1: now match "temp" instead of "log"
|
|
p.set_query("'temp | !error | /[0-9]+/");
|
|
let result = matched_labels(&p, labels);
|
|
assert!(result.contains(&"warning_temp_456"));
|
|
assert!(result.contains(&"debug_temp_012"));
|
|
assert!(!result.contains(&"error_log_123"));
|
|
}
|
|
|
|
#[test]
|
|
fn invalid_regex_in_pipeline() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["apple", "banana", "cherry"];
|
|
push_items(&mut p, labels);
|
|
// Invalid regex: unclosed bracket. Should match everything (graceful degradation).
|
|
p.set_query("/[invalid/");
|
|
assert_eq!(p.matched_count(), 3);
|
|
}
|
|
|
|
#[test]
|
|
fn same_query_twice_stable() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["apple", "banana", "cherry"];
|
|
push_items(&mut p, labels);
|
|
p.set_query("ban");
|
|
let first = matched_labels(&p, labels);
|
|
p.set_query("ban");
|
|
let second = matched_labels(&p, labels);
|
|
assert_eq!(first, second);
|
|
}
|
|
|
|
#[test]
|
|
fn query_shrink_to_single() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["apple", "banana", "cherry"];
|
|
push_items(&mut p, labels);
|
|
p.set_query("'ban | !x");
|
|
let result = matched_labels(&p, labels);
|
|
assert_eq!(result, vec!["banana"]);
|
|
|
|
// Shrink back to single stage
|
|
p.set_query("'ban");
|
|
let result = matched_labels(&p, labels);
|
|
assert_eq!(result, vec!["banana"]);
|
|
}
|
|
|
|
#[test]
|
|
fn all_items_excluded() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["apple", "banana"];
|
|
push_items(&mut p, labels);
|
|
p.set_query("xyz");
|
|
assert_eq!(p.matched_count(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn single_regex_stage() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["item-001", "item-abc", "item-123"];
|
|
push_items(&mut p, labels);
|
|
p.set_query("/[0-9]+/");
|
|
let result = matched_labels(&p, labels);
|
|
assert_eq!(result, vec!["item-001", "item-123"]);
|
|
}
|
|
|
|
#[test]
|
|
fn inverse_fuzzy_stage() {
|
|
let mut p = FilterPipeline::new();
|
|
let labels = &["apple", "banana", "cherry"];
|
|
push_items(&mut p, labels);
|
|
p.set_query("!ban");
|
|
let result = matched_labels(&p, labels);
|
|
assert!(result.contains(&"apple"));
|
|
assert!(result.contains(&"cherry"));
|
|
assert!(!result.contains(&"banana"));
|
|
}
|
|
}
|