feat(core): Expand filtering into pipeline supporting multiple text search modes.

Modes include: exact match, smart-case, and regular expressions.
This commit is contained in:
2026-03-13 22:55:47 -04:00
parent 6187b83f26
commit 6a4cc85285
10 changed files with 1352 additions and 10 deletions

View File

@@ -0,0 +1,231 @@
//! Filter segment prefix parsing. Determines which filter
//! strategy to use based on the query prefix.
/// The type of filter to apply for a query segment.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FilterKind {
Fuzzy,
Exact,
Regex,
}
/// A parsed filter segment with its kind, inversion flag,
/// and the actual query text (prefix stripped).
#[derive(Debug, PartialEq, Eq)]
pub struct ParsedSegment<'a> {
pub kind: FilterKind,
pub inverse: bool,
pub query: &'a str,
}
/// Parse a single filter segment's prefix to determine
/// the filter strategy and extract the query text.
///
/// Prefix rules (order matters):
/// 1. `!/pattern/` -> Regex, inverse, inner pattern
/// 2. `/pattern/` -> Regex, inner pattern
/// 3. `!'query` -> Exact, inverse, after `!'`
/// 4. `!query` -> Fuzzy, inverse, after `!`
/// 5. `'query` -> Exact, after `'`
/// 6. Everything else -> Fuzzy
///
/// A `/` with no closing slash is treated as fuzzy (user
/// is still typing the regex delimiter).
pub fn parse_segment(segment: &str) -> ParsedSegment<'_> {
// Check for inverse regex: !/pattern/
if let Some(rest) = segment.strip_prefix("!/") {
if let Some(inner) = rest.strip_suffix('/') {
return ParsedSegment {
kind: FilterKind::Regex,
inverse: true,
query: inner,
};
}
// No closing slash: treat the whole thing as fuzzy inverse
return ParsedSegment {
kind: FilterKind::Fuzzy,
inverse: true,
query: &segment[1..],
};
}
// Check for regex: /pattern/
if let Some(rest) = segment.strip_prefix('/') {
if let Some(inner) = rest.strip_suffix('/') {
return ParsedSegment {
kind: FilterKind::Regex,
inverse: false,
query: inner,
};
}
// No closing slash: treat as fuzzy (still typing)
return ParsedSegment {
kind: FilterKind::Fuzzy,
inverse: false,
query: segment,
};
}
// Check for inverse exact: !'query
if let Some(rest) = segment.strip_prefix("!'") {
return ParsedSegment {
kind: FilterKind::Exact,
inverse: true,
query: rest,
};
}
// Check for inverse fuzzy: !query
if let Some(rest) = segment.strip_prefix('!') {
return ParsedSegment {
kind: FilterKind::Fuzzy,
inverse: true,
query: rest,
};
}
// Check for exact: 'query
if let Some(rest) = segment.strip_prefix('\'') {
return ParsedSegment {
kind: FilterKind::Exact,
inverse: false,
query: rest,
};
}
// Default: fuzzy
ParsedSegment {
kind: FilterKind::Fuzzy,
inverse: false,
query: segment,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn plain_text_is_fuzzy() {
let p = parse_segment("hello");
assert_eq!(p.kind, FilterKind::Fuzzy);
assert!(!p.inverse);
assert_eq!(p.query, "hello");
}
#[test]
fn empty_is_fuzzy() {
let p = parse_segment("");
assert_eq!(p.kind, FilterKind::Fuzzy);
assert!(!p.inverse);
assert_eq!(p.query, "");
}
#[test]
fn exact_prefix() {
let p = parse_segment("'exact match");
assert_eq!(p.kind, FilterKind::Exact);
assert!(!p.inverse);
assert_eq!(p.query, "exact match");
}
#[test]
fn regex_delimiters() {
let p = parse_segment("/[0-9]+/");
assert_eq!(p.kind, FilterKind::Regex);
assert!(!p.inverse);
assert_eq!(p.query, "[0-9]+");
}
#[test]
fn inverse_fuzzy() {
let p = parse_segment("!temp");
assert_eq!(p.kind, FilterKind::Fuzzy);
assert!(p.inverse);
assert_eq!(p.query, "temp");
}
#[test]
fn inverse_exact() {
let p = parse_segment("!'temp");
assert_eq!(p.kind, FilterKind::Exact);
assert!(p.inverse);
assert_eq!(p.query, "temp");
}
#[test]
fn inverse_regex() {
let p = parse_segment("!/[0-9]+/");
assert_eq!(p.kind, FilterKind::Regex);
assert!(p.inverse);
assert_eq!(p.query, "[0-9]+");
}
#[test]
fn unclosed_regex_is_fuzzy() {
let p = parse_segment("/still typing");
assert_eq!(p.kind, FilterKind::Fuzzy);
assert!(!p.inverse);
assert_eq!(p.query, "/still typing");
}
#[test]
fn unclosed_inverse_regex_is_fuzzy_inverse() {
let p = parse_segment("!/still typing");
assert_eq!(p.kind, FilterKind::Fuzzy);
assert!(p.inverse);
assert_eq!(p.query, "/still typing");
}
#[test]
fn just_slash_is_fuzzy() {
let p = parse_segment("/");
assert_eq!(p.kind, FilterKind::Fuzzy);
assert!(!p.inverse);
assert_eq!(p.query, "/");
}
#[test]
fn empty_regex_pattern() {
let p = parse_segment("//");
assert_eq!(p.kind, FilterKind::Regex);
assert!(!p.inverse);
assert_eq!(p.query, "");
}
#[test]
fn just_exclamation() {
let p = parse_segment("!");
assert_eq!(p.kind, FilterKind::Fuzzy);
assert!(p.inverse);
assert_eq!(p.query, "");
}
#[test]
fn just_quote() {
let p = parse_segment("'");
assert_eq!(p.kind, FilterKind::Exact);
assert!(!p.inverse);
assert_eq!(p.query, "");
}
// -- Double-prefix edge cases --
#[test]
fn double_exclamation() {
// "!!query" -> first ! is inverse, rest is "!query" which is fuzzy inverse
let p = parse_segment("!!query");
assert_eq!(p.kind, FilterKind::Fuzzy);
assert!(p.inverse);
assert_eq!(p.query, "!query");
}
#[test]
fn inverse_exact_regex_like() {
// "!'[0-9]" -> exact inverse, query is "[0-9]" (not regex)
let p = parse_segment("!'[0-9]");
assert_eq!(p.kind, FilterKind::Exact);
assert!(p.inverse);
assert_eq!(p.query, "[0-9]");
}
}