| use crate::utils::SysRegex; |
| use crate::{Offsets, Result}; |
| use regex::Regex; |
|
|
| |
| pub trait Pattern { |
| |
| |
| |
| |
| |
| fn find_matches(&self, inside: &str) -> Result<Vec<(Offsets, bool)>>; |
| } |
|
|
| impl Pattern for char { |
| fn find_matches(&self, inside: &str) -> Result<Vec<(Offsets, bool)>> { |
| let is_char = |c: char| -> bool { c == *self }; |
| is_char.find_matches(inside) |
| } |
| } |
|
|
| impl Pattern for &str { |
| fn find_matches(&self, inside: &str) -> Result<Vec<(Offsets, bool)>> { |
| if self.is_empty() { |
| |
| return Ok(vec![((0, inside.chars().count()), false)]); |
| } |
|
|
| let re = Regex::new(®ex::escape(self))?; |
| (&re).find_matches(inside) |
| } |
| } |
|
|
| impl Pattern for &String { |
| fn find_matches(&self, inside: &str) -> Result<Vec<(Offsets, bool)>> { |
| let s: &str = self; |
| s.find_matches(inside) |
| } |
| } |
|
|
| impl Pattern for &Regex { |
| fn find_matches(&self, inside: &str) -> Result<Vec<(Offsets, bool)>> { |
| if inside.is_empty() { |
| return Ok(vec![((0, 0), false)]); |
| } |
|
|
| let mut prev = 0; |
| let mut splits = Vec::with_capacity(inside.len()); |
| for m in self.find_iter(inside) { |
| if prev != m.start() { |
| splits.push(((prev, m.start()), false)); |
| } |
| splits.push(((m.start(), m.end()), true)); |
| prev = m.end(); |
| } |
| if prev != inside.len() { |
| splits.push(((prev, inside.len()), false)) |
| } |
| Ok(splits) |
| } |
| } |
|
|
| impl Pattern for &SysRegex { |
| fn find_matches(&self, inside: &str) -> Result<Vec<(Offsets, bool)>> { |
| if inside.is_empty() { |
| return Ok(vec![((0, 0), false)]); |
| } |
|
|
| let mut prev = 0; |
| let mut splits = Vec::with_capacity(inside.len()); |
| for (start, end) in self.find_iter(inside) { |
| if prev != start { |
| splits.push(((prev, start), false)); |
| } |
| splits.push(((start, end), true)); |
| prev = end; |
| } |
| if prev != inside.len() { |
| splits.push(((prev, inside.len()), false)) |
| } |
| Ok(splits) |
| } |
| } |
|
|
| impl<F> Pattern for F |
| where |
| F: Fn(char) -> bool, |
| { |
| fn find_matches(&self, inside: &str) -> Result<Vec<(Offsets, bool)>> { |
| if inside.is_empty() { |
| return Ok(vec![((0, 0), false)]); |
| } |
|
|
| let mut last_offset = 0; |
| let mut last_seen = 0; |
|
|
| let mut matches = inside |
| .char_indices() |
| .flat_map(|(b, c)| { |
| last_seen = b + c.len_utf8(); |
| if self(c) { |
| let mut events = Vec::with_capacity(2); |
| if last_offset < b { |
| |
| events.push(((last_offset, b), false)); |
| } |
| events.push(((b, b + c.len_utf8()), true)); |
| last_offset = b + c.len_utf8(); |
| events |
| } else { |
| vec![] |
| } |
| }) |
| .collect::<Vec<_>>(); |
|
|
| |
| if last_seen > last_offset { |
| matches.push(((last_offset, last_seen), false)); |
| } |
|
|
| Ok(matches) |
| } |
| } |
|
|
| |
| |
| |
| pub struct Invert<P: Pattern>(pub P); |
| impl<P: Pattern> Pattern for Invert<P> { |
| fn find_matches(&self, inside: &str) -> Result<Vec<(Offsets, bool)>> { |
| Ok(self |
| .0 |
| .find_matches(inside)? |
| .into_iter() |
| .map(|(offsets, flag)| (offsets, !flag)) |
| .collect()) |
| } |
| } |
|
|
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| use regex::Regex; |
|
|
| macro_rules! do_test { |
| ($inside: expr, $pattern: expr => @ERROR) => { |
| assert!($pattern.find_matches($inside).is_err()); |
| }; |
| ($inside: expr, $pattern: expr => $result: expr) => { |
| assert_eq!($pattern.find_matches($inside).unwrap(), $result); |
| assert_eq!( |
| Invert($pattern).find_matches($inside).unwrap(), |
| $result |
| .into_iter() |
| .map(|v: (Offsets, bool)| (v.0, !v.1)) |
| .collect::<Vec<_>>() |
| ); |
| }; |
| } |
|
|
| #[test] |
| fn char() { |
| do_test!("aba", 'a' => vec![((0, 1), true), ((1, 2), false), ((2, 3), true)]); |
| do_test!("bbbba", 'a' => vec![((0, 4), false), ((4, 5), true)]); |
| do_test!("aabbb", 'a' => vec![((0, 1), true), ((1, 2), true), ((2, 5), false)]); |
| do_test!("", 'a' => vec![((0, 0), false)]); |
| do_test!("aaa", 'b' => vec![((0, 3), false)]); |
| } |
|
|
| #[test] |
| fn str() { |
| do_test!("aba", "a" => vec![((0, 1), true), ((1, 2), false), ((2, 3), true)]); |
| do_test!("bbbba", "a" => vec![((0, 4), false), ((4, 5), true)]); |
| do_test!("aabbb", "a" => vec![((0, 1), true), ((1, 2), true), ((2, 5), false)]); |
| do_test!("aabbb", "ab" => vec![((0, 1), false), ((1, 3), true), ((3, 5), false)]); |
| do_test!("aabbab", "ab" => |
| vec![((0, 1), false), ((1, 3), true), ((3, 4), false), ((4, 6), true)] |
| ); |
| do_test!("", "" => vec![((0, 0), false)]); |
| do_test!("aaa", "" => vec![((0, 3), false)]); |
| do_test!("aaa", "b" => vec![((0, 3), false)]); |
| } |
|
|
| #[test] |
| fn functions() { |
| let is_b = |c| c == 'b'; |
| do_test!("aba", is_b => vec![((0, 1), false), ((1, 2), true), ((2, 3), false)]); |
| do_test!("aaaab", is_b => vec![((0, 4), false), ((4, 5), true)]); |
| do_test!("bbaaa", is_b => vec![((0, 1), true), ((1, 2), true), ((2, 5), false)]); |
| do_test!("", is_b => vec![((0, 0), false)]); |
| do_test!("aaa", is_b => vec![((0, 3), false)]); |
| } |
|
|
| #[test] |
| fn regex() { |
| let is_whitespace = Regex::new(r"\s+").unwrap(); |
| do_test!("a b", &is_whitespace => vec![((0, 1), false), ((1, 4), true), ((4, 5), false)]); |
| do_test!(" a b ", &is_whitespace => |
| vec![((0, 3), true), ((3, 4), false), ((4, 7), true), ((7, 8), false), ((8, 11), true)] |
| ); |
| do_test!("", &is_whitespace => vec![((0, 0), false)]); |
| do_test!("πΎπ π π ππ π£ππππ", &is_whitespace => |
| vec![((0, 16), false), ((16, 17), true), ((17, 45), false)] |
| ); |
| do_test!("aaa", &is_whitespace => vec![((0, 3), false)]); |
| } |
|
|
| #[test] |
| fn sys_regex() { |
| let is_whitespace = SysRegex::new(r"\s+").unwrap(); |
| do_test!("a b", &is_whitespace => vec![((0, 1), false), ((1, 4), true), ((4, 5), false)]); |
| do_test!(" a b ", &is_whitespace => |
| vec![((0, 3), true), ((3, 4), false), ((4, 7), true), ((7, 8), false), ((8, 11), true)] |
| ); |
| do_test!("", &is_whitespace => vec![((0, 0), false)]); |
| do_test!("πΎπ π π ππ π£ππππ", &is_whitespace => |
| vec![((0, 16), false), ((16, 17), true), ((17, 45), false)] |
| ); |
| do_test!("aaa", &is_whitespace => vec![((0, 3), false)]); |
| } |
| } |
|
|