File size: 1,665 Bytes
72c0672 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | use crate::tokenizer::pattern::Pattern;
use crate::Offsets;
use fancy_regex::Regex;
use std::error::Error;
#[derive(Debug)]
pub struct SysRegex {
regex: Regex,
}
impl SysRegex {
pub fn find_iter<'r, 't>(&'r self, inside: &'t str) -> Matches<'r, 't> {
Matches(self.regex.find_iter(inside))
}
pub fn new(regex_str: &str) -> Result<Self, Box<dyn Error + Send + Sync + 'static>> {
Ok(Self {
regex: Regex::new(regex_str)?,
})
}
}
pub struct Matches<'r, 't>(fancy_regex::Matches<'r, 't>);
impl<'r, 't> Iterator for Matches<'r, 't> {
type Item = (usize, usize);
fn next(&mut self) -> Option<Self::Item> {
match self.0.next() {
Some(Ok(mat)) => Some((mat.start(), mat.end())),
// stop if an error is encountered
None | Some(Err(_)) => None,
}
}
}
impl Pattern for &Regex {
fn find_matches(
&self,
inside: &str,
) -> Result<Vec<(Offsets, bool)>, Box<dyn Error + Send + Sync + 'static>> {
if inside.is_empty() {
return Ok(vec![((0, 0), false)]);
}
let mut prev = 0;
let mut splits = Vec::with_capacity(inside.len());
for match_ in self.find_iter(inside) {
let match_ = match_?;
let start = match_.start();
let end = match_.end();
if prev != start {
splits.push(((prev, start), false));
}
splits.push(((start, end), true));
prev = end;
}
if prev != inside.len() {
splits.push(((prev, inside.len()), false))
}
Ok(splits)
}
}
|