File size: 1,291 Bytes
ce1c12f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
use fancy_regex::RegexBuilder;
use polars::prelude::*;
use pyo3_polars::derive::polars_expr;
use serde::Deserialize;

#[derive(Deserialize)]
struct FancyRegexKwargs {
    pattern: String,
    case_sensitive: bool,
    backtrack_limit: usize,
}

#[polars_expr(output_type=Boolean)]
fn fancy_contains(inputs: &[Series], kwargs: FancyRegexKwargs) -> PolarsResult<Series> {
    let ca = inputs[0].str()?;

    let mut builder = RegexBuilder::new(&kwargs.pattern);
    builder.case_insensitive(!kwargs.case_sensitive);
    builder.backtrack_limit(kwargs.backtrack_limit);

    let re = builder.build().map_err(|e| {
        PolarsError::ComputeError(format!("Invalid fancy regex pattern: {e}").into())
    })?;

    let mut values = Vec::with_capacity(ca.len());
    for opt_s in ca.into_iter() {
        match opt_s {
            Some(s) => match re.is_match(s) {
                Ok(is_match) => values.push(Some(is_match)),
                Err(err) => {
                    return Err(PolarsError::ComputeError(
                        format!("Fancy regex match failed: {err}").into(),
                    ))
                }
            },
            None => values.push(Some(false)),
        }
    }

    let out: BooleanChunked = values.into_iter().collect();
    Ok(out.into_series())
}