File size: 1,315 Bytes
72c0672 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | use serde::{Deserialize, Serialize};
use crate::normalizers::NormalizerWrapper;
use crate::tokenizer::{NormalizedString, Normalizer, Result};
use crate::utils::macro_rules_attribute;
#[derive(Clone, Deserialize, Debug, Serialize)]
#[serde(tag = "type")]
/// Allows concatenating multiple other Normalizer as a Sequence.
/// All the normalizers run in sequence in the given order against the same NormalizedString.
pub struct Sequence {
normalizers: Vec<NormalizerWrapper>,
}
impl Sequence {
pub fn new(normalizers: Vec<NormalizerWrapper>) -> Self {
Self { normalizers }
}
pub fn get_normalizers(&self) -> &[NormalizerWrapper] {
&self.normalizers
}
pub fn get_normalizers_mut(&mut self) -> &mut [NormalizerWrapper] {
&mut self.normalizers
}
}
impl Normalizer for Sequence {
fn normalize(&self, normalized: &mut NormalizedString) -> Result<()> {
for normalizer in &self.normalizers {
normalizer.normalize(normalized)?;
}
Ok(())
}
}
/// Lowercases the input
#[derive(Copy, Clone, Debug)]
#[macro_rules_attribute(impl_serde_type!)]
pub struct Lowercase;
impl Normalizer for Lowercase {
fn normalize(&self, normalized: &mut NormalizedString) -> Result<()> {
normalized.lowercase();
Ok(())
}
}
|