File size: 1,552 Bytes
72c0672 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | use crate::tokenizer::{NormalizedString, Normalizer, Result};
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Deserialize, Serialize)]
#[serde(tag = "type")]
pub struct Prepend {
pub prepend: String,
}
impl Prepend {
pub fn new(prepend: String) -> Self {
Self { prepend }
}
}
impl Normalizer for Prepend {
/// Strip the normalized string inplace
fn normalize(&self, normalized: &mut NormalizedString) -> Result<()> {
if !normalized.is_empty() {
normalized.prepend(&self.prepend);
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_prepend() {
let original = "Hello";
let normalized = "▁Hello";
assert_ne!(original, normalized);
let mut n = NormalizedString::from(original);
let prepend = Prepend::new("▁".to_string());
prepend.normalize(&mut n).unwrap();
assert_eq!(&n.get(), &normalized);
assert_eq!(
n,
NormalizedString::new(
original.to_string(),
normalized.to_string(),
vec![
(0, 1),
(0, 1),
(0, 1),
(0, 1),
(1, 2),
(2, 3),
(3, 4),
(4, 5)
],
0
)
);
assert_eq!(
n.alignments_original(),
vec![(0, 4), (4, 5), (5, 6), (6, 7), (7, 8)]
);
}
}
|