| use crate::tokenizer::{NormalizedString, Normalizer, Result}; |
| use serde::{Deserialize, Serialize}; |
|
|
| #[derive(Clone, Debug, Deserialize, Serialize)] |
| #[serde(tag = "type")] |
| pub struct Prepend { |
| pub prepend: String, |
| } |
|
|
| impl Prepend { |
| pub fn new(prepend: String) -> Self { |
| Self { prepend } |
| } |
| } |
|
|
| impl Normalizer for Prepend { |
| |
| fn normalize(&self, normalized: &mut NormalizedString) -> Result<()> { |
| if !normalized.is_empty() { |
| normalized.prepend(&self.prepend); |
| } |
| Ok(()) |
| } |
| } |
|
|
| #[cfg(test)] |
| mod tests { |
| use super::*; |
|
|
| #[test] |
| fn test_prepend() { |
| let original = "Hello"; |
| let normalized = "▁Hello"; |
| assert_ne!(original, normalized); |
| let mut n = NormalizedString::from(original); |
| let prepend = Prepend::new("▁".to_string()); |
| prepend.normalize(&mut n).unwrap(); |
| assert_eq!(&n.get(), &normalized); |
| assert_eq!( |
| n, |
| NormalizedString::new( |
| original.to_string(), |
| normalized.to_string(), |
| vec![ |
| (0, 1), |
| (0, 1), |
| (0, 1), |
| (0, 1), |
| (1, 2), |
| (2, 3), |
| (3, 4), |
| (4, 5) |
| ], |
| 0 |
| ) |
| ); |
| assert_eq!( |
| n.alignments_original(), |
| vec![(0, 4), (4, 5), (5, 6), (6, 7), (7, 8)] |
| ); |
| } |
| } |
|
|