2ira's picture
offline_compression_graph_code
72c0672 verified
use napi::bindgen_prelude::*;
use napi_derive::napi;
use tokenizers as tk;
use tokenizers::Encoding;
use crate::encoding::JsEncoding;
#[napi]
pub fn slice(s: String, begin_index: Option<i32>, end_index: Option<i32>) -> Result<String> {
let len = s.chars().count();
let get_index = |x: i32| -> usize {
if x >= 0 {
x as usize
} else {
(len as i32 + x) as usize
}
};
let begin_index = get_index(begin_index.unwrap_or(0));
let end_index = get_index(end_index.unwrap_or(len as i32));
if let Some(slice) = tk::tokenizer::normalizer::get_range_of(&s, begin_index..end_index) {
Ok(slice.to_string())
} else {
Err(Error::new(
Status::GenericFailure,
"Error in offsets".to_string(),
))
}
}
#[napi]
pub fn merge_encodings(
encodings: Vec<&JsEncoding>,
growing_offsets: Option<bool>,
) -> Result<JsEncoding> {
let growing_offsets = growing_offsets.unwrap_or(false);
let encodings: Vec<_> = encodings
.into_iter()
.map(|enc| enc.encoding.to_owned().unwrap())
.collect();
let new_encoding = Encoding::merge(encodings, growing_offsets);
let js_encoding = JsEncoding {
encoding: Some(new_encoding),
};
Ok(js_encoding)
}