Spaces:
Running on Zero
Running on Zero
File size: 453 Bytes
6ea3105 ebc3bf5 6ea3105 ebc3bf5 6ea3105 ebc3bf5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 | from models.model_loader import get_tokenizer_only
def count_tokens(text: str) -> int:
tokenizer = get_tokenizer_only()
return len(tokenizer.encode(text, add_special_tokens=False))
def get_token_strings(text: str) -> list[str]:
"""Return the decoded surface string for every token in text."""
tokenizer = get_tokenizer_only()
ids = tokenizer.encode(text, add_special_tokens=False)
return [tokenizer.decode([i]) for i in ids]
|