pika-tokenizer / script.js
qikp's picture
Update script.js
deaaa92 verified
import { AutoTokenizer } from "https://nobsdelivr.private.coffee/npm/@huggingface/transformers@3.8.1";
let tokenizeButton = document.querySelector("#tokenize");
tokenizeButton.addEventListener("click", async () => {
tokenizeButton.disabled = true;
let tokenizer = await AutoTokenizer.from_pretrained(document.querySelector("#model").value);
let text = document.querySelector("#text").value.replace(/[^\S ]/g, "");
let tokenized = tokenizer.encode(text);
document.querySelector("#totalLength").textContent = `Total length: ${tokenized.length}`;
let output = document.querySelector("#output");
output.contentDocument.body.innerHTML = "";
let colors = ["#ff0000", "#ff8700", "#ffd300", "#deff0a", "#a1ff0a", "#0aff99", "#0aefff", "#147df5", "#580aff", "#be0aff"].sort(() => Math.random() - 0.5);
tokenized.forEach((token, index) => {
let content = document.createElement("span");
content.textContent = tokenizer.decode([token]);
content.style.backgroundColor = colors[index % colors.length];
content.style.fontFamily = "monospace";
content.title = token.toString();
output.contentDocument.body.appendChild(content);
});
tokenizeButton.disabled = false;
});