Spaces:

ping98k
/

Embedding-Playground

Running

App Files Files Community

ping98k commited on Jun 29, 2025

Commit

7376f34

1 Parent(s): c67d118

Update K-Means clustering implementation to utilize new tokenizer and model for enhanced text generation and reasoning capabilities

Browse files

Files changed (1) hide show

index.html +44 -11

index.html CHANGED Viewed

@@ -65,7 +65,7 @@
     </div>
     <script src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script>
     <script type="module">
-        import { pipeline, TextStreamer } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.5.2";
         import { UMAP } from "https://cdn.jsdelivr.net/npm/umap-js@1.4.0/+esm";
         const embed = await pipeline(
@@ -73,11 +73,8 @@
             "onnx-community/Qwen3-Embedding-0.6B-ONNX",
             { device: "webgpu", dtype: "q4f16" },
         );
-        const generator = await pipeline(
-            "text-generation",
-            "onnx-community/Qwen3-0.6B-ONNX",
-            { device: "webgpu", dtype: "q4f16" },
-        );
         const task = "Given a textual input sentence, retrieve relevant categories that best describe it.";
         document.getElementById("run").onclick = async () => {
@@ -170,15 +167,51 @@
                 const joined = clustered[c].join("\n");
                 const messages = [
                     { role: "system", content: "You are a helpful assistant." },
-                    { role: "user", content: `/no_think Given the following texts, provide a short, descriptive name for this group:\n\n${joined}` }
                 ];
-                const output = await generator(messages, {
                     max_new_tokens: 32,
                     do_sample: false,
-                    streamer: new TextStreamer(generator.tokenizer, { skip_prompt: true, skip_special_tokens: true, enable_thinking: false })
                 });
-                let name = output[0]?.generated_text?.at(-1)?.content || `Cluster ${c + 1}`;
-                name = name.replace(/^[\s\n]+|[\s\n]+$/g, "");
                 clusterNames.push(name.length > 0 ? name : `Cluster ${c + 1}`);
             }
             progressBarInner.style.width = "100%";

     </div>
     <script src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script>
     <script type="module">
+        import { pipeline, TextStreamer, AutoTokenizer, AutoModelForCausalLM } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.6.0';
         import { UMAP } from "https://cdn.jsdelivr.net/npm/umap-js@1.4.0/+esm";
         const embed = await pipeline(
             "onnx-community/Qwen3-Embedding-0.6B-ONNX",
             { device: "webgpu", dtype: "q4f16" },
         );
+        const tokenizer = await AutoTokenizer.from_pretrained("onnx-community/Qwen3-0.6B-ONNX");
+        const model = await AutoModelForCausalLM.from_pretrained("onnx-community/Qwen3-0.6B-ONNX", { device: "webgpu", dtype: "q4f16" });
         const task = "Given a textual input sentence, retrieve relevant categories that best describe it.";
         document.getElementById("run").onclick = async () => {
                 const joined = clustered[c].join("\n");
                 const messages = [
                     { role: "system", content: "You are a helpful assistant." },
+                    { role: "user", content: `Given the following texts, provide a short, descriptive name for this group:\n\n${joined}` }
                 ];
+                const reasonEnabled = false;
+                const inputs = tokenizer.apply_chat_template(messages, {
+                    add_generation_prompt: true,
+                    return_dict: true,
+                    enable_thinking: reasonEnabled,
+                });
+                const [START_THINKING_TOKEN_ID, END_THINKING_TOKEN_ID] = tokenizer.encode("<think></think>", { add_special_tokens: false });
+                let state = "answering";
+                let startTime;
+                let numTokens = 0;
+                let tps;
+                const token_callback_function = (tokens) => {
+                    startTime ??= performance.now();
+                    if (numTokens++ > 0) {
+                        tps = (numTokens / (performance.now() - startTime)) * 1000;
+                    }
+                    switch (Number(tokens[0])) {
+                        case START_THINKING_TOKEN_ID:
+                            state = "thinking";
+                            break;
+                        case END_THINKING_TOKEN_ID:
+                            state = "answering";
+                            break;
+                    }
+                    console.log(state, tokens, tokenizer.decode(tokens));
+                };
+                const callback_function = (output) => {
+                    // You can update UI here if desired
+                    console.log({ output, tps, numTokens, state });
+                };
+                const streamer = new TextStreamer(tokenizer, {
+                    skip_prompt: true,
+                    skip_special_tokens: true,
+                    callback_function,
+                    token_callback_function,
+                });
+                const outputTokens = await model.generate({
+                    ...inputs,
                     max_new_tokens: 32,
                     do_sample: false,
+                    streamer,
                 });
+                let name = tokenizer.decode(outputTokens[0], { skip_special_tokens: false }).trim();
                 clusterNames.push(name.length > 0 ? name : `Cluster ${c + 1}`);
             }
             progressBarInner.style.width = "100%";