Spaces:

webgpu
/

FunctionGemma-Physics-Playground

Running

App Files Files Community

captainspock commited on Jan 9

Commit

f0c0712

verified ·

1 Parent(s): e695b00

Update index.html

Browse files

Files changed (1) hide show

index.html +22 -13

index.html CHANGED Viewed

@@ -293,7 +293,7 @@
       </div>
     </div>
     <script type="module">
-      import { AutoModelForCausalLM, AutoTokenizer } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.1";
       // --- Game Constants ---
       const CONFIG = {
         width: 1000,
@@ -1147,21 +1147,30 @@
             let ttft = 0;
             let tps = 0;
             const output = await model.generate({
               ...inputs,
               max_new_tokens: 128,
               do_sample: false,
-              callback_function: (tokens) => {
-                // Record TTFT on first token (before incrementing counter)
-                if (numTokens === 0) {
-                  ttft = performance.now() - generationStart;
-                }
-                numTokens++;
-                // Calculate TPS after incrementing
-                if (numTokens > 0) {
-                  tps = (numTokens / (performance.now() - generationStart)) * 1000;
-                }
-              }
             });
             const generationEnd = performance.now();
@@ -1174,7 +1183,7 @@
             document.getElementById("metric-ttft").innerText = `${ttft.toFixed(0)}ms`;
             document.getElementById("metric-tps").innerText = `${tps.toFixed(1)} t/s`;
-            const decoded = tokenizer.decode(output.slice(0, [inputs.input_ids.dims[1], null]), { skip_special_tokens: false });
             // 5. Parse Output
             // Format: <start_function_call>call:add{...}<end_function_call>
             const startTag = "<start_function_call>";

       </div>
     </div>
     <script type="module">
+      import { AutoModelForCausalLM, AutoTokenizer, TextStreamer } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.1";
       // --- Game Constants ---
       const CONFIG = {
         width: 1000,
             let ttft = 0;
             let tps = 0;
+            // Token callback for TTFT measurement
+            const token_callback_function = (tokens) => {
+              if (numTokens === 0) {
+                ttft = performance.now() - generationStart;
+              }
+              numTokens++;
+              if (numTokens > 0) {
+                tps = (numTokens / (performance.now() - generationStart)) * 1000;
+              }
+            };
+            // Streamer for token-level callbacks
+            const streamer = new TextStreamer(tokenizer, {
+              skip_prompt: true,
+              skip_special_tokens: false,
+              token_callback_function
+            });
             const output = await model.generate({
               ...inputs,
               max_new_tokens: 128,
               do_sample: false,
+              streamer,
+              return_dict_in_generate: true
             });
             const generationEnd = performance.now();
             document.getElementById("metric-ttft").innerText = `${ttft.toFixed(0)}ms`;
             document.getElementById("metric-tps").innerText = `${tps.toFixed(1)} t/s`;
+            const decoded = tokenizer.decode(output.sequences.slice(0, [inputs.input_ids.dims[1], null]), { skip_special_tokens: false });
             // 5. Parse Output
             // Format: <start_function_call>call:add{...}<end_function_call>
             const startTag = "<start_function_call>";