captainspock commited on
Commit
f0c0712
·
verified ·
1 Parent(s): e695b00

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +22 -13
index.html CHANGED
@@ -293,7 +293,7 @@
293
  </div>
294
  </div>
295
  <script type="module">
296
- import { AutoModelForCausalLM, AutoTokenizer } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.1";
297
  // --- Game Constants ---
298
  const CONFIG = {
299
  width: 1000,
@@ -1147,21 +1147,30 @@
1147
  let ttft = 0;
1148
  let tps = 0;
1149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1150
  const output = await model.generate({
1151
  ...inputs,
1152
  max_new_tokens: 128,
1153
  do_sample: false,
1154
- callback_function: (tokens) => {
1155
- // Record TTFT on first token (before incrementing counter)
1156
- if (numTokens === 0) {
1157
- ttft = performance.now() - generationStart;
1158
- }
1159
- numTokens++;
1160
- // Calculate TPS after incrementing
1161
- if (numTokens > 0) {
1162
- tps = (numTokens / (performance.now() - generationStart)) * 1000;
1163
- }
1164
- }
1165
  });
1166
 
1167
  const generationEnd = performance.now();
@@ -1174,7 +1183,7 @@
1174
  document.getElementById("metric-ttft").innerText = `${ttft.toFixed(0)}ms`;
1175
  document.getElementById("metric-tps").innerText = `${tps.toFixed(1)} t/s`;
1176
 
1177
- const decoded = tokenizer.decode(output.slice(0, [inputs.input_ids.dims[1], null]), { skip_special_tokens: false });
1178
  // 5. Parse Output
1179
  // Format: <start_function_call>call:add{...}<end_function_call>
1180
  const startTag = "<start_function_call>";
 
293
  </div>
294
  </div>
295
  <script type="module">
296
+ import { AutoModelForCausalLM, AutoTokenizer, TextStreamer } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.1";
297
  // --- Game Constants ---
298
  const CONFIG = {
299
  width: 1000,
 
1147
  let ttft = 0;
1148
  let tps = 0;
1149
 
1150
+ // Token callback for TTFT measurement
1151
+ const token_callback_function = (tokens) => {
1152
+ if (numTokens === 0) {
1153
+ ttft = performance.now() - generationStart;
1154
+ }
1155
+ numTokens++;
1156
+ if (numTokens > 0) {
1157
+ tps = (numTokens / (performance.now() - generationStart)) * 1000;
1158
+ }
1159
+ };
1160
+
1161
+ // Streamer for token-level callbacks
1162
+ const streamer = new TextStreamer(tokenizer, {
1163
+ skip_prompt: true,
1164
+ skip_special_tokens: false,
1165
+ token_callback_function
1166
+ });
1167
+
1168
  const output = await model.generate({
1169
  ...inputs,
1170
  max_new_tokens: 128,
1171
  do_sample: false,
1172
+ streamer,
1173
+ return_dict_in_generate: true
 
 
 
 
 
 
 
 
 
1174
  });
1175
 
1176
  const generationEnd = performance.now();
 
1183
  document.getElementById("metric-ttft").innerText = `${ttft.toFixed(0)}ms`;
1184
  document.getElementById("metric-tps").innerText = `${tps.toFixed(1)} t/s`;
1185
 
1186
+ const decoded = tokenizer.decode(output.sequences.slice(0, [inputs.input_ids.dims[1], null]), { skip_special_tokens: false });
1187
  // 5. Parse Output
1188
  // Format: <start_function_call>call:add{...}<end_function_call>
1189
  const startTag = "<start_function_call>";