Update index.html
Browse files- index.html +22 -13
index.html
CHANGED
|
@@ -293,7 +293,7 @@
|
|
| 293 |
</div>
|
| 294 |
</div>
|
| 295 |
<script type="module">
|
| 296 |
-
import { AutoModelForCausalLM, AutoTokenizer } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.1";
|
| 297 |
// --- Game Constants ---
|
| 298 |
const CONFIG = {
|
| 299 |
width: 1000,
|
|
@@ -1147,21 +1147,30 @@
|
|
| 1147 |
let ttft = 0;
|
| 1148 |
let tps = 0;
|
| 1149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1150 |
const output = await model.generate({
|
| 1151 |
...inputs,
|
| 1152 |
max_new_tokens: 128,
|
| 1153 |
do_sample: false,
|
| 1154 |
-
|
| 1155 |
-
|
| 1156 |
-
if (numTokens === 0) {
|
| 1157 |
-
ttft = performance.now() - generationStart;
|
| 1158 |
-
}
|
| 1159 |
-
numTokens++;
|
| 1160 |
-
// Calculate TPS after incrementing
|
| 1161 |
-
if (numTokens > 0) {
|
| 1162 |
-
tps = (numTokens / (performance.now() - generationStart)) * 1000;
|
| 1163 |
-
}
|
| 1164 |
-
}
|
| 1165 |
});
|
| 1166 |
|
| 1167 |
const generationEnd = performance.now();
|
|
@@ -1174,7 +1183,7 @@
|
|
| 1174 |
document.getElementById("metric-ttft").innerText = `${ttft.toFixed(0)}ms`;
|
| 1175 |
document.getElementById("metric-tps").innerText = `${tps.toFixed(1)} t/s`;
|
| 1176 |
|
| 1177 |
-
const decoded = tokenizer.decode(output.slice(0, [inputs.input_ids.dims[1], null]), { skip_special_tokens: false });
|
| 1178 |
// 5. Parse Output
|
| 1179 |
// Format: <start_function_call>call:add{...}<end_function_call>
|
| 1180 |
const startTag = "<start_function_call>";
|
|
|
|
| 293 |
</div>
|
| 294 |
</div>
|
| 295 |
<script type="module">
|
| 296 |
+
import { AutoModelForCausalLM, AutoTokenizer, TextStreamer } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.1";
|
| 297 |
// --- Game Constants ---
|
| 298 |
const CONFIG = {
|
| 299 |
width: 1000,
|
|
|
|
| 1147 |
let ttft = 0;
|
| 1148 |
let tps = 0;
|
| 1149 |
|
| 1150 |
+
// Token callback for TTFT measurement
|
| 1151 |
+
const token_callback_function = (tokens) => {
|
| 1152 |
+
if (numTokens === 0) {
|
| 1153 |
+
ttft = performance.now() - generationStart;
|
| 1154 |
+
}
|
| 1155 |
+
numTokens++;
|
| 1156 |
+
if (numTokens > 0) {
|
| 1157 |
+
tps = (numTokens / (performance.now() - generationStart)) * 1000;
|
| 1158 |
+
}
|
| 1159 |
+
};
|
| 1160 |
+
|
| 1161 |
+
// Streamer for token-level callbacks
|
| 1162 |
+
const streamer = new TextStreamer(tokenizer, {
|
| 1163 |
+
skip_prompt: true,
|
| 1164 |
+
skip_special_tokens: false,
|
| 1165 |
+
token_callback_function
|
| 1166 |
+
});
|
| 1167 |
+
|
| 1168 |
const output = await model.generate({
|
| 1169 |
...inputs,
|
| 1170 |
max_new_tokens: 128,
|
| 1171 |
do_sample: false,
|
| 1172 |
+
streamer,
|
| 1173 |
+
return_dict_in_generate: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1174 |
});
|
| 1175 |
|
| 1176 |
const generationEnd = performance.now();
|
|
|
|
| 1183 |
document.getElementById("metric-ttft").innerText = `${ttft.toFixed(0)}ms`;
|
| 1184 |
document.getElementById("metric-tps").innerText = `${tps.toFixed(1)} t/s`;
|
| 1185 |
|
| 1186 |
+
const decoded = tokenizer.decode(output.sequences.slice(0, [inputs.input_ids.dims[1], null]), { skip_special_tokens: false });
|
| 1187 |
// 5. Parse Output
|
| 1188 |
// Format: <start_function_call>call:add{...}<end_function_call>
|
| 1189 |
const startTag = "<start_function_call>";
|