tool-research / app.js
evalstate's picture
evalstate HF Staff
Upload 4 files
6ba7af7 verified
const data = window.REPORT_DATA;
const groupDefinitions = [
{
id: "kimi-k2",
label: "Kimi-K2.5 / K2.6",
members: ["moonshotai/Kimi-K2.5", "moonshotai/Kimi-K2.6"],
note: "Shared TypeScript-style tool declaration output across K2.5 and K2.6.",
},
{
id: "minimax-m2",
label: "MiniMax-M2.5 / M2.7",
members: ["MiniMaxAI/MiniMax-M2.5", "MiniMaxAI/MiniMax-M2.7"],
note: "Same XML/tag-style tool rendering; model-name boilerplate differs.",
},
{
id: "qwen-3",
label: "Qwen3.5 / Qwen3.6",
members: ["Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3.6-35B-A3B"],
note: "Shared XML/tag-style tool rendering in the sampled cases.",
},
];
const modelByName = new Map(data.models.map((model) => [model.model, model]));
const groupedNames = new Set(groupDefinitions.flatMap((group) => group.members));
const modelGroups = [
...data.models
.filter((model) => !groupedNames.has(model.model))
.map((model) => ({
id: model.model,
label: shortModel(model.model),
members: [model.model],
note: "",
})),
...groupDefinitions,
]
.map((group) => ({
...group,
models: group.members.map((name) => modelByName.get(name)).filter(Boolean),
}))
.filter((group) => group.models.length)
.sort((a, b) => a.label.localeCompare(b.label));
const state = {
page: location.hash === "#matrix" ? "matrix" : "overview",
group: modelGroups[0]?.id,
caseId: "minimal_string_required",
examplePath: "",
dialect: "all",
view: "output",
};
const featuredCases = [
"minimal_string_required",
"nested_arrays_enums",
"nullable_anyof_refs",
"oneof_allof_constraints",
];
const caseLabels = {
minimal_string_required: "Simple required string",
nested_arrays_enums: "Nested arrays + enums",
nullable_anyof_refs: "Refs + nullable anyOf",
oneof_allof_constraints: "oneOf/allOf stress",
};
const featureNames = [
"tool_name",
"description",
"parameters",
"properties",
"required",
"array",
"enum",
"anyOf",
"oneOf",
"ref",
"default",
"additionalProperties",
];
function $(id) {
return document.getElementById(id);
}
function label(value) {
return String(value ?? "—").replaceAll("_", " ");
}
function shortModel(model) {
return model.split("/").pop();
}
function family(model) {
return model.split("/")[0] || model;
}
function selectedModel() {
return selectedGroup().models[0] || data.models[0];
}
function selectedGroup() {
return modelGroups.find((group) => group.id === state.group) || modelGroups[0];
}
function caseInfo(id) {
return data.cases.find((item) => item.id === id) || data.cases[0];
}
function isGood(status) {
return typeof status === "string" && (status.startsWith("present") || status.startsWith("transformed"));
}
function statusClass(status) {
if (status === "not_applicable") return "na";
return isGood(status) ? "good" : "warn";
}
function dialectLabel(dialect) {
const labels = {
typescript: "TypeScript-style declarations",
xml: "XML/tag-style instructions",
json: "JSON tool block",
"custom-channel": "Custom channel syntax",
"message-json": "Message functions JSON",
builtin: "Builtin tool block",
other: "Other",
};
return labels[dialect] || label(dialect);
}
function supportCoverage(model) {
if (model.support_variant === "tools_argument") return `${model.standard_supported}/${model.case_count}`;
if (model.support_variant === "message_functions_json") return `${model.message_supported}/${model.case_count}`;
if (model.support_variant === "special_modes") return `${model.special_supported}/${model.special_total}`;
if (model.support_variant?.startsWith("custom_")) return `${model.custom_supported}/${model.case_count}`;
return `${Math.max(model.standard_supported, model.custom_supported, model.message_supported)}/${model.case_count}`;
}
function filteredModels() {
return modelGroups.filter((group) => {
return state.dialect === "all" || group.models.some((model) => model.dialect === state.dialect);
});
}
function renderModelList() {
$("model-list").innerHTML = filteredModels()
.map((group) => {
const model = group.models[0];
return `
<button class="model-card ${model.dialect} ${state.page === "matrix" && group.id === state.group ? "active" : ""}" type="button" data-group="${group.id}">
<strong>${group.label}</strong>
<span>${dialectLabel(model.dialect)}</span>
</button>
`;
})
.join("");
}
function chooseDefaultExample(model) {
const current = model.cases[state.caseId]?.[model.support_variant]?.rendered_path;
if (current) return current;
for (const variant of Object.values(model.cases[state.caseId] || {})) {
if (variant.rendered_path) return variant.rendered_path;
}
if (model.representative_paths?.length) return model.representative_paths[0];
for (const variants of Object.values(model.cases)) {
for (const variant of Object.values(variants)) {
if (variant.rendered_path) return variant.rendered_path;
}
}
return Object.keys(data.snippets)[0];
}
function availableExamples(model) {
const paths = [];
const variants = model.cases[state.caseId] || {};
if (variants[model.support_variant]?.rendered_path) {
paths.push(variants[model.support_variant].rendered_path);
}
for (const variant of Object.values(variants)) {
if (variant.meaningful && variant.rendered_path) paths.push(variant.rendered_path);
}
return [...new Set(paths)];
}
function renderHeader(group) {
const model = group.models[0];
$("active-family").textContent = group.models.length > 1 ? family(model.model) : family(model.model);
$("active-model").textContent = group.label;
$("active-summary").textContent =
`${dialectLabel(model.dialect)}. ${group.note || model.support_detail} This view shows one representative rendered prompt; evidence packets for each grouped model are kept below.`;
$("active-summary").style.borderLeftColor = `var(--${model.dialect})`;
$("active-dialect").textContent = dialectLabel(model.dialect);
$("active-path").textContent = model.support_path;
$("active-coverage").textContent = group.models.length > 1 ? `${supportCoverage(model)} each` : `${supportCoverage(model)} cases`;
}
function renderExamples(model) {
const paths = availableExamples(model);
if (!paths.length) {
state.examplePath = "";
$("rendered-output").innerHTML = unsupportedMessage(model);
renderViewMode();
return;
}
if (!paths.includes(state.examplePath)) state.examplePath = paths[0];
renderOutput(model);
}
function renderCases(model) {
$("case-tabs").innerHTML = featuredCases
.filter((id) => model.cases[id])
.map((id) => {
const supported = hasMeaningfulOutput(model, id);
const title = supported ? "Rendered output available" : "Input example only; no meaningful rendered output for this model/path";
return `<button type="button" class="${id === state.caseId ? "active" : ""} ${supported ? "" : "unsupported"}" data-case="${id}" title="${title}">${caseLabels[id] || caseInfo(id).label}</button>`;
})
.join("");
$("case-description").textContent = caseInfo(state.caseId).description;
$("case-title").textContent = caseLabels[state.caseId] || caseInfo(state.caseId).label;
renderInputSchema();
}
function hasMeaningfulOutput(model, caseId) {
const variants = model.cases[caseId] || {};
return Object.values(variants).some((variant) => variant.meaningful && variant.rendered_path);
}
function renderInputSchema() {
const item = caseInfo(state.caseId);
$("input-schema").innerHTML = highlightInputJson(JSON.stringify(item.schema.tools, null, 2));
}
function renderOutput(model) {
if (!state.examplePath) {
$("rendered-output").innerHTML = unsupportedMessage(model);
return;
}
const text = data.snippets[state.examplePath] || "No rendered prompt available for this selection.";
$("rendered-output").innerHTML = highlightRendered(text, caseInfo(state.caseId));
}
function unsupportedMessage(model) {
const variant = model.cases[state.caseId]?.[model.support_variant];
const reason = variant?.error ? ` Renderer error: ${escapeHtml(variant.error)}.` : "";
return `<span class="empty-render">No meaningful model-visible output was produced for <strong>${escapeHtml(caseLabels[state.caseId] || caseInfo(state.caseId).label)}</strong> using ${escapeHtml(model.support_path)}.${reason} Use <strong>Tool Definition</strong> to inspect the JSON schema that failed this renderer.</span>`;
}
function renderViewMode() {
const isOutput = state.view === "output";
$("rendered-output").hidden = !isOutput;
$("input-schema").hidden = isOutput;
for (const button of document.querySelectorAll("[data-view]")) {
const active = button.dataset.view === state.view;
button.classList.toggle("active", active);
button.setAttribute("aria-selected", String(active));
}
}
function renderPage() {
const isOverview = state.page === "overview";
$("overview-panel").hidden = !isOverview;
$("matrix-view").hidden = isOverview;
$("overview-link").classList.toggle("active", isOverview);
if (location.hash !== (isOverview ? "#overview" : "#matrix")) {
history.replaceState(null, "", isOverview ? "#overview" : "#matrix");
}
}
function highlightInputJson(text) {
return escapeHtml(text).replace(
/(&quot;(name|description|parameters|properties|required|type|enum|items|anyOf|oneOf|allOf|\$defs|\$ref|default|additionalProperties|minimum)&quot;)/g,
'<span class="schema-token">$1</span>',
);
}
function highlightRendered(text, item) {
const boilerplatePattern = /knowledge cutoff|current date|general guidelines|multimodal|you are |system_prompt|reasoning mode|today date|user<|<\|user|accurately answer|be very attentive/i;
return highlightRenderedTokens(text, item, boilerplatePattern);
}
function highlightRenderedTokens(text, item, boilerplatePattern) {
let escaped = escapeHtml(text);
for (const name of item.tool_names || []) {
escaped = escaped.replaceAll(escapeHtml(name), `<span class="tool-token">${escapeHtml(name)}</span>`);
}
escaped = escaped.replace(
/\b(parameters|properties|required|enum|anyOf|oneOf|allOf|additionalProperties|tool_call|AVAILABLE_TOOLS|namespace functions|DSML|tool_declare)\b/g,
'<span class="schema-token-output">$1</span>',
);
escaped = escaped
.split("\n")
.map((line) => (boilerplatePattern.test(line) ? `<span class="boilerplate-token">${line}</span>` : line))
.join("\n");
return escaped;
}
function currentVariant(model) {
const variants = model.cases[state.caseId] || {};
return variants[model.support_variant] || Object.values(variants).find((variant) => variant.meaningful) || Object.values(variants)[0];
}
function renderFeatures(model) {
const variant = currentVariant(model);
if (!variant) {
$("feature-list").innerHTML = `<p class="muted">No feature packet for this case.</p>`;
return;
}
$("feature-list").innerHTML = featureNames
.map((name) => {
const status = variant.features?.[name] || "not_applicable";
return `
<div class="feature-row">
<strong>${label(name)}</strong>
<span class="status ${statusClass(status)}">${label(status)}</span>
</div>
`;
})
.join("");
}
function renderSpecialModes(model) {
const modes = model.special_modes || [];
const notes = model.highlighted_notes || [];
if (!modes.length && !notes.length) {
$("special-list").innerHTML = `<p class="muted">No special/builtin probes or highlighted special-tool notes in this evidence set.</p>`;
return;
}
const modeHtml = modes
.map((mode) => `
<button class="special-row" type="button" data-path="${mode.rendered_path || ""}">
<strong>${label(mode.probe_id)}</strong>
<span class="status ${mode.meaningful && mode.counts_as_special_tool ? "good" : "warn"}">${label(mode.category || mode.style_family)} · ${mode.token_count ?? "—"} tokens</span>
<small>${escapeHtml(mode.description || "")}</small>
</button>
`)
.join("");
const notesHtml = notes
.map((note) => `
<div class="special-row note-row">
<strong>${escapeHtml(note.title)}</strong>
<span class="status warn">${escapeHtml(note.category)}</span>
<small>${escapeHtml(note.detail)}</small>
${note.evidence ? `<small class="evidence-path">${escapeHtml(note.evidence)}</small>` : ""}
</div>
`)
.join("");
$("special-list").innerHTML = modeHtml + notesHtml;
}
function stringifyEvidence(item) {
if (item.snippet) return item.snippet;
if (item.observation) return item.observation;
if (item.artifact) return item.artifact;
return JSON.stringify(item, null, 2);
}
function renderClaims(group) {
const sections = group.models.map((model) => {
const claims = model.findings_packet?.claims || [];
if (!claims.length) {
return `<section class="packet-group"><h4>${model.model}</h4><p class="muted">No findings JSON packet found for this model.</p></section>`;
}
return `
<section class="packet-group">
<h4>${model.model}</h4>
${claims
.map((claim, index) => `
<details class="claim" ${index === 0 ? "open" : ""}>
<summary>
<strong>${label(claim.evidence_class)} · ${claim.confidence || "confidence unknown"}</strong>
<p>${escapeHtml(claim.claim)}</p>
</summary>
<div class="claim-body">
${(claim.evidence || []).map((item) => `<div class="evidence-item">${escapeHtml(stringifyEvidence(item))}</div>`).join("")}
</div>
</details>
`)
.join("")}
</section>
`;
});
$("claims").innerHTML = sections.join("");
}
function escapeHtml(text) {
return String(text)
.replaceAll("&", "&amp;")
.replaceAll("<", "&lt;")
.replaceAll(">", "&gt;")
.replaceAll('"', "&quot;");
}
function renderAll() {
const group = selectedGroup();
const model = group.models[0];
renderPage();
renderModelList();
renderHeader(group);
renderExamples(model);
renderCases(model);
renderFeatures(model);
renderSpecialModes(model);
renderClaims(group);
renderViewMode();
}
function bindEvents() {
$("overview-link").addEventListener("click", () => {
state.page = "overview";
renderAll();
});
$("dialect-filter").addEventListener("click", (event) => {
const button = event.target.closest("[data-dialect]");
if (!button) return;
state.dialect = button.dataset.dialect;
for (const item of $("dialect-filter").querySelectorAll("[data-dialect]")) {
item.classList.toggle("active", item.dataset.dialect === state.dialect);
}
const visible = filteredModels();
if (visible.length && !visible.some((group) => group.id === state.group)) {
state.group = visible[0].id;
state.examplePath = chooseDefaultExample(visible[0].models[0]);
renderAll();
return;
}
renderModelList();
});
$("model-list").addEventListener("click", (event) => {
const button = event.target.closest("[data-group]");
if (!button) return;
state.page = "matrix";
state.group = button.dataset.group;
const model = selectedModel();
state.examplePath = chooseDefaultExample(model);
renderAll();
});
$("case-tabs").addEventListener("click", (event) => {
const button = event.target.closest("[data-case]");
if (!button) return;
state.caseId = button.dataset.case;
const model = selectedModel();
const path = model.cases[state.caseId]?.[model.support_variant]?.rendered_path;
if (path) state.examplePath = path;
renderAll();
});
document.querySelector(".view-tabs").addEventListener("click", (event) => {
const button = event.target.closest("[data-view]");
if (!button) return;
state.view = button.dataset.view;
renderViewMode();
});
$("special-list").addEventListener("click", (event) => {
const button = event.target.closest("[data-path]");
if (!button?.dataset.path) return;
state.examplePath = button.dataset.path;
renderOutput(selectedModel());
});
window.addEventListener("hashchange", () => {
state.page = location.hash === "#matrix" ? "matrix" : "overview";
renderAll();
});
}
function renderMarkdownSummary() {
const md = data.overview?.summary;
if (!md) return;
let titleRewritten = false;
const lines = md.split("\n");
const parts = [];
let paragraph = [];
let list = [];
const inline = (text) =>
escapeHtml(text)
.replace(/\*\*(.+?)\*\*/g, "<strong>$1</strong>")
.replace(/`([^`]+)`/g, "<code>$1</code>");
const flushParagraph = () => {
if (!paragraph.length) return;
parts.push(`<p>${inline(paragraph.join(" "))}</p>`);
paragraph = [];
};
const flushList = () => {
if (!list.length) return;
parts.push(`<ul>${list.map((item) => `<li>${inline(item)}</li>`).join("")}</ul>`);
list = [];
};
const isTableDivider = (line) => /^\|\s*[-:| ]+\s*\|$/.test(line);
const cells = (line) => line.split("|").slice(1, -1).map((cell) => inline(cell.trim()));
for (let index = 0; index < lines.length; index += 1) {
const line = lines[index].trim();
if (!line) {
flushParagraph();
flushList();
continue;
}
if (line.startsWith("|") && lines[index + 1] && isTableDivider(lines[index + 1].trim())) {
flushParagraph();
flushList();
const headers = cells(line).map((cell) => `<th>${cell}</th>`).join("");
index += 2;
const rows = [];
while (index < lines.length && lines[index].trim().startsWith("|")) {
rows.push(`<tr>${cells(lines[index].trim()).map((cell) => `<td>${cell}</td>`).join("")}</tr>`);
index += 1;
}
index -= 1;
parts.push(`<div class="table-wrap"><table><thead><tr>${headers}</tr></thead><tbody>${rows.join("")}</tbody></table></div>`);
continue;
}
const heading = line.match(/^(#{1,3})\s+(.+)$/);
if (heading) {
flushParagraph();
flushList();
const level = heading[1].length;
let text = heading[2];
if (level === 1 && !titleRewritten) {
text = "Tool Schema Rendering Atlas - Summary";
titleRewritten = true;
}
const tag = level === 1 ? "h2" : level === 2 ? "h3" : "h4";
parts.push(`<${tag}>${inline(text)}</${tag}>`);
continue;
}
const date = line.match(/^_(.+)_$/);
if (date) {
flushParagraph();
flushList();
parts.push(`<p class="summary-date">${inline(date[1])}</p>`);
continue;
}
const bullet = line.match(/^-\s+(.+)$/);
if (bullet) {
flushParagraph();
list.push(bullet[1]);
continue;
}
flushList();
paragraph.push(line);
}
flushParagraph();
flushList();
const html = parts.join("");
$("exec-summary").innerHTML = `<p class="eyebrow">Executive summary</p>${html}`;
}
renderMarkdownSummary();
bindEvents();
renderAll();