sutama's picture
Upload CabinLavatoryPrediction LoRA adapter, checkpoint, code, and evaluation artifacts
e74a796 verified
<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>MWave Design Review Presentation</title>
<style>
:root {
--boeing:#0039A6; --boeing2:#0067B1; --red:#C8102E; --black:#080A0F;
--ink:#111827; --steel:#5B6472; --mist:#E7ECF3; --light:#F7F9FC; --line:rgba(8,10,15,.14);
}
* { box-sizing:border-box; }
body {
margin:0; color:var(--ink);
font-family:"Aptos Display","Bahnschrift","DIN Alternate","Noto Sans SC",sans-serif;
background:radial-gradient(circle at 10% 0%, rgba(0,103,177,.22), transparent 28%),
radial-gradient(circle at 86% 6%, rgba(200,16,46,.12), transparent 24%),
linear-gradient(135deg,#eef4fb,#ffffff 44%,#e9eef6);
}
a { color:var(--boeing); font-weight:800; }
.deck { max-width:1480px; margin:0 auto; padding:28px; }
.hero {
min-height:620px; color:#fff; border-radius:36px; padding:52px;
background:linear-gradient(118deg,rgba(3,11,29,.98),rgba(0,57,166,.94) 54%,rgba(0,103,177,.72));
position:relative; overflow:hidden;
}
.hero:after { content:""; position:absolute; right:-120px; top:-160px; width:640px; height:640px; border-radius:50%; border:1px solid rgba(255,255,255,.18); box-shadow:inset 0 0 0 82px rgba(255,255,255,.05), inset 0 0 0 180px rgba(255,255,255,.035); }
.kicker { letter-spacing:.18em; text-transform:uppercase; font-size:12px; color:#dbeafe; }
h1 { font-size:clamp(46px,7vw,100px); line-height:.92; margin:26px 0 18px; max-width:1050px; letter-spacing:-.06em; }
h2 { font-size:38px; letter-spacing:-.04em; margin:0 0 18px; color:var(--black); }
h3 { margin:0 0 8px; font-size:22px; color:var(--black); }
.hero p { color:#eaf1fb; font-size:21px; line-height:1.65; max-width:980px; }
.hero-grid { display:grid; grid-template-columns:repeat(4,1fr); gap:18px; position:relative; z-index:1; margin-top:42px; }
.hero-card { padding:22px; border:1px solid rgba(255,255,255,.22); background:rgba(255,255,255,.1); border-radius:24px; backdrop-filter:blur(16px); }
.big { font-size:38px; font-weight:900; letter-spacing:-.04em; }
.label { color:#cbd5e1; font-size:13px; margin-top:6px; }
nav { position:sticky; top:0; z-index:20; display:flex; gap:8px; flex-wrap:wrap; margin:18px 0; padding:12px; border:1px solid var(--line); border-radius:22px; background:rgba(255,255,255,.88); backdrop-filter:blur(18px); }
nav button, .pill { border:0; border-radius:999px; padding:11px 15px; background:#eef3fb; color:#0f172a; font-weight:900; cursor:pointer; }
nav button:hover, .pill.active { background:var(--boeing); color:#fff; }
section { background:rgba(255,255,255,.92); border:1px solid var(--line); border-radius:30px; padding:32px; margin-top:24px; box-shadow:0 16px 44px rgba(17,24,39,.06); }
.grid { display:grid; gap:18px; }
.grid-2 { grid-template-columns:repeat(2,minmax(0,1fr)); }
.grid-3 { grid-template-columns:repeat(3,minmax(0,1fr)); }
.grid-4 { grid-template-columns:repeat(4,minmax(0,1fr)); }
.card { border:1px solid var(--line); background:linear-gradient(180deg,#fff,#f6f9fd); border-radius:24px; padding:22px; }
.card strong { color:var(--boeing); }
.muted { color:var(--steel); line-height:1.66; }
.story { display:grid; grid-template-columns:72px 1fr; gap:18px; margin-bottom:18px; }
.num { width:54px; height:54px; border-radius:50%; display:grid; place-items:center; background:var(--black); color:#fff; font-weight:900; font-size:22px; }
table { width:100%; border-collapse:collapse; overflow:hidden; border-radius:18px; background:#fff; }
th,td { padding:13px 14px; border-bottom:1px solid #e5e7eb; text-align:left; vertical-align:top; }
th { background:#08111f; color:#fff; font-size:13px; }
.chart-grid { display:grid; grid-template-columns:1fr; gap:22px; }
.chart-card { border:1px solid var(--line); background:#fff; border-radius:26px; padding:22px; }
.chart-card object { width:100%; height:620px; border:0; background:#fff; border-radius:18px; }
.chart-card p { margin:0 0 12px; color:var(--steel); }
.metadata-panel { display:grid; grid-template-columns:360px 1fr; gap:18px; }
select, textarea { width:100%; border:1px solid #d6dee9; border-radius:16px; padding:12px; font:inherit; background:#fff; }
pre { margin:0; white-space:pre-wrap; background:#05070c; color:#d1fae5; border-radius:18px; padding:18px; min-height:360px; overflow:auto; font-family:"Cascadia Mono","SFMono-Regular",monospace; font-size:13px; }
.callout { border-left:6px solid var(--red); background:#fff5f6; padding:18px; border-radius:18px; }
.files { columns:2; }
.footer { text-align:center; color:#64748b; padding:28px; }
@media print {
nav, .metadata-panel, .no-print { display:none !important; }
body { background:#fff; }
section, .hero { break-inside:avoid; page-break-inside:avoid; box-shadow:none; }
.chart-card object { height:520px; }
}
@media(max-width:980px) { .hero-grid,.grid-2,.grid-3,.grid-4,.metadata-panel { grid-template-columns:1fr; } .deck { padding:14px; } .hero { padding:28px; } .files { columns:1; } }
</style>
</head>
<body>
<div class="deck">
<header class="hero">
<div class="kicker">MWave Radar LLM · Aircraft Lavatory Design Review</div>
<h1>从毫米波雷达到机载厕所状态智能:一次可验证的 LLM 微调实验</h1>
<p>这份汇报面向设计专家评审,重点不是展示训练日志,而是讲清楚:为什么需要模型、模型学到了什么、哪些结果能支持设计决策、哪些风险需要在机载部署前继续验证。</p>
<div class="hero-grid">
<div class="hero-card"><div class="big">25,130</div><div class="label">训练结构样本</div></div>
<div class="hero-card"><div class="big">4,030</div><div class="label">验证结构样本</div></div>
<div class="hero-card"><div class="big">25,130</div><div class="label">QA 训练样本</div></div>
<div class="hero-card"><div class="big">Qwen3.5-9B</div><div class="label">4-bit QLoRA 微调</div></div>
</div>
</header>
<nav class="no-print">
<button onclick="go('story')">Story Line</button>
<button onclick="go('method')">任务与方法</button>
<button onclick="go('results')">结果证据</button>
<button onclick="go('charts')">SVG 图表</button>
<button onclick="go('metadata')">Metadata Explorer</button>
<button onclick="go('deployment')">部署方案</button>
<button onclick="go('sources')">源数据</button>
</nav>
<section id="story">
<h2>评审 Story Line</h2>
<div class="story"><div class="num">1</div><div><h3>设计问题</h3><p class="muted">飞机厕所内不能依赖摄像头,毫米波雷达提供隐私友好的时空轨迹,但原始序列难以直接变成可用的客舱状态。</p></div></div>
<div class="story"><div class="num">2</div><div><h3>模型角色</h3><p class="muted">LLM 不替代雷达信号处理,而是读取结构化窗口和中间表征,输出严格 JSON:当前行为、下一行为、阶段、剩余时间、序列,以及 QA 状态。</p></div></div>
<div class="story"><div class="num">3</div><div><h3>实验判断</h3><p class="muted">同一验证集上比较 base Qwen3.5-9B 与微调后模型,指标覆盖 schema 合规、行为识别、流程理解、时间估计和 QA 推理。</p></div></div>
<div class="story"><div class="num">4</div><div><h3>设计结论</h3><p class="muted">微调显著提升结构化输出和 QA 可用性,但时间误差、少数类行为和异常提示仍应进入下一轮系统设计验证。</p></div></div>
</section>
<section id="method">
<h2>任务与评估方法</h2>
<div class="grid grid-3">
<div class="card"><h3>结构化预测</h3><p class="muted">输入雷达时序窗口和中间层表征,输出 current_behavior、next_possible_behavior、stage_index、remaining time、sequence_so_far 等字段。</p></div>
<div class="card"><h3>QA 状态回答</h3><p class="muted">作为独立维度评估 occupied、time_to_free_minutes、used_areas、is_abnormal,避免只看结构化任务而忽略最终用户问题。</p></div>
<div class="card"><h3>指标选择</h3><p class="muted">分类用 accuracy/F1,schema 用 JSON parse 和 required field complete,时间用 MAE,序列用 exact/prefix/last-label match。</p></div>
</div>
</section>
<section id="results">
<h2>关键结果</h2>
<div class="grid grid-4">
<div class="card"><div class="big">67.0%</div><strong>当前行为准确率</strong><p class="muted">Base 为 48.1%。</p></div>
<div class="card"><div class="big">95.1%</div><strong>结构化字段完整率</strong><p class="muted">Base 为 0.0%,说明微调主要解决 schema 对齐。</p></div>
<div class="card"><div class="big">89.5%</div><strong>QA 异常 F1</strong><p class="muted">Base 为 45.4%。</p></div>
<div class="card"><div class="big">65.3s</div><strong>完整流程剩余时间 MAE</strong><p class="muted">该误差直接影响预计空出时间体验。</p></div>
</div>
<div style="margin-top:20px; overflow:auto">
<table>
<tr><th>评估项</th><th>Base</th><th>Fine-tuned</th><th>变化</th><th>设计含义</th></tr>
<tr><td>结构化 schema 完整率</td><td>0.0%</td><td>95.1%</td><td>95.1%</td><td>决定输出能否进入机载系统状态总线。</td></tr><tr><td>当前行为准确率</td><td>48.1%</td><td>67.0%</td><td>18.9%</td><td>核心状态识别能力,从 radar 表征映射到行为标签。</td></tr><tr><td>当前行为 Macro-F1</td><td>11.1%</td><td>49.1%</td><td>38.0%</td><td>衡量少数类是否被覆盖,适合设计评审关注长尾行为。</td></tr><tr><td>下一行为准确率</td><td>39.2%</td><td>65.0%</td><td>25.8%</td><td>影响短期流程预测和预计空出时间。</td></tr><tr><td>阶段 index 准确率</td><td>0.0%</td><td>65.5%</td><td>65.5%</td><td>反映模型是否理解完整厕所使用流程位置。</td></tr><tr><td>QA 异常 F1</td><td>45.4%</td><td>89.5%</td><td>44.1%</td><td>用于乘务关注提示,需单独评估。</td></tr><tr><td>QA 区域 F1</td><td>70.5%</td><td>100.0%</td><td>29.5%</td><td>用于理解马桶、洗手池、门、垃圾桶等区域使用状态。</td></tr>
</table>
</div>
<div class="callout" style="margin-top:20px">
<strong>评审判断:</strong>当前实验已经证明 9B LoRA 可以把通用 LLM 拉到可用的结构化状态输出轨道上;但机载上线前仍应增加跨乘客、跨机型、跨雷达安装角度的数据验证,并对异常提示设定保守阈值。
</div>
</section>
<section>
<h2>哪些行为受益最大</h2>
<p class="muted">下表展示 F1 改善最大的行为类别。设计评审时应同时看 support,避免把少量样本上的提升误判为稳定能力。</p>
<table>
<tr><th>行为</th><th>验证样本数</th><th>Base F1</th><th>Fine-tuned F1</th><th>提升</th></tr>
<tr><td>离开</td><td>129</td><td>0.0%</td><td>77.6%</td><td>77.6%</td></tr>
<tr><td>进入</td><td>147</td><td>4.0%</td><td>77.5%</td><td>73.5%</td></tr>
<tr><td>坐下</td><td>114</td><td>0.0%</td><td>65.5%</td><td>65.5%</td></tr>
<tr><td>马桶垫纸</td><td>156</td><td>10.9%</td><td>70.3%</td><td>59.3%</td></tr>
<tr><td>洗手</td><td>204</td><td>5.7%</td><td>60.2%</td><td>54.5%</td></tr>
<tr><td>卷筒厕纸</td><td>129</td><td>2.9%</td><td>52.7%</td><td>49.8%</td></tr>
<tr><td>垃圾桶</td><td>93</td><td>0.0%</td><td>45.5%</td><td>45.5%</td></tr>
<tr><td>起身</td><td>108</td><td>7.8%</td><td>49.8%</td><td>42.1%</td></tr>
</table>
</section>
<section id="charts">
<h2>SVG 矢量图表</h2>
<p class="muted">所有图表都在 <code>assets/charts/</code> 下以 SVG 保存,可缩放、可放进设计文档;SVG 内包含 metadata,HTML 下方可查看。</p>
<div class="chart-grid"><article class="chart-card" data-chart="assets/charts/01_base_vs_finetuned_scores.svg">
<div><h3>01 base vs finetuned scores</h3><p>Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded &lt;metadata&gt; and per-element data-meta.</p></div>
<object type="image/svg+xml" data="assets/charts/01_base_vs_finetuned_scores.svg" aria-label="01 base vs finetuned scores"></object>
</article>
<article class="chart-card" data-chart="assets/charts/02_train_label_distribution.svg">
<div><h3>02 train label distribution</h3><p>Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded &lt;metadata&gt; and per-element data-meta.</p></div>
<object type="image/svg+xml" data="assets/charts/02_train_label_distribution.svg" aria-label="02 train label distribution"></object>
</article>
<article class="chart-card" data-chart="assets/charts/03_behavior_f1_delta.svg">
<div><h3>03 behavior f1 delta</h3><p>Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded &lt;metadata&gt; and per-element data-meta.</p></div>
<object type="image/svg+xml" data="assets/charts/03_behavior_f1_delta.svg" aria-label="03 behavior f1 delta"></object>
</article>
<article class="chart-card" data-chart="assets/charts/04_time_error_mae.svg">
<div><h3>04 time error mae</h3><p>Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded &lt;metadata&gt; and per-element data-meta.</p></div>
<object type="image/svg+xml" data="assets/charts/04_time_error_mae.svg" aria-label="04 time error mae"></object>
</article>
<article class="chart-card" data-chart="assets/charts/05_finetuned_behavior_confusion.svg">
<div><h3>05 finetuned behavior confusion</h3><p>Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded &lt;metadata&gt; and per-element data-meta.</p></div>
<object type="image/svg+xml" data="assets/charts/05_finetuned_behavior_confusion.svg" aria-label="05 finetuned behavior confusion"></object>
</article>
<article class="chart-card" data-chart="assets/charts/06_deployment_architecture.svg">
<div><h3>06 deployment architecture</h3><p>Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded &lt;metadata&gt; and per-element data-meta.</p></div>
<object type="image/svg+xml" data="assets/charts/06_deployment_architecture.svg" aria-label="06 deployment architecture"></object>
</article></div>
</section>
<section id="metadata" class="no-print">
<h2>Metadata Explorer</h2>
<p class="muted">选择图表或数据对象,查看生成依据、源文件路径和图表内嵌 metadata。此功能完全离线,不依赖外网。</p>
<div class="metadata-panel">
<div class="card">
<h3>选择对象</h3>
<select id="metaSelect"></select>
<p class="muted">提示:SVG 图表也可以直接用文本编辑器打开,查看其中的 <code>&lt;metadata&gt;</code> 和每个 bar/cell 的 <code>data-meta</code></p>
</div>
<pre id="metaOut"></pre>
</div>
</section>
<section id="deployment">
<h2>部署到飞机厕所的技术方案</h2>
<div class="grid grid-3">
<div class="card"><h3>边缘优先</h3><p class="muted">厕所侧完成雷达预处理和特征抽取,客舱边缘计算单元加载量化 LLM/LoRA,减少原始数据移动。</p></div>
<div class="card"><h3>状态输出</h3><p class="muted">对外只输出 occupied、estimated free time、used areas、abnormal flag 和行为阶段,不输出身份或可逆人体点云。</p></div>
<div class="card"><h3>失效保护</h3><p class="muted">JSON 校验失败、置信不足或异常连续触发时回退规则模型,并只给出“需关注”级提示。</p></div>
</div>
<object type="image/svg+xml" data="assets/charts/06_deployment_architecture.svg" style="width:100%;height:480px;border:0;margin-top:18px"></object>
</section>
<section id="sources">
<h2>可拷走的源数据与材料</h2>
<p class="muted">整个 <code>mwave_design_review_package</code> 目录可直接复制到其他电脑。HTML、PDF、SVG 和 CSV/JSONL 都使用相对路径或独立文件。</p>
<ul class="files"><li><a href='source_data/derived/finetuned_current_behavior_confusion_top18.csv'>source_data/derived/finetuned_current_behavior_confusion_top18.csv</a></li>
<li><a href='source_data/derived/finetuned_qa_error_samples.csv'>source_data/derived/finetuned_qa_error_samples.csv</a></li>
<li><a href='source_data/derived/metric_comparison.csv'>source_data/derived/metric_comparison.csv</a></li>
<li><a href='source_data/derived/per_behavior_scores.csv'>source_data/derived/per_behavior_scores.csv</a></li>
<li><a href='source_data/metrics/base_qa_metrics.json'>source_data/metrics/base_qa_metrics.json</a></li>
<li><a href='source_data/metrics/base_struct_metrics.json'>source_data/metrics/base_struct_metrics.json</a></li>
<li><a href='source_data/metrics/finetuned_qa_metrics.json'>source_data/metrics/finetuned_qa_metrics.json</a></li>
<li><a href='source_data/metrics/finetuned_struct_metrics.json'>source_data/metrics/finetuned_struct_metrics.json</a></li>
<li><a href='source_data/model_training_metadata.json'>source_data/model_training_metadata.json</a></li>
<li><a href='source_data/predictions/base_qa_predictions.jsonl'>source_data/predictions/base_qa_predictions.jsonl</a></li>
<li><a href='source_data/predictions/base_struct_predictions.jsonl'>source_data/predictions/base_struct_predictions.jsonl</a></li>
<li><a href='source_data/predictions/finetuned_qa_predictions.jsonl'>source_data/predictions/finetuned_qa_predictions.jsonl</a></li>
<li><a href='source_data/predictions/finetuned_struct_predictions.jsonl'>source_data/predictions/finetuned_struct_predictions.jsonl</a></li>
<li><a href='source_data/presentation_metadata.json'>source_data/presentation_metadata.json</a></li>
<li><a href='source_data/summary.json'>source_data/summary.json</a></li>
<li><a href='source_data/val_qa.jsonl'>source_data/val_qa.jsonl</a></li>
<li><a href='source_data/val_struct.jsonl'>source_data/val_struct.jsonl</a></li></ul>
</section>
<div class="footer">Generated locally · 2026-05-06 05:50:52 · Boeing blue / red / black / gray design system</div>
</div>
<script id="metadata-json" type="application/json">{"summary": {"normalization": {"反复折返": "折返"}, "train_struct": {"num_examples": 25130, "num_sample_ids": 38, "label_counts": {"冲水": 588, "卷筒厕纸": 807, "坐下": 651, "坐用马桶": 10705, "垃圾桶": 1176, "折返": 90, "整理": 372, "洗手": 1599, "犹豫": 195, "离开": 795, "站用马桶": 726, "起身": 630, "进入": 903, "门锁": 1483, "靠近洗手池": 753, "靠近门": 765, "靠近马桶": 606, "马桶垫纸": 939, "马桶盖": 1347}}, "val_struct": {"num_examples": 4030, "num_sample_ids": 6, "label_counts": {"冲水": 81, "刷牙": 316, "卷筒厕纸": 129, "坐下": 114, "坐用马桶": 1569, "垃圾桶": 93, "折返": 42, "洗手": 204, "犹豫": 135, "离开": 129, "站用马桶": 54, "起身": 108, "进入": 147, "门锁": 231, "靠近洗手池": 108, "靠近门": 108, "靠近马桶": 90, "马桶垫纸": 156, "马桶盖": 216}}, "train_qa": {"num_examples": 25130}, "val_qa": {"num_examples": 4030}, "abnormal_elapsed_thresholds_p95": {"进入": 6.0, "门锁": 5.449999999999932, "靠近马桶": 5.0, "马桶盖": 6.0, "马桶垫纸": 28.5, "坐下": 7.25, "坐用马桶": 479.5, "卷筒厕纸": 19.5, "起身": 6.0, "冲水": 6.0, "靠近洗手池": 5.0, "洗手": 25.5, "垃圾桶": 15.5, "靠近门": 27.09999999999991, "离开": 6.0, "整理": 35.0, "站用马桶": 54.0, "犹豫": 27.5, "折返": 23.5, "__default__": 341.5}, "qa_schema": ["occupied", "time_to_free_minutes", "used_areas", "is_abnormal"]}, "metrics": {"base_struct": {"run_name": "base", "task_type": "struct", "input_file": null, "predictions_file": "outputs/predictions/base_struct_predictions.jsonl", "metrics": {"num_examples": 4030, "json_parse_rate": 0.9803970223325063, "required_field_complete_rate": 0.0, "current_behavior_accuracy": 0.4806378132118451, "current_behavior_macro_f1": 0.11147386989988774, "next_possible_behavior_accuracy": 0.3916898910564986, "next_possible_behavior_macro_f1": 0.10808824311037209, "is_transition_accuracy": 0.8180207542394331, "is_transition_macro_f1": 0.5801234900916015, "stage_index_accuracy": 0.0, "total_stages_accuracy": 0.0, "elapsed_seconds_in_current_behavior_mae": null, "elapsed_seconds_in_current_behavior_coverage": 0.0, "estimated_remaining_seconds_mae": null, "estimated_remaining_seconds_coverage": 0.0, "full_remaining_seconds_mae": null, "full_remaining_seconds_coverage": 0.0, "expected_end_time_mae": 13.538461538461538, "expected_end_time_coverage": 0.012903225806451613, "sequence_exact_match": 0.0, "sequence_last_label_accuracy": 0.0, "sequence_prefix_label_match": 0.0}}, "finetuned_struct": {"run_name": "finetuned", "task_type": "struct", "input_file": "data/processed/val_struct.jsonl", "predictions_file": null, "metrics": {"num_examples": 4030, "json_parse_rate": 1.0, "required_field_complete_rate": 0.9513647642679901, "current_behavior_accuracy": 0.6700573813249869, "current_behavior_macro_f1": 0.4914548748892143, "next_possible_behavior_accuracy": 0.6497130933750652, "next_possible_behavior_macro_f1": 0.39767345664466514, "is_transition_accuracy": 0.784037558685446, "is_transition_macro_f1": 0.6452678421374449, "stage_index_accuracy": 0.6546687532603026, "total_stages_accuracy": 0.7595200834637454, "elapsed_seconds_in_current_behavior_mae": 15.573813249869588, "elapsed_seconds_in_current_behavior_coverage": 0.9513647642679901, "estimated_remaining_seconds_mae": 39.19822639540949, "estimated_remaining_seconds_coverage": 0.9513647642679901, "full_remaining_seconds_mae": 65.30829420970267, "full_remaining_seconds_coverage": 0.9513647642679901, "expected_end_time_mae": 39.19822639540949, "expected_end_time_coverage": 0.9513647642679901, "sequence_exact_match": 0.6106699751861042, "sequence_last_label_accuracy": 0.6238213399503723, "sequence_prefix_label_match": 0.9061194732732504}}, "base_qa": {"run_name": "base", "task_type": "qa", "input_file": "data/processed/val_qa.jsonl", "predictions_file": null, "metrics": {"num_examples": 4030, "json_parse_rate": 1.0, "required_field_complete_rate": 1.0, "occupied_accuracy": 0.9970223325062034, "occupied_f1": 0.9748419035252549, "is_abnormal_accuracy": 0.830272952853598, "is_abnormal_f1": 0.4536334056399132, "time_to_free_minutes_mae": 5.129823538461539, "used_areas_micro_precision": 0.9994097973637616, "used_areas_micro_recall": 0.5448889842325432, "used_areas_micro_f1": 0.7052616965153409}}, "finetuned_qa": {"run_name": "finetuned", "task_type": "qa", "input_file": "data/processed/val_qa.jsonl", "predictions_file": null, "metrics": {"num_examples": 4030, "json_parse_rate": 1.0, "required_field_complete_rate": 1.0, "occupied_accuracy": 1.0, "occupied_f1": 1.0, "is_abnormal_accuracy": 0.9320099255583126, "is_abnormal_f1": 0.89464454388459, "time_to_free_minutes_mae": 7.4441687344913216e-06, "used_areas_micro_precision": 1.0, "used_areas_micro_recall": 1.0, "used_areas_micro_f1": 1.0}}}, "chart_manifest": [{"file": "assets/charts/01_base_vs_finetuned_scores.svg", "title": "01 base vs finetuned scores", "metadata_note": "Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta."}, {"file": "assets/charts/02_train_label_distribution.svg", "title": "02 train label distribution", "metadata_note": "Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta."}, {"file": "assets/charts/03_behavior_f1_delta.svg", "title": "03 behavior f1 delta", "metadata_note": "Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta."}, {"file": "assets/charts/04_time_error_mae.svg", "title": "04 time error mae", "metadata_note": "Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta."}, {"file": "assets/charts/05_finetuned_behavior_confusion.svg", "title": "05 finetuned behavior confusion", "metadata_note": "Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta."}, {"file": "assets/charts/06_deployment_architecture.svg", "title": "06 deployment architecture", "metadata_note": "Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta."}], "derived_files": ["source_data/derived/finetuned_current_behavior_confusion_top18.csv", "source_data/derived/finetuned_qa_error_samples.csv", "source_data/derived/metric_comparison.csv", "source_data/derived/per_behavior_scores.csv"]}</script>
<script>
const metadata = JSON.parse(document.getElementById('metadata-json').textContent);
const options = [
{label:'Package overview', value:metadata},
...metadata.chart_manifest.map(c => ({label:'Chart: '+c.title, value:c})),
{label:'Metrics: base struct', value:metadata.metrics.base_struct},
{label:'Metrics: fine-tuned struct', value:metadata.metrics.finetuned_struct},
{label:'Metrics: base QA', value:metadata.metrics.base_qa},
{label:'Metrics: fine-tuned QA', value:metadata.metrics.finetuned_qa},
{label:'Data summary', value:metadata.summary},
];
function go(id){ document.getElementById(id).scrollIntoView({behavior:'smooth'}); }
const select = document.getElementById('metaSelect');
const out = document.getElementById('metaOut');
options.forEach((item, idx) => {
const option = document.createElement('option');
option.value = String(idx);
option.textContent = item.label;
select.appendChild(option);
});
function renderMeta(){ out.textContent = JSON.stringify(options[Number(select.value)].value, null, 2); }
select.addEventListener('change', renderMeta);
renderMeta();
</script>
</body>
</html>