Text Generation
PEFT
Safetensors
Chinese
English
qwen
qlora
radar
aircraft-cabin
structured-prediction
qa
conversational
Instructions to use sutama/CabinLavatoryPrediction with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use sutama/CabinLavatoryPrediction with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3.5-9B") model = PeftModel.from_pretrained(base_model, "sutama/CabinLavatoryPrediction") - Notebooks
- Google Colab
- Kaggle
| <html lang="zh-CN"> | |
| <head> | |
| <meta charset="utf-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1"> | |
| <title>MWave Design Review Presentation</title> | |
| <style> | |
| :root { | |
| --boeing:#0039A6; --boeing2:#0067B1; --red:#C8102E; --black:#080A0F; | |
| --ink:#111827; --steel:#5B6472; --mist:#E7ECF3; --light:#F7F9FC; --line:rgba(8,10,15,.14); | |
| } | |
| * { box-sizing:border-box; } | |
| body { | |
| margin:0; color:var(--ink); | |
| font-family:"Aptos Display","Bahnschrift","DIN Alternate","Noto Sans SC",sans-serif; | |
| background:radial-gradient(circle at 10% 0%, rgba(0,103,177,.22), transparent 28%), | |
| radial-gradient(circle at 86% 6%, rgba(200,16,46,.12), transparent 24%), | |
| linear-gradient(135deg,#eef4fb,#ffffff 44%,#e9eef6); | |
| } | |
| a { color:var(--boeing); font-weight:800; } | |
| .deck { max-width:1480px; margin:0 auto; padding:28px; } | |
| .hero { | |
| min-height:620px; color:#fff; border-radius:36px; padding:52px; | |
| background:linear-gradient(118deg,rgba(3,11,29,.98),rgba(0,57,166,.94) 54%,rgba(0,103,177,.72)); | |
| position:relative; overflow:hidden; | |
| } | |
| .hero:after { content:""; position:absolute; right:-120px; top:-160px; width:640px; height:640px; border-radius:50%; border:1px solid rgba(255,255,255,.18); box-shadow:inset 0 0 0 82px rgba(255,255,255,.05), inset 0 0 0 180px rgba(255,255,255,.035); } | |
| .kicker { letter-spacing:.18em; text-transform:uppercase; font-size:12px; color:#dbeafe; } | |
| h1 { font-size:clamp(46px,7vw,100px); line-height:.92; margin:26px 0 18px; max-width:1050px; letter-spacing:-.06em; } | |
| h2 { font-size:38px; letter-spacing:-.04em; margin:0 0 18px; color:var(--black); } | |
| h3 { margin:0 0 8px; font-size:22px; color:var(--black); } | |
| .hero p { color:#eaf1fb; font-size:21px; line-height:1.65; max-width:980px; } | |
| .hero-grid { display:grid; grid-template-columns:repeat(4,1fr); gap:18px; position:relative; z-index:1; margin-top:42px; } | |
| .hero-card { padding:22px; border:1px solid rgba(255,255,255,.22); background:rgba(255,255,255,.1); border-radius:24px; backdrop-filter:blur(16px); } | |
| .big { font-size:38px; font-weight:900; letter-spacing:-.04em; } | |
| .label { color:#cbd5e1; font-size:13px; margin-top:6px; } | |
| nav { position:sticky; top:0; z-index:20; display:flex; gap:8px; flex-wrap:wrap; margin:18px 0; padding:12px; border:1px solid var(--line); border-radius:22px; background:rgba(255,255,255,.88); backdrop-filter:blur(18px); } | |
| nav button, .pill { border:0; border-radius:999px; padding:11px 15px; background:#eef3fb; color:#0f172a; font-weight:900; cursor:pointer; } | |
| nav button:hover, .pill.active { background:var(--boeing); color:#fff; } | |
| section { background:rgba(255,255,255,.92); border:1px solid var(--line); border-radius:30px; padding:32px; margin-top:24px; box-shadow:0 16px 44px rgba(17,24,39,.06); } | |
| .grid { display:grid; gap:18px; } | |
| .grid-2 { grid-template-columns:repeat(2,minmax(0,1fr)); } | |
| .grid-3 { grid-template-columns:repeat(3,minmax(0,1fr)); } | |
| .grid-4 { grid-template-columns:repeat(4,minmax(0,1fr)); } | |
| .card { border:1px solid var(--line); background:linear-gradient(180deg,#fff,#f6f9fd); border-radius:24px; padding:22px; } | |
| .card strong { color:var(--boeing); } | |
| .muted { color:var(--steel); line-height:1.66; } | |
| .story { display:grid; grid-template-columns:72px 1fr; gap:18px; margin-bottom:18px; } | |
| .num { width:54px; height:54px; border-radius:50%; display:grid; place-items:center; background:var(--black); color:#fff; font-weight:900; font-size:22px; } | |
| table { width:100%; border-collapse:collapse; overflow:hidden; border-radius:18px; background:#fff; } | |
| th,td { padding:13px 14px; border-bottom:1px solid #e5e7eb; text-align:left; vertical-align:top; } | |
| th { background:#08111f; color:#fff; font-size:13px; } | |
| .chart-grid { display:grid; grid-template-columns:1fr; gap:22px; } | |
| .chart-card { border:1px solid var(--line); background:#fff; border-radius:26px; padding:22px; } | |
| .chart-card object { width:100%; height:620px; border:0; background:#fff; border-radius:18px; } | |
| .chart-card p { margin:0 0 12px; color:var(--steel); } | |
| .metadata-panel { display:grid; grid-template-columns:360px 1fr; gap:18px; } | |
| select, textarea { width:100%; border:1px solid #d6dee9; border-radius:16px; padding:12px; font:inherit; background:#fff; } | |
| pre { margin:0; white-space:pre-wrap; background:#05070c; color:#d1fae5; border-radius:18px; padding:18px; min-height:360px; overflow:auto; font-family:"Cascadia Mono","SFMono-Regular",monospace; font-size:13px; } | |
| .callout { border-left:6px solid var(--red); background:#fff5f6; padding:18px; border-radius:18px; } | |
| .files { columns:2; } | |
| .footer { text-align:center; color:#64748b; padding:28px; } | |
| @media print { | |
| nav, .metadata-panel, .no-print { display:none !important; } | |
| body { background:#fff; } | |
| section, .hero { break-inside:avoid; page-break-inside:avoid; box-shadow:none; } | |
| .chart-card object { height:520px; } | |
| } | |
| @media(max-width:980px) { .hero-grid,.grid-2,.grid-3,.grid-4,.metadata-panel { grid-template-columns:1fr; } .deck { padding:14px; } .hero { padding:28px; } .files { columns:1; } } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="deck"> | |
| <header class="hero"> | |
| <div class="kicker">MWave Radar LLM · Aircraft Lavatory Design Review</div> | |
| <h1>从毫米波雷达到机载厕所状态智能:一次可验证的 LLM 微调实验</h1> | |
| <p>这份汇报面向设计专家评审,重点不是展示训练日志,而是讲清楚:为什么需要模型、模型学到了什么、哪些结果能支持设计决策、哪些风险需要在机载部署前继续验证。</p> | |
| <div class="hero-grid"> | |
| <div class="hero-card"><div class="big">25,130</div><div class="label">训练结构样本</div></div> | |
| <div class="hero-card"><div class="big">4,030</div><div class="label">验证结构样本</div></div> | |
| <div class="hero-card"><div class="big">25,130</div><div class="label">QA 训练样本</div></div> | |
| <div class="hero-card"><div class="big">Qwen3.5-9B</div><div class="label">4-bit QLoRA 微调</div></div> | |
| </div> | |
| </header> | |
| <nav class="no-print"> | |
| <button onclick="go('story')">Story Line</button> | |
| <button onclick="go('method')">任务与方法</button> | |
| <button onclick="go('results')">结果证据</button> | |
| <button onclick="go('charts')">SVG 图表</button> | |
| <button onclick="go('metadata')">Metadata Explorer</button> | |
| <button onclick="go('deployment')">部署方案</button> | |
| <button onclick="go('sources')">源数据</button> | |
| </nav> | |
| <section id="story"> | |
| <h2>评审 Story Line</h2> | |
| <div class="story"><div class="num">1</div><div><h3>设计问题</h3><p class="muted">飞机厕所内不能依赖摄像头,毫米波雷达提供隐私友好的时空轨迹,但原始序列难以直接变成可用的客舱状态。</p></div></div> | |
| <div class="story"><div class="num">2</div><div><h3>模型角色</h3><p class="muted">LLM 不替代雷达信号处理,而是读取结构化窗口和中间表征,输出严格 JSON:当前行为、下一行为、阶段、剩余时间、序列,以及 QA 状态。</p></div></div> | |
| <div class="story"><div class="num">3</div><div><h3>实验判断</h3><p class="muted">同一验证集上比较 base Qwen3.5-9B 与微调后模型,指标覆盖 schema 合规、行为识别、流程理解、时间估计和 QA 推理。</p></div></div> | |
| <div class="story"><div class="num">4</div><div><h3>设计结论</h3><p class="muted">微调显著提升结构化输出和 QA 可用性,但时间误差、少数类行为和异常提示仍应进入下一轮系统设计验证。</p></div></div> | |
| </section> | |
| <section id="method"> | |
| <h2>任务与评估方法</h2> | |
| <div class="grid grid-3"> | |
| <div class="card"><h3>结构化预测</h3><p class="muted">输入雷达时序窗口和中间层表征,输出 current_behavior、next_possible_behavior、stage_index、remaining time、sequence_so_far 等字段。</p></div> | |
| <div class="card"><h3>QA 状态回答</h3><p class="muted">作为独立维度评估 occupied、time_to_free_minutes、used_areas、is_abnormal,避免只看结构化任务而忽略最终用户问题。</p></div> | |
| <div class="card"><h3>指标选择</h3><p class="muted">分类用 accuracy/F1,schema 用 JSON parse 和 required field complete,时间用 MAE,序列用 exact/prefix/last-label match。</p></div> | |
| </div> | |
| </section> | |
| <section id="results"> | |
| <h2>关键结果</h2> | |
| <div class="grid grid-4"> | |
| <div class="card"><div class="big">67.0%</div><strong>当前行为准确率</strong><p class="muted">Base 为 48.1%。</p></div> | |
| <div class="card"><div class="big">95.1%</div><strong>结构化字段完整率</strong><p class="muted">Base 为 0.0%,说明微调主要解决 schema 对齐。</p></div> | |
| <div class="card"><div class="big">89.5%</div><strong>QA 异常 F1</strong><p class="muted">Base 为 45.4%。</p></div> | |
| <div class="card"><div class="big">65.3s</div><strong>完整流程剩余时间 MAE</strong><p class="muted">该误差直接影响预计空出时间体验。</p></div> | |
| </div> | |
| <div style="margin-top:20px; overflow:auto"> | |
| <table> | |
| <tr><th>评估项</th><th>Base</th><th>Fine-tuned</th><th>变化</th><th>设计含义</th></tr> | |
| <tr><td>结构化 schema 完整率</td><td>0.0%</td><td>95.1%</td><td>95.1%</td><td>决定输出能否进入机载系统状态总线。</td></tr><tr><td>当前行为准确率</td><td>48.1%</td><td>67.0%</td><td>18.9%</td><td>核心状态识别能力,从 radar 表征映射到行为标签。</td></tr><tr><td>当前行为 Macro-F1</td><td>11.1%</td><td>49.1%</td><td>38.0%</td><td>衡量少数类是否被覆盖,适合设计评审关注长尾行为。</td></tr><tr><td>下一行为准确率</td><td>39.2%</td><td>65.0%</td><td>25.8%</td><td>影响短期流程预测和预计空出时间。</td></tr><tr><td>阶段 index 准确率</td><td>0.0%</td><td>65.5%</td><td>65.5%</td><td>反映模型是否理解完整厕所使用流程位置。</td></tr><tr><td>QA 异常 F1</td><td>45.4%</td><td>89.5%</td><td>44.1%</td><td>用于乘务关注提示,需单独评估。</td></tr><tr><td>QA 区域 F1</td><td>70.5%</td><td>100.0%</td><td>29.5%</td><td>用于理解马桶、洗手池、门、垃圾桶等区域使用状态。</td></tr> | |
| </table> | |
| </div> | |
| <div class="callout" style="margin-top:20px"> | |
| <strong>评审判断:</strong>当前实验已经证明 9B LoRA 可以把通用 LLM 拉到可用的结构化状态输出轨道上;但机载上线前仍应增加跨乘客、跨机型、跨雷达安装角度的数据验证,并对异常提示设定保守阈值。 | |
| </div> | |
| </section> | |
| <section> | |
| <h2>哪些行为受益最大</h2> | |
| <p class="muted">下表展示 F1 改善最大的行为类别。设计评审时应同时看 support,避免把少量样本上的提升误判为稳定能力。</p> | |
| <table> | |
| <tr><th>行为</th><th>验证样本数</th><th>Base F1</th><th>Fine-tuned F1</th><th>提升</th></tr> | |
| <tr><td>离开</td><td>129</td><td>0.0%</td><td>77.6%</td><td>77.6%</td></tr> | |
| <tr><td>进入</td><td>147</td><td>4.0%</td><td>77.5%</td><td>73.5%</td></tr> | |
| <tr><td>坐下</td><td>114</td><td>0.0%</td><td>65.5%</td><td>65.5%</td></tr> | |
| <tr><td>马桶垫纸</td><td>156</td><td>10.9%</td><td>70.3%</td><td>59.3%</td></tr> | |
| <tr><td>洗手</td><td>204</td><td>5.7%</td><td>60.2%</td><td>54.5%</td></tr> | |
| <tr><td>卷筒厕纸</td><td>129</td><td>2.9%</td><td>52.7%</td><td>49.8%</td></tr> | |
| <tr><td>垃圾桶</td><td>93</td><td>0.0%</td><td>45.5%</td><td>45.5%</td></tr> | |
| <tr><td>起身</td><td>108</td><td>7.8%</td><td>49.8%</td><td>42.1%</td></tr> | |
| </table> | |
| </section> | |
| <section id="charts"> | |
| <h2>SVG 矢量图表</h2> | |
| <p class="muted">所有图表都在 <code>assets/charts/</code> 下以 SVG 保存,可缩放、可放进设计文档;SVG 内包含 metadata,HTML 下方可查看。</p> | |
| <div class="chart-grid"><article class="chart-card" data-chart="assets/charts/01_base_vs_finetuned_scores.svg"> | |
| <div><h3>01 base vs finetuned scores</h3><p>Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta.</p></div> | |
| <object type="image/svg+xml" data="assets/charts/01_base_vs_finetuned_scores.svg" aria-label="01 base vs finetuned scores"></object> | |
| </article> | |
| <article class="chart-card" data-chart="assets/charts/02_train_label_distribution.svg"> | |
| <div><h3>02 train label distribution</h3><p>Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta.</p></div> | |
| <object type="image/svg+xml" data="assets/charts/02_train_label_distribution.svg" aria-label="02 train label distribution"></object> | |
| </article> | |
| <article class="chart-card" data-chart="assets/charts/03_behavior_f1_delta.svg"> | |
| <div><h3>03 behavior f1 delta</h3><p>Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta.</p></div> | |
| <object type="image/svg+xml" data="assets/charts/03_behavior_f1_delta.svg" aria-label="03 behavior f1 delta"></object> | |
| </article> | |
| <article class="chart-card" data-chart="assets/charts/04_time_error_mae.svg"> | |
| <div><h3>04 time error mae</h3><p>Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta.</p></div> | |
| <object type="image/svg+xml" data="assets/charts/04_time_error_mae.svg" aria-label="04 time error mae"></object> | |
| </article> | |
| <article class="chart-card" data-chart="assets/charts/05_finetuned_behavior_confusion.svg"> | |
| <div><h3>05 finetuned behavior confusion</h3><p>Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta.</p></div> | |
| <object type="image/svg+xml" data="assets/charts/05_finetuned_behavior_confusion.svg" aria-label="05 finetuned behavior confusion"></object> | |
| </article> | |
| <article class="chart-card" data-chart="assets/charts/06_deployment_architecture.svg"> | |
| <div><h3>06 deployment architecture</h3><p>Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta.</p></div> | |
| <object type="image/svg+xml" data="assets/charts/06_deployment_architecture.svg" aria-label="06 deployment architecture"></object> | |
| </article></div> | |
| </section> | |
| <section id="metadata" class="no-print"> | |
| <h2>Metadata Explorer</h2> | |
| <p class="muted">选择图表或数据对象,查看生成依据、源文件路径和图表内嵌 metadata。此功能完全离线,不依赖外网。</p> | |
| <div class="metadata-panel"> | |
| <div class="card"> | |
| <h3>选择对象</h3> | |
| <select id="metaSelect"></select> | |
| <p class="muted">提示:SVG 图表也可以直接用文本编辑器打开,查看其中的 <code><metadata></code> 和每个 bar/cell 的 <code>data-meta</code>。</p> | |
| </div> | |
| <pre id="metaOut"></pre> | |
| </div> | |
| </section> | |
| <section id="deployment"> | |
| <h2>部署到飞机厕所的技术方案</h2> | |
| <div class="grid grid-3"> | |
| <div class="card"><h3>边缘优先</h3><p class="muted">厕所侧完成雷达预处理和特征抽取,客舱边缘计算单元加载量化 LLM/LoRA,减少原始数据移动。</p></div> | |
| <div class="card"><h3>状态输出</h3><p class="muted">对外只输出 occupied、estimated free time、used areas、abnormal flag 和行为阶段,不输出身份或可逆人体点云。</p></div> | |
| <div class="card"><h3>失效保护</h3><p class="muted">JSON 校验失败、置信不足或异常连续触发时回退规则模型,并只给出“需关注”级提示。</p></div> | |
| </div> | |
| <object type="image/svg+xml" data="assets/charts/06_deployment_architecture.svg" style="width:100%;height:480px;border:0;margin-top:18px"></object> | |
| </section> | |
| <section id="sources"> | |
| <h2>可拷走的源数据与材料</h2> | |
| <p class="muted">整个 <code>mwave_design_review_package</code> 目录可直接复制到其他电脑。HTML、PDF、SVG 和 CSV/JSONL 都使用相对路径或独立文件。</p> | |
| <ul class="files"><li><a href='source_data/derived/finetuned_current_behavior_confusion_top18.csv'>source_data/derived/finetuned_current_behavior_confusion_top18.csv</a></li> | |
| <li><a href='source_data/derived/finetuned_qa_error_samples.csv'>source_data/derived/finetuned_qa_error_samples.csv</a></li> | |
| <li><a href='source_data/derived/metric_comparison.csv'>source_data/derived/metric_comparison.csv</a></li> | |
| <li><a href='source_data/derived/per_behavior_scores.csv'>source_data/derived/per_behavior_scores.csv</a></li> | |
| <li><a href='source_data/metrics/base_qa_metrics.json'>source_data/metrics/base_qa_metrics.json</a></li> | |
| <li><a href='source_data/metrics/base_struct_metrics.json'>source_data/metrics/base_struct_metrics.json</a></li> | |
| <li><a href='source_data/metrics/finetuned_qa_metrics.json'>source_data/metrics/finetuned_qa_metrics.json</a></li> | |
| <li><a href='source_data/metrics/finetuned_struct_metrics.json'>source_data/metrics/finetuned_struct_metrics.json</a></li> | |
| <li><a href='source_data/model_training_metadata.json'>source_data/model_training_metadata.json</a></li> | |
| <li><a href='source_data/predictions/base_qa_predictions.jsonl'>source_data/predictions/base_qa_predictions.jsonl</a></li> | |
| <li><a href='source_data/predictions/base_struct_predictions.jsonl'>source_data/predictions/base_struct_predictions.jsonl</a></li> | |
| <li><a href='source_data/predictions/finetuned_qa_predictions.jsonl'>source_data/predictions/finetuned_qa_predictions.jsonl</a></li> | |
| <li><a href='source_data/predictions/finetuned_struct_predictions.jsonl'>source_data/predictions/finetuned_struct_predictions.jsonl</a></li> | |
| <li><a href='source_data/presentation_metadata.json'>source_data/presentation_metadata.json</a></li> | |
| <li><a href='source_data/summary.json'>source_data/summary.json</a></li> | |
| <li><a href='source_data/val_qa.jsonl'>source_data/val_qa.jsonl</a></li> | |
| <li><a href='source_data/val_struct.jsonl'>source_data/val_struct.jsonl</a></li></ul> | |
| </section> | |
| <div class="footer">Generated locally · 2026-05-06 05:50:52 · Boeing blue / red / black / gray design system</div> | |
| </div> | |
| <script id="metadata-json" type="application/json">{"summary": {"normalization": {"反复折返": "折返"}, "train_struct": {"num_examples": 25130, "num_sample_ids": 38, "label_counts": {"冲水": 588, "卷筒厕纸": 807, "坐下": 651, "坐用马桶": 10705, "垃圾桶": 1176, "折返": 90, "整理": 372, "洗手": 1599, "犹豫": 195, "离开": 795, "站用马桶": 726, "起身": 630, "进入": 903, "门锁": 1483, "靠近洗手池": 753, "靠近门": 765, "靠近马桶": 606, "马桶垫纸": 939, "马桶盖": 1347}}, "val_struct": {"num_examples": 4030, "num_sample_ids": 6, "label_counts": {"冲水": 81, "刷牙": 316, "卷筒厕纸": 129, "坐下": 114, "坐用马桶": 1569, "垃圾桶": 93, "折返": 42, "洗手": 204, "犹豫": 135, "离开": 129, "站用马桶": 54, "起身": 108, "进入": 147, "门锁": 231, "靠近洗手池": 108, "靠近门": 108, "靠近马桶": 90, "马桶垫纸": 156, "马桶盖": 216}}, "train_qa": {"num_examples": 25130}, "val_qa": {"num_examples": 4030}, "abnormal_elapsed_thresholds_p95": {"进入": 6.0, "门锁": 5.449999999999932, "靠近马桶": 5.0, "马桶盖": 6.0, "马桶垫纸": 28.5, "坐下": 7.25, "坐用马桶": 479.5, "卷筒厕纸": 19.5, "起身": 6.0, "冲水": 6.0, "靠近洗手池": 5.0, "洗手": 25.5, "垃圾桶": 15.5, "靠近门": 27.09999999999991, "离开": 6.0, "整理": 35.0, "站用马桶": 54.0, "犹豫": 27.5, "折返": 23.5, "__default__": 341.5}, "qa_schema": ["occupied", "time_to_free_minutes", "used_areas", "is_abnormal"]}, "metrics": {"base_struct": {"run_name": "base", "task_type": "struct", "input_file": null, "predictions_file": "outputs/predictions/base_struct_predictions.jsonl", "metrics": {"num_examples": 4030, "json_parse_rate": 0.9803970223325063, "required_field_complete_rate": 0.0, "current_behavior_accuracy": 0.4806378132118451, "current_behavior_macro_f1": 0.11147386989988774, "next_possible_behavior_accuracy": 0.3916898910564986, "next_possible_behavior_macro_f1": 0.10808824311037209, "is_transition_accuracy": 0.8180207542394331, "is_transition_macro_f1": 0.5801234900916015, "stage_index_accuracy": 0.0, "total_stages_accuracy": 0.0, "elapsed_seconds_in_current_behavior_mae": null, "elapsed_seconds_in_current_behavior_coverage": 0.0, "estimated_remaining_seconds_mae": null, "estimated_remaining_seconds_coverage": 0.0, "full_remaining_seconds_mae": null, "full_remaining_seconds_coverage": 0.0, "expected_end_time_mae": 13.538461538461538, "expected_end_time_coverage": 0.012903225806451613, "sequence_exact_match": 0.0, "sequence_last_label_accuracy": 0.0, "sequence_prefix_label_match": 0.0}}, "finetuned_struct": {"run_name": "finetuned", "task_type": "struct", "input_file": "data/processed/val_struct.jsonl", "predictions_file": null, "metrics": {"num_examples": 4030, "json_parse_rate": 1.0, "required_field_complete_rate": 0.9513647642679901, "current_behavior_accuracy": 0.6700573813249869, "current_behavior_macro_f1": 0.4914548748892143, "next_possible_behavior_accuracy": 0.6497130933750652, "next_possible_behavior_macro_f1": 0.39767345664466514, "is_transition_accuracy": 0.784037558685446, "is_transition_macro_f1": 0.6452678421374449, "stage_index_accuracy": 0.6546687532603026, "total_stages_accuracy": 0.7595200834637454, "elapsed_seconds_in_current_behavior_mae": 15.573813249869588, "elapsed_seconds_in_current_behavior_coverage": 0.9513647642679901, "estimated_remaining_seconds_mae": 39.19822639540949, "estimated_remaining_seconds_coverage": 0.9513647642679901, "full_remaining_seconds_mae": 65.30829420970267, "full_remaining_seconds_coverage": 0.9513647642679901, "expected_end_time_mae": 39.19822639540949, "expected_end_time_coverage": 0.9513647642679901, "sequence_exact_match": 0.6106699751861042, "sequence_last_label_accuracy": 0.6238213399503723, "sequence_prefix_label_match": 0.9061194732732504}}, "base_qa": {"run_name": "base", "task_type": "qa", "input_file": "data/processed/val_qa.jsonl", "predictions_file": null, "metrics": {"num_examples": 4030, "json_parse_rate": 1.0, "required_field_complete_rate": 1.0, "occupied_accuracy": 0.9970223325062034, "occupied_f1": 0.9748419035252549, "is_abnormal_accuracy": 0.830272952853598, "is_abnormal_f1": 0.4536334056399132, "time_to_free_minutes_mae": 5.129823538461539, "used_areas_micro_precision": 0.9994097973637616, "used_areas_micro_recall": 0.5448889842325432, "used_areas_micro_f1": 0.7052616965153409}}, "finetuned_qa": {"run_name": "finetuned", "task_type": "qa", "input_file": "data/processed/val_qa.jsonl", "predictions_file": null, "metrics": {"num_examples": 4030, "json_parse_rate": 1.0, "required_field_complete_rate": 1.0, "occupied_accuracy": 1.0, "occupied_f1": 1.0, "is_abnormal_accuracy": 0.9320099255583126, "is_abnormal_f1": 0.89464454388459, "time_to_free_minutes_mae": 7.4441687344913216e-06, "used_areas_micro_precision": 1.0, "used_areas_micro_recall": 1.0, "used_areas_micro_f1": 1.0}}}, "chart_manifest": [{"file": "assets/charts/01_base_vs_finetuned_scores.svg", "title": "01 base vs finetuned scores", "metadata_note": "Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta."}, {"file": "assets/charts/02_train_label_distribution.svg", "title": "02 train label distribution", "metadata_note": "Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta."}, {"file": "assets/charts/03_behavior_f1_delta.svg", "title": "03 behavior f1 delta", "metadata_note": "Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta."}, {"file": "assets/charts/04_time_error_mae.svg", "title": "04 time error mae", "metadata_note": "Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta."}, {"file": "assets/charts/05_finetuned_behavior_confusion.svg", "title": "05 finetuned behavior confusion", "metadata_note": "Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta."}, {"file": "assets/charts/06_deployment_architecture.svg", "title": "06 deployment architecture", "metadata_note": "Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta."}], "derived_files": ["source_data/derived/finetuned_current_behavior_confusion_top18.csv", "source_data/derived/finetuned_qa_error_samples.csv", "source_data/derived/metric_comparison.csv", "source_data/derived/per_behavior_scores.csv"]}</script> | |
| <script> | |
| const metadata = JSON.parse(document.getElementById('metadata-json').textContent); | |
| const options = [ | |
| {label:'Package overview', value:metadata}, | |
| ...metadata.chart_manifest.map(c => ({label:'Chart: '+c.title, value:c})), | |
| {label:'Metrics: base struct', value:metadata.metrics.base_struct}, | |
| {label:'Metrics: fine-tuned struct', value:metadata.metrics.finetuned_struct}, | |
| {label:'Metrics: base QA', value:metadata.metrics.base_qa}, | |
| {label:'Metrics: fine-tuned QA', value:metadata.metrics.finetuned_qa}, | |
| {label:'Data summary', value:metadata.summary}, | |
| ]; | |
| function go(id){ document.getElementById(id).scrollIntoView({behavior:'smooth'}); } | |
| const select = document.getElementById('metaSelect'); | |
| const out = document.getElementById('metaOut'); | |
| options.forEach((item, idx) => { | |
| const option = document.createElement('option'); | |
| option.value = String(idx); | |
| option.textContent = item.label; | |
| select.appendChild(option); | |
| }); | |
| function renderMeta(){ out.textContent = JSON.stringify(options[Number(select.value)].value, null, 2); } | |
| select.addEventListener('change', renderMeta); | |
| renderMeta(); | |
| </script> | |
| </body> | |
| </html> | |