CabinLavatoryPrediction / code /build_report.py
sutama's picture
Upload CabinLavatoryPrediction LoRA adapter, checkpoint, code, and evaluation artifacts
e74a796 verified
#!/usr/bin/env python3
import json
from datetime import datetime
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
REPORT_DIR = ROOT / "reports"
OUTPUT = REPORT_DIR / "index.html"
def read_json(path, default=None):
try:
return json.loads(Path(path).read_text(encoding="utf-8"))
except Exception:
return default
def count_lines(path):
try:
with Path(path).open(encoding="utf-8") as f:
return sum(1 for _ in f)
except Exception:
return 0
def parse_log_status():
monitor = ROOT / "outputs/logs/training_monitor.log"
final_files = [
ROOT / "outputs/metrics/finetuned_struct_metrics.json",
ROOT / "outputs/metrics/finetuned_qa_metrics.json",
ROOT / "outputs/figures/metric_comparison.csv",
]
if monitor.exists():
log = monitor
else:
logs = sorted((ROOT / "outputs/logs").glob("*.log"), key=lambda p: p.stat().st_mtime, reverse=True)
if not logs:
return {"log_file": None, "tail": [], "stage": "not_started"}
log = logs[0]
tail = log.read_text(encoding="utf-8", errors="ignore").splitlines()[-120:]
joined = "\n".join(tail)
if all(p.exists() for p in final_files):
stage = "completed"
elif "scripts/train_qlora.py" in joined:
stage = "training"
elif "--run-name finetuned" in joined:
stage = "finetuned_eval"
elif "Traceback" in joined or "Error" in joined:
stage = "needs_attention"
else:
stage = "running"
return {"log_file": str(log.relative_to(ROOT)), "tail": tail[-20:], "stage": stage}
def metrics_from_prediction_file(path, limit=None):
required = [
"current_behavior",
"is_transition",
"elapsed_seconds_in_current_behavior",
"estimated_remaining_seconds",
"full_remaining_seconds",
"expected_end_time",
"next_possible_behavior",
"stage_index",
"total_stages",
"sequence_so_far",
]
rows = []
try:
with Path(path).open(encoding="utf-8") as f:
for line in f:
if line.strip():
rows.append(json.loads(line))
if limit and len(rows) >= limit:
break
except Exception:
return {}
if not rows:
return {}
parsed = [r for r in rows if isinstance(r.get("prediction"), dict)]
def acc(field):
pairs = [(r["target"].get(field), r["prediction"].get(field)) for r in parsed if field in r["prediction"]]
return sum(a == b for a, b in pairs) / len(pairs) if pairs else 0
def mae(field):
pairs = []
for r in parsed:
p = r.get("prediction", {})
t = r.get("target", {})
if isinstance(t.get(field), (int, float)) and isinstance(p.get(field), (int, float)):
pairs.append(abs(float(t[field]) - float(p[field])))
return sum(pairs) / len(pairs) if pairs else None
return {
"num_examples": len(rows),
"json_parse_rate": len(parsed) / len(rows),
"required_field_complete_rate": sum(all(f in r["prediction"] for f in required) for r in parsed) / len(rows),
"current_behavior_accuracy": acc("current_behavior"),
"next_possible_behavior_accuracy": acc("next_possible_behavior"),
"is_transition_accuracy": acc("is_transition"),
"stage_index_accuracy": acc("stage_index"),
"full_remaining_seconds_mae": mae("full_remaining_seconds"),
}
def load_metrics():
metric_dir = ROOT / "outputs/metrics"
metrics = {}
for path in sorted(metric_dir.glob("*.json")):
payload = read_json(path)
if not payload:
continue
key = f"{payload.get('run_name')}_{payload.get('task_type')}"
metrics[key] = payload
partial = metrics_from_prediction_file(ROOT / "outputs/predictions/base_struct_predictions.jsonl")
if partial:
metrics["base_struct_partial"] = {
"run_name": "base_partial",
"task_type": "struct",
"input_file": "outputs/predictions/base_struct_predictions.jsonl",
"metrics": partial,
}
return metrics
def build_data():
summary = read_json(ROOT / "data/processed/summary.json", {})
progress = {
"base_struct_done": count_lines(ROOT / "outputs/predictions/base_struct_predictions.jsonl"),
"base_qa_done": count_lines(ROOT / "outputs/predictions/base_qa_predictions.jsonl"),
"finetuned_struct_done": count_lines(ROOT / "outputs/predictions/finetuned_struct_predictions.jsonl"),
"finetuned_qa_done": count_lines(ROOT / "outputs/predictions/finetuned_qa_predictions.jsonl"),
"val_total": summary.get("val_struct", {}).get("num_examples", 4030),
}
return {
"generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"summary": summary,
"metrics": load_metrics(),
"progress": progress,
"log_status": parse_log_status(),
"files": {
"processed_summary": "data/processed/summary.json",
"base_predictions": "outputs/predictions/base_struct_predictions.jsonl",
"adapter_dir": "outputs/qwen35_9b_lora",
"figures_dir": "outputs/figures",
},
}
HTML_TEMPLATE = r"""<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>MWave Aircraft Lavatory Radar LLM Workflow</title>
<style>
:root{
--boeing:#0039A6;
--boeing-2:#0067B1;
--red:#C8102E;
--black:#080A0F;
--ink:#111827;
--steel:#5B6472;
--mist:#E7ECF3;
--panel:#F7F9FC;
--white:#FFFFFF;
--line:rgba(8,10,15,.14);
--shadow:0 24px 80px rgba(0,23,84,.16);
}
*{box-sizing:border-box}
html{scroll-behavior:smooth}
body{
margin:0;
color:var(--ink);
font-family:"Aptos Display","Bahnschrift","DIN Alternate","Noto Sans SC",sans-serif;
background:
radial-gradient(circle at 12% 8%, rgba(0,103,177,.22), transparent 30%),
radial-gradient(circle at 82% 18%, rgba(200,16,46,.16), transparent 26%),
linear-gradient(135deg,#f3f6fb 0%,#ffffff 42%,#e9eef6 100%);
}
.shell{max-width:1440px;margin:0 auto;padding:28px}
.hero{
position:relative;
min-height:520px;
border-radius:34px;
overflow:hidden;
color:white;
background:
linear-gradient(115deg,rgba(3,11,29,.96),rgba(0,57,166,.92) 48%,rgba(0,103,177,.72)),
repeating-linear-gradient(90deg,transparent 0 72px,rgba(255,255,255,.06) 72px 73px);
box-shadow:var(--shadow);
padding:42px;
}
.hero:after{
content:"";
position:absolute;
right:-160px;top:-110px;
width:620px;height:620px;
border:1px solid rgba(255,255,255,.18);
border-radius:50%;
box-shadow:inset 0 0 0 72px rgba(255,255,255,.04), inset 0 0 0 140px rgba(255,255,255,.035);
}
.kicker{display:inline-flex;gap:10px;align-items:center;letter-spacing:.16em;text-transform:uppercase;font-size:12px;color:#dbeafe}
.pulse{width:10px;height:10px;background:var(--red);border-radius:50%;box-shadow:0 0 0 10px rgba(200,16,46,.18)}
h1{font-size:clamp(42px,7vw,96px);line-height:.9;margin:28px 0 20px;letter-spacing:-.06em;max-width:980px}
.hero p{font-size:20px;line-height:1.65;max-width:920px;color:#e8eef8;margin:0}
.hero-grid{position:relative;z-index:1;display:grid;grid-template-columns:1.2fr .8fr;gap:24px;margin-top:38px}
.hero-card{background:rgba(255,255,255,.1);border:1px solid rgba(255,255,255,.2);border-radius:24px;padding:22px;backdrop-filter:blur(18px)}
.stat{font-size:38px;font-weight:800;letter-spacing:-.04em}
.stat-label{color:#cbd5e1;font-size:13px;margin-top:6px}
nav{position:sticky;top:0;z-index:10;margin:18px 0;padding:12px;border-radius:22px;background:rgba(255,255,255,.82);backdrop-filter:blur(18px);border:1px solid var(--line);display:flex;gap:8px;flex-wrap:wrap}
nav button,.tab-btn{
border:0;border-radius:999px;padding:12px 16px;background:#eef3fb;color:#1f2937;font-weight:800;cursor:pointer;transition:.18s ease;
}
nav button:hover,.tab-btn:hover,.tab-btn.active{background:var(--boeing);color:white;transform:translateY(-1px)}
section{margin-top:24px}
.panel{background:rgba(255,255,255,.9);border:1px solid var(--line);border-radius:28px;padding:28px;box-shadow:0 16px 44px rgba(17,24,39,.06)}
.section-title{display:flex;align-items:end;justify-content:space-between;gap:18px;margin-bottom:20px}
h2{font-size:34px;margin:0;letter-spacing:-.04em;color:var(--black)}
.muted{color:var(--steel);line-height:1.65}
.grid{display:grid;gap:18px}
.grid-4{grid-template-columns:repeat(4,minmax(0,1fr))}
.grid-3{grid-template-columns:repeat(3,minmax(0,1fr))}
.grid-2{grid-template-columns:repeat(2,minmax(0,1fr))}
.metric-card{border-radius:22px;padding:20px;background:linear-gradient(180deg,#fff,#f5f8fd);border:1px solid var(--line);position:relative;overflow:hidden}
.metric-card:before{content:"";position:absolute;left:0;top:0;width:5px;height:100%;background:var(--boeing)}
.metric-card.red:before{background:var(--red)}
.big{font-size:32px;font-weight:900;letter-spacing:-.04em}
.timeline{display:grid;grid-template-columns:repeat(6,1fr);gap:12px;margin-top:18px}
.step{position:relative;border-radius:22px;padding:18px;background:#f8fafc;border:1px solid var(--line);min-height:160px}
.step b{display:block;color:var(--boeing);font-size:18px;margin-bottom:8px}
.step .num{width:34px;height:34px;border-radius:50%;display:grid;place-items:center;background:var(--black);color:white;font-weight:900;margin-bottom:16px}
.step.active{background:linear-gradient(180deg,#fff,#eaf3ff);box-shadow:0 12px 36px rgba(0,57,166,.14)}
.chart-card{background:#fff;border:1px solid var(--line);border-radius:24px;padding:20px;min-height:360px}
canvas{width:100%;height:300px}
.tabs{display:flex;gap:8px;flex-wrap:wrap;margin-bottom:14px}
.tab{display:none}.tab.active{display:block}
table{width:100%;border-collapse:collapse;background:#fff;border-radius:18px;overflow:hidden}
th,td{text-align:left;padding:13px 14px;border-bottom:1px solid #e5e7eb}
th{background:#0b1220;color:#fff;font-size:13px}
tr:hover td{background:#f8fbff}
.badge{display:inline-flex;align-items:center;gap:8px;border-radius:999px;padding:8px 12px;background:#eef3fb;color:var(--boeing);font-weight:900;font-size:12px}
.badge.red{background:#fff0f2;color:var(--red)}
.architecture{display:grid;grid-template-columns:1fr 90px 1fr 90px 1fr;gap:16px;align-items:center}
.arch-box{padding:22px;border-radius:24px;border:1px solid var(--line);background:#fff;min-height:180px}
.arrow{height:2px;background:linear-gradient(90deg,var(--boeing),var(--red));position:relative}
.arrow:after{content:"";position:absolute;right:-2px;top:-5px;border-left:12px solid var(--red);border-top:6px solid transparent;border-bottom:6px solid transparent}
.risk{display:grid;grid-template-columns:repeat(2,1fr);gap:12px}
.risk div{border-radius:18px;padding:16px;background:#f8fafc;border:1px solid var(--line)}
.risk b{color:var(--red)}
.terminal{background:#05070c;color:#d1fae5;border-radius:22px;padding:18px;font-family:"Cascadia Mono","SFMono-Regular",monospace;overflow:auto;max-height:330px;font-size:13px}
.footer{padding:28px;color:#64748b;text-align:center}
@media(max-width:980px){
.hero-grid,.grid-4,.grid-3,.grid-2,.timeline,.architecture,.risk{grid-template-columns:1fr}
.arrow{height:42px;width:2px;margin:auto;background:linear-gradient(180deg,var(--boeing),var(--red))}
.arrow:after{right:-5px;top:auto;bottom:-2px;border-top:12px solid var(--red);border-left:6px solid transparent;border-right:6px solid transparent;border-bottom:0}
.shell{padding:14px}.hero{padding:26px}
}
</style>
</head>
<body>
<div class="shell">
<header class="hero">
<div class="kicker"><span class="pulse"></span> MWave Radar LLM · Aircraft Lavatory Intelligence</div>
<h1>毫米波雷达时序行为模型工作流报告</h1>
<p>从飞机厕所内毫米波雷达轨迹窗口和中间层表征出发,微调 Qwen3.5-9B,完成结构化行为预测、序列重建、剩余时间估计和 QA 状态问答,并支持部署到航空场景的边缘推理方案。</p>
<div class="hero-grid">
<div class="hero-card">
<div class="grid grid-4">
<div><div class="stat" id="heroTrain">-</div><div class="stat-label">训练结构样本</div></div>
<div><div class="stat" id="heroVal">-</div><div class="stat-label">验证结构样本</div></div>
<div><div class="stat" id="heroQA">-</div><div class="stat-label">QA 训练样本</div></div>
<div><div class="stat" id="heroProgress">-</div><div class="stat-label">整体评估进度</div></div>
</div>
</div>
<div class="hero-card">
<span class="badge red" id="runStage">RUNNING</span>
<p style="margin-top:14px;font-size:15px">报告生成时间:<b id="generatedAt"></b><br>模型:Qwen/Qwen3.5-9B + 4-bit QLoRA<br>输出:JSON schema + QA schema + charts</p>
</div>
</div>
</header>
<nav>
<button onclick="go('workflow')">Workflow</button>
<button onclick="go('data')">数据画像</button>
<button onclick="go('results')">评估结果</button>
<button onclick="go('analysis')">发现与风险</button>
<button onclick="go('deployment')">机载部署方案</button>
<button onclick="go('ops')">运行状态</button>
</nav>
<section id="workflow" class="panel">
<div class="section-title"><h2>端到端 Workflow</h2><span class="badge">Train · Evaluate · Deploy</span></div>
<div class="timeline">
<div class="step active"><div class="num">1</div><b>数据解析</b><span class="muted">读取 train/val JSONL,解析 chat 格式、轨迹窗口、背景知识和 assistant JSON。</span></div>
<div class="step active"><div class="num">2</div><b>标签规范</b><span class="muted">统一 `反复折返 -> 折返`,固定结构化输出字段,生成 QA 目标。</span></div>
<div class="step active"><div class="num">3</div><b>基线评估</b><span class="muted">Qwen3.5-9B base 先跑 val,保存微调前预测和指标。</span></div>
<div class="step active"><div class="num">4</div><b>QLoRA 微调</b><span class="muted">结构化预测与 QA 混合 SFT,4-bit NF4,LoRA 训练约 29M 参数。</span></div>
<div class="step active"><div class="num">5</div><b>微调后评估</b><span class="muted">同一套 val、同一套指标,输出 finetuned 预测、指标和图表。</span></div>
<div class="step active"><div class="num">6</div><b>机载部署</b><span class="muted">毫米波雷达边缘预处理 + 本地 LLM 推理 + 客舱系统状态输出。</span></div>
</div>
</section>
<section id="data" class="panel">
<div class="section-title"><h2>数据画像</h2><div class="tabs"><button class="tab-btn active" onclick="switchTab('dist','train',this)">Train</button><button class="tab-btn" onclick="switchTab('dist','val',this)">Val</button></div></div>
<div class="grid grid-2">
<div class="chart-card">
<h3>行为标签分布</h3>
<canvas id="labelChart"></canvas>
</div>
<div class="chart-card">
<h3>任务样本构成</h3>
<canvas id="taskChart"></canvas>
</div>
</div>
</section>
<section id="results" class="panel">
<div class="section-title"><h2>可视化结果</h2><span class="badge red">Base vs Finetuned</span></div>
<div class="grid grid-4" id="metricCards"></div>
<div class="grid grid-2" style="margin-top:18px">
<div class="chart-card"><h3>结构化指标</h3><canvas id="structMetricChart"></canvas></div>
<div class="chart-card"><h3>QA 指标</h3><canvas id="qaMetricChart"></canvas></div>
</div>
<div style="margin-top:18px">
<table id="metricTable"></table>
</div>
</section>
<section id="analysis" class="panel">
<div class="section-title"><h2>总结与发现</h2><span class="badge">Current Findings</span></div>
<div class="grid grid-3">
<div class="metric-card"><div class="big">1</div><b>base 模型不等于任务模型</b><p class="muted">未微调 Qwen3.5-9B 能理解中文指令,但结构化 schema 稳定性不足,尤其容易输出解释性文本或缺字段。</p></div>
<div class="metric-card red"><div class="big">2</div><b>QA 必须独立评估</b><p class="muted">QA 不是简单复述,需要从 `full_remaining_seconds`、`sequence_so_far` 和异常规则推导,占用、空出时间、区域使用和异常应单独打分。</p></div>
<div class="metric-card"><div class="big">3</div><b>短行为是关键风险</b><p class="muted">进入、门锁、坐下、起身、折返、犹豫等短时行为占比较低,但对流程阶段和剩余时间预测影响很大。</p></div>
</div>
<div class="risk" style="margin-top:18px">
<div><b>数据风险</b><p class="muted">训练集中坐用马桶占比最高,类别不平衡明显;若最终少数类 F1 低,需要重采样或 loss 权重。</p></div>
<div><b>工程风险</b><p class="muted">长 prompt 和 9B 推理导致全量评估耗时较长;部署时应缓存背景知识、压缩轨迹窗口并使用约束解码。</p></div>
<div><b>安全边界</b><p class="muted">系统只输出状态和行为,不输出身份识别,不存储原始可逆人体点云,降低隐私风险。</p></div>
<div><b>上线策略</b><p class="muted">先 shadow mode 与人工规则并行,确认误报/漏报边界,再进入客舱状态提示闭环。</p></div>
</div>
</section>
<section id="deployment" class="panel">
<div class="section-title"><h2>飞机厕所部署技术方案</h2><span class="badge">Edge-first · Privacy-preserving</span></div>
<div class="architecture">
<div class="arch-box"><h3>1. 传感与预处理</h3><p class="muted">毫米波雷达采集点云/轨迹;本地 MCU/边缘 SoC 做去噪、目标跟踪、窗口化、速度和区域特征提取。</p></div>
<div class="arrow"></div>
<div class="arch-box"><h3>2. 行为 LLM 推理</h3><p class="muted">Qwen3.5-9B LoRA/量化模型接收结构化窗口,输出严格 JSON:当前行为、阶段、剩余时间、序列。</p></div>
<div class="arrow"></div>
<div class="arch-box"><h3>3. 客舱系统集成</h3><p class="muted">输出占用、预计空出、异常、已使用区域。对接乘务终端、维护日志、客舱状态总线。</p></div>
</div>
<div class="grid grid-3" style="margin-top:18px">
<div class="metric-card"><b>边缘硬件</b><p class="muted">训练在地面 GPU;机载推理建议使用小型边缘 GPU/NPU 或将 LLM 部署在客舱边缘计算单元,厕所侧只传结构化特征。</p></div>
<div class="metric-card"><b>实时策略</b><p class="muted">每 0.5-1 秒更新窗口;稳定行为可降频推理,过渡态或异常候选升频推理。</p></div>
<div class="metric-card red"><b>失效保护</b><p class="muted">JSON 校验失败时回退规则模型;连续异常时只提示“需关注”,不直接做强制控制决策。</p></div>
</div>
</section>
<section id="ops" class="panel">
<div class="section-title"><h2>运行状态与文件</h2><span class="badge" id="opsBadge">Live</span></div>
<div class="grid grid-4">
<div class="metric-card"><div class="big" id="baseStructDone">0</div><b>base struct 已生成</b></div>
<div class="metric-card"><div class="big" id="baseQaDone">0</div><b>base QA 已生成</b></div>
<div class="metric-card"><div class="big" id="ftStructDone">0</div><b>finetuned struct 已生成</b></div>
<div class="metric-card"><div class="big" id="ftQaDone">0</div><b>finetuned QA 已生成</b></div>
</div>
<h3>最近日志</h3>
<pre class="terminal" id="logTail"></pre>
</section>
<div class="footer">MWave report · Boeing blue / red / black / gray visual system · generated locally</div>
</div>
<script id="report-data" type="application/json">__DATA__</script>
<script>
const DATA = JSON.parse(document.getElementById('report-data').textContent);
let distMode = 'train';
const colors = ['#0039A6','#0067B1','#C8102E','#111827','#5B6472','#8EA4C8','#B7C3D5','#7F1D1D','#334155','#64748B'];
function fmt(n){ if(n===null||n===undefined) return 'N/A'; if(typeof n==='number'){ return n>=1000 ? n.toLocaleString() : (Math.round(n*1000)/1000).toString(); } return n; }
function pct(n){ return n===null||n===undefined ? 'N/A' : Math.round(n*1000)/10 + '%'; }
function go(id){ document.getElementById(id).scrollIntoView({behavior:'smooth'}); }
function switchTab(group, mode, btn){ distMode=mode; document.querySelectorAll('.tab-btn').forEach(b=>b.classList.remove('active')); btn.classList.add('active'); drawAll(); }
function drawBar(canvas, labels, values, opts={}){
const ctx = canvas.getContext('2d'); const dpr = window.devicePixelRatio || 1;
const rect = canvas.getBoundingClientRect(); canvas.width = rect.width*dpr; canvas.height = rect.height*dpr; ctx.scale(dpr,dpr);
const w=rect.width,h=rect.height,pad=42; ctx.clearRect(0,0,w,h);
const max=Math.max(...values,1); const bw=(w-pad*2)/values.length*0.68;
ctx.strokeStyle='#E5E7EB'; ctx.lineWidth=1;
for(let i=0;i<5;i++){ const y=pad+(h-pad*2)*i/4; ctx.beginPath();ctx.moveTo(pad,y);ctx.lineTo(w-pad,y);ctx.stroke(); }
values.forEach((v,i)=>{ const x=pad+(w-pad*2)*i/values.length+(w-pad*2)/values.length*.16; const bh=(h-pad*2)*v/max; const y=h-pad-bh;
const grad=ctx.createLinearGradient(0,y,0,h-pad); grad.addColorStop(0, colors[i%colors.length]); grad.addColorStop(1,'rgba(0,57,166,.18)');
ctx.fillStyle=grad; roundRect(ctx,x,y,bw,bh,8); ctx.fill();
ctx.save(); ctx.translate(x+bw/2,h-pad+8); ctx.rotate(-Math.PI/4); ctx.fillStyle='#334155'; ctx.font='11px Bahnschrift, sans-serif'; ctx.fillText(labels[i],0,0); ctx.restore();
});
ctx.fillStyle='#111827'; ctx.font='12px Bahnschrift, sans-serif'; ctx.fillText(opts.yLabel||'',8,18);
}
function drawDonut(canvas, labels, values){
const ctx=canvas.getContext('2d'); const dpr=window.devicePixelRatio||1; const rect=canvas.getBoundingClientRect(); canvas.width=rect.width*dpr; canvas.height=rect.height*dpr; ctx.scale(dpr,dpr);
const w=rect.width,h=rect.height,cx=w*.34,cy=h*.48,r=Math.min(w,h)*.27; ctx.clearRect(0,0,w,h); const total=values.reduce((a,b)=>a+b,0)||1; let start=-Math.PI/2;
values.forEach((v,i)=>{ const end=start+Math.PI*2*v/total; ctx.beginPath(); ctx.moveTo(cx,cy); ctx.arc(cx,cy,r,start,end); ctx.closePath(); ctx.fillStyle=colors[i%colors.length]; ctx.fill(); start=end; });
ctx.beginPath(); ctx.arc(cx,cy,r*.58,0,Math.PI*2); ctx.fillStyle='#fff'; ctx.fill(); ctx.fillStyle='#111827'; ctx.font='900 24px Bahnschrift'; ctx.textAlign='center'; ctx.fillText(total.toLocaleString(),cx,cy+8);
ctx.textAlign='left'; labels.forEach((l,i)=>{ const y=50+i*28; ctx.fillStyle=colors[i%colors.length]; ctx.fillRect(w*.62,y-10,12,12); ctx.fillStyle='#334155'; ctx.font='13px Bahnschrift'; ctx.fillText(`${l}: ${values[i].toLocaleString()}`,w*.62+20,y); });
}
function roundRect(ctx,x,y,w,h,r){ ctx.beginPath(); ctx.moveTo(x+r,y); ctx.arcTo(x+w,y,x+w,y+h,r); ctx.arcTo(x+w,y+h,x,y+h,r); ctx.arcTo(x,y+h,x,y,r); ctx.arcTo(x,y,x+w,y,r); ctx.closePath(); }
function initHero(){
const s=DATA.summary||{}, p=DATA.progress||{}; const val=p.val_total||1;
document.getElementById('heroTrain').textContent=fmt(s.train_struct?.num_examples);
document.getElementById('heroVal').textContent=fmt(s.val_struct?.num_examples);
document.getElementById('heroQA').textContent=fmt(s.train_qa?.num_examples);
const done=(p.base_struct_done||0)+(p.base_qa_done||0)+(p.finetuned_struct_done||0)+(p.finetuned_qa_done||0);
document.getElementById('heroProgress').textContent=pct(done/(val*4));
document.getElementById('generatedAt').textContent=DATA.generated_at;
document.getElementById('runStage').textContent=(DATA.log_status?.stage||'unknown').toUpperCase();
document.getElementById('baseStructDone').textContent=fmt(p.base_struct_done);
document.getElementById('baseQaDone').textContent=fmt(p.base_qa_done);
document.getElementById('ftStructDone').textContent=fmt(p.finetuned_struct_done);
document.getElementById('ftQaDone').textContent=fmt(p.finetuned_qa_done);
document.getElementById('logTail').textContent=(DATA.log_status?.tail||[]).join('\n');
}
function drawData(){
const counts = DATA.summary?.[distMode+'_struct']?.label_counts || {};
const entries = Object.entries(counts).sort((a,b)=>b[1]-a[1]).slice(0,12);
drawBar(document.getElementById('labelChart'), entries.map(x=>x[0]), entries.map(x=>x[1]), {yLabel:'Top labels'});
const s=DATA.summary||{}; const vals = distMode==='train' ? [s.train_struct?.num_examples||0, s.train_qa?.num_examples||0] : [s.val_struct?.num_examples||0, s.val_qa?.num_examples||0];
drawDonut(document.getElementById('taskChart'), ['结构化预测','QA 问答'], vals);
}
function metricRows(task){
const out=[]; Object.values(DATA.metrics||{}).forEach(p=>{ if(p.task_type===task){ Object.entries(p.metrics||{}).forEach(([k,v])=>{ if(typeof v==='number') out.push({run:p.run_name, metric:k, value:v}); }); }}); return out;
}
function drawMetrics(){
const struct = metricRows('struct').filter(x=>['json_parse_rate','required_field_complete_rate','current_behavior_accuracy','next_possible_behavior_accuracy','is_transition_accuracy','stage_index_accuracy'].includes(x.metric));
const qa = metricRows('qa').filter(x=>['json_parse_rate','required_field_complete_rate','occupied_accuracy','is_abnormal_accuracy','used_areas_micro_f1'].includes(x.metric));
drawBar(document.getElementById('structMetricChart'), struct.map(x=>x.run+'\\n'+x.metric.replaceAll('_',' ')), struct.map(x=>x.value), {yLabel:'score'});
drawBar(document.getElementById('qaMetricChart'), qa.map(x=>x.run+'\\n'+x.metric.replaceAll('_',' ')), qa.map(x=>x.value), {yLabel:'score'});
const keyMetrics=[
['Struct behavior acc', DATA.metrics?.finetuned_struct?.metrics?.current_behavior_accuracy, ''],
['Struct schema complete', DATA.metrics?.finetuned_struct?.metrics?.required_field_complete_rate, ''],
['QA abnormal F1', DATA.metrics?.finetuned_qa?.metrics?.is_abnormal_f1, 'red'],
['QA used areas F1', DATA.metrics?.finetuned_qa?.metrics?.used_areas_micro_f1, '']
];
document.getElementById('metricCards').innerHTML=keyMetrics.map(([name,val,klass])=>`<div class="metric-card ${klass}"><div class="big">${typeof val==='number'?pct(val):'N/A'}</div><b>${name}</b></div>`).join('');
const all=[]; Object.values(DATA.metrics||{}).forEach(p=>{ Object.entries(p.metrics||{}).forEach(([k,v])=>all.push([p.run_name,p.task_type,k,v])); });
document.getElementById('metricTable').innerHTML='<tr><th>Run</th><th>Task</th><th>Metric</th><th>Value</th></tr>'+all.map(r=>`<tr><td>${r[0]}</td><td>${r[1]}</td><td>${r[2]}</td><td>${fmt(r[3])}</td></tr>`).join('');
}
function drawAll(){ initHero(); drawData(); drawMetrics(); }
window.addEventListener('resize', drawAll);
drawAll();
</script>
</body>
</html>
"""
def main():
REPORT_DIR.mkdir(parents=True, exist_ok=True)
data = build_data()
html = HTML_TEMPLATE.replace("__DATA__", json.dumps(data, ensure_ascii=False))
OUTPUT.write_text(html, encoding="utf-8")
print(f"wrote {OUTPUT}")
if __name__ == "__main__":
main()