| |
| import json |
| from datetime import datetime |
| from pathlib import Path |
|
|
|
|
| ROOT = Path(__file__).resolve().parents[1] |
| REPORT_DIR = ROOT / "reports" |
| OUTPUT = REPORT_DIR / "index.html" |
|
|
|
|
| def read_json(path, default=None): |
| try: |
| return json.loads(Path(path).read_text(encoding="utf-8")) |
| except Exception: |
| return default |
|
|
|
|
| def count_lines(path): |
| try: |
| with Path(path).open(encoding="utf-8") as f: |
| return sum(1 for _ in f) |
| except Exception: |
| return 0 |
|
|
|
|
| def parse_log_status(): |
| monitor = ROOT / "outputs/logs/training_monitor.log" |
| final_files = [ |
| ROOT / "outputs/metrics/finetuned_struct_metrics.json", |
| ROOT / "outputs/metrics/finetuned_qa_metrics.json", |
| ROOT / "outputs/figures/metric_comparison.csv", |
| ] |
| if monitor.exists(): |
| log = monitor |
| else: |
| logs = sorted((ROOT / "outputs/logs").glob("*.log"), key=lambda p: p.stat().st_mtime, reverse=True) |
| if not logs: |
| return {"log_file": None, "tail": [], "stage": "not_started"} |
| log = logs[0] |
| tail = log.read_text(encoding="utf-8", errors="ignore").splitlines()[-120:] |
| joined = "\n".join(tail) |
| if all(p.exists() for p in final_files): |
| stage = "completed" |
| elif "scripts/train_qlora.py" in joined: |
| stage = "training" |
| elif "--run-name finetuned" in joined: |
| stage = "finetuned_eval" |
| elif "Traceback" in joined or "Error" in joined: |
| stage = "needs_attention" |
| else: |
| stage = "running" |
| return {"log_file": str(log.relative_to(ROOT)), "tail": tail[-20:], "stage": stage} |
|
|
|
|
| def metrics_from_prediction_file(path, limit=None): |
| required = [ |
| "current_behavior", |
| "is_transition", |
| "elapsed_seconds_in_current_behavior", |
| "estimated_remaining_seconds", |
| "full_remaining_seconds", |
| "expected_end_time", |
| "next_possible_behavior", |
| "stage_index", |
| "total_stages", |
| "sequence_so_far", |
| ] |
| rows = [] |
| try: |
| with Path(path).open(encoding="utf-8") as f: |
| for line in f: |
| if line.strip(): |
| rows.append(json.loads(line)) |
| if limit and len(rows) >= limit: |
| break |
| except Exception: |
| return {} |
| if not rows: |
| return {} |
| parsed = [r for r in rows if isinstance(r.get("prediction"), dict)] |
| def acc(field): |
| pairs = [(r["target"].get(field), r["prediction"].get(field)) for r in parsed if field in r["prediction"]] |
| return sum(a == b for a, b in pairs) / len(pairs) if pairs else 0 |
| def mae(field): |
| pairs = [] |
| for r in parsed: |
| p = r.get("prediction", {}) |
| t = r.get("target", {}) |
| if isinstance(t.get(field), (int, float)) and isinstance(p.get(field), (int, float)): |
| pairs.append(abs(float(t[field]) - float(p[field]))) |
| return sum(pairs) / len(pairs) if pairs else None |
| return { |
| "num_examples": len(rows), |
| "json_parse_rate": len(parsed) / len(rows), |
| "required_field_complete_rate": sum(all(f in r["prediction"] for f in required) for r in parsed) / len(rows), |
| "current_behavior_accuracy": acc("current_behavior"), |
| "next_possible_behavior_accuracy": acc("next_possible_behavior"), |
| "is_transition_accuracy": acc("is_transition"), |
| "stage_index_accuracy": acc("stage_index"), |
| "full_remaining_seconds_mae": mae("full_remaining_seconds"), |
| } |
|
|
|
|
| def load_metrics(): |
| metric_dir = ROOT / "outputs/metrics" |
| metrics = {} |
| for path in sorted(metric_dir.glob("*.json")): |
| payload = read_json(path) |
| if not payload: |
| continue |
| key = f"{payload.get('run_name')}_{payload.get('task_type')}" |
| metrics[key] = payload |
| partial = metrics_from_prediction_file(ROOT / "outputs/predictions/base_struct_predictions.jsonl") |
| if partial: |
| metrics["base_struct_partial"] = { |
| "run_name": "base_partial", |
| "task_type": "struct", |
| "input_file": "outputs/predictions/base_struct_predictions.jsonl", |
| "metrics": partial, |
| } |
| return metrics |
|
|
|
|
| def build_data(): |
| summary = read_json(ROOT / "data/processed/summary.json", {}) |
| progress = { |
| "base_struct_done": count_lines(ROOT / "outputs/predictions/base_struct_predictions.jsonl"), |
| "base_qa_done": count_lines(ROOT / "outputs/predictions/base_qa_predictions.jsonl"), |
| "finetuned_struct_done": count_lines(ROOT / "outputs/predictions/finetuned_struct_predictions.jsonl"), |
| "finetuned_qa_done": count_lines(ROOT / "outputs/predictions/finetuned_qa_predictions.jsonl"), |
| "val_total": summary.get("val_struct", {}).get("num_examples", 4030), |
| } |
| return { |
| "generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), |
| "summary": summary, |
| "metrics": load_metrics(), |
| "progress": progress, |
| "log_status": parse_log_status(), |
| "files": { |
| "processed_summary": "data/processed/summary.json", |
| "base_predictions": "outputs/predictions/base_struct_predictions.jsonl", |
| "adapter_dir": "outputs/qwen35_9b_lora", |
| "figures_dir": "outputs/figures", |
| }, |
| } |
|
|
|
|
| HTML_TEMPLATE = r"""<!doctype html> |
| <html lang="zh-CN"> |
| <head> |
| <meta charset="utf-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <title>MWave Aircraft Lavatory Radar LLM Workflow</title> |
| <style> |
| :root{ |
| --boeing:#0039A6; |
| --boeing-2:#0067B1; |
| --red:#C8102E; |
| --black:#080A0F; |
| --ink:#111827; |
| --steel:#5B6472; |
| --mist:#E7ECF3; |
| --panel:#F7F9FC; |
| --white:#FFFFFF; |
| --line:rgba(8,10,15,.14); |
| --shadow:0 24px 80px rgba(0,23,84,.16); |
| } |
| *{box-sizing:border-box} |
| html{scroll-behavior:smooth} |
| body{ |
| margin:0; |
| color:var(--ink); |
| font-family:"Aptos Display","Bahnschrift","DIN Alternate","Noto Sans SC",sans-serif; |
| background: |
| radial-gradient(circle at 12% 8%, rgba(0,103,177,.22), transparent 30%), |
| radial-gradient(circle at 82% 18%, rgba(200,16,46,.16), transparent 26%), |
| linear-gradient(135deg,#f3f6fb 0%,#ffffff 42%,#e9eef6 100%); |
| } |
| .shell{max-width:1440px;margin:0 auto;padding:28px} |
| .hero{ |
| position:relative; |
| min-height:520px; |
| border-radius:34px; |
| overflow:hidden; |
| color:white; |
| background: |
| linear-gradient(115deg,rgba(3,11,29,.96),rgba(0,57,166,.92) 48%,rgba(0,103,177,.72)), |
| repeating-linear-gradient(90deg,transparent 0 72px,rgba(255,255,255,.06) 72px 73px); |
| box-shadow:var(--shadow); |
| padding:42px; |
| } |
| .hero:after{ |
| content:""; |
| position:absolute; |
| right:-160px;top:-110px; |
| width:620px;height:620px; |
| border:1px solid rgba(255,255,255,.18); |
| border-radius:50%; |
| box-shadow:inset 0 0 0 72px rgba(255,255,255,.04), inset 0 0 0 140px rgba(255,255,255,.035); |
| } |
| .kicker{display:inline-flex;gap:10px;align-items:center;letter-spacing:.16em;text-transform:uppercase;font-size:12px;color:#dbeafe} |
| .pulse{width:10px;height:10px;background:var(--red);border-radius:50%;box-shadow:0 0 0 10px rgba(200,16,46,.18)} |
| h1{font-size:clamp(42px,7vw,96px);line-height:.9;margin:28px 0 20px;letter-spacing:-.06em;max-width:980px} |
| .hero p{font-size:20px;line-height:1.65;max-width:920px;color:#e8eef8;margin:0} |
| .hero-grid{position:relative;z-index:1;display:grid;grid-template-columns:1.2fr .8fr;gap:24px;margin-top:38px} |
| .hero-card{background:rgba(255,255,255,.1);border:1px solid rgba(255,255,255,.2);border-radius:24px;padding:22px;backdrop-filter:blur(18px)} |
| .stat{font-size:38px;font-weight:800;letter-spacing:-.04em} |
| .stat-label{color:#cbd5e1;font-size:13px;margin-top:6px} |
| nav{position:sticky;top:0;z-index:10;margin:18px 0;padding:12px;border-radius:22px;background:rgba(255,255,255,.82);backdrop-filter:blur(18px);border:1px solid var(--line);display:flex;gap:8px;flex-wrap:wrap} |
| nav button,.tab-btn{ |
| border:0;border-radius:999px;padding:12px 16px;background:#eef3fb;color:#1f2937;font-weight:800;cursor:pointer;transition:.18s ease; |
| } |
| nav button:hover,.tab-btn:hover,.tab-btn.active{background:var(--boeing);color:white;transform:translateY(-1px)} |
| section{margin-top:24px} |
| .panel{background:rgba(255,255,255,.9);border:1px solid var(--line);border-radius:28px;padding:28px;box-shadow:0 16px 44px rgba(17,24,39,.06)} |
| .section-title{display:flex;align-items:end;justify-content:space-between;gap:18px;margin-bottom:20px} |
| h2{font-size:34px;margin:0;letter-spacing:-.04em;color:var(--black)} |
| .muted{color:var(--steel);line-height:1.65} |
| .grid{display:grid;gap:18px} |
| .grid-4{grid-template-columns:repeat(4,minmax(0,1fr))} |
| .grid-3{grid-template-columns:repeat(3,minmax(0,1fr))} |
| .grid-2{grid-template-columns:repeat(2,minmax(0,1fr))} |
| .metric-card{border-radius:22px;padding:20px;background:linear-gradient(180deg,#fff,#f5f8fd);border:1px solid var(--line);position:relative;overflow:hidden} |
| .metric-card:before{content:"";position:absolute;left:0;top:0;width:5px;height:100%;background:var(--boeing)} |
| .metric-card.red:before{background:var(--red)} |
| .big{font-size:32px;font-weight:900;letter-spacing:-.04em} |
| .timeline{display:grid;grid-template-columns:repeat(6,1fr);gap:12px;margin-top:18px} |
| .step{position:relative;border-radius:22px;padding:18px;background:#f8fafc;border:1px solid var(--line);min-height:160px} |
| .step b{display:block;color:var(--boeing);font-size:18px;margin-bottom:8px} |
| .step .num{width:34px;height:34px;border-radius:50%;display:grid;place-items:center;background:var(--black);color:white;font-weight:900;margin-bottom:16px} |
| .step.active{background:linear-gradient(180deg,#fff,#eaf3ff);box-shadow:0 12px 36px rgba(0,57,166,.14)} |
| .chart-card{background:#fff;border:1px solid var(--line);border-radius:24px;padding:20px;min-height:360px} |
| canvas{width:100%;height:300px} |
| .tabs{display:flex;gap:8px;flex-wrap:wrap;margin-bottom:14px} |
| .tab{display:none}.tab.active{display:block} |
| table{width:100%;border-collapse:collapse;background:#fff;border-radius:18px;overflow:hidden} |
| th,td{text-align:left;padding:13px 14px;border-bottom:1px solid #e5e7eb} |
| th{background:#0b1220;color:#fff;font-size:13px} |
| tr:hover td{background:#f8fbff} |
| .badge{display:inline-flex;align-items:center;gap:8px;border-radius:999px;padding:8px 12px;background:#eef3fb;color:var(--boeing);font-weight:900;font-size:12px} |
| .badge.red{background:#fff0f2;color:var(--red)} |
| .architecture{display:grid;grid-template-columns:1fr 90px 1fr 90px 1fr;gap:16px;align-items:center} |
| .arch-box{padding:22px;border-radius:24px;border:1px solid var(--line);background:#fff;min-height:180px} |
| .arrow{height:2px;background:linear-gradient(90deg,var(--boeing),var(--red));position:relative} |
| .arrow:after{content:"";position:absolute;right:-2px;top:-5px;border-left:12px solid var(--red);border-top:6px solid transparent;border-bottom:6px solid transparent} |
| .risk{display:grid;grid-template-columns:repeat(2,1fr);gap:12px} |
| .risk div{border-radius:18px;padding:16px;background:#f8fafc;border:1px solid var(--line)} |
| .risk b{color:var(--red)} |
| .terminal{background:#05070c;color:#d1fae5;border-radius:22px;padding:18px;font-family:"Cascadia Mono","SFMono-Regular",monospace;overflow:auto;max-height:330px;font-size:13px} |
| .footer{padding:28px;color:#64748b;text-align:center} |
| @media(max-width:980px){ |
| .hero-grid,.grid-4,.grid-3,.grid-2,.timeline,.architecture,.risk{grid-template-columns:1fr} |
| .arrow{height:42px;width:2px;margin:auto;background:linear-gradient(180deg,var(--boeing),var(--red))} |
| .arrow:after{right:-5px;top:auto;bottom:-2px;border-top:12px solid var(--red);border-left:6px solid transparent;border-right:6px solid transparent;border-bottom:0} |
| .shell{padding:14px}.hero{padding:26px} |
| } |
| </style> |
| </head> |
| <body> |
| <div class="shell"> |
| <header class="hero"> |
| <div class="kicker"><span class="pulse"></span> MWave Radar LLM · Aircraft Lavatory Intelligence</div> |
| <h1>毫米波雷达时序行为模型工作流报告</h1> |
| <p>从飞机厕所内毫米波雷达轨迹窗口和中间层表征出发,微调 Qwen3.5-9B,完成结构化行为预测、序列重建、剩余时间估计和 QA 状态问答,并支持部署到航空场景的边缘推理方案。</p> |
| <div class="hero-grid"> |
| <div class="hero-card"> |
| <div class="grid grid-4"> |
| <div><div class="stat" id="heroTrain">-</div><div class="stat-label">训练结构样本</div></div> |
| <div><div class="stat" id="heroVal">-</div><div class="stat-label">验证结构样本</div></div> |
| <div><div class="stat" id="heroQA">-</div><div class="stat-label">QA 训练样本</div></div> |
| <div><div class="stat" id="heroProgress">-</div><div class="stat-label">整体评估进度</div></div> |
| </div> |
| </div> |
| <div class="hero-card"> |
| <span class="badge red" id="runStage">RUNNING</span> |
| <p style="margin-top:14px;font-size:15px">报告生成时间:<b id="generatedAt"></b><br>模型:Qwen/Qwen3.5-9B + 4-bit QLoRA<br>输出:JSON schema + QA schema + charts</p> |
| </div> |
| </div> |
| </header> |
| |
| <nav> |
| <button onclick="go('workflow')">Workflow</button> |
| <button onclick="go('data')">数据画像</button> |
| <button onclick="go('results')">评估结果</button> |
| <button onclick="go('analysis')">发现与风险</button> |
| <button onclick="go('deployment')">机载部署方案</button> |
| <button onclick="go('ops')">运行状态</button> |
| </nav> |
| |
| <section id="workflow" class="panel"> |
| <div class="section-title"><h2>端到端 Workflow</h2><span class="badge">Train · Evaluate · Deploy</span></div> |
| <div class="timeline"> |
| <div class="step active"><div class="num">1</div><b>数据解析</b><span class="muted">读取 train/val JSONL,解析 chat 格式、轨迹窗口、背景知识和 assistant JSON。</span></div> |
| <div class="step active"><div class="num">2</div><b>标签规范</b><span class="muted">统一 `反复折返 -> 折返`,固定结构化输出字段,生成 QA 目标。</span></div> |
| <div class="step active"><div class="num">3</div><b>基线评估</b><span class="muted">Qwen3.5-9B base 先跑 val,保存微调前预测和指标。</span></div> |
| <div class="step active"><div class="num">4</div><b>QLoRA 微调</b><span class="muted">结构化预测与 QA 混合 SFT,4-bit NF4,LoRA 训练约 29M 参数。</span></div> |
| <div class="step active"><div class="num">5</div><b>微调后评估</b><span class="muted">同一套 val、同一套指标,输出 finetuned 预测、指标和图表。</span></div> |
| <div class="step active"><div class="num">6</div><b>机载部署</b><span class="muted">毫米波雷达边缘预处理 + 本地 LLM 推理 + 客舱系统状态输出。</span></div> |
| </div> |
| </section> |
| |
| <section id="data" class="panel"> |
| <div class="section-title"><h2>数据画像</h2><div class="tabs"><button class="tab-btn active" onclick="switchTab('dist','train',this)">Train</button><button class="tab-btn" onclick="switchTab('dist','val',this)">Val</button></div></div> |
| <div class="grid grid-2"> |
| <div class="chart-card"> |
| <h3>行为标签分布</h3> |
| <canvas id="labelChart"></canvas> |
| </div> |
| <div class="chart-card"> |
| <h3>任务样本构成</h3> |
| <canvas id="taskChart"></canvas> |
| </div> |
| </div> |
| </section> |
| |
| <section id="results" class="panel"> |
| <div class="section-title"><h2>可视化结果</h2><span class="badge red">Base vs Finetuned</span></div> |
| <div class="grid grid-4" id="metricCards"></div> |
| <div class="grid grid-2" style="margin-top:18px"> |
| <div class="chart-card"><h3>结构化指标</h3><canvas id="structMetricChart"></canvas></div> |
| <div class="chart-card"><h3>QA 指标</h3><canvas id="qaMetricChart"></canvas></div> |
| </div> |
| <div style="margin-top:18px"> |
| <table id="metricTable"></table> |
| </div> |
| </section> |
| |
| <section id="analysis" class="panel"> |
| <div class="section-title"><h2>总结与发现</h2><span class="badge">Current Findings</span></div> |
| <div class="grid grid-3"> |
| <div class="metric-card"><div class="big">1</div><b>base 模型不等于任务模型</b><p class="muted">未微调 Qwen3.5-9B 能理解中文指令,但结构化 schema 稳定性不足,尤其容易输出解释性文本或缺字段。</p></div> |
| <div class="metric-card red"><div class="big">2</div><b>QA 必须独立评估</b><p class="muted">QA 不是简单复述,需要从 `full_remaining_seconds`、`sequence_so_far` 和异常规则推导,占用、空出时间、区域使用和异常应单独打分。</p></div> |
| <div class="metric-card"><div class="big">3</div><b>短行为是关键风险</b><p class="muted">进入、门锁、坐下、起身、折返、犹豫等短时行为占比较低,但对流程阶段和剩余时间预测影响很大。</p></div> |
| </div> |
| <div class="risk" style="margin-top:18px"> |
| <div><b>数据风险</b><p class="muted">训练集中坐用马桶占比最高,类别不平衡明显;若最终少数类 F1 低,需要重采样或 loss 权重。</p></div> |
| <div><b>工程风险</b><p class="muted">长 prompt 和 9B 推理导致全量评估耗时较长;部署时应缓存背景知识、压缩轨迹窗口并使用约束解码。</p></div> |
| <div><b>安全边界</b><p class="muted">系统只输出状态和行为,不输出身份识别,不存储原始可逆人体点云,降低隐私风险。</p></div> |
| <div><b>上线策略</b><p class="muted">先 shadow mode 与人工规则并行,确认误报/漏报边界,再进入客舱状态提示闭环。</p></div> |
| </div> |
| </section> |
| |
| <section id="deployment" class="panel"> |
| <div class="section-title"><h2>飞机厕所部署技术方案</h2><span class="badge">Edge-first · Privacy-preserving</span></div> |
| <div class="architecture"> |
| <div class="arch-box"><h3>1. 传感与预处理</h3><p class="muted">毫米波雷达采集点云/轨迹;本地 MCU/边缘 SoC 做去噪、目标跟踪、窗口化、速度和区域特征提取。</p></div> |
| <div class="arrow"></div> |
| <div class="arch-box"><h3>2. 行为 LLM 推理</h3><p class="muted">Qwen3.5-9B LoRA/量化模型接收结构化窗口,输出严格 JSON:当前行为、阶段、剩余时间、序列。</p></div> |
| <div class="arrow"></div> |
| <div class="arch-box"><h3>3. 客舱系统集成</h3><p class="muted">输出占用、预计空出、异常、已使用区域。对接乘务终端、维护日志、客舱状态总线。</p></div> |
| </div> |
| <div class="grid grid-3" style="margin-top:18px"> |
| <div class="metric-card"><b>边缘硬件</b><p class="muted">训练在地面 GPU;机载推理建议使用小型边缘 GPU/NPU 或将 LLM 部署在客舱边缘计算单元,厕所侧只传结构化特征。</p></div> |
| <div class="metric-card"><b>实时策略</b><p class="muted">每 0.5-1 秒更新窗口;稳定行为可降频推理,过渡态或异常候选升频推理。</p></div> |
| <div class="metric-card red"><b>失效保护</b><p class="muted">JSON 校验失败时回退规则模型;连续异常时只提示“需关注”,不直接做强制控制决策。</p></div> |
| </div> |
| </section> |
| |
| <section id="ops" class="panel"> |
| <div class="section-title"><h2>运行状态与文件</h2><span class="badge" id="opsBadge">Live</span></div> |
| <div class="grid grid-4"> |
| <div class="metric-card"><div class="big" id="baseStructDone">0</div><b>base struct 已生成</b></div> |
| <div class="metric-card"><div class="big" id="baseQaDone">0</div><b>base QA 已生成</b></div> |
| <div class="metric-card"><div class="big" id="ftStructDone">0</div><b>finetuned struct 已生成</b></div> |
| <div class="metric-card"><div class="big" id="ftQaDone">0</div><b>finetuned QA 已生成</b></div> |
| </div> |
| <h3>最近日志</h3> |
| <pre class="terminal" id="logTail"></pre> |
| </section> |
| <div class="footer">MWave report · Boeing blue / red / black / gray visual system · generated locally</div> |
| </div> |
| |
| <script id="report-data" type="application/json">__DATA__</script> |
| <script> |
| const DATA = JSON.parse(document.getElementById('report-data').textContent); |
| let distMode = 'train'; |
| const colors = ['#0039A6','#0067B1','#C8102E','#111827','#5B6472','#8EA4C8','#B7C3D5','#7F1D1D','#334155','#64748B']; |
| |
| function fmt(n){ if(n===null||n===undefined) return 'N/A'; if(typeof n==='number'){ return n>=1000 ? n.toLocaleString() : (Math.round(n*1000)/1000).toString(); } return n; } |
| function pct(n){ return n===null||n===undefined ? 'N/A' : Math.round(n*1000)/10 + '%'; } |
| function go(id){ document.getElementById(id).scrollIntoView({behavior:'smooth'}); } |
| function switchTab(group, mode, btn){ distMode=mode; document.querySelectorAll('.tab-btn').forEach(b=>b.classList.remove('active')); btn.classList.add('active'); drawAll(); } |
| |
| function drawBar(canvas, labels, values, opts={}){ |
| const ctx = canvas.getContext('2d'); const dpr = window.devicePixelRatio || 1; |
| const rect = canvas.getBoundingClientRect(); canvas.width = rect.width*dpr; canvas.height = rect.height*dpr; ctx.scale(dpr,dpr); |
| const w=rect.width,h=rect.height,pad=42; ctx.clearRect(0,0,w,h); |
| const max=Math.max(...values,1); const bw=(w-pad*2)/values.length*0.68; |
| ctx.strokeStyle='#E5E7EB'; ctx.lineWidth=1; |
| for(let i=0;i<5;i++){ const y=pad+(h-pad*2)*i/4; ctx.beginPath();ctx.moveTo(pad,y);ctx.lineTo(w-pad,y);ctx.stroke(); } |
| values.forEach((v,i)=>{ const x=pad+(w-pad*2)*i/values.length+(w-pad*2)/values.length*.16; const bh=(h-pad*2)*v/max; const y=h-pad-bh; |
| const grad=ctx.createLinearGradient(0,y,0,h-pad); grad.addColorStop(0, colors[i%colors.length]); grad.addColorStop(1,'rgba(0,57,166,.18)'); |
| ctx.fillStyle=grad; roundRect(ctx,x,y,bw,bh,8); ctx.fill(); |
| ctx.save(); ctx.translate(x+bw/2,h-pad+8); ctx.rotate(-Math.PI/4); ctx.fillStyle='#334155'; ctx.font='11px Bahnschrift, sans-serif'; ctx.fillText(labels[i],0,0); ctx.restore(); |
| }); |
| ctx.fillStyle='#111827'; ctx.font='12px Bahnschrift, sans-serif'; ctx.fillText(opts.yLabel||'',8,18); |
| } |
| function drawDonut(canvas, labels, values){ |
| const ctx=canvas.getContext('2d'); const dpr=window.devicePixelRatio||1; const rect=canvas.getBoundingClientRect(); canvas.width=rect.width*dpr; canvas.height=rect.height*dpr; ctx.scale(dpr,dpr); |
| const w=rect.width,h=rect.height,cx=w*.34,cy=h*.48,r=Math.min(w,h)*.27; ctx.clearRect(0,0,w,h); const total=values.reduce((a,b)=>a+b,0)||1; let start=-Math.PI/2; |
| values.forEach((v,i)=>{ const end=start+Math.PI*2*v/total; ctx.beginPath(); ctx.moveTo(cx,cy); ctx.arc(cx,cy,r,start,end); ctx.closePath(); ctx.fillStyle=colors[i%colors.length]; ctx.fill(); start=end; }); |
| ctx.beginPath(); ctx.arc(cx,cy,r*.58,0,Math.PI*2); ctx.fillStyle='#fff'; ctx.fill(); ctx.fillStyle='#111827'; ctx.font='900 24px Bahnschrift'; ctx.textAlign='center'; ctx.fillText(total.toLocaleString(),cx,cy+8); |
| ctx.textAlign='left'; labels.forEach((l,i)=>{ const y=50+i*28; ctx.fillStyle=colors[i%colors.length]; ctx.fillRect(w*.62,y-10,12,12); ctx.fillStyle='#334155'; ctx.font='13px Bahnschrift'; ctx.fillText(`${l}: ${values[i].toLocaleString()}`,w*.62+20,y); }); |
| } |
| function roundRect(ctx,x,y,w,h,r){ ctx.beginPath(); ctx.moveTo(x+r,y); ctx.arcTo(x+w,y,x+w,y+h,r); ctx.arcTo(x+w,y+h,x,y+h,r); ctx.arcTo(x,y+h,x,y,r); ctx.arcTo(x,y,x+w,y,r); ctx.closePath(); } |
| |
| function initHero(){ |
| const s=DATA.summary||{}, p=DATA.progress||{}; const val=p.val_total||1; |
| document.getElementById('heroTrain').textContent=fmt(s.train_struct?.num_examples); |
| document.getElementById('heroVal').textContent=fmt(s.val_struct?.num_examples); |
| document.getElementById('heroQA').textContent=fmt(s.train_qa?.num_examples); |
| const done=(p.base_struct_done||0)+(p.base_qa_done||0)+(p.finetuned_struct_done||0)+(p.finetuned_qa_done||0); |
| document.getElementById('heroProgress').textContent=pct(done/(val*4)); |
| document.getElementById('generatedAt').textContent=DATA.generated_at; |
| document.getElementById('runStage').textContent=(DATA.log_status?.stage||'unknown').toUpperCase(); |
| document.getElementById('baseStructDone').textContent=fmt(p.base_struct_done); |
| document.getElementById('baseQaDone').textContent=fmt(p.base_qa_done); |
| document.getElementById('ftStructDone').textContent=fmt(p.finetuned_struct_done); |
| document.getElementById('ftQaDone').textContent=fmt(p.finetuned_qa_done); |
| document.getElementById('logTail').textContent=(DATA.log_status?.tail||[]).join('\n'); |
| } |
| function drawData(){ |
| const counts = DATA.summary?.[distMode+'_struct']?.label_counts || {}; |
| const entries = Object.entries(counts).sort((a,b)=>b[1]-a[1]).slice(0,12); |
| drawBar(document.getElementById('labelChart'), entries.map(x=>x[0]), entries.map(x=>x[1]), {yLabel:'Top labels'}); |
| const s=DATA.summary||{}; const vals = distMode==='train' ? [s.train_struct?.num_examples||0, s.train_qa?.num_examples||0] : [s.val_struct?.num_examples||0, s.val_qa?.num_examples||0]; |
| drawDonut(document.getElementById('taskChart'), ['结构化预测','QA 问答'], vals); |
| } |
| function metricRows(task){ |
| const out=[]; Object.values(DATA.metrics||{}).forEach(p=>{ if(p.task_type===task){ Object.entries(p.metrics||{}).forEach(([k,v])=>{ if(typeof v==='number') out.push({run:p.run_name, metric:k, value:v}); }); }}); return out; |
| } |
| function drawMetrics(){ |
| const struct = metricRows('struct').filter(x=>['json_parse_rate','required_field_complete_rate','current_behavior_accuracy','next_possible_behavior_accuracy','is_transition_accuracy','stage_index_accuracy'].includes(x.metric)); |
| const qa = metricRows('qa').filter(x=>['json_parse_rate','required_field_complete_rate','occupied_accuracy','is_abnormal_accuracy','used_areas_micro_f1'].includes(x.metric)); |
| drawBar(document.getElementById('structMetricChart'), struct.map(x=>x.run+'\\n'+x.metric.replaceAll('_',' ')), struct.map(x=>x.value), {yLabel:'score'}); |
| drawBar(document.getElementById('qaMetricChart'), qa.map(x=>x.run+'\\n'+x.metric.replaceAll('_',' ')), qa.map(x=>x.value), {yLabel:'score'}); |
| const keyMetrics=[ |
| ['Struct behavior acc', DATA.metrics?.finetuned_struct?.metrics?.current_behavior_accuracy, ''], |
| ['Struct schema complete', DATA.metrics?.finetuned_struct?.metrics?.required_field_complete_rate, ''], |
| ['QA abnormal F1', DATA.metrics?.finetuned_qa?.metrics?.is_abnormal_f1, 'red'], |
| ['QA used areas F1', DATA.metrics?.finetuned_qa?.metrics?.used_areas_micro_f1, ''] |
| ]; |
| document.getElementById('metricCards').innerHTML=keyMetrics.map(([name,val,klass])=>`<div class="metric-card ${klass}"><div class="big">${typeof val==='number'?pct(val):'N/A'}</div><b>${name}</b></div>`).join(''); |
| const all=[]; Object.values(DATA.metrics||{}).forEach(p=>{ Object.entries(p.metrics||{}).forEach(([k,v])=>all.push([p.run_name,p.task_type,k,v])); }); |
| document.getElementById('metricTable').innerHTML='<tr><th>Run</th><th>Task</th><th>Metric</th><th>Value</th></tr>'+all.map(r=>`<tr><td>${r[0]}</td><td>${r[1]}</td><td>${r[2]}</td><td>${fmt(r[3])}</td></tr>`).join(''); |
| } |
| function drawAll(){ initHero(); drawData(); drawMetrics(); } |
| window.addEventListener('resize', drawAll); |
| drawAll(); |
| </script> |
| </body> |
| </html> |
| """ |
|
|
|
|
| def main(): |
| REPORT_DIR.mkdir(parents=True, exist_ok=True) |
| data = build_data() |
| html = HTML_TEMPLATE.replace("__DATA__", json.dumps(data, ensure_ascii=False)) |
| OUTPUT.write_text(html, encoding="utf-8") |
| print(f"wrote {OUTPUT}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|