| |
| import csv |
| import html |
| import json |
| import math |
| import shutil |
| import subprocess |
| from collections import Counter, defaultdict |
| from datetime import datetime |
| from pathlib import Path |
|
|
|
|
| ROOT = Path(__file__).resolve().parents[1] |
| OUT_ROOT = ROOT / "presentation" |
| PACKAGE = OUT_ROOT / "mwave_design_review_package" |
| ASSETS = PACKAGE / "assets" |
| CHARTS = ASSETS / "charts" |
| DATA_DIR = PACKAGE / "source_data" |
| METRICS_DIR = DATA_DIR / "metrics" |
| PRED_DIR = DATA_DIR / "predictions" |
| DERIVED_DIR = DATA_DIR / "derived" |
|
|
| BOEING = "#0039A6" |
| BOEING_2 = "#0067B1" |
| RED = "#C8102E" |
| BLACK = "#080A0F" |
| INK = "#111827" |
| STEEL = "#5B6472" |
| MIST = "#E7ECF3" |
| LIGHT = "#F7F9FC" |
| COLORS = [BOEING, BOEING_2, RED, BLACK, STEEL, "#8EA4C8", "#B7C3D5", "#7F1D1D", "#334155", "#64748B"] |
|
|
|
|
| METRIC_FILES = { |
| "base_struct": ROOT / "outputs/metrics/base_struct_metrics.json", |
| "finetuned_struct": ROOT / "outputs/metrics/finetuned_struct_metrics.json", |
| "base_qa": ROOT / "outputs/metrics/base_qa_metrics.json", |
| "finetuned_qa": ROOT / "outputs/metrics/finetuned_qa_metrics.json", |
| } |
| PRED_FILES = { |
| "base_struct": ROOT / "outputs/predictions/base_struct_predictions.jsonl", |
| "finetuned_struct": ROOT / "outputs/predictions/finetuned_struct_predictions.jsonl", |
| "base_qa": ROOT / "outputs/predictions/base_qa_predictions.jsonl", |
| "finetuned_qa": ROOT / "outputs/predictions/finetuned_qa_predictions.jsonl", |
| } |
| VAL_FILES = { |
| "val_struct": ROOT / "data/processed/val_struct.jsonl", |
| "val_qa": ROOT / "data/processed/val_qa.jsonl", |
| "summary": ROOT / "data/processed/summary.json", |
| } |
|
|
|
|
| def read_json(path): |
| return json.loads(Path(path).read_text(encoding="utf-8")) |
|
|
|
|
| def read_jsonl(path): |
| rows = [] |
| with Path(path).open(encoding="utf-8") as f: |
| for line in f: |
| line = line.strip() |
| if line: |
| rows.append(json.loads(line)) |
| return rows |
|
|
|
|
| def safe_pct(v): |
| if v is None: |
| return "N/A" |
| return f"{v * 100:.1f}%" |
|
|
|
|
| def safe_num(v, digits=3): |
| if v is None: |
| return "N/A" |
| if isinstance(v, float): |
| if abs(v) < 0.001 and v != 0: |
| return f"{v:.2e}" |
| return f"{v:.{digits}f}".rstrip("0").rstrip(".") |
| if isinstance(v, int): |
| return f"{v:,}" |
| return str(v) |
|
|
|
|
| def esc(text): |
| return html.escape(str(text), quote=True) |
|
|
|
|
| def ensure_dirs(): |
| if PACKAGE.exists(): |
| shutil.rmtree(PACKAGE) |
| for path in [CHARTS, METRICS_DIR, PRED_DIR, DERIVED_DIR]: |
| path.mkdir(parents=True, exist_ok=True) |
|
|
|
|
| def copy_source_files(): |
| for name, src in METRIC_FILES.items(): |
| shutil.copy2(src, METRICS_DIR / f"{name}_metrics.json") |
| for name, src in PRED_FILES.items(): |
| shutil.copy2(src, PRED_DIR / f"{name}_predictions.jsonl") |
| for name, src in VAL_FILES.items(): |
| shutil.copy2(src, DATA_DIR / src.name) |
| adapter = ROOT / "outputs/qwen35_9b_lora" |
| model_meta = { |
| "base_model": "Qwen/Qwen3.5-9B", |
| "adapter_dir": "outputs/qwen35_9b_lora", |
| "adapter_files": sorted(p.name for p in adapter.glob("*") if p.is_file()) if adapter.exists() else [], |
| "training_method": "4-bit QLoRA supervised fine-tuning", |
| "train_file": "data/processed/train_mixed.jsonl", |
| "validation_files": ["data/processed/val_struct.jsonl", "data/processed/val_qa.jsonl"], |
| } |
| (DATA_DIR / "model_training_metadata.json").write_text(json.dumps(model_meta, ensure_ascii=False, indent=2), encoding="utf-8") |
|
|
|
|
| def write_csv(path, rows, fieldnames): |
| with Path(path).open("w", encoding="utf-8", newline="") as f: |
| writer = csv.DictWriter(f, fieldnames=fieldnames) |
| writer.writeheader() |
| writer.writerows(rows) |
|
|
|
|
| def flatten_metrics(metrics): |
| rows = [] |
| for key, payload in metrics.items(): |
| run = payload["run_name"] |
| task = payload["task_type"] |
| for metric, value in payload["metrics"].items(): |
| rows.append({"run": run, "task": task, "metric": metric, "value": value}) |
| return rows |
|
|
|
|
| def class_stats(pred_rows, field="current_behavior"): |
| labels = sorted({r.get("target", {}).get(field) for r in pred_rows if r.get("target", {}).get(field) is not None}) |
| out = {} |
| for label in labels: |
| tp = fp = fn = correct = support = 0 |
| for rec in pred_rows: |
| target = rec.get("target", {}).get(field) |
| pred = rec.get("prediction") if isinstance(rec.get("prediction"), dict) else {} |
| pred_label = pred.get(field) |
| if target == label: |
| support += 1 |
| if pred_label == label: |
| correct += 1 |
| if target == label and pred_label == label: |
| tp += 1 |
| elif target != label and pred_label == label: |
| fp += 1 |
| elif target == label and pred_label != label: |
| fn += 1 |
| precision = tp / (tp + fp) if tp + fp else 0.0 |
| recall = tp / (tp + fn) if tp + fn else 0.0 |
| f1 = 2 * precision * recall / (precision + recall) if precision + recall else 0.0 |
| acc = correct / support if support else 0.0 |
| out[label] = {"support": support, "accuracy": acc, "precision": precision, "recall": recall, "f1": f1} |
| return out |
|
|
|
|
| def confusion_rows(pred_rows, field="current_behavior", top_n=18): |
| support = Counter(r.get("target", {}).get(field) for r in pred_rows) |
| labels = [x for x, _ in support.most_common(top_n) if x is not None] |
| matrix = [] |
| for true_label in labels: |
| row = {"true_label": true_label} |
| total = 0 |
| counts = Counter() |
| for rec in pred_rows: |
| pred = rec.get("prediction") if isinstance(rec.get("prediction"), dict) else {} |
| if rec.get("target", {}).get(field) == true_label: |
| total += 1 |
| counts[pred.get(field)] += 1 |
| for pred_label in labels: |
| row[pred_label] = counts[pred_label] |
| row["other"] = max(0, total - sum(row[p] for p in labels)) |
| row["support"] = total |
| matrix.append(row) |
| return labels, matrix |
|
|
|
|
| def qa_error_samples(rows, limit=20): |
| samples = [] |
| for idx, rec in enumerate(rows): |
| target = rec.get("target", {}) |
| pred = rec.get("prediction") if isinstance(rec.get("prediction"), dict) else {} |
| errors = [] |
| for key in ["occupied", "is_abnormal", "time_to_free_minutes", "used_areas"]: |
| if target.get(key) != pred.get(key): |
| errors.append(key) |
| if errors: |
| samples.append({ |
| "index": idx, |
| "error_fields": "|".join(errors), |
| "target": json.dumps(target, ensure_ascii=False), |
| "prediction": json.dumps(pred, ensure_ascii=False), |
| }) |
| if len(samples) >= limit: |
| break |
| return samples |
|
|
|
|
| def derive_data(metrics, predictions): |
| metric_rows = flatten_metrics(metrics) |
| write_csv(DERIVED_DIR / "metric_comparison.csv", metric_rows, ["run", "task", "metric", "value"]) |
|
|
| base_stats = class_stats(predictions["base_struct"]) |
| ft_stats = class_stats(predictions["finetuned_struct"]) |
| labels = sorted(set(base_stats) | set(ft_stats), key=lambda x: ft_stats.get(x, {}).get("support", 0), reverse=True) |
| per_class = [] |
| for label in labels: |
| per_class.append({ |
| "label": label, |
| "support": ft_stats.get(label, {}).get("support", 0), |
| "base_accuracy": base_stats.get(label, {}).get("accuracy", 0), |
| "finetuned_accuracy": ft_stats.get(label, {}).get("accuracy", 0), |
| "base_f1": base_stats.get(label, {}).get("f1", 0), |
| "finetuned_f1": ft_stats.get(label, {}).get("f1", 0), |
| "f1_delta": ft_stats.get(label, {}).get("f1", 0) - base_stats.get(label, {}).get("f1", 0), |
| }) |
| write_csv(DERIVED_DIR / "per_behavior_scores.csv", per_class, ["label", "support", "base_accuracy", "finetuned_accuracy", "base_f1", "finetuned_f1", "f1_delta"]) |
|
|
| labels, cm = confusion_rows(predictions["finetuned_struct"]) |
| write_csv(DERIVED_DIR / "finetuned_current_behavior_confusion_top18.csv", cm, ["true_label", *labels, "other", "support"]) |
|
|
| qa_samples = qa_error_samples(predictions["finetuned_qa"]) |
| write_csv(DERIVED_DIR / "finetuned_qa_error_samples.csv", qa_samples, ["index", "error_fields", "target", "prediction"]) |
|
|
| metadata = { |
| "generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), |
| "package_purpose": "Self-contained design review presentation for MWave aircraft lavatory radar LLM fine-tuning experiment.", |
| "primary_audience": "Design experts and system reviewers", |
| "storyline": [ |
| "Problem: radar-derived lavatory behavior state is sequential, ambiguous, and privacy-sensitive.", |
| "Intervention: fine-tune Qwen3.5-9B with mixed structured prediction and QA supervision.", |
| "Evidence: compare base vs fine-tuned on identical validation sets.", |
| "Design implication: edge-first state output should be robustly validated before aircraft integration.", |
| ], |
| "source_files": { |
| "metrics": [str((METRICS_DIR / f"{name}_metrics.json").relative_to(PACKAGE)) for name in METRIC_FILES], |
| "predictions": [str((PRED_DIR / f"{name}_predictions.jsonl").relative_to(PACKAGE)) for name in PRED_FILES], |
| "derived": [str(p.relative_to(PACKAGE)) for p in sorted(DERIVED_DIR.glob("*"))], |
| }, |
| } |
| (DATA_DIR / "presentation_metadata.json").write_text(json.dumps(metadata, ensure_ascii=False, indent=2), encoding="utf-8") |
| return {"metric_rows": metric_rows, "per_class": per_class, "confusion_labels": labels, "confusion": cm, "metadata": metadata} |
|
|
|
|
| def svg_wrap(width, height, title, desc, metadata, body): |
| meta = esc(json.dumps(metadata, ensure_ascii=False, indent=2)) |
| return f'''<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}" role="img" aria-labelledby="title desc"> |
| <title id="title">{esc(title)}</title> |
| <desc id="desc">{esc(desc)}</desc> |
| <metadata>{meta}</metadata> |
| <defs> |
| <filter id="softShadow" x="-20%" y="-20%" width="140%" height="140%"> |
| <feDropShadow dx="0" dy="12" stdDeviation="14" flood-color="#001754" flood-opacity="0.12"/> |
| </filter> |
| <linearGradient id="blueGrad" x1="0" x2="0" y1="0" y2="1"> |
| <stop offset="0" stop-color="{BOEING}"/> |
| <stop offset="1" stop-color="#9DB7E2"/> |
| </linearGradient> |
| <linearGradient id="redGrad" x1="0" x2="0" y1="0" y2="1"> |
| <stop offset="0" stop-color="{RED}"/> |
| <stop offset="1" stop-color="#F4A6B4"/> |
| </linearGradient> |
| </defs> |
| <rect width="100%" height="100%" fill="#ffffff"/> |
| {body} |
| </svg> |
| ''' |
|
|
|
|
| def text(x, y, content, size=16, weight=500, fill=INK, anchor="start"): |
| return f'<text x="{x}" y="{y}" font-family="Aptos, Bahnschrift, Noto Sans CJK SC, sans-serif" font-size="{size}" font-weight="{weight}" fill="{fill}" text-anchor="{anchor}">{esc(content)}</text>' |
|
|
|
|
| def save_svg(name, width, height, title, desc, metadata, body): |
| path = CHARTS / name |
| path.write_text(svg_wrap(width, height, title, desc, metadata, body), encoding="utf-8") |
| return path |
|
|
|
|
| def bar_chart(name, title, subtitle, rows, value_key, label_key="label", group_key=None, width=980, height=560, value_format="percent"): |
| margin = {"l": 110, "r": 46, "t": 106, "b": 110} |
| plot_w = width - margin["l"] - margin["r"] |
| plot_h = height - margin["t"] - margin["b"] |
| max_v = max([r[value_key] for r in rows] + [1.0 if value_format == "percent" else 0.1]) |
| if value_format == "percent": |
| max_v = max(1.0, max_v) |
| body = [ |
| text(34, 42, title, 28, 900, BLACK), |
| text(34, 72, subtitle, 14, 500, STEEL), |
| f'<rect x="{margin["l"]}" y="{margin["t"]}" width="{plot_w}" height="{plot_h}" fill="#F8FAFC" stroke="#E5E7EB" rx="18"/>', |
| ] |
| for i in range(5): |
| y = margin["t"] + plot_h * i / 4 |
| val = max_v * (1 - i / 4) |
| body.append(f'<line x1="{margin["l"]}" y1="{y:.1f}" x2="{margin["l"] + plot_w}" y2="{y:.1f}" stroke="#E5E7EB"/>') |
| tick = safe_pct(val) if value_format == "percent" else safe_num(val, 1) |
| body.append(text(margin["l"] - 14, y + 5, tick, 12, 600, STEEL, "end")) |
| n = len(rows) |
| slot = plot_w / max(n, 1) |
| bar_w = slot * 0.62 |
| for i, row in enumerate(rows): |
| v = row[value_key] |
| h = 0 if max_v == 0 else plot_h * v / max_v |
| x = margin["l"] + i * slot + (slot - bar_w) / 2 |
| y = margin["t"] + plot_h - h |
| color = row.get("color") or COLORS[i % len(COLORS)] |
| label = str(row[label_key]) |
| meta = esc(json.dumps(row, ensure_ascii=False)) |
| body.append(f'<rect class="bar" data-meta="{meta}" x="{x:.1f}" y="{y:.1f}" width="{bar_w:.1f}" height="{h:.1f}" rx="8" fill="{color}"><title>{esc(label)}: {safe_pct(v) if value_format == "percent" else safe_num(v)}</title></rect>') |
| body.append(text(x + bar_w / 2, margin["t"] + plot_h + 28, label[:18], 11, 700, STEEL, "middle") + f'<animate attributeName="opacity" from="0" to="1" dur="0.4s" fill="freeze"/>') |
| if group_key: |
| groups = [] |
| last = None |
| start = 0 |
| for i, row in enumerate(rows + [{group_key: None}]): |
| g = row.get(group_key) |
| if last is None: |
| last = g |
| if g != last: |
| groups.append((last, start, i - 1)) |
| start = i |
| last = g |
| for group, start, end in groups: |
| if group is None: |
| continue |
| x = margin["l"] + (start + end + 1) * slot / 2 |
| body.append(text(x, height - 26, group, 13, 900, BLACK, "middle")) |
| return save_svg(name, width, height, title, subtitle, {"chart_type": "bar", "rows": rows, "value_key": value_key}, "\n".join(body)) |
|
|
|
|
| def grouped_metric_chart(metrics): |
| order = [ |
| ("Schema完整率", "required_field_complete_rate", "struct"), |
| ("当前行为Acc", "current_behavior_accuracy", "struct"), |
| ("当前行为Macro-F1", "current_behavior_macro_f1", "struct"), |
| ("下一行为Acc", "next_possible_behavior_accuracy", "struct"), |
| ("阶段Index Acc", "stage_index_accuracy", "struct"), |
| ("序列Exact", "sequence_exact_match", "struct"), |
| ("占用Acc", "occupied_accuracy", "qa"), |
| ("异常F1", "is_abnormal_f1", "qa"), |
| ("区域F1", "used_areas_micro_f1", "qa"), |
| ] |
| rows = [] |
| for label, key, task in order: |
| base = metrics[f"base_{task}"]["metrics"].get(key) |
| ft = metrics[f"finetuned_{task}"]["metrics"].get(key) |
| if base is not None: |
| rows.append({"label": f"{label}\nBase", "metric": key, "run": "base", "task": task, "value": base, "color": STEEL}) |
| if ft is not None: |
| rows.append({"label": f"{label}\nFT", "metric": key, "run": "finetuned", "task": task, "value": ft, "color": BOEING if task == "struct" else RED}) |
| return bar_chart( |
| "01_base_vs_finetuned_scores.svg", |
| "Base vs Fine-tuned: 指标总览", |
| "同一验证集上的结构化预测与 QA 任务对比;柱形为可检查 metadata 的 SVG 元素。", |
| rows, |
| "value", |
| width=1280, |
| height=620, |
| ) |
|
|
|
|
| def label_distribution_chart(summary): |
| rows = [] |
| for label, count in sorted(summary["train_struct"]["label_counts"].items(), key=lambda x: x[1], reverse=True): |
| rows.append({"label": label, "value": count, "color": BOEING if label == "坐用马桶" else COLORS[len(rows) % len(COLORS)]}) |
| return bar_chart( |
| "02_train_label_distribution.svg", |
| "训练集行为标签分布", |
| "类别分布显示任务主要由长时间状态和短过渡行为共同构成;设计评审应关注少数类边界。", |
| rows[:19], |
| "value", |
| width=1280, |
| height=620, |
| value_format="count", |
| ) |
|
|
|
|
| def per_behavior_delta_chart(per_class): |
| rows = [] |
| for row in sorted(per_class, key=lambda x: x["f1_delta"], reverse=True)[:16]: |
| rows.append({ |
| "label": row["label"], |
| "value": row["f1_delta"], |
| "support": row["support"], |
| "base_f1": row["base_f1"], |
| "finetuned_f1": row["finetuned_f1"], |
| "color": BOEING if row["f1_delta"] >= 0 else RED, |
| }) |
| return bar_chart( |
| "03_behavior_f1_delta.svg", |
| "行为类别 F1 改善幅度", |
| "展示微调对各行为类别的收益;support 表示验证集该行为样本量。", |
| rows, |
| "value", |
| width=1280, |
| height=620, |
| ) |
|
|
|
|
| def time_mae_chart(metrics): |
| rows = [] |
| mapping = [ |
| ("当前行为已持续", "elapsed_seconds_in_current_behavior_mae"), |
| ("当前行为剩余", "estimated_remaining_seconds_mae"), |
| ("完整流程剩余", "full_remaining_seconds_mae"), |
| ("当前行为结束时刻", "expected_end_time_mae"), |
| ] |
| for label, key in mapping: |
| v = metrics["finetuned_struct"]["metrics"].get(key) |
| if v is not None: |
| rows.append({"label": label, "metric": key, "value": v, "color": BOEING}) |
| rows.append({"label": "QA空出时间(分钟)", "metric": "time_to_free_minutes_mae", "value": metrics["finetuned_qa"]["metrics"]["time_to_free_minutes_mae"], "color": RED}) |
| return bar_chart( |
| "04_time_error_mae.svg", |
| "时间估计误差", |
| "结构化任务以秒为单位;QA 空出时间以分钟为单位,接近 0 说明该 QA 目标在当前构造规则下被模型很好拟合。", |
| rows, |
| "value", |
| width=1100, |
| height=560, |
| value_format="count", |
| ) |
|
|
|
|
| def confusion_heatmap(name, title, labels, matrix): |
| width, height = 1180, 900 |
| margin = {"l": 170, "r": 44, "t": 126, "b": 170} |
| n = len(labels) |
| cell = min((width - margin["l"] - margin["r"]) / n, (height - margin["t"] - margin["b"]) / n) |
| max_count = max([row[label] for row in matrix for label in labels] + [1]) |
| body = [ |
| text(34, 44, title, 28, 900, BLACK), |
| text(34, 74, "Fine-tuned current_behavior confusion matrix, top validation labels.", 14, 500, STEEL), |
| f'<rect x="{margin["l"]}" y="{margin["t"]}" width="{cell*n:.1f}" height="{cell*n:.1f}" fill="#F8FAFC" stroke="#E5E7EB" rx="12"/>', |
| ] |
| for i, true_label in enumerate(labels): |
| body.append(text(margin["l"] - 12, margin["t"] + i * cell + cell * 0.62, true_label, 12, 700, INK, "end")) |
| body.append(text(margin["l"] + i * cell + cell * 0.5, margin["t"] + n * cell + 26, true_label, 11, 700, INK, "middle")) |
| for i, row in enumerate(matrix): |
| for j, pred_label in enumerate(labels): |
| count = row[pred_label] |
| intensity = count / max_count |
| blue = int(246 - 170 * intensity) |
| color = f"rgb({blue},{max(39, blue + 12)},{255})" if i == j else f"rgb({255},{max(230, 248 - int(120 * intensity))},{max(230, 248 - int(120 * intensity))})" |
| meta = esc(json.dumps({"true_label": row["true_label"], "predicted_label": pred_label, "count": count, "support": row["support"]}, ensure_ascii=False)) |
| x = margin["l"] + j * cell |
| y = margin["t"] + i * cell |
| body.append(f'<rect data-meta="{meta}" x="{x:.1f}" y="{y:.1f}" width="{cell:.1f}" height="{cell:.1f}" fill="{color}" stroke="#FFFFFF"><title>true={esc(row["true_label"])}, pred={esc(pred_label)}, count={count}</title></rect>') |
| if count: |
| body.append(text(x + cell / 2, y + cell * 0.62, count, 9, 700, BLACK, "middle")) |
| body.append(text(margin["l"] + cell * n / 2, height - 38, "Predicted behavior", 14, 900, BLACK, "middle")) |
| body.append(f'<text x="34" y="{margin["t"] + cell*n/2}" transform="rotate(-90 34 {margin["t"] + cell*n/2})" font-family="Aptos, Bahnschrift, sans-serif" font-size="14" font-weight="900" fill="{BLACK}" text-anchor="middle">True behavior</text>') |
| return save_svg(name, width, height, title, "Confusion heatmap with metadata on each cell.", {"chart_type": "confusion_matrix", "labels": labels}, "\n".join(body)) |
|
|
|
|
| def architecture_svg(): |
| width, height = 1280, 500 |
| boxes = [ |
| ("毫米波雷达", "点云/轨迹窗口\n隐私友好,非视觉图像", 56), |
| ("边缘特征层", "去噪、跟踪、区域、速度\n生成结构化中间表征", 356), |
| ("Qwen3.5-9B LoRA", "本地推理输出 JSON\n行为、阶段、剩余时间、QA", 656), |
| ("客舱系统", "乘务终端、维护日志\n异常关注而非强控制", 956), |
| ] |
| body = [text(34, 44, "飞机厕所部署技术方案", 30, 900, BLACK), text(34, 76, "Edge-first, privacy-preserving, fail-safe integration concept.", 14, 500, STEEL)] |
| for i, (title, desc, x) in enumerate(boxes): |
| body.append(f'<rect x="{x}" y="142" width="250" height="210" rx="28" fill="#FFFFFF" stroke="#D6DEE9" filter="url(#softShadow)"/>') |
| body.append(text(x + 24, 190, title, 22, 900, BOEING if i < 3 else RED)) |
| for k, line in enumerate(desc.split("\n")): |
| body.append(text(x + 24, 238 + k * 28, line, 16, 600, STEEL)) |
| if i < len(boxes) - 1: |
| body.append(f'<line x1="{x+260}" y1="246" x2="{x+294}" y2="246" stroke="{BOEING}" stroke-width="4"/>') |
| body.append(f'<polygon points="{x+294},246 {x+282},238 {x+282},254" fill="{RED}"/>') |
| body.append(text(56, 420, "设计原则:厕所侧只上传结构化状态,不上传可逆原始人体数据;LLM JSON 校验失败时回退规则模型。", 18, 800, BLACK)) |
| return save_svg("06_deployment_architecture.svg", width, height, "飞机厕所部署技术方案", "Edge deployment architecture.", {"chart_type": "architecture"}, "\n".join(body)) |
|
|
|
|
| def build_svgs(summary, metrics, derived): |
| paths = [ |
| grouped_metric_chart(metrics), |
| label_distribution_chart(summary), |
| per_behavior_delta_chart(derived["per_class"]), |
| time_mae_chart(metrics), |
| confusion_heatmap("05_finetuned_behavior_confusion.svg", "Fine-tuned 行为混淆矩阵", derived["confusion_labels"], derived["confusion"]), |
| architecture_svg(), |
| ] |
| chart_manifest = [] |
| for path in paths: |
| chart_manifest.append({ |
| "file": str(path.relative_to(PACKAGE)), |
| "title": path.stem.replace("_", " "), |
| "metadata_note": "Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta.", |
| }) |
| (CHARTS / "chart_manifest.json").write_text(json.dumps(chart_manifest, ensure_ascii=False, indent=2), encoding="utf-8") |
| return chart_manifest |
|
|
|
|
| def metric_delta(metrics, task, key): |
| base = metrics[f"base_{task}"]["metrics"].get(key) |
| ft = metrics[f"finetuned_{task}"]["metrics"].get(key) |
| if base is None or ft is None: |
| return None |
| return ft - base |
|
|
|
|
| def build_html(summary, metrics, derived, chart_manifest): |
| struct = metrics["finetuned_struct"]["metrics"] |
| qa = metrics["finetuned_qa"]["metrics"] |
| base_struct = metrics["base_struct"]["metrics"] |
| base_qa = metrics["base_qa"]["metrics"] |
| metadata_json = json.dumps({ |
| "summary": summary, |
| "metrics": metrics, |
| "chart_manifest": chart_manifest, |
| "derived_files": [str(p.relative_to(PACKAGE)) for p in sorted(DERIVED_DIR.glob("*"))], |
| }, ensure_ascii=False) |
| metric_table_rows = [] |
| story_metrics = [ |
| ("结构化 schema 完整率", base_struct["required_field_complete_rate"], struct["required_field_complete_rate"], "决定输出能否进入机载系统状态总线。"), |
| ("当前行为准确率", base_struct["current_behavior_accuracy"], struct["current_behavior_accuracy"], "核心状态识别能力,从 radar 表征映射到行为标签。"), |
| ("当前行为 Macro-F1", base_struct["current_behavior_macro_f1"], struct["current_behavior_macro_f1"], "衡量少数类是否被覆盖,适合设计评审关注长尾行为。"), |
| ("下一行为准确率", base_struct["next_possible_behavior_accuracy"], struct["next_possible_behavior_accuracy"], "影响短期流程预测和预计空出时间。"), |
| ("阶段 index 准确率", base_struct["stage_index_accuracy"], struct["stage_index_accuracy"], "反映模型是否理解完整厕所使用流程位置。"), |
| ("QA 异常 F1", base_qa["is_abnormal_f1"], qa["is_abnormal_f1"], "用于乘务关注提示,需单独评估。"), |
| ("QA 区域 F1", base_qa["used_areas_micro_f1"], qa["used_areas_micro_f1"], "用于理解马桶、洗手池、门、垃圾桶等区域使用状态。"), |
| ] |
| for name, base, ft, meaning in story_metrics: |
| delta = ft - base if base is not None and ft is not None else None |
| metric_table_rows.append(f"<tr><td>{esc(name)}</td><td>{safe_pct(base)}</td><td>{safe_pct(ft)}</td><td>{safe_pct(delta) if delta is not None else 'N/A'}</td><td>{esc(meaning)}</td></tr>") |
|
|
| top_improvements = sorted(derived["per_class"], key=lambda x: x["f1_delta"], reverse=True)[:8] |
| top_rows = "\n".join( |
| f"<tr><td>{esc(r['label'])}</td><td>{r['support']}</td><td>{safe_pct(r['base_f1'])}</td><td>{safe_pct(r['finetuned_f1'])}</td><td>{safe_pct(r['f1_delta'])}</td></tr>" |
| for r in top_improvements |
| ) |
| chart_cards = "\n".join( |
| f'''<article class="chart-card" data-chart="{esc(item["file"])}"> |
| <div><h3>{esc(item["title"])}</h3><p>{esc(item["metadata_note"])}</p></div> |
| <object type="image/svg+xml" data="{esc(item["file"])}" aria-label="{esc(item["title"])}"></object> |
| </article>''' |
| for item in chart_manifest |
| ) |
| source_links = "\n".join( |
| f"<li><a href='{esc(str(p.relative_to(PACKAGE)))}'>{esc(str(p.relative_to(PACKAGE)))}</a></li>" |
| for p in sorted(DATA_DIR.rglob("*")) |
| if p.is_file() |
| ) |
| html_text = f'''<!doctype html> |
| <html lang="zh-CN"> |
| <head> |
| <meta charset="utf-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <title>MWave Design Review Presentation</title> |
| <style> |
| :root {{ |
| --boeing:{BOEING}; --boeing2:{BOEING_2}; --red:{RED}; --black:{BLACK}; |
| --ink:{INK}; --steel:{STEEL}; --mist:{MIST}; --light:{LIGHT}; --line:rgba(8,10,15,.14); |
| }} |
| * {{ box-sizing:border-box; }} |
| body {{ |
| margin:0; color:var(--ink); |
| font-family:"Aptos Display","Bahnschrift","DIN Alternate","Noto Sans SC",sans-serif; |
| background:radial-gradient(circle at 10% 0%, rgba(0,103,177,.22), transparent 28%), |
| radial-gradient(circle at 86% 6%, rgba(200,16,46,.12), transparent 24%), |
| linear-gradient(135deg,#eef4fb,#ffffff 44%,#e9eef6); |
| }} |
| a {{ color:var(--boeing); font-weight:800; }} |
| .deck {{ max-width:1480px; margin:0 auto; padding:28px; }} |
| .hero {{ |
| min-height:620px; color:#fff; border-radius:36px; padding:52px; |
| background:linear-gradient(118deg,rgba(3,11,29,.98),rgba(0,57,166,.94) 54%,rgba(0,103,177,.72)); |
| position:relative; overflow:hidden; |
| }} |
| .hero:after {{ content:""; position:absolute; right:-120px; top:-160px; width:640px; height:640px; border-radius:50%; border:1px solid rgba(255,255,255,.18); box-shadow:inset 0 0 0 82px rgba(255,255,255,.05), inset 0 0 0 180px rgba(255,255,255,.035); }} |
| .kicker {{ letter-spacing:.18em; text-transform:uppercase; font-size:12px; color:#dbeafe; }} |
| h1 {{ font-size:clamp(46px,7vw,100px); line-height:.92; margin:26px 0 18px; max-width:1050px; letter-spacing:-.06em; }} |
| h2 {{ font-size:38px; letter-spacing:-.04em; margin:0 0 18px; color:var(--black); }} |
| h3 {{ margin:0 0 8px; font-size:22px; color:var(--black); }} |
| .hero p {{ color:#eaf1fb; font-size:21px; line-height:1.65; max-width:980px; }} |
| .hero-grid {{ display:grid; grid-template-columns:repeat(4,1fr); gap:18px; position:relative; z-index:1; margin-top:42px; }} |
| .hero-card {{ padding:22px; border:1px solid rgba(255,255,255,.22); background:rgba(255,255,255,.1); border-radius:24px; backdrop-filter:blur(16px); }} |
| .big {{ font-size:38px; font-weight:900; letter-spacing:-.04em; }} |
| .label {{ color:#cbd5e1; font-size:13px; margin-top:6px; }} |
| nav {{ position:sticky; top:0; z-index:20; display:flex; gap:8px; flex-wrap:wrap; margin:18px 0; padding:12px; border:1px solid var(--line); border-radius:22px; background:rgba(255,255,255,.88); backdrop-filter:blur(18px); }} |
| nav button, .pill {{ border:0; border-radius:999px; padding:11px 15px; background:#eef3fb; color:#0f172a; font-weight:900; cursor:pointer; }} |
| nav button:hover, .pill.active {{ background:var(--boeing); color:#fff; }} |
| section {{ background:rgba(255,255,255,.92); border:1px solid var(--line); border-radius:30px; padding:32px; margin-top:24px; box-shadow:0 16px 44px rgba(17,24,39,.06); }} |
| .grid {{ display:grid; gap:18px; }} |
| .grid-2 {{ grid-template-columns:repeat(2,minmax(0,1fr)); }} |
| .grid-3 {{ grid-template-columns:repeat(3,minmax(0,1fr)); }} |
| .grid-4 {{ grid-template-columns:repeat(4,minmax(0,1fr)); }} |
| .card {{ border:1px solid var(--line); background:linear-gradient(180deg,#fff,#f6f9fd); border-radius:24px; padding:22px; }} |
| .card strong {{ color:var(--boeing); }} |
| .muted {{ color:var(--steel); line-height:1.66; }} |
| .story {{ display:grid; grid-template-columns:72px 1fr; gap:18px; margin-bottom:18px; }} |
| .num {{ width:54px; height:54px; border-radius:50%; display:grid; place-items:center; background:var(--black); color:#fff; font-weight:900; font-size:22px; }} |
| table {{ width:100%; border-collapse:collapse; overflow:hidden; border-radius:18px; background:#fff; }} |
| th,td {{ padding:13px 14px; border-bottom:1px solid #e5e7eb; text-align:left; vertical-align:top; }} |
| th {{ background:#08111f; color:#fff; font-size:13px; }} |
| .chart-grid {{ display:grid; grid-template-columns:1fr; gap:22px; }} |
| .chart-card {{ border:1px solid var(--line); background:#fff; border-radius:26px; padding:22px; }} |
| .chart-card object {{ width:100%; height:620px; border:0; background:#fff; border-radius:18px; }} |
| .chart-card p {{ margin:0 0 12px; color:var(--steel); }} |
| .metadata-panel {{ display:grid; grid-template-columns:360px 1fr; gap:18px; }} |
| select, textarea {{ width:100%; border:1px solid #d6dee9; border-radius:16px; padding:12px; font:inherit; background:#fff; }} |
| pre {{ margin:0; white-space:pre-wrap; background:#05070c; color:#d1fae5; border-radius:18px; padding:18px; min-height:360px; overflow:auto; font-family:"Cascadia Mono","SFMono-Regular",monospace; font-size:13px; }} |
| .callout {{ border-left:6px solid var(--red); background:#fff5f6; padding:18px; border-radius:18px; }} |
| .files {{ columns:2; }} |
| .footer {{ text-align:center; color:#64748b; padding:28px; }} |
| @media print {{ |
| nav, .metadata-panel, .no-print {{ display:none !important; }} |
| body {{ background:#fff; }} |
| section, .hero {{ break-inside:avoid; page-break-inside:avoid; box-shadow:none; }} |
| .chart-card object {{ height:520px; }} |
| }} |
| @media(max-width:980px) {{ .hero-grid,.grid-2,.grid-3,.grid-4,.metadata-panel {{ grid-template-columns:1fr; }} .deck {{ padding:14px; }} .hero {{ padding:28px; }} .files {{ columns:1; }} }} |
| </style> |
| </head> |
| <body> |
| <div class="deck"> |
| <header class="hero"> |
| <div class="kicker">MWave Radar LLM · Aircraft Lavatory Design Review</div> |
| <h1>从毫米波雷达到机载厕所状态智能:一次可验证的 LLM 微调实验</h1> |
| <p>这份汇报面向设计专家评审,重点不是展示训练日志,而是讲清楚:为什么需要模型、模型学到了什么、哪些结果能支持设计决策、哪些风险需要在机载部署前继续验证。</p> |
| <div class="hero-grid"> |
| <div class="hero-card"><div class="big">{summary["train_struct"]["num_examples"]:,}</div><div class="label">训练结构样本</div></div> |
| <div class="hero-card"><div class="big">{summary["val_struct"]["num_examples"]:,}</div><div class="label">验证结构样本</div></div> |
| <div class="hero-card"><div class="big">{summary["train_qa"]["num_examples"]:,}</div><div class="label">QA 训练样本</div></div> |
| <div class="hero-card"><div class="big">Qwen3.5-9B</div><div class="label">4-bit QLoRA 微调</div></div> |
| </div> |
| </header> |
| |
| <nav class="no-print"> |
| <button onclick="go('story')">Story Line</button> |
| <button onclick="go('method')">任务与方法</button> |
| <button onclick="go('results')">结果证据</button> |
| <button onclick="go('charts')">SVG 图表</button> |
| <button onclick="go('metadata')">Metadata Explorer</button> |
| <button onclick="go('deployment')">部署方案</button> |
| <button onclick="go('sources')">源数据</button> |
| </nav> |
| |
| <section id="story"> |
| <h2>评审 Story Line</h2> |
| <div class="story"><div class="num">1</div><div><h3>设计问题</h3><p class="muted">飞机厕所内不能依赖摄像头,毫米波雷达提供隐私友好的时空轨迹,但原始序列难以直接变成可用的客舱状态。</p></div></div> |
| <div class="story"><div class="num">2</div><div><h3>模型角色</h3><p class="muted">LLM 不替代雷达信号处理,而是读取结构化窗口和中间表征,输出严格 JSON:当前行为、下一行为、阶段、剩余时间、序列,以及 QA 状态。</p></div></div> |
| <div class="story"><div class="num">3</div><div><h3>实验判断</h3><p class="muted">同一验证集上比较 base Qwen3.5-9B 与微调后模型,指标覆盖 schema 合规、行为识别、流程理解、时间估计和 QA 推理。</p></div></div> |
| <div class="story"><div class="num">4</div><div><h3>设计结论</h3><p class="muted">微调显著提升结构化输出和 QA 可用性,但时间误差、少数类行为和异常提示仍应进入下一轮系统设计验证。</p></div></div> |
| </section> |
| |
| <section id="method"> |
| <h2>任务与评估方法</h2> |
| <div class="grid grid-3"> |
| <div class="card"><h3>结构化预测</h3><p class="muted">输入雷达时序窗口和中间层表征,输出 current_behavior、next_possible_behavior、stage_index、remaining time、sequence_so_far 等字段。</p></div> |
| <div class="card"><h3>QA 状态回答</h3><p class="muted">作为独立维度评估 occupied、time_to_free_minutes、used_areas、is_abnormal,避免只看结构化任务而忽略最终用户问题。</p></div> |
| <div class="card"><h3>指标选择</h3><p class="muted">分类用 accuracy/F1,schema 用 JSON parse 和 required field complete,时间用 MAE,序列用 exact/prefix/last-label match。</p></div> |
| </div> |
| </section> |
| |
| <section id="results"> |
| <h2>关键结果</h2> |
| <div class="grid grid-4"> |
| <div class="card"><div class="big">{safe_pct(struct["current_behavior_accuracy"])}</div><strong>当前行为准确率</strong><p class="muted">Base 为 {safe_pct(base_struct["current_behavior_accuracy"])}。</p></div> |
| <div class="card"><div class="big">{safe_pct(struct["required_field_complete_rate"])}</div><strong>结构化字段完整率</strong><p class="muted">Base 为 {safe_pct(base_struct["required_field_complete_rate"])},说明微调主要解决 schema 对齐。</p></div> |
| <div class="card"><div class="big">{safe_pct(qa["is_abnormal_f1"])}</div><strong>QA 异常 F1</strong><p class="muted">Base 为 {safe_pct(base_qa["is_abnormal_f1"])}。</p></div> |
| <div class="card"><div class="big">{safe_num(struct["full_remaining_seconds_mae"], 1)}s</div><strong>完整流程剩余时间 MAE</strong><p class="muted">该误差直接影响预计空出时间体验。</p></div> |
| </div> |
| <div style="margin-top:20px; overflow:auto"> |
| <table> |
| <tr><th>评估项</th><th>Base</th><th>Fine-tuned</th><th>变化</th><th>设计含义</th></tr> |
| {''.join(metric_table_rows)} |
| </table> |
| </div> |
| <div class="callout" style="margin-top:20px"> |
| <strong>评审判断:</strong>当前实验已经证明 9B LoRA 可以把通用 LLM 拉到可用的结构化状态输出轨道上;但机载上线前仍应增加跨乘客、跨机型、跨雷达安装角度的数据验证,并对异常提示设定保守阈值。 |
| </div> |
| </section> |
| |
| <section> |
| <h2>哪些行为受益最大</h2> |
| <p class="muted">下表展示 F1 改善最大的行为类别。设计评审时应同时看 support,避免把少量样本上的提升误判为稳定能力。</p> |
| <table> |
| <tr><th>行为</th><th>验证样本数</th><th>Base F1</th><th>Fine-tuned F1</th><th>提升</th></tr> |
| {top_rows} |
| </table> |
| </section> |
| |
| <section id="charts"> |
| <h2>SVG 矢量图表</h2> |
| <p class="muted">所有图表都在 <code>assets/charts/</code> 下以 SVG 保存,可缩放、可放进设计文档;SVG 内包含 metadata,HTML 下方可查看。</p> |
| <div class="chart-grid">{chart_cards}</div> |
| </section> |
| |
| <section id="metadata" class="no-print"> |
| <h2>Metadata Explorer</h2> |
| <p class="muted">选择图表或数据对象,查看生成依据、源文件路径和图表内嵌 metadata。此功能完全离线,不依赖外网。</p> |
| <div class="metadata-panel"> |
| <div class="card"> |
| <h3>选择对象</h3> |
| <select id="metaSelect"></select> |
| <p class="muted">提示:SVG 图表也可以直接用文本编辑器打开,查看其中的 <code><metadata></code> 和每个 bar/cell 的 <code>data-meta</code>。</p> |
| </div> |
| <pre id="metaOut"></pre> |
| </div> |
| </section> |
| |
| <section id="deployment"> |
| <h2>部署到飞机厕所的技术方案</h2> |
| <div class="grid grid-3"> |
| <div class="card"><h3>边缘优先</h3><p class="muted">厕所侧完成雷达预处理和特征抽取,客舱边缘计算单元加载量化 LLM/LoRA,减少原始数据移动。</p></div> |
| <div class="card"><h3>状态输出</h3><p class="muted">对外只输出 occupied、estimated free time、used areas、abnormal flag 和行为阶段,不输出身份或可逆人体点云。</p></div> |
| <div class="card"><h3>失效保护</h3><p class="muted">JSON 校验失败、置信不足或异常连续触发时回退规则模型,并只给出“需关注”级提示。</p></div> |
| </div> |
| <object type="image/svg+xml" data="assets/charts/06_deployment_architecture.svg" style="width:100%;height:480px;border:0;margin-top:18px"></object> |
| </section> |
| |
| <section id="sources"> |
| <h2>可拷走的源数据与材料</h2> |
| <p class="muted">整个 <code>mwave_design_review_package</code> 目录可直接复制到其他电脑。HTML、PDF、SVG 和 CSV/JSONL 都使用相对路径或独立文件。</p> |
| <ul class="files">{source_links}</ul> |
| </section> |
| |
| <div class="footer">Generated locally · {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} · Boeing blue / red / black / gray design system</div> |
| </div> |
| <script id="metadata-json" type="application/json">{metadata_json}</script> |
| <script> |
| const metadata = JSON.parse(document.getElementById('metadata-json').textContent); |
| const options = [ |
| {{label:'Package overview', value:metadata}}, |
| ...metadata.chart_manifest.map(c => ({{label:'Chart: '+c.title, value:c}})), |
| {{label:'Metrics: base struct', value:metadata.metrics.base_struct}}, |
| {{label:'Metrics: fine-tuned struct', value:metadata.metrics.finetuned_struct}}, |
| {{label:'Metrics: base QA', value:metadata.metrics.base_qa}}, |
| {{label:'Metrics: fine-tuned QA', value:metadata.metrics.finetuned_qa}}, |
| {{label:'Data summary', value:metadata.summary}}, |
| ]; |
| function go(id){{ document.getElementById(id).scrollIntoView({{behavior:'smooth'}}); }} |
| const select = document.getElementById('metaSelect'); |
| const out = document.getElementById('metaOut'); |
| options.forEach((item, idx) => {{ |
| const option = document.createElement('option'); |
| option.value = String(idx); |
| option.textContent = item.label; |
| select.appendChild(option); |
| }}); |
| function renderMeta(){{ out.textContent = JSON.stringify(options[Number(select.value)].value, null, 2); }} |
| select.addEventListener('change', renderMeta); |
| renderMeta(); |
| </script> |
| </body> |
| </html> |
| ''' |
| (PACKAGE / "index.html").write_text(html_text, encoding="utf-8") |
|
|
|
|
| def build_readme(): |
| readme = f"""# MWave Design Review Presentation Package |
| |
| This folder is self-contained and can be copied to another computer. |
| |
| Open: |
| - `index.html`: interactive presentation with SVG charts and metadata explorer. |
| - `mwave_design_review_report.pdf`: static PDF export for review circulation. |
| |
| Key folders: |
| - `assets/charts/`: standalone SVG vector charts. Each SVG includes embedded metadata. |
| - `source_data/metrics/`: original evaluation metric JSON files. |
| - `source_data/predictions/`: original prediction JSONL files for base and fine-tuned runs. |
| - `source_data/derived/`: CSV files derived for the presentation, including per-behavior scores and confusion matrix. |
| |
| Generated at: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} |
| """ |
| (PACKAGE / "README.md").write_text(readme, encoding="utf-8") |
|
|
|
|
| def export_pdf(): |
| html_path = PACKAGE / "index.html" |
| pdf_path = PACKAGE / "mwave_design_review_report.pdf" |
| chrome = shutil.which("google-chrome") or shutil.which("chromium") or shutil.which("chromium-browser") |
| if not chrome: |
| print("Chrome not found; skipped PDF export.") |
| return False |
| cmd = [ |
| chrome, |
| "--headless", |
| "--no-sandbox", |
| "--disable-gpu", |
| "--print-to-pdf=" + str(pdf_path), |
| "--print-to-pdf-no-header", |
| "file://" + str(html_path.resolve()), |
| ] |
| subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| return True |
|
|
|
|
| def make_zip(): |
| zip_base = OUT_ROOT / "mwave_design_review_package" |
| archive = shutil.make_archive(str(zip_base), "zip", root_dir=OUT_ROOT, base_dir=PACKAGE.name) |
| return Path(archive) |
|
|
|
|
| def main(): |
| ensure_dirs() |
| copy_source_files() |
| summary = read_json(ROOT / "data/processed/summary.json") |
| metrics = {key: read_json(path) for key, path in METRIC_FILES.items()} |
| predictions = {key: read_jsonl(path) for key, path in PRED_FILES.items()} |
| derived = derive_data(metrics, predictions) |
| chart_manifest = build_svgs(summary, metrics, derived) |
| build_html(summary, metrics, derived, chart_manifest) |
| build_readme() |
| pdf_ok = export_pdf() |
| archive = make_zip() |
| print(f"wrote package: {PACKAGE}") |
| print(f"wrote archive: {archive}") |
| print(f"pdf_exported: {pdf_ok}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|