CabinLavatoryPrediction / code /build_presentation.py
sutama's picture
Upload CabinLavatoryPrediction LoRA adapter, checkpoint, code, and evaluation artifacts
e74a796 verified
#!/usr/bin/env python3
import csv
import html
import json
import math
import shutil
import subprocess
from collections import Counter, defaultdict
from datetime import datetime
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
OUT_ROOT = ROOT / "presentation"
PACKAGE = OUT_ROOT / "mwave_design_review_package"
ASSETS = PACKAGE / "assets"
CHARTS = ASSETS / "charts"
DATA_DIR = PACKAGE / "source_data"
METRICS_DIR = DATA_DIR / "metrics"
PRED_DIR = DATA_DIR / "predictions"
DERIVED_DIR = DATA_DIR / "derived"
BOEING = "#0039A6"
BOEING_2 = "#0067B1"
RED = "#C8102E"
BLACK = "#080A0F"
INK = "#111827"
STEEL = "#5B6472"
MIST = "#E7ECF3"
LIGHT = "#F7F9FC"
COLORS = [BOEING, BOEING_2, RED, BLACK, STEEL, "#8EA4C8", "#B7C3D5", "#7F1D1D", "#334155", "#64748B"]
METRIC_FILES = {
"base_struct": ROOT / "outputs/metrics/base_struct_metrics.json",
"finetuned_struct": ROOT / "outputs/metrics/finetuned_struct_metrics.json",
"base_qa": ROOT / "outputs/metrics/base_qa_metrics.json",
"finetuned_qa": ROOT / "outputs/metrics/finetuned_qa_metrics.json",
}
PRED_FILES = {
"base_struct": ROOT / "outputs/predictions/base_struct_predictions.jsonl",
"finetuned_struct": ROOT / "outputs/predictions/finetuned_struct_predictions.jsonl",
"base_qa": ROOT / "outputs/predictions/base_qa_predictions.jsonl",
"finetuned_qa": ROOT / "outputs/predictions/finetuned_qa_predictions.jsonl",
}
VAL_FILES = {
"val_struct": ROOT / "data/processed/val_struct.jsonl",
"val_qa": ROOT / "data/processed/val_qa.jsonl",
"summary": ROOT / "data/processed/summary.json",
}
def read_json(path):
return json.loads(Path(path).read_text(encoding="utf-8"))
def read_jsonl(path):
rows = []
with Path(path).open(encoding="utf-8") as f:
for line in f:
line = line.strip()
if line:
rows.append(json.loads(line))
return rows
def safe_pct(v):
if v is None:
return "N/A"
return f"{v * 100:.1f}%"
def safe_num(v, digits=3):
if v is None:
return "N/A"
if isinstance(v, float):
if abs(v) < 0.001 and v != 0:
return f"{v:.2e}"
return f"{v:.{digits}f}".rstrip("0").rstrip(".")
if isinstance(v, int):
return f"{v:,}"
return str(v)
def esc(text):
return html.escape(str(text), quote=True)
def ensure_dirs():
if PACKAGE.exists():
shutil.rmtree(PACKAGE)
for path in [CHARTS, METRICS_DIR, PRED_DIR, DERIVED_DIR]:
path.mkdir(parents=True, exist_ok=True)
def copy_source_files():
for name, src in METRIC_FILES.items():
shutil.copy2(src, METRICS_DIR / f"{name}_metrics.json")
for name, src in PRED_FILES.items():
shutil.copy2(src, PRED_DIR / f"{name}_predictions.jsonl")
for name, src in VAL_FILES.items():
shutil.copy2(src, DATA_DIR / src.name)
adapter = ROOT / "outputs/qwen35_9b_lora"
model_meta = {
"base_model": "Qwen/Qwen3.5-9B",
"adapter_dir": "outputs/qwen35_9b_lora",
"adapter_files": sorted(p.name for p in adapter.glob("*") if p.is_file()) if adapter.exists() else [],
"training_method": "4-bit QLoRA supervised fine-tuning",
"train_file": "data/processed/train_mixed.jsonl",
"validation_files": ["data/processed/val_struct.jsonl", "data/processed/val_qa.jsonl"],
}
(DATA_DIR / "model_training_metadata.json").write_text(json.dumps(model_meta, ensure_ascii=False, indent=2), encoding="utf-8")
def write_csv(path, rows, fieldnames):
with Path(path).open("w", encoding="utf-8", newline="") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
def flatten_metrics(metrics):
rows = []
for key, payload in metrics.items():
run = payload["run_name"]
task = payload["task_type"]
for metric, value in payload["metrics"].items():
rows.append({"run": run, "task": task, "metric": metric, "value": value})
return rows
def class_stats(pred_rows, field="current_behavior"):
labels = sorted({r.get("target", {}).get(field) for r in pred_rows if r.get("target", {}).get(field) is not None})
out = {}
for label in labels:
tp = fp = fn = correct = support = 0
for rec in pred_rows:
target = rec.get("target", {}).get(field)
pred = rec.get("prediction") if isinstance(rec.get("prediction"), dict) else {}
pred_label = pred.get(field)
if target == label:
support += 1
if pred_label == label:
correct += 1
if target == label and pred_label == label:
tp += 1
elif target != label and pred_label == label:
fp += 1
elif target == label and pred_label != label:
fn += 1
precision = tp / (tp + fp) if tp + fp else 0.0
recall = tp / (tp + fn) if tp + fn else 0.0
f1 = 2 * precision * recall / (precision + recall) if precision + recall else 0.0
acc = correct / support if support else 0.0
out[label] = {"support": support, "accuracy": acc, "precision": precision, "recall": recall, "f1": f1}
return out
def confusion_rows(pred_rows, field="current_behavior", top_n=18):
support = Counter(r.get("target", {}).get(field) for r in pred_rows)
labels = [x for x, _ in support.most_common(top_n) if x is not None]
matrix = []
for true_label in labels:
row = {"true_label": true_label}
total = 0
counts = Counter()
for rec in pred_rows:
pred = rec.get("prediction") if isinstance(rec.get("prediction"), dict) else {}
if rec.get("target", {}).get(field) == true_label:
total += 1
counts[pred.get(field)] += 1
for pred_label in labels:
row[pred_label] = counts[pred_label]
row["other"] = max(0, total - sum(row[p] for p in labels))
row["support"] = total
matrix.append(row)
return labels, matrix
def qa_error_samples(rows, limit=20):
samples = []
for idx, rec in enumerate(rows):
target = rec.get("target", {})
pred = rec.get("prediction") if isinstance(rec.get("prediction"), dict) else {}
errors = []
for key in ["occupied", "is_abnormal", "time_to_free_minutes", "used_areas"]:
if target.get(key) != pred.get(key):
errors.append(key)
if errors:
samples.append({
"index": idx,
"error_fields": "|".join(errors),
"target": json.dumps(target, ensure_ascii=False),
"prediction": json.dumps(pred, ensure_ascii=False),
})
if len(samples) >= limit:
break
return samples
def derive_data(metrics, predictions):
metric_rows = flatten_metrics(metrics)
write_csv(DERIVED_DIR / "metric_comparison.csv", metric_rows, ["run", "task", "metric", "value"])
base_stats = class_stats(predictions["base_struct"])
ft_stats = class_stats(predictions["finetuned_struct"])
labels = sorted(set(base_stats) | set(ft_stats), key=lambda x: ft_stats.get(x, {}).get("support", 0), reverse=True)
per_class = []
for label in labels:
per_class.append({
"label": label,
"support": ft_stats.get(label, {}).get("support", 0),
"base_accuracy": base_stats.get(label, {}).get("accuracy", 0),
"finetuned_accuracy": ft_stats.get(label, {}).get("accuracy", 0),
"base_f1": base_stats.get(label, {}).get("f1", 0),
"finetuned_f1": ft_stats.get(label, {}).get("f1", 0),
"f1_delta": ft_stats.get(label, {}).get("f1", 0) - base_stats.get(label, {}).get("f1", 0),
})
write_csv(DERIVED_DIR / "per_behavior_scores.csv", per_class, ["label", "support", "base_accuracy", "finetuned_accuracy", "base_f1", "finetuned_f1", "f1_delta"])
labels, cm = confusion_rows(predictions["finetuned_struct"])
write_csv(DERIVED_DIR / "finetuned_current_behavior_confusion_top18.csv", cm, ["true_label", *labels, "other", "support"])
qa_samples = qa_error_samples(predictions["finetuned_qa"])
write_csv(DERIVED_DIR / "finetuned_qa_error_samples.csv", qa_samples, ["index", "error_fields", "target", "prediction"])
metadata = {
"generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"package_purpose": "Self-contained design review presentation for MWave aircraft lavatory radar LLM fine-tuning experiment.",
"primary_audience": "Design experts and system reviewers",
"storyline": [
"Problem: radar-derived lavatory behavior state is sequential, ambiguous, and privacy-sensitive.",
"Intervention: fine-tune Qwen3.5-9B with mixed structured prediction and QA supervision.",
"Evidence: compare base vs fine-tuned on identical validation sets.",
"Design implication: edge-first state output should be robustly validated before aircraft integration.",
],
"source_files": {
"metrics": [str((METRICS_DIR / f"{name}_metrics.json").relative_to(PACKAGE)) for name in METRIC_FILES],
"predictions": [str((PRED_DIR / f"{name}_predictions.jsonl").relative_to(PACKAGE)) for name in PRED_FILES],
"derived": [str(p.relative_to(PACKAGE)) for p in sorted(DERIVED_DIR.glob("*"))],
},
}
(DATA_DIR / "presentation_metadata.json").write_text(json.dumps(metadata, ensure_ascii=False, indent=2), encoding="utf-8")
return {"metric_rows": metric_rows, "per_class": per_class, "confusion_labels": labels, "confusion": cm, "metadata": metadata}
def svg_wrap(width, height, title, desc, metadata, body):
meta = esc(json.dumps(metadata, ensure_ascii=False, indent=2))
return f'''<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}" role="img" aria-labelledby="title desc">
<title id="title">{esc(title)}</title>
<desc id="desc">{esc(desc)}</desc>
<metadata>{meta}</metadata>
<defs>
<filter id="softShadow" x="-20%" y="-20%" width="140%" height="140%">
<feDropShadow dx="0" dy="12" stdDeviation="14" flood-color="#001754" flood-opacity="0.12"/>
</filter>
<linearGradient id="blueGrad" x1="0" x2="0" y1="0" y2="1">
<stop offset="0" stop-color="{BOEING}"/>
<stop offset="1" stop-color="#9DB7E2"/>
</linearGradient>
<linearGradient id="redGrad" x1="0" x2="0" y1="0" y2="1">
<stop offset="0" stop-color="{RED}"/>
<stop offset="1" stop-color="#F4A6B4"/>
</linearGradient>
</defs>
<rect width="100%" height="100%" fill="#ffffff"/>
{body}
</svg>
'''
def text(x, y, content, size=16, weight=500, fill=INK, anchor="start"):
return f'<text x="{x}" y="{y}" font-family="Aptos, Bahnschrift, Noto Sans CJK SC, sans-serif" font-size="{size}" font-weight="{weight}" fill="{fill}" text-anchor="{anchor}">{esc(content)}</text>'
def save_svg(name, width, height, title, desc, metadata, body):
path = CHARTS / name
path.write_text(svg_wrap(width, height, title, desc, metadata, body), encoding="utf-8")
return path
def bar_chart(name, title, subtitle, rows, value_key, label_key="label", group_key=None, width=980, height=560, value_format="percent"):
margin = {"l": 110, "r": 46, "t": 106, "b": 110}
plot_w = width - margin["l"] - margin["r"]
plot_h = height - margin["t"] - margin["b"]
max_v = max([r[value_key] for r in rows] + [1.0 if value_format == "percent" else 0.1])
if value_format == "percent":
max_v = max(1.0, max_v)
body = [
text(34, 42, title, 28, 900, BLACK),
text(34, 72, subtitle, 14, 500, STEEL),
f'<rect x="{margin["l"]}" y="{margin["t"]}" width="{plot_w}" height="{plot_h}" fill="#F8FAFC" stroke="#E5E7EB" rx="18"/>',
]
for i in range(5):
y = margin["t"] + plot_h * i / 4
val = max_v * (1 - i / 4)
body.append(f'<line x1="{margin["l"]}" y1="{y:.1f}" x2="{margin["l"] + plot_w}" y2="{y:.1f}" stroke="#E5E7EB"/>')
tick = safe_pct(val) if value_format == "percent" else safe_num(val, 1)
body.append(text(margin["l"] - 14, y + 5, tick, 12, 600, STEEL, "end"))
n = len(rows)
slot = plot_w / max(n, 1)
bar_w = slot * 0.62
for i, row in enumerate(rows):
v = row[value_key]
h = 0 if max_v == 0 else plot_h * v / max_v
x = margin["l"] + i * slot + (slot - bar_w) / 2
y = margin["t"] + plot_h - h
color = row.get("color") or COLORS[i % len(COLORS)]
label = str(row[label_key])
meta = esc(json.dumps(row, ensure_ascii=False))
body.append(f'<rect class="bar" data-meta="{meta}" x="{x:.1f}" y="{y:.1f}" width="{bar_w:.1f}" height="{h:.1f}" rx="8" fill="{color}"><title>{esc(label)}: {safe_pct(v) if value_format == "percent" else safe_num(v)}</title></rect>')
body.append(text(x + bar_w / 2, margin["t"] + plot_h + 28, label[:18], 11, 700, STEEL, "middle") + f'<animate attributeName="opacity" from="0" to="1" dur="0.4s" fill="freeze"/>')
if group_key:
groups = []
last = None
start = 0
for i, row in enumerate(rows + [{group_key: None}]):
g = row.get(group_key)
if last is None:
last = g
if g != last:
groups.append((last, start, i - 1))
start = i
last = g
for group, start, end in groups:
if group is None:
continue
x = margin["l"] + (start + end + 1) * slot / 2
body.append(text(x, height - 26, group, 13, 900, BLACK, "middle"))
return save_svg(name, width, height, title, subtitle, {"chart_type": "bar", "rows": rows, "value_key": value_key}, "\n".join(body))
def grouped_metric_chart(metrics):
order = [
("Schema完整率", "required_field_complete_rate", "struct"),
("当前行为Acc", "current_behavior_accuracy", "struct"),
("当前行为Macro-F1", "current_behavior_macro_f1", "struct"),
("下一行为Acc", "next_possible_behavior_accuracy", "struct"),
("阶段Index Acc", "stage_index_accuracy", "struct"),
("序列Exact", "sequence_exact_match", "struct"),
("占用Acc", "occupied_accuracy", "qa"),
("异常F1", "is_abnormal_f1", "qa"),
("区域F1", "used_areas_micro_f1", "qa"),
]
rows = []
for label, key, task in order:
base = metrics[f"base_{task}"]["metrics"].get(key)
ft = metrics[f"finetuned_{task}"]["metrics"].get(key)
if base is not None:
rows.append({"label": f"{label}\nBase", "metric": key, "run": "base", "task": task, "value": base, "color": STEEL})
if ft is not None:
rows.append({"label": f"{label}\nFT", "metric": key, "run": "finetuned", "task": task, "value": ft, "color": BOEING if task == "struct" else RED})
return bar_chart(
"01_base_vs_finetuned_scores.svg",
"Base vs Fine-tuned: 指标总览",
"同一验证集上的结构化预测与 QA 任务对比;柱形为可检查 metadata 的 SVG 元素。",
rows,
"value",
width=1280,
height=620,
)
def label_distribution_chart(summary):
rows = []
for label, count in sorted(summary["train_struct"]["label_counts"].items(), key=lambda x: x[1], reverse=True):
rows.append({"label": label, "value": count, "color": BOEING if label == "坐用马桶" else COLORS[len(rows) % len(COLORS)]})
return bar_chart(
"02_train_label_distribution.svg",
"训练集行为标签分布",
"类别分布显示任务主要由长时间状态和短过渡行为共同构成;设计评审应关注少数类边界。",
rows[:19],
"value",
width=1280,
height=620,
value_format="count",
)
def per_behavior_delta_chart(per_class):
rows = []
for row in sorted(per_class, key=lambda x: x["f1_delta"], reverse=True)[:16]:
rows.append({
"label": row["label"],
"value": row["f1_delta"],
"support": row["support"],
"base_f1": row["base_f1"],
"finetuned_f1": row["finetuned_f1"],
"color": BOEING if row["f1_delta"] >= 0 else RED,
})
return bar_chart(
"03_behavior_f1_delta.svg",
"行为类别 F1 改善幅度",
"展示微调对各行为类别的收益;support 表示验证集该行为样本量。",
rows,
"value",
width=1280,
height=620,
)
def time_mae_chart(metrics):
rows = []
mapping = [
("当前行为已持续", "elapsed_seconds_in_current_behavior_mae"),
("当前行为剩余", "estimated_remaining_seconds_mae"),
("完整流程剩余", "full_remaining_seconds_mae"),
("当前行为结束时刻", "expected_end_time_mae"),
]
for label, key in mapping:
v = metrics["finetuned_struct"]["metrics"].get(key)
if v is not None:
rows.append({"label": label, "metric": key, "value": v, "color": BOEING})
rows.append({"label": "QA空出时间(分钟)", "metric": "time_to_free_minutes_mae", "value": metrics["finetuned_qa"]["metrics"]["time_to_free_minutes_mae"], "color": RED})
return bar_chart(
"04_time_error_mae.svg",
"时间估计误差",
"结构化任务以秒为单位;QA 空出时间以分钟为单位,接近 0 说明该 QA 目标在当前构造规则下被模型很好拟合。",
rows,
"value",
width=1100,
height=560,
value_format="count",
)
def confusion_heatmap(name, title, labels, matrix):
width, height = 1180, 900
margin = {"l": 170, "r": 44, "t": 126, "b": 170}
n = len(labels)
cell = min((width - margin["l"] - margin["r"]) / n, (height - margin["t"] - margin["b"]) / n)
max_count = max([row[label] for row in matrix for label in labels] + [1])
body = [
text(34, 44, title, 28, 900, BLACK),
text(34, 74, "Fine-tuned current_behavior confusion matrix, top validation labels.", 14, 500, STEEL),
f'<rect x="{margin["l"]}" y="{margin["t"]}" width="{cell*n:.1f}" height="{cell*n:.1f}" fill="#F8FAFC" stroke="#E5E7EB" rx="12"/>',
]
for i, true_label in enumerate(labels):
body.append(text(margin["l"] - 12, margin["t"] + i * cell + cell * 0.62, true_label, 12, 700, INK, "end"))
body.append(text(margin["l"] + i * cell + cell * 0.5, margin["t"] + n * cell + 26, true_label, 11, 700, INK, "middle"))
for i, row in enumerate(matrix):
for j, pred_label in enumerate(labels):
count = row[pred_label]
intensity = count / max_count
blue = int(246 - 170 * intensity)
color = f"rgb({blue},{max(39, blue + 12)},{255})" if i == j else f"rgb({255},{max(230, 248 - int(120 * intensity))},{max(230, 248 - int(120 * intensity))})"
meta = esc(json.dumps({"true_label": row["true_label"], "predicted_label": pred_label, "count": count, "support": row["support"]}, ensure_ascii=False))
x = margin["l"] + j * cell
y = margin["t"] + i * cell
body.append(f'<rect data-meta="{meta}" x="{x:.1f}" y="{y:.1f}" width="{cell:.1f}" height="{cell:.1f}" fill="{color}" stroke="#FFFFFF"><title>true={esc(row["true_label"])}, pred={esc(pred_label)}, count={count}</title></rect>')
if count:
body.append(text(x + cell / 2, y + cell * 0.62, count, 9, 700, BLACK, "middle"))
body.append(text(margin["l"] + cell * n / 2, height - 38, "Predicted behavior", 14, 900, BLACK, "middle"))
body.append(f'<text x="34" y="{margin["t"] + cell*n/2}" transform="rotate(-90 34 {margin["t"] + cell*n/2})" font-family="Aptos, Bahnschrift, sans-serif" font-size="14" font-weight="900" fill="{BLACK}" text-anchor="middle">True behavior</text>')
return save_svg(name, width, height, title, "Confusion heatmap with metadata on each cell.", {"chart_type": "confusion_matrix", "labels": labels}, "\n".join(body))
def architecture_svg():
width, height = 1280, 500
boxes = [
("毫米波雷达", "点云/轨迹窗口\n隐私友好,非视觉图像", 56),
("边缘特征层", "去噪、跟踪、区域、速度\n生成结构化中间表征", 356),
("Qwen3.5-9B LoRA", "本地推理输出 JSON\n行为、阶段、剩余时间、QA", 656),
("客舱系统", "乘务终端、维护日志\n异常关注而非强控制", 956),
]
body = [text(34, 44, "飞机厕所部署技术方案", 30, 900, BLACK), text(34, 76, "Edge-first, privacy-preserving, fail-safe integration concept.", 14, 500, STEEL)]
for i, (title, desc, x) in enumerate(boxes):
body.append(f'<rect x="{x}" y="142" width="250" height="210" rx="28" fill="#FFFFFF" stroke="#D6DEE9" filter="url(#softShadow)"/>')
body.append(text(x + 24, 190, title, 22, 900, BOEING if i < 3 else RED))
for k, line in enumerate(desc.split("\n")):
body.append(text(x + 24, 238 + k * 28, line, 16, 600, STEEL))
if i < len(boxes) - 1:
body.append(f'<line x1="{x+260}" y1="246" x2="{x+294}" y2="246" stroke="{BOEING}" stroke-width="4"/>')
body.append(f'<polygon points="{x+294},246 {x+282},238 {x+282},254" fill="{RED}"/>')
body.append(text(56, 420, "设计原则:厕所侧只上传结构化状态,不上传可逆原始人体数据;LLM JSON 校验失败时回退规则模型。", 18, 800, BLACK))
return save_svg("06_deployment_architecture.svg", width, height, "飞机厕所部署技术方案", "Edge deployment architecture.", {"chart_type": "architecture"}, "\n".join(body))
def build_svgs(summary, metrics, derived):
paths = [
grouped_metric_chart(metrics),
label_distribution_chart(summary),
per_behavior_delta_chart(derived["per_class"]),
time_mae_chart(metrics),
confusion_heatmap("05_finetuned_behavior_confusion.svg", "Fine-tuned 行为混淆矩阵", derived["confusion_labels"], derived["confusion"]),
architecture_svg(),
]
chart_manifest = []
for path in paths:
chart_manifest.append({
"file": str(path.relative_to(PACKAGE)),
"title": path.stem.replace("_", " "),
"metadata_note": "Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta.",
})
(CHARTS / "chart_manifest.json").write_text(json.dumps(chart_manifest, ensure_ascii=False, indent=2), encoding="utf-8")
return chart_manifest
def metric_delta(metrics, task, key):
base = metrics[f"base_{task}"]["metrics"].get(key)
ft = metrics[f"finetuned_{task}"]["metrics"].get(key)
if base is None or ft is None:
return None
return ft - base
def build_html(summary, metrics, derived, chart_manifest):
struct = metrics["finetuned_struct"]["metrics"]
qa = metrics["finetuned_qa"]["metrics"]
base_struct = metrics["base_struct"]["metrics"]
base_qa = metrics["base_qa"]["metrics"]
metadata_json = json.dumps({
"summary": summary,
"metrics": metrics,
"chart_manifest": chart_manifest,
"derived_files": [str(p.relative_to(PACKAGE)) for p in sorted(DERIVED_DIR.glob("*"))],
}, ensure_ascii=False)
metric_table_rows = []
story_metrics = [
("结构化 schema 完整率", base_struct["required_field_complete_rate"], struct["required_field_complete_rate"], "决定输出能否进入机载系统状态总线。"),
("当前行为准确率", base_struct["current_behavior_accuracy"], struct["current_behavior_accuracy"], "核心状态识别能力,从 radar 表征映射到行为标签。"),
("当前行为 Macro-F1", base_struct["current_behavior_macro_f1"], struct["current_behavior_macro_f1"], "衡量少数类是否被覆盖,适合设计评审关注长尾行为。"),
("下一行为准确率", base_struct["next_possible_behavior_accuracy"], struct["next_possible_behavior_accuracy"], "影响短期流程预测和预计空出时间。"),
("阶段 index 准确率", base_struct["stage_index_accuracy"], struct["stage_index_accuracy"], "反映模型是否理解完整厕所使用流程位置。"),
("QA 异常 F1", base_qa["is_abnormal_f1"], qa["is_abnormal_f1"], "用于乘务关注提示,需单独评估。"),
("QA 区域 F1", base_qa["used_areas_micro_f1"], qa["used_areas_micro_f1"], "用于理解马桶、洗手池、门、垃圾桶等区域使用状态。"),
]
for name, base, ft, meaning in story_metrics:
delta = ft - base if base is not None and ft is not None else None
metric_table_rows.append(f"<tr><td>{esc(name)}</td><td>{safe_pct(base)}</td><td>{safe_pct(ft)}</td><td>{safe_pct(delta) if delta is not None else 'N/A'}</td><td>{esc(meaning)}</td></tr>")
top_improvements = sorted(derived["per_class"], key=lambda x: x["f1_delta"], reverse=True)[:8]
top_rows = "\n".join(
f"<tr><td>{esc(r['label'])}</td><td>{r['support']}</td><td>{safe_pct(r['base_f1'])}</td><td>{safe_pct(r['finetuned_f1'])}</td><td>{safe_pct(r['f1_delta'])}</td></tr>"
for r in top_improvements
)
chart_cards = "\n".join(
f'''<article class="chart-card" data-chart="{esc(item["file"])}">
<div><h3>{esc(item["title"])}</h3><p>{esc(item["metadata_note"])}</p></div>
<object type="image/svg+xml" data="{esc(item["file"])}" aria-label="{esc(item["title"])}"></object>
</article>'''
for item in chart_manifest
)
source_links = "\n".join(
f"<li><a href='{esc(str(p.relative_to(PACKAGE)))}'>{esc(str(p.relative_to(PACKAGE)))}</a></li>"
for p in sorted(DATA_DIR.rglob("*"))
if p.is_file()
)
html_text = f'''<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>MWave Design Review Presentation</title>
<style>
:root {{
--boeing:{BOEING}; --boeing2:{BOEING_2}; --red:{RED}; --black:{BLACK};
--ink:{INK}; --steel:{STEEL}; --mist:{MIST}; --light:{LIGHT}; --line:rgba(8,10,15,.14);
}}
* {{ box-sizing:border-box; }}
body {{
margin:0; color:var(--ink);
font-family:"Aptos Display","Bahnschrift","DIN Alternate","Noto Sans SC",sans-serif;
background:radial-gradient(circle at 10% 0%, rgba(0,103,177,.22), transparent 28%),
radial-gradient(circle at 86% 6%, rgba(200,16,46,.12), transparent 24%),
linear-gradient(135deg,#eef4fb,#ffffff 44%,#e9eef6);
}}
a {{ color:var(--boeing); font-weight:800; }}
.deck {{ max-width:1480px; margin:0 auto; padding:28px; }}
.hero {{
min-height:620px; color:#fff; border-radius:36px; padding:52px;
background:linear-gradient(118deg,rgba(3,11,29,.98),rgba(0,57,166,.94) 54%,rgba(0,103,177,.72));
position:relative; overflow:hidden;
}}
.hero:after {{ content:""; position:absolute; right:-120px; top:-160px; width:640px; height:640px; border-radius:50%; border:1px solid rgba(255,255,255,.18); box-shadow:inset 0 0 0 82px rgba(255,255,255,.05), inset 0 0 0 180px rgba(255,255,255,.035); }}
.kicker {{ letter-spacing:.18em; text-transform:uppercase; font-size:12px; color:#dbeafe; }}
h1 {{ font-size:clamp(46px,7vw,100px); line-height:.92; margin:26px 0 18px; max-width:1050px; letter-spacing:-.06em; }}
h2 {{ font-size:38px; letter-spacing:-.04em; margin:0 0 18px; color:var(--black); }}
h3 {{ margin:0 0 8px; font-size:22px; color:var(--black); }}
.hero p {{ color:#eaf1fb; font-size:21px; line-height:1.65; max-width:980px; }}
.hero-grid {{ display:grid; grid-template-columns:repeat(4,1fr); gap:18px; position:relative; z-index:1; margin-top:42px; }}
.hero-card {{ padding:22px; border:1px solid rgba(255,255,255,.22); background:rgba(255,255,255,.1); border-radius:24px; backdrop-filter:blur(16px); }}
.big {{ font-size:38px; font-weight:900; letter-spacing:-.04em; }}
.label {{ color:#cbd5e1; font-size:13px; margin-top:6px; }}
nav {{ position:sticky; top:0; z-index:20; display:flex; gap:8px; flex-wrap:wrap; margin:18px 0; padding:12px; border:1px solid var(--line); border-radius:22px; background:rgba(255,255,255,.88); backdrop-filter:blur(18px); }}
nav button, .pill {{ border:0; border-radius:999px; padding:11px 15px; background:#eef3fb; color:#0f172a; font-weight:900; cursor:pointer; }}
nav button:hover, .pill.active {{ background:var(--boeing); color:#fff; }}
section {{ background:rgba(255,255,255,.92); border:1px solid var(--line); border-radius:30px; padding:32px; margin-top:24px; box-shadow:0 16px 44px rgba(17,24,39,.06); }}
.grid {{ display:grid; gap:18px; }}
.grid-2 {{ grid-template-columns:repeat(2,minmax(0,1fr)); }}
.grid-3 {{ grid-template-columns:repeat(3,minmax(0,1fr)); }}
.grid-4 {{ grid-template-columns:repeat(4,minmax(0,1fr)); }}
.card {{ border:1px solid var(--line); background:linear-gradient(180deg,#fff,#f6f9fd); border-radius:24px; padding:22px; }}
.card strong {{ color:var(--boeing); }}
.muted {{ color:var(--steel); line-height:1.66; }}
.story {{ display:grid; grid-template-columns:72px 1fr; gap:18px; margin-bottom:18px; }}
.num {{ width:54px; height:54px; border-radius:50%; display:grid; place-items:center; background:var(--black); color:#fff; font-weight:900; font-size:22px; }}
table {{ width:100%; border-collapse:collapse; overflow:hidden; border-radius:18px; background:#fff; }}
th,td {{ padding:13px 14px; border-bottom:1px solid #e5e7eb; text-align:left; vertical-align:top; }}
th {{ background:#08111f; color:#fff; font-size:13px; }}
.chart-grid {{ display:grid; grid-template-columns:1fr; gap:22px; }}
.chart-card {{ border:1px solid var(--line); background:#fff; border-radius:26px; padding:22px; }}
.chart-card object {{ width:100%; height:620px; border:0; background:#fff; border-radius:18px; }}
.chart-card p {{ margin:0 0 12px; color:var(--steel); }}
.metadata-panel {{ display:grid; grid-template-columns:360px 1fr; gap:18px; }}
select, textarea {{ width:100%; border:1px solid #d6dee9; border-radius:16px; padding:12px; font:inherit; background:#fff; }}
pre {{ margin:0; white-space:pre-wrap; background:#05070c; color:#d1fae5; border-radius:18px; padding:18px; min-height:360px; overflow:auto; font-family:"Cascadia Mono","SFMono-Regular",monospace; font-size:13px; }}
.callout {{ border-left:6px solid var(--red); background:#fff5f6; padding:18px; border-radius:18px; }}
.files {{ columns:2; }}
.footer {{ text-align:center; color:#64748b; padding:28px; }}
@media print {{
nav, .metadata-panel, .no-print {{ display:none !important; }}
body {{ background:#fff; }}
section, .hero {{ break-inside:avoid; page-break-inside:avoid; box-shadow:none; }}
.chart-card object {{ height:520px; }}
}}
@media(max-width:980px) {{ .hero-grid,.grid-2,.grid-3,.grid-4,.metadata-panel {{ grid-template-columns:1fr; }} .deck {{ padding:14px; }} .hero {{ padding:28px; }} .files {{ columns:1; }} }}
</style>
</head>
<body>
<div class="deck">
<header class="hero">
<div class="kicker">MWave Radar LLM · Aircraft Lavatory Design Review</div>
<h1>从毫米波雷达到机载厕所状态智能:一次可验证的 LLM 微调实验</h1>
<p>这份汇报面向设计专家评审,重点不是展示训练日志,而是讲清楚:为什么需要模型、模型学到了什么、哪些结果能支持设计决策、哪些风险需要在机载部署前继续验证。</p>
<div class="hero-grid">
<div class="hero-card"><div class="big">{summary["train_struct"]["num_examples"]:,}</div><div class="label">训练结构样本</div></div>
<div class="hero-card"><div class="big">{summary["val_struct"]["num_examples"]:,}</div><div class="label">验证结构样本</div></div>
<div class="hero-card"><div class="big">{summary["train_qa"]["num_examples"]:,}</div><div class="label">QA 训练样本</div></div>
<div class="hero-card"><div class="big">Qwen3.5-9B</div><div class="label">4-bit QLoRA 微调</div></div>
</div>
</header>
<nav class="no-print">
<button onclick="go('story')">Story Line</button>
<button onclick="go('method')">任务与方法</button>
<button onclick="go('results')">结果证据</button>
<button onclick="go('charts')">SVG 图表</button>
<button onclick="go('metadata')">Metadata Explorer</button>
<button onclick="go('deployment')">部署方案</button>
<button onclick="go('sources')">源数据</button>
</nav>
<section id="story">
<h2>评审 Story Line</h2>
<div class="story"><div class="num">1</div><div><h3>设计问题</h3><p class="muted">飞机厕所内不能依赖摄像头,毫米波雷达提供隐私友好的时空轨迹,但原始序列难以直接变成可用的客舱状态。</p></div></div>
<div class="story"><div class="num">2</div><div><h3>模型角色</h3><p class="muted">LLM 不替代雷达信号处理,而是读取结构化窗口和中间表征,输出严格 JSON:当前行为、下一行为、阶段、剩余时间、序列,以及 QA 状态。</p></div></div>
<div class="story"><div class="num">3</div><div><h3>实验判断</h3><p class="muted">同一验证集上比较 base Qwen3.5-9B 与微调后模型,指标覆盖 schema 合规、行为识别、流程理解、时间估计和 QA 推理。</p></div></div>
<div class="story"><div class="num">4</div><div><h3>设计结论</h3><p class="muted">微调显著提升结构化输出和 QA 可用性,但时间误差、少数类行为和异常提示仍应进入下一轮系统设计验证。</p></div></div>
</section>
<section id="method">
<h2>任务与评估方法</h2>
<div class="grid grid-3">
<div class="card"><h3>结构化预测</h3><p class="muted">输入雷达时序窗口和中间层表征,输出 current_behavior、next_possible_behavior、stage_index、remaining time、sequence_so_far 等字段。</p></div>
<div class="card"><h3>QA 状态回答</h3><p class="muted">作为独立维度评估 occupied、time_to_free_minutes、used_areas、is_abnormal,避免只看结构化任务而忽略最终用户问题。</p></div>
<div class="card"><h3>指标选择</h3><p class="muted">分类用 accuracy/F1,schema 用 JSON parse 和 required field complete,时间用 MAE,序列用 exact/prefix/last-label match。</p></div>
</div>
</section>
<section id="results">
<h2>关键结果</h2>
<div class="grid grid-4">
<div class="card"><div class="big">{safe_pct(struct["current_behavior_accuracy"])}</div><strong>当前行为准确率</strong><p class="muted">Base 为 {safe_pct(base_struct["current_behavior_accuracy"])}。</p></div>
<div class="card"><div class="big">{safe_pct(struct["required_field_complete_rate"])}</div><strong>结构化字段完整率</strong><p class="muted">Base 为 {safe_pct(base_struct["required_field_complete_rate"])},说明微调主要解决 schema 对齐。</p></div>
<div class="card"><div class="big">{safe_pct(qa["is_abnormal_f1"])}</div><strong>QA 异常 F1</strong><p class="muted">Base 为 {safe_pct(base_qa["is_abnormal_f1"])}。</p></div>
<div class="card"><div class="big">{safe_num(struct["full_remaining_seconds_mae"], 1)}s</div><strong>完整流程剩余时间 MAE</strong><p class="muted">该误差直接影响预计空出时间体验。</p></div>
</div>
<div style="margin-top:20px; overflow:auto">
<table>
<tr><th>评估项</th><th>Base</th><th>Fine-tuned</th><th>变化</th><th>设计含义</th></tr>
{''.join(metric_table_rows)}
</table>
</div>
<div class="callout" style="margin-top:20px">
<strong>评审判断:</strong>当前实验已经证明 9B LoRA 可以把通用 LLM 拉到可用的结构化状态输出轨道上;但机载上线前仍应增加跨乘客、跨机型、跨雷达安装角度的数据验证,并对异常提示设定保守阈值。
</div>
</section>
<section>
<h2>哪些行为受益最大</h2>
<p class="muted">下表展示 F1 改善最大的行为类别。设计评审时应同时看 support,避免把少量样本上的提升误判为稳定能力。</p>
<table>
<tr><th>行为</th><th>验证样本数</th><th>Base F1</th><th>Fine-tuned F1</th><th>提升</th></tr>
{top_rows}
</table>
</section>
<section id="charts">
<h2>SVG 矢量图表</h2>
<p class="muted">所有图表都在 <code>assets/charts/</code> 下以 SVG 保存,可缩放、可放进设计文档;SVG 内包含 metadata,HTML 下方可查看。</p>
<div class="chart-grid">{chart_cards}</div>
</section>
<section id="metadata" class="no-print">
<h2>Metadata Explorer</h2>
<p class="muted">选择图表或数据对象,查看生成依据、源文件路径和图表内嵌 metadata。此功能完全离线,不依赖外网。</p>
<div class="metadata-panel">
<div class="card">
<h3>选择对象</h3>
<select id="metaSelect"></select>
<p class="muted">提示:SVG 图表也可以直接用文本编辑器打开,查看其中的 <code>&lt;metadata&gt;</code> 和每个 bar/cell 的 <code>data-meta</code>。</p>
</div>
<pre id="metaOut"></pre>
</div>
</section>
<section id="deployment">
<h2>部署到飞机厕所的技术方案</h2>
<div class="grid grid-3">
<div class="card"><h3>边缘优先</h3><p class="muted">厕所侧完成雷达预处理和特征抽取,客舱边缘计算单元加载量化 LLM/LoRA,减少原始数据移动。</p></div>
<div class="card"><h3>状态输出</h3><p class="muted">对外只输出 occupied、estimated free time、used areas、abnormal flag 和行为阶段,不输出身份或可逆人体点云。</p></div>
<div class="card"><h3>失效保护</h3><p class="muted">JSON 校验失败、置信不足或异常连续触发时回退规则模型,并只给出“需关注”级提示。</p></div>
</div>
<object type="image/svg+xml" data="assets/charts/06_deployment_architecture.svg" style="width:100%;height:480px;border:0;margin-top:18px"></object>
</section>
<section id="sources">
<h2>可拷走的源数据与材料</h2>
<p class="muted">整个 <code>mwave_design_review_package</code> 目录可直接复制到其他电脑。HTML、PDF、SVG 和 CSV/JSONL 都使用相对路径或独立文件。</p>
<ul class="files">{source_links}</ul>
</section>
<div class="footer">Generated locally · {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} · Boeing blue / red / black / gray design system</div>
</div>
<script id="metadata-json" type="application/json">{metadata_json}</script>
<script>
const metadata = JSON.parse(document.getElementById('metadata-json').textContent);
const options = [
{{label:'Package overview', value:metadata}},
...metadata.chart_manifest.map(c => ({{label:'Chart: '+c.title, value:c}})),
{{label:'Metrics: base struct', value:metadata.metrics.base_struct}},
{{label:'Metrics: fine-tuned struct', value:metadata.metrics.finetuned_struct}},
{{label:'Metrics: base QA', value:metadata.metrics.base_qa}},
{{label:'Metrics: fine-tuned QA', value:metadata.metrics.finetuned_qa}},
{{label:'Data summary', value:metadata.summary}},
];
function go(id){{ document.getElementById(id).scrollIntoView({{behavior:'smooth'}}); }}
const select = document.getElementById('metaSelect');
const out = document.getElementById('metaOut');
options.forEach((item, idx) => {{
const option = document.createElement('option');
option.value = String(idx);
option.textContent = item.label;
select.appendChild(option);
}});
function renderMeta(){{ out.textContent = JSON.stringify(options[Number(select.value)].value, null, 2); }}
select.addEventListener('change', renderMeta);
renderMeta();
</script>
</body>
</html>
'''
(PACKAGE / "index.html").write_text(html_text, encoding="utf-8")
def build_readme():
readme = f"""# MWave Design Review Presentation Package
This folder is self-contained and can be copied to another computer.
Open:
- `index.html`: interactive presentation with SVG charts and metadata explorer.
- `mwave_design_review_report.pdf`: static PDF export for review circulation.
Key folders:
- `assets/charts/`: standalone SVG vector charts. Each SVG includes embedded metadata.
- `source_data/metrics/`: original evaluation metric JSON files.
- `source_data/predictions/`: original prediction JSONL files for base and fine-tuned runs.
- `source_data/derived/`: CSV files derived for the presentation, including per-behavior scores and confusion matrix.
Generated at: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
"""
(PACKAGE / "README.md").write_text(readme, encoding="utf-8")
def export_pdf():
html_path = PACKAGE / "index.html"
pdf_path = PACKAGE / "mwave_design_review_report.pdf"
chrome = shutil.which("google-chrome") or shutil.which("chromium") or shutil.which("chromium-browser")
if not chrome:
print("Chrome not found; skipped PDF export.")
return False
cmd = [
chrome,
"--headless",
"--no-sandbox",
"--disable-gpu",
"--print-to-pdf=" + str(pdf_path),
"--print-to-pdf-no-header",
"file://" + str(html_path.resolve()),
]
subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return True
def make_zip():
zip_base = OUT_ROOT / "mwave_design_review_package"
archive = shutil.make_archive(str(zip_base), "zip", root_dir=OUT_ROOT, base_dir=PACKAGE.name)
return Path(archive)
def main():
ensure_dirs()
copy_source_files()
summary = read_json(ROOT / "data/processed/summary.json")
metrics = {key: read_json(path) for key, path in METRIC_FILES.items()}
predictions = {key: read_jsonl(path) for key, path in PRED_FILES.items()}
derived = derive_data(metrics, predictions)
chart_manifest = build_svgs(summary, metrics, derived)
build_html(summary, metrics, derived, chart_manifest)
build_readme()
pdf_ok = export_pdf()
archive = make_zip()
print(f"wrote package: {PACKAGE}")
print(f"wrote archive: {archive}")
print(f"pdf_exported: {pdf_ok}")
if __name__ == "__main__":
main()