File size: 42,508 Bytes
e74a796 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 | #!/usr/bin/env python3
import csv
import html
import json
import math
import shutil
import subprocess
from collections import Counter, defaultdict
from datetime import datetime
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
OUT_ROOT = ROOT / "presentation"
PACKAGE = OUT_ROOT / "mwave_design_review_package"
ASSETS = PACKAGE / "assets"
CHARTS = ASSETS / "charts"
DATA_DIR = PACKAGE / "source_data"
METRICS_DIR = DATA_DIR / "metrics"
PRED_DIR = DATA_DIR / "predictions"
DERIVED_DIR = DATA_DIR / "derived"
BOEING = "#0039A6"
BOEING_2 = "#0067B1"
RED = "#C8102E"
BLACK = "#080A0F"
INK = "#111827"
STEEL = "#5B6472"
MIST = "#E7ECF3"
LIGHT = "#F7F9FC"
COLORS = [BOEING, BOEING_2, RED, BLACK, STEEL, "#8EA4C8", "#B7C3D5", "#7F1D1D", "#334155", "#64748B"]
METRIC_FILES = {
"base_struct": ROOT / "outputs/metrics/base_struct_metrics.json",
"finetuned_struct": ROOT / "outputs/metrics/finetuned_struct_metrics.json",
"base_qa": ROOT / "outputs/metrics/base_qa_metrics.json",
"finetuned_qa": ROOT / "outputs/metrics/finetuned_qa_metrics.json",
}
PRED_FILES = {
"base_struct": ROOT / "outputs/predictions/base_struct_predictions.jsonl",
"finetuned_struct": ROOT / "outputs/predictions/finetuned_struct_predictions.jsonl",
"base_qa": ROOT / "outputs/predictions/base_qa_predictions.jsonl",
"finetuned_qa": ROOT / "outputs/predictions/finetuned_qa_predictions.jsonl",
}
VAL_FILES = {
"val_struct": ROOT / "data/processed/val_struct.jsonl",
"val_qa": ROOT / "data/processed/val_qa.jsonl",
"summary": ROOT / "data/processed/summary.json",
}
def read_json(path):
return json.loads(Path(path).read_text(encoding="utf-8"))
def read_jsonl(path):
rows = []
with Path(path).open(encoding="utf-8") as f:
for line in f:
line = line.strip()
if line:
rows.append(json.loads(line))
return rows
def safe_pct(v):
if v is None:
return "N/A"
return f"{v * 100:.1f}%"
def safe_num(v, digits=3):
if v is None:
return "N/A"
if isinstance(v, float):
if abs(v) < 0.001 and v != 0:
return f"{v:.2e}"
return f"{v:.{digits}f}".rstrip("0").rstrip(".")
if isinstance(v, int):
return f"{v:,}"
return str(v)
def esc(text):
return html.escape(str(text), quote=True)
def ensure_dirs():
if PACKAGE.exists():
shutil.rmtree(PACKAGE)
for path in [CHARTS, METRICS_DIR, PRED_DIR, DERIVED_DIR]:
path.mkdir(parents=True, exist_ok=True)
def copy_source_files():
for name, src in METRIC_FILES.items():
shutil.copy2(src, METRICS_DIR / f"{name}_metrics.json")
for name, src in PRED_FILES.items():
shutil.copy2(src, PRED_DIR / f"{name}_predictions.jsonl")
for name, src in VAL_FILES.items():
shutil.copy2(src, DATA_DIR / src.name)
adapter = ROOT / "outputs/qwen35_9b_lora"
model_meta = {
"base_model": "Qwen/Qwen3.5-9B",
"adapter_dir": "outputs/qwen35_9b_lora",
"adapter_files": sorted(p.name for p in adapter.glob("*") if p.is_file()) if adapter.exists() else [],
"training_method": "4-bit QLoRA supervised fine-tuning",
"train_file": "data/processed/train_mixed.jsonl",
"validation_files": ["data/processed/val_struct.jsonl", "data/processed/val_qa.jsonl"],
}
(DATA_DIR / "model_training_metadata.json").write_text(json.dumps(model_meta, ensure_ascii=False, indent=2), encoding="utf-8")
def write_csv(path, rows, fieldnames):
with Path(path).open("w", encoding="utf-8", newline="") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
def flatten_metrics(metrics):
rows = []
for key, payload in metrics.items():
run = payload["run_name"]
task = payload["task_type"]
for metric, value in payload["metrics"].items():
rows.append({"run": run, "task": task, "metric": metric, "value": value})
return rows
def class_stats(pred_rows, field="current_behavior"):
labels = sorted({r.get("target", {}).get(field) for r in pred_rows if r.get("target", {}).get(field) is not None})
out = {}
for label in labels:
tp = fp = fn = correct = support = 0
for rec in pred_rows:
target = rec.get("target", {}).get(field)
pred = rec.get("prediction") if isinstance(rec.get("prediction"), dict) else {}
pred_label = pred.get(field)
if target == label:
support += 1
if pred_label == label:
correct += 1
if target == label and pred_label == label:
tp += 1
elif target != label and pred_label == label:
fp += 1
elif target == label and pred_label != label:
fn += 1
precision = tp / (tp + fp) if tp + fp else 0.0
recall = tp / (tp + fn) if tp + fn else 0.0
f1 = 2 * precision * recall / (precision + recall) if precision + recall else 0.0
acc = correct / support if support else 0.0
out[label] = {"support": support, "accuracy": acc, "precision": precision, "recall": recall, "f1": f1}
return out
def confusion_rows(pred_rows, field="current_behavior", top_n=18):
support = Counter(r.get("target", {}).get(field) for r in pred_rows)
labels = [x for x, _ in support.most_common(top_n) if x is not None]
matrix = []
for true_label in labels:
row = {"true_label": true_label}
total = 0
counts = Counter()
for rec in pred_rows:
pred = rec.get("prediction") if isinstance(rec.get("prediction"), dict) else {}
if rec.get("target", {}).get(field) == true_label:
total += 1
counts[pred.get(field)] += 1
for pred_label in labels:
row[pred_label] = counts[pred_label]
row["other"] = max(0, total - sum(row[p] for p in labels))
row["support"] = total
matrix.append(row)
return labels, matrix
def qa_error_samples(rows, limit=20):
samples = []
for idx, rec in enumerate(rows):
target = rec.get("target", {})
pred = rec.get("prediction") if isinstance(rec.get("prediction"), dict) else {}
errors = []
for key in ["occupied", "is_abnormal", "time_to_free_minutes", "used_areas"]:
if target.get(key) != pred.get(key):
errors.append(key)
if errors:
samples.append({
"index": idx,
"error_fields": "|".join(errors),
"target": json.dumps(target, ensure_ascii=False),
"prediction": json.dumps(pred, ensure_ascii=False),
})
if len(samples) >= limit:
break
return samples
def derive_data(metrics, predictions):
metric_rows = flatten_metrics(metrics)
write_csv(DERIVED_DIR / "metric_comparison.csv", metric_rows, ["run", "task", "metric", "value"])
base_stats = class_stats(predictions["base_struct"])
ft_stats = class_stats(predictions["finetuned_struct"])
labels = sorted(set(base_stats) | set(ft_stats), key=lambda x: ft_stats.get(x, {}).get("support", 0), reverse=True)
per_class = []
for label in labels:
per_class.append({
"label": label,
"support": ft_stats.get(label, {}).get("support", 0),
"base_accuracy": base_stats.get(label, {}).get("accuracy", 0),
"finetuned_accuracy": ft_stats.get(label, {}).get("accuracy", 0),
"base_f1": base_stats.get(label, {}).get("f1", 0),
"finetuned_f1": ft_stats.get(label, {}).get("f1", 0),
"f1_delta": ft_stats.get(label, {}).get("f1", 0) - base_stats.get(label, {}).get("f1", 0),
})
write_csv(DERIVED_DIR / "per_behavior_scores.csv", per_class, ["label", "support", "base_accuracy", "finetuned_accuracy", "base_f1", "finetuned_f1", "f1_delta"])
labels, cm = confusion_rows(predictions["finetuned_struct"])
write_csv(DERIVED_DIR / "finetuned_current_behavior_confusion_top18.csv", cm, ["true_label", *labels, "other", "support"])
qa_samples = qa_error_samples(predictions["finetuned_qa"])
write_csv(DERIVED_DIR / "finetuned_qa_error_samples.csv", qa_samples, ["index", "error_fields", "target", "prediction"])
metadata = {
"generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"package_purpose": "Self-contained design review presentation for MWave aircraft lavatory radar LLM fine-tuning experiment.",
"primary_audience": "Design experts and system reviewers",
"storyline": [
"Problem: radar-derived lavatory behavior state is sequential, ambiguous, and privacy-sensitive.",
"Intervention: fine-tune Qwen3.5-9B with mixed structured prediction and QA supervision.",
"Evidence: compare base vs fine-tuned on identical validation sets.",
"Design implication: edge-first state output should be robustly validated before aircraft integration.",
],
"source_files": {
"metrics": [str((METRICS_DIR / f"{name}_metrics.json").relative_to(PACKAGE)) for name in METRIC_FILES],
"predictions": [str((PRED_DIR / f"{name}_predictions.jsonl").relative_to(PACKAGE)) for name in PRED_FILES],
"derived": [str(p.relative_to(PACKAGE)) for p in sorted(DERIVED_DIR.glob("*"))],
},
}
(DATA_DIR / "presentation_metadata.json").write_text(json.dumps(metadata, ensure_ascii=False, indent=2), encoding="utf-8")
return {"metric_rows": metric_rows, "per_class": per_class, "confusion_labels": labels, "confusion": cm, "metadata": metadata}
def svg_wrap(width, height, title, desc, metadata, body):
meta = esc(json.dumps(metadata, ensure_ascii=False, indent=2))
return f'''<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}" role="img" aria-labelledby="title desc">
<title id="title">{esc(title)}</title>
<desc id="desc">{esc(desc)}</desc>
<metadata>{meta}</metadata>
<defs>
<filter id="softShadow" x="-20%" y="-20%" width="140%" height="140%">
<feDropShadow dx="0" dy="12" stdDeviation="14" flood-color="#001754" flood-opacity="0.12"/>
</filter>
<linearGradient id="blueGrad" x1="0" x2="0" y1="0" y2="1">
<stop offset="0" stop-color="{BOEING}"/>
<stop offset="1" stop-color="#9DB7E2"/>
</linearGradient>
<linearGradient id="redGrad" x1="0" x2="0" y1="0" y2="1">
<stop offset="0" stop-color="{RED}"/>
<stop offset="1" stop-color="#F4A6B4"/>
</linearGradient>
</defs>
<rect width="100%" height="100%" fill="#ffffff"/>
{body}
</svg>
'''
def text(x, y, content, size=16, weight=500, fill=INK, anchor="start"):
return f'<text x="{x}" y="{y}" font-family="Aptos, Bahnschrift, Noto Sans CJK SC, sans-serif" font-size="{size}" font-weight="{weight}" fill="{fill}" text-anchor="{anchor}">{esc(content)}</text>'
def save_svg(name, width, height, title, desc, metadata, body):
path = CHARTS / name
path.write_text(svg_wrap(width, height, title, desc, metadata, body), encoding="utf-8")
return path
def bar_chart(name, title, subtitle, rows, value_key, label_key="label", group_key=None, width=980, height=560, value_format="percent"):
margin = {"l": 110, "r": 46, "t": 106, "b": 110}
plot_w = width - margin["l"] - margin["r"]
plot_h = height - margin["t"] - margin["b"]
max_v = max([r[value_key] for r in rows] + [1.0 if value_format == "percent" else 0.1])
if value_format == "percent":
max_v = max(1.0, max_v)
body = [
text(34, 42, title, 28, 900, BLACK),
text(34, 72, subtitle, 14, 500, STEEL),
f'<rect x="{margin["l"]}" y="{margin["t"]}" width="{plot_w}" height="{plot_h}" fill="#F8FAFC" stroke="#E5E7EB" rx="18"/>',
]
for i in range(5):
y = margin["t"] + plot_h * i / 4
val = max_v * (1 - i / 4)
body.append(f'<line x1="{margin["l"]}" y1="{y:.1f}" x2="{margin["l"] + plot_w}" y2="{y:.1f}" stroke="#E5E7EB"/>')
tick = safe_pct(val) if value_format == "percent" else safe_num(val, 1)
body.append(text(margin["l"] - 14, y + 5, tick, 12, 600, STEEL, "end"))
n = len(rows)
slot = plot_w / max(n, 1)
bar_w = slot * 0.62
for i, row in enumerate(rows):
v = row[value_key]
h = 0 if max_v == 0 else plot_h * v / max_v
x = margin["l"] + i * slot + (slot - bar_w) / 2
y = margin["t"] + plot_h - h
color = row.get("color") or COLORS[i % len(COLORS)]
label = str(row[label_key])
meta = esc(json.dumps(row, ensure_ascii=False))
body.append(f'<rect class="bar" data-meta="{meta}" x="{x:.1f}" y="{y:.1f}" width="{bar_w:.1f}" height="{h:.1f}" rx="8" fill="{color}"><title>{esc(label)}: {safe_pct(v) if value_format == "percent" else safe_num(v)}</title></rect>')
body.append(text(x + bar_w / 2, margin["t"] + plot_h + 28, label[:18], 11, 700, STEEL, "middle") + f'<animate attributeName="opacity" from="0" to="1" dur="0.4s" fill="freeze"/>')
if group_key:
groups = []
last = None
start = 0
for i, row in enumerate(rows + [{group_key: None}]):
g = row.get(group_key)
if last is None:
last = g
if g != last:
groups.append((last, start, i - 1))
start = i
last = g
for group, start, end in groups:
if group is None:
continue
x = margin["l"] + (start + end + 1) * slot / 2
body.append(text(x, height - 26, group, 13, 900, BLACK, "middle"))
return save_svg(name, width, height, title, subtitle, {"chart_type": "bar", "rows": rows, "value_key": value_key}, "\n".join(body))
def grouped_metric_chart(metrics):
order = [
("Schema完整率", "required_field_complete_rate", "struct"),
("当前行为Acc", "current_behavior_accuracy", "struct"),
("当前行为Macro-F1", "current_behavior_macro_f1", "struct"),
("下一行为Acc", "next_possible_behavior_accuracy", "struct"),
("阶段Index Acc", "stage_index_accuracy", "struct"),
("序列Exact", "sequence_exact_match", "struct"),
("占用Acc", "occupied_accuracy", "qa"),
("异常F1", "is_abnormal_f1", "qa"),
("区域F1", "used_areas_micro_f1", "qa"),
]
rows = []
for label, key, task in order:
base = metrics[f"base_{task}"]["metrics"].get(key)
ft = metrics[f"finetuned_{task}"]["metrics"].get(key)
if base is not None:
rows.append({"label": f"{label}\nBase", "metric": key, "run": "base", "task": task, "value": base, "color": STEEL})
if ft is not None:
rows.append({"label": f"{label}\nFT", "metric": key, "run": "finetuned", "task": task, "value": ft, "color": BOEING if task == "struct" else RED})
return bar_chart(
"01_base_vs_finetuned_scores.svg",
"Base vs Fine-tuned: 指标总览",
"同一验证集上的结构化预测与 QA 任务对比;柱形为可检查 metadata 的 SVG 元素。",
rows,
"value",
width=1280,
height=620,
)
def label_distribution_chart(summary):
rows = []
for label, count in sorted(summary["train_struct"]["label_counts"].items(), key=lambda x: x[1], reverse=True):
rows.append({"label": label, "value": count, "color": BOEING if label == "坐用马桶" else COLORS[len(rows) % len(COLORS)]})
return bar_chart(
"02_train_label_distribution.svg",
"训练集行为标签分布",
"类别分布显示任务主要由长时间状态和短过渡行为共同构成;设计评审应关注少数类边界。",
rows[:19],
"value",
width=1280,
height=620,
value_format="count",
)
def per_behavior_delta_chart(per_class):
rows = []
for row in sorted(per_class, key=lambda x: x["f1_delta"], reverse=True)[:16]:
rows.append({
"label": row["label"],
"value": row["f1_delta"],
"support": row["support"],
"base_f1": row["base_f1"],
"finetuned_f1": row["finetuned_f1"],
"color": BOEING if row["f1_delta"] >= 0 else RED,
})
return bar_chart(
"03_behavior_f1_delta.svg",
"行为类别 F1 改善幅度",
"展示微调对各行为类别的收益;support 表示验证集该行为样本量。",
rows,
"value",
width=1280,
height=620,
)
def time_mae_chart(metrics):
rows = []
mapping = [
("当前行为已持续", "elapsed_seconds_in_current_behavior_mae"),
("当前行为剩余", "estimated_remaining_seconds_mae"),
("完整流程剩余", "full_remaining_seconds_mae"),
("当前行为结束时刻", "expected_end_time_mae"),
]
for label, key in mapping:
v = metrics["finetuned_struct"]["metrics"].get(key)
if v is not None:
rows.append({"label": label, "metric": key, "value": v, "color": BOEING})
rows.append({"label": "QA空出时间(分钟)", "metric": "time_to_free_minutes_mae", "value": metrics["finetuned_qa"]["metrics"]["time_to_free_minutes_mae"], "color": RED})
return bar_chart(
"04_time_error_mae.svg",
"时间估计误差",
"结构化任务以秒为单位;QA 空出时间以分钟为单位,接近 0 说明该 QA 目标在当前构造规则下被模型很好拟合。",
rows,
"value",
width=1100,
height=560,
value_format="count",
)
def confusion_heatmap(name, title, labels, matrix):
width, height = 1180, 900
margin = {"l": 170, "r": 44, "t": 126, "b": 170}
n = len(labels)
cell = min((width - margin["l"] - margin["r"]) / n, (height - margin["t"] - margin["b"]) / n)
max_count = max([row[label] for row in matrix for label in labels] + [1])
body = [
text(34, 44, title, 28, 900, BLACK),
text(34, 74, "Fine-tuned current_behavior confusion matrix, top validation labels.", 14, 500, STEEL),
f'<rect x="{margin["l"]}" y="{margin["t"]}" width="{cell*n:.1f}" height="{cell*n:.1f}" fill="#F8FAFC" stroke="#E5E7EB" rx="12"/>',
]
for i, true_label in enumerate(labels):
body.append(text(margin["l"] - 12, margin["t"] + i * cell + cell * 0.62, true_label, 12, 700, INK, "end"))
body.append(text(margin["l"] + i * cell + cell * 0.5, margin["t"] + n * cell + 26, true_label, 11, 700, INK, "middle"))
for i, row in enumerate(matrix):
for j, pred_label in enumerate(labels):
count = row[pred_label]
intensity = count / max_count
blue = int(246 - 170 * intensity)
color = f"rgb({blue},{max(39, blue + 12)},{255})" if i == j else f"rgb({255},{max(230, 248 - int(120 * intensity))},{max(230, 248 - int(120 * intensity))})"
meta = esc(json.dumps({"true_label": row["true_label"], "predicted_label": pred_label, "count": count, "support": row["support"]}, ensure_ascii=False))
x = margin["l"] + j * cell
y = margin["t"] + i * cell
body.append(f'<rect data-meta="{meta}" x="{x:.1f}" y="{y:.1f}" width="{cell:.1f}" height="{cell:.1f}" fill="{color}" stroke="#FFFFFF"><title>true={esc(row["true_label"])}, pred={esc(pred_label)}, count={count}</title></rect>')
if count:
body.append(text(x + cell / 2, y + cell * 0.62, count, 9, 700, BLACK, "middle"))
body.append(text(margin["l"] + cell * n / 2, height - 38, "Predicted behavior", 14, 900, BLACK, "middle"))
body.append(f'<text x="34" y="{margin["t"] + cell*n/2}" transform="rotate(-90 34 {margin["t"] + cell*n/2})" font-family="Aptos, Bahnschrift, sans-serif" font-size="14" font-weight="900" fill="{BLACK}" text-anchor="middle">True behavior</text>')
return save_svg(name, width, height, title, "Confusion heatmap with metadata on each cell.", {"chart_type": "confusion_matrix", "labels": labels}, "\n".join(body))
def architecture_svg():
width, height = 1280, 500
boxes = [
("毫米波雷达", "点云/轨迹窗口\n隐私友好,非视觉图像", 56),
("边缘特征层", "去噪、跟踪、区域、速度\n生成结构化中间表征", 356),
("Qwen3.5-9B LoRA", "本地推理输出 JSON\n行为、阶段、剩余时间、QA", 656),
("客舱系统", "乘务终端、维护日志\n异常关注而非强控制", 956),
]
body = [text(34, 44, "飞机厕所部署技术方案", 30, 900, BLACK), text(34, 76, "Edge-first, privacy-preserving, fail-safe integration concept.", 14, 500, STEEL)]
for i, (title, desc, x) in enumerate(boxes):
body.append(f'<rect x="{x}" y="142" width="250" height="210" rx="28" fill="#FFFFFF" stroke="#D6DEE9" filter="url(#softShadow)"/>')
body.append(text(x + 24, 190, title, 22, 900, BOEING if i < 3 else RED))
for k, line in enumerate(desc.split("\n")):
body.append(text(x + 24, 238 + k * 28, line, 16, 600, STEEL))
if i < len(boxes) - 1:
body.append(f'<line x1="{x+260}" y1="246" x2="{x+294}" y2="246" stroke="{BOEING}" stroke-width="4"/>')
body.append(f'<polygon points="{x+294},246 {x+282},238 {x+282},254" fill="{RED}"/>')
body.append(text(56, 420, "设计原则:厕所侧只上传结构化状态,不上传可逆原始人体数据;LLM JSON 校验失败时回退规则模型。", 18, 800, BLACK))
return save_svg("06_deployment_architecture.svg", width, height, "飞机厕所部署技术方案", "Edge deployment architecture.", {"chart_type": "architecture"}, "\n".join(body))
def build_svgs(summary, metrics, derived):
paths = [
grouped_metric_chart(metrics),
label_distribution_chart(summary),
per_behavior_delta_chart(derived["per_class"]),
time_mae_chart(metrics),
confusion_heatmap("05_finetuned_behavior_confusion.svg", "Fine-tuned 行为混淆矩阵", derived["confusion_labels"], derived["confusion"]),
architecture_svg(),
]
chart_manifest = []
for path in paths:
chart_manifest.append({
"file": str(path.relative_to(PACKAGE)),
"title": path.stem.replace("_", " "),
"metadata_note": "Open the SVG file in a text editor or use the HTML metadata explorer to inspect embedded <metadata> and per-element data-meta.",
})
(CHARTS / "chart_manifest.json").write_text(json.dumps(chart_manifest, ensure_ascii=False, indent=2), encoding="utf-8")
return chart_manifest
def metric_delta(metrics, task, key):
base = metrics[f"base_{task}"]["metrics"].get(key)
ft = metrics[f"finetuned_{task}"]["metrics"].get(key)
if base is None or ft is None:
return None
return ft - base
def build_html(summary, metrics, derived, chart_manifest):
struct = metrics["finetuned_struct"]["metrics"]
qa = metrics["finetuned_qa"]["metrics"]
base_struct = metrics["base_struct"]["metrics"]
base_qa = metrics["base_qa"]["metrics"]
metadata_json = json.dumps({
"summary": summary,
"metrics": metrics,
"chart_manifest": chart_manifest,
"derived_files": [str(p.relative_to(PACKAGE)) for p in sorted(DERIVED_DIR.glob("*"))],
}, ensure_ascii=False)
metric_table_rows = []
story_metrics = [
("结构化 schema 完整率", base_struct["required_field_complete_rate"], struct["required_field_complete_rate"], "决定输出能否进入机载系统状态总线。"),
("当前行为准确率", base_struct["current_behavior_accuracy"], struct["current_behavior_accuracy"], "核心状态识别能力,从 radar 表征映射到行为标签。"),
("当前行为 Macro-F1", base_struct["current_behavior_macro_f1"], struct["current_behavior_macro_f1"], "衡量少数类是否被覆盖,适合设计评审关注长尾行为。"),
("下一行为准确率", base_struct["next_possible_behavior_accuracy"], struct["next_possible_behavior_accuracy"], "影响短期流程预测和预计空出时间。"),
("阶段 index 准确率", base_struct["stage_index_accuracy"], struct["stage_index_accuracy"], "反映模型是否理解完整厕所使用流程位置。"),
("QA 异常 F1", base_qa["is_abnormal_f1"], qa["is_abnormal_f1"], "用于乘务关注提示,需单独评估。"),
("QA 区域 F1", base_qa["used_areas_micro_f1"], qa["used_areas_micro_f1"], "用于理解马桶、洗手池、门、垃圾桶等区域使用状态。"),
]
for name, base, ft, meaning in story_metrics:
delta = ft - base if base is not None and ft is not None else None
metric_table_rows.append(f"<tr><td>{esc(name)}</td><td>{safe_pct(base)}</td><td>{safe_pct(ft)}</td><td>{safe_pct(delta) if delta is not None else 'N/A'}</td><td>{esc(meaning)}</td></tr>")
top_improvements = sorted(derived["per_class"], key=lambda x: x["f1_delta"], reverse=True)[:8]
top_rows = "\n".join(
f"<tr><td>{esc(r['label'])}</td><td>{r['support']}</td><td>{safe_pct(r['base_f1'])}</td><td>{safe_pct(r['finetuned_f1'])}</td><td>{safe_pct(r['f1_delta'])}</td></tr>"
for r in top_improvements
)
chart_cards = "\n".join(
f'''<article class="chart-card" data-chart="{esc(item["file"])}">
<div><h3>{esc(item["title"])}</h3><p>{esc(item["metadata_note"])}</p></div>
<object type="image/svg+xml" data="{esc(item["file"])}" aria-label="{esc(item["title"])}"></object>
</article>'''
for item in chart_manifest
)
source_links = "\n".join(
f"<li><a href='{esc(str(p.relative_to(PACKAGE)))}'>{esc(str(p.relative_to(PACKAGE)))}</a></li>"
for p in sorted(DATA_DIR.rglob("*"))
if p.is_file()
)
html_text = f'''<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>MWave Design Review Presentation</title>
<style>
:root {{
--boeing:{BOEING}; --boeing2:{BOEING_2}; --red:{RED}; --black:{BLACK};
--ink:{INK}; --steel:{STEEL}; --mist:{MIST}; --light:{LIGHT}; --line:rgba(8,10,15,.14);
}}
* {{ box-sizing:border-box; }}
body {{
margin:0; color:var(--ink);
font-family:"Aptos Display","Bahnschrift","DIN Alternate","Noto Sans SC",sans-serif;
background:radial-gradient(circle at 10% 0%, rgba(0,103,177,.22), transparent 28%),
radial-gradient(circle at 86% 6%, rgba(200,16,46,.12), transparent 24%),
linear-gradient(135deg,#eef4fb,#ffffff 44%,#e9eef6);
}}
a {{ color:var(--boeing); font-weight:800; }}
.deck {{ max-width:1480px; margin:0 auto; padding:28px; }}
.hero {{
min-height:620px; color:#fff; border-radius:36px; padding:52px;
background:linear-gradient(118deg,rgba(3,11,29,.98),rgba(0,57,166,.94) 54%,rgba(0,103,177,.72));
position:relative; overflow:hidden;
}}
.hero:after {{ content:""; position:absolute; right:-120px; top:-160px; width:640px; height:640px; border-radius:50%; border:1px solid rgba(255,255,255,.18); box-shadow:inset 0 0 0 82px rgba(255,255,255,.05), inset 0 0 0 180px rgba(255,255,255,.035); }}
.kicker {{ letter-spacing:.18em; text-transform:uppercase; font-size:12px; color:#dbeafe; }}
h1 {{ font-size:clamp(46px,7vw,100px); line-height:.92; margin:26px 0 18px; max-width:1050px; letter-spacing:-.06em; }}
h2 {{ font-size:38px; letter-spacing:-.04em; margin:0 0 18px; color:var(--black); }}
h3 {{ margin:0 0 8px; font-size:22px; color:var(--black); }}
.hero p {{ color:#eaf1fb; font-size:21px; line-height:1.65; max-width:980px; }}
.hero-grid {{ display:grid; grid-template-columns:repeat(4,1fr); gap:18px; position:relative; z-index:1; margin-top:42px; }}
.hero-card {{ padding:22px; border:1px solid rgba(255,255,255,.22); background:rgba(255,255,255,.1); border-radius:24px; backdrop-filter:blur(16px); }}
.big {{ font-size:38px; font-weight:900; letter-spacing:-.04em; }}
.label {{ color:#cbd5e1; font-size:13px; margin-top:6px; }}
nav {{ position:sticky; top:0; z-index:20; display:flex; gap:8px; flex-wrap:wrap; margin:18px 0; padding:12px; border:1px solid var(--line); border-radius:22px; background:rgba(255,255,255,.88); backdrop-filter:blur(18px); }}
nav button, .pill {{ border:0; border-radius:999px; padding:11px 15px; background:#eef3fb; color:#0f172a; font-weight:900; cursor:pointer; }}
nav button:hover, .pill.active {{ background:var(--boeing); color:#fff; }}
section {{ background:rgba(255,255,255,.92); border:1px solid var(--line); border-radius:30px; padding:32px; margin-top:24px; box-shadow:0 16px 44px rgba(17,24,39,.06); }}
.grid {{ display:grid; gap:18px; }}
.grid-2 {{ grid-template-columns:repeat(2,minmax(0,1fr)); }}
.grid-3 {{ grid-template-columns:repeat(3,minmax(0,1fr)); }}
.grid-4 {{ grid-template-columns:repeat(4,minmax(0,1fr)); }}
.card {{ border:1px solid var(--line); background:linear-gradient(180deg,#fff,#f6f9fd); border-radius:24px; padding:22px; }}
.card strong {{ color:var(--boeing); }}
.muted {{ color:var(--steel); line-height:1.66; }}
.story {{ display:grid; grid-template-columns:72px 1fr; gap:18px; margin-bottom:18px; }}
.num {{ width:54px; height:54px; border-radius:50%; display:grid; place-items:center; background:var(--black); color:#fff; font-weight:900; font-size:22px; }}
table {{ width:100%; border-collapse:collapse; overflow:hidden; border-radius:18px; background:#fff; }}
th,td {{ padding:13px 14px; border-bottom:1px solid #e5e7eb; text-align:left; vertical-align:top; }}
th {{ background:#08111f; color:#fff; font-size:13px; }}
.chart-grid {{ display:grid; grid-template-columns:1fr; gap:22px; }}
.chart-card {{ border:1px solid var(--line); background:#fff; border-radius:26px; padding:22px; }}
.chart-card object {{ width:100%; height:620px; border:0; background:#fff; border-radius:18px; }}
.chart-card p {{ margin:0 0 12px; color:var(--steel); }}
.metadata-panel {{ display:grid; grid-template-columns:360px 1fr; gap:18px; }}
select, textarea {{ width:100%; border:1px solid #d6dee9; border-radius:16px; padding:12px; font:inherit; background:#fff; }}
pre {{ margin:0; white-space:pre-wrap; background:#05070c; color:#d1fae5; border-radius:18px; padding:18px; min-height:360px; overflow:auto; font-family:"Cascadia Mono","SFMono-Regular",monospace; font-size:13px; }}
.callout {{ border-left:6px solid var(--red); background:#fff5f6; padding:18px; border-radius:18px; }}
.files {{ columns:2; }}
.footer {{ text-align:center; color:#64748b; padding:28px; }}
@media print {{
nav, .metadata-panel, .no-print {{ display:none !important; }}
body {{ background:#fff; }}
section, .hero {{ break-inside:avoid; page-break-inside:avoid; box-shadow:none; }}
.chart-card object {{ height:520px; }}
}}
@media(max-width:980px) {{ .hero-grid,.grid-2,.grid-3,.grid-4,.metadata-panel {{ grid-template-columns:1fr; }} .deck {{ padding:14px; }} .hero {{ padding:28px; }} .files {{ columns:1; }} }}
</style>
</head>
<body>
<div class="deck">
<header class="hero">
<div class="kicker">MWave Radar LLM · Aircraft Lavatory Design Review</div>
<h1>从毫米波雷达到机载厕所状态智能:一次可验证的 LLM 微调实验</h1>
<p>这份汇报面向设计专家评审,重点不是展示训练日志,而是讲清楚:为什么需要模型、模型学到了什么、哪些结果能支持设计决策、哪些风险需要在机载部署前继续验证。</p>
<div class="hero-grid">
<div class="hero-card"><div class="big">{summary["train_struct"]["num_examples"]:,}</div><div class="label">训练结构样本</div></div>
<div class="hero-card"><div class="big">{summary["val_struct"]["num_examples"]:,}</div><div class="label">验证结构样本</div></div>
<div class="hero-card"><div class="big">{summary["train_qa"]["num_examples"]:,}</div><div class="label">QA 训练样本</div></div>
<div class="hero-card"><div class="big">Qwen3.5-9B</div><div class="label">4-bit QLoRA 微调</div></div>
</div>
</header>
<nav class="no-print">
<button onclick="go('story')">Story Line</button>
<button onclick="go('method')">任务与方法</button>
<button onclick="go('results')">结果证据</button>
<button onclick="go('charts')">SVG 图表</button>
<button onclick="go('metadata')">Metadata Explorer</button>
<button onclick="go('deployment')">部署方案</button>
<button onclick="go('sources')">源数据</button>
</nav>
<section id="story">
<h2>评审 Story Line</h2>
<div class="story"><div class="num">1</div><div><h3>设计问题</h3><p class="muted">飞机厕所内不能依赖摄像头,毫米波雷达提供隐私友好的时空轨迹,但原始序列难以直接变成可用的客舱状态。</p></div></div>
<div class="story"><div class="num">2</div><div><h3>模型角色</h3><p class="muted">LLM 不替代雷达信号处理,而是读取结构化窗口和中间表征,输出严格 JSON:当前行为、下一行为、阶段、剩余时间、序列,以及 QA 状态。</p></div></div>
<div class="story"><div class="num">3</div><div><h3>实验判断</h3><p class="muted">同一验证集上比较 base Qwen3.5-9B 与微调后模型,指标覆盖 schema 合规、行为识别、流程理解、时间估计和 QA 推理。</p></div></div>
<div class="story"><div class="num">4</div><div><h3>设计结论</h3><p class="muted">微调显著提升结构化输出和 QA 可用性,但时间误差、少数类行为和异常提示仍应进入下一轮系统设计验证。</p></div></div>
</section>
<section id="method">
<h2>任务与评估方法</h2>
<div class="grid grid-3">
<div class="card"><h3>结构化预测</h3><p class="muted">输入雷达时序窗口和中间层表征,输出 current_behavior、next_possible_behavior、stage_index、remaining time、sequence_so_far 等字段。</p></div>
<div class="card"><h3>QA 状态回答</h3><p class="muted">作为独立维度评估 occupied、time_to_free_minutes、used_areas、is_abnormal,避免只看结构化任务而忽略最终用户问题。</p></div>
<div class="card"><h3>指标选择</h3><p class="muted">分类用 accuracy/F1,schema 用 JSON parse 和 required field complete,时间用 MAE,序列用 exact/prefix/last-label match。</p></div>
</div>
</section>
<section id="results">
<h2>关键结果</h2>
<div class="grid grid-4">
<div class="card"><div class="big">{safe_pct(struct["current_behavior_accuracy"])}</div><strong>当前行为准确率</strong><p class="muted">Base 为 {safe_pct(base_struct["current_behavior_accuracy"])}。</p></div>
<div class="card"><div class="big">{safe_pct(struct["required_field_complete_rate"])}</div><strong>结构化字段完整率</strong><p class="muted">Base 为 {safe_pct(base_struct["required_field_complete_rate"])},说明微调主要解决 schema 对齐。</p></div>
<div class="card"><div class="big">{safe_pct(qa["is_abnormal_f1"])}</div><strong>QA 异常 F1</strong><p class="muted">Base 为 {safe_pct(base_qa["is_abnormal_f1"])}。</p></div>
<div class="card"><div class="big">{safe_num(struct["full_remaining_seconds_mae"], 1)}s</div><strong>完整流程剩余时间 MAE</strong><p class="muted">该误差直接影响预计空出时间体验。</p></div>
</div>
<div style="margin-top:20px; overflow:auto">
<table>
<tr><th>评估项</th><th>Base</th><th>Fine-tuned</th><th>变化</th><th>设计含义</th></tr>
{''.join(metric_table_rows)}
</table>
</div>
<div class="callout" style="margin-top:20px">
<strong>评审判断:</strong>当前实验已经证明 9B LoRA 可以把通用 LLM 拉到可用的结构化状态输出轨道上;但机载上线前仍应增加跨乘客、跨机型、跨雷达安装角度的数据验证,并对异常提示设定保守阈值。
</div>
</section>
<section>
<h2>哪些行为受益最大</h2>
<p class="muted">下表展示 F1 改善最大的行为类别。设计评审时应同时看 support,避免把少量样本上的提升误判为稳定能力。</p>
<table>
<tr><th>行为</th><th>验证样本数</th><th>Base F1</th><th>Fine-tuned F1</th><th>提升</th></tr>
{top_rows}
</table>
</section>
<section id="charts">
<h2>SVG 矢量图表</h2>
<p class="muted">所有图表都在 <code>assets/charts/</code> 下以 SVG 保存,可缩放、可放进设计文档;SVG 内包含 metadata,HTML 下方可查看。</p>
<div class="chart-grid">{chart_cards}</div>
</section>
<section id="metadata" class="no-print">
<h2>Metadata Explorer</h2>
<p class="muted">选择图表或数据对象,查看生成依据、源文件路径和图表内嵌 metadata。此功能完全离线,不依赖外网。</p>
<div class="metadata-panel">
<div class="card">
<h3>选择对象</h3>
<select id="metaSelect"></select>
<p class="muted">提示:SVG 图表也可以直接用文本编辑器打开,查看其中的 <code><metadata></code> 和每个 bar/cell 的 <code>data-meta</code>。</p>
</div>
<pre id="metaOut"></pre>
</div>
</section>
<section id="deployment">
<h2>部署到飞机厕所的技术方案</h2>
<div class="grid grid-3">
<div class="card"><h3>边缘优先</h3><p class="muted">厕所侧完成雷达预处理和特征抽取,客舱边缘计算单元加载量化 LLM/LoRA,减少原始数据移动。</p></div>
<div class="card"><h3>状态输出</h3><p class="muted">对外只输出 occupied、estimated free time、used areas、abnormal flag 和行为阶段,不输出身份或可逆人体点云。</p></div>
<div class="card"><h3>失效保护</h3><p class="muted">JSON 校验失败、置信不足或异常连续触发时回退规则模型,并只给出“需关注”级提示。</p></div>
</div>
<object type="image/svg+xml" data="assets/charts/06_deployment_architecture.svg" style="width:100%;height:480px;border:0;margin-top:18px"></object>
</section>
<section id="sources">
<h2>可拷走的源数据与材料</h2>
<p class="muted">整个 <code>mwave_design_review_package</code> 目录可直接复制到其他电脑。HTML、PDF、SVG 和 CSV/JSONL 都使用相对路径或独立文件。</p>
<ul class="files">{source_links}</ul>
</section>
<div class="footer">Generated locally · {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} · Boeing blue / red / black / gray design system</div>
</div>
<script id="metadata-json" type="application/json">{metadata_json}</script>
<script>
const metadata = JSON.parse(document.getElementById('metadata-json').textContent);
const options = [
{{label:'Package overview', value:metadata}},
...metadata.chart_manifest.map(c => ({{label:'Chart: '+c.title, value:c}})),
{{label:'Metrics: base struct', value:metadata.metrics.base_struct}},
{{label:'Metrics: fine-tuned struct', value:metadata.metrics.finetuned_struct}},
{{label:'Metrics: base QA', value:metadata.metrics.base_qa}},
{{label:'Metrics: fine-tuned QA', value:metadata.metrics.finetuned_qa}},
{{label:'Data summary', value:metadata.summary}},
];
function go(id){{ document.getElementById(id).scrollIntoView({{behavior:'smooth'}}); }}
const select = document.getElementById('metaSelect');
const out = document.getElementById('metaOut');
options.forEach((item, idx) => {{
const option = document.createElement('option');
option.value = String(idx);
option.textContent = item.label;
select.appendChild(option);
}});
function renderMeta(){{ out.textContent = JSON.stringify(options[Number(select.value)].value, null, 2); }}
select.addEventListener('change', renderMeta);
renderMeta();
</script>
</body>
</html>
'''
(PACKAGE / "index.html").write_text(html_text, encoding="utf-8")
def build_readme():
readme = f"""# MWave Design Review Presentation Package
This folder is self-contained and can be copied to another computer.
Open:
- `index.html`: interactive presentation with SVG charts and metadata explorer.
- `mwave_design_review_report.pdf`: static PDF export for review circulation.
Key folders:
- `assets/charts/`: standalone SVG vector charts. Each SVG includes embedded metadata.
- `source_data/metrics/`: original evaluation metric JSON files.
- `source_data/predictions/`: original prediction JSONL files for base and fine-tuned runs.
- `source_data/derived/`: CSV files derived for the presentation, including per-behavior scores and confusion matrix.
Generated at: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
"""
(PACKAGE / "README.md").write_text(readme, encoding="utf-8")
def export_pdf():
html_path = PACKAGE / "index.html"
pdf_path = PACKAGE / "mwave_design_review_report.pdf"
chrome = shutil.which("google-chrome") or shutil.which("chromium") or shutil.which("chromium-browser")
if not chrome:
print("Chrome not found; skipped PDF export.")
return False
cmd = [
chrome,
"--headless",
"--no-sandbox",
"--disable-gpu",
"--print-to-pdf=" + str(pdf_path),
"--print-to-pdf-no-header",
"file://" + str(html_path.resolve()),
]
subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return True
def make_zip():
zip_base = OUT_ROOT / "mwave_design_review_package"
archive = shutil.make_archive(str(zip_base), "zip", root_dir=OUT_ROOT, base_dir=PACKAGE.name)
return Path(archive)
def main():
ensure_dirs()
copy_source_files()
summary = read_json(ROOT / "data/processed/summary.json")
metrics = {key: read_json(path) for key, path in METRIC_FILES.items()}
predictions = {key: read_jsonl(path) for key, path in PRED_FILES.items()}
derived = derive_data(metrics, predictions)
chart_manifest = build_svgs(summary, metrics, derived)
build_html(summary, metrics, derived, chart_manifest)
build_readme()
pdf_ok = export_pdf()
archive = make_zip()
print(f"wrote package: {PACKAGE}")
print(f"wrote archive: {archive}")
print(f"pdf_exported: {pdf_ok}")
if __name__ == "__main__":
main()
|