File size: 7,260 Bytes
c5b731e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 | #!/usr/bin/env python3
"""
Full benchmark runner for FSD Model with CoT reasoning.
Runs external benchmarks, compares with/without CoT, and optimizes.
"""
import sys
import time
import torch
import numpy as np
sys.path.insert(0, '/app')
from fsd_model.config import VehicleConfig
from fsd_model.model import FullSelfDrivingModel
from fsd_model.data import FSDDataGenerator
from fsd_model.benchmarks import FSDExternalBenchmark
from fsd_model.visualization import format_parameter_count
def build_large_model(enable_cot=True):
"""Build the scaled-up FSD model (larger than v1)."""
config = VehicleConfig()
model = FullSelfDrivingModel(
vehicle_config=config,
bev_size=200,
bev_resolution=0.25,
bev_feature_dim=256,
num_object_classes=10,
num_seg_classes=7,
num_waypoints=20,
planning_d_model=256,
future_steps=6,
num_forecast_modes=6,
forecast_steps=12,
num_behaviors=10,
enable_cot=enable_cot,
cot_num_actor_queries=64,
cot_num_road_queries=32,
)
return model, config
def build_small_model(enable_cot=True):
"""Build a test-sized model for CPU benchmark."""
config = VehicleConfig()
model = FullSelfDrivingModel(
vehicle_config=config,
bev_size=100,
bev_resolution=0.5,
bev_feature_dim=128,
num_object_classes=10,
num_seg_classes=7,
num_waypoints=20,
planning_d_model=128,
future_steps=6,
num_forecast_modes=6,
forecast_steps=12,
num_behaviors=10,
enable_cot=enable_cot,
cot_num_actor_queries=32,
cot_num_road_queries=16,
)
return model, config
def run_benchmark_comparison():
"""Run benchmarks with and without CoT and compare."""
print("=" * 70)
print(" FSD Model β External Benchmark Suite")
print(" Comparing: Base Model vs. CoT-Enhanced Model")
print("=" * 70)
# ββ Build models ββ
print("\n[1/4] Building models...")
model_no_cot, config = build_small_model(enable_cot=False)
model_with_cot, _ = build_small_model(enable_cot=True)
print("\n Base model (no CoT):")
counts_no = model_no_cot.count_parameters()
print(format_parameter_count(counts_no))
print("\n CoT-enhanced model:")
counts_cot = model_with_cot.count_parameters()
print(format_parameter_count(counts_cot))
cot_overhead = counts_cot["total"] - counts_no["total"]
print(f"\n CoT parameter overhead: {cot_overhead:,} ({cot_overhead/counts_no['total']:.1%} increase)")
# ββ Data generator ββ
data_gen = FSDDataGenerator(config, bev_size=100, image_size=(120, 160))
# ββ Quick forward pass sanity check ββ
print("\n[2/4] Sanity check forward passes...")
inputs, targets = data_gen.generate_batch(batch_size=2, scenario="urban")
with torch.no_grad():
out_no = model_no_cot(**inputs)
out_cot = model_with_cot(**inputs)
print(f" Base model outputs: {len(out_no)} keys")
print(f" CoT model outputs: {len(out_cot)} keys")
cot_keys = [k for k in out_cot.keys() if k.startswith("cot/")]
print(f" CoT-specific outputs: {len(cot_keys)} keys")
for k in sorted(cot_keys)[:10]:
print(f" {k}: {out_cot[k].shape}")
# ββ Run benchmarks ββ
N = 48 # total scenarios (fast for CPU)
BS = 4
print(f"\n[3/4] Running external benchmarks ({N} scenarios each)...")
print("\n ββ Base Model (no CoT) ββ")
bench_no = FSDExternalBenchmark(
model_no_cot, data_gen,
num_scenarios=N, batch_size=BS,
max_speed_ms=config.max_speed_ms,
has_cot=False,
)
result_no = bench_no.run()
print(result_no.summary())
print("\n ββ CoT-Enhanced Model ββ")
bench_cot = FSDExternalBenchmark(
model_with_cot, data_gen,
num_scenarios=N, batch_size=BS,
max_speed_ms=config.max_speed_ms,
has_cot=True,
)
result_cot = bench_cot.run()
print(result_cot.summary())
# ββ Comparison ββ
print("\n[4/4] Comparison Summary")
print("=" * 70)
print(f"{'Metric':<40} {'Base':>12} {'+ CoT':>12} {'Delta':>12}")
print("-" * 70)
comparisons = [
("Planning L2 avg (m) β", result_no.planning.l2_avg, result_cot.planning.l2_avg),
("Collision rate avg β", result_no.planning.collision_rate_avg, result_cot.planning.collision_rate_avg),
("Planning score β", result_no.planning.planning_score, result_cot.planning.planning_score),
("NDS β", result_no.detection.NDS, result_cot.detection.NDS),
("mAP β", result_no.detection.mAP, result_cot.detection.mAP),
("CARLA driving score β", result_no.carla.driving_score, result_cot.carla.driving_score),
("Route completion % β", result_no.carla.route_completion, result_cot.carla.route_completion),
("Total collisions β", result_no.carla.num_collisions, result_cot.carla.num_collisions),
("Min TTC (s) β", result_no.safety.min_ttc, result_cot.safety.min_ttc),
("Mean TTC (s) β", result_no.safety.mean_ttc, result_cot.safety.mean_ttc),
("TTC <2s rate β", result_no.safety.ttc_below_2s_rate, result_cot.safety.ttc_below_2s_rate),
("Speed compliance β", result_no.safety.speed_compliance_rate, result_cot.safety.speed_compliance_rate),
("Safe following dist β", result_no.safety.safe_following_distance_rate, result_cot.safety.safe_following_distance_rate),
("Mean jerk (m/sΒ³) β", result_no.safety.mean_jerk, result_cot.safety.mean_jerk),
("Occ IoU near β", result_no.occupancy.iou_near, result_cot.occupancy.iou_near),
("Occ IoU far β", result_no.occupancy.iou_far, result_cot.occupancy.iou_far),
("FPS", result_no.fps, result_cot.fps),
]
# CoT-only metrics
cot_only = [
("CoT override accuracy β", "β", result_cot.safety.cot_override_accuracy),
("CoT risk AUC β", "β", result_cot.safety.cot_risk_auc),
("E-brake precision β", "β", result_cot.safety.emergency_brake_precision),
("E-brake recall β", "β", result_cot.safety.emergency_brake_recall),
("E-brake F1 β", "β", result_cot.safety.emergency_brake_f1),
]
for name, base, cot in comparisons:
delta = cot - base
sign = "+" if delta > 0 else ""
print(f" {name:<38} {base:>12.4f} {cot:>12.4f} {sign}{delta:>11.4f}")
print("-" * 70)
print(" CoT-Specific Metrics:")
for name, base, cot in cot_only:
print(f" {name:<38} {str(base):>12} {cot:>12.4f}")
print("=" * 70)
# ββ Show full-size model parameter counts ββ
print("\n Full-size model (production config):")
model_full, _ = build_large_model(enable_cot=True)
counts_full = model_full.count_parameters()
print(format_parameter_count(counts_full))
# Save results
result_no.save("/app/benchmark_base.json")
result_cot.save("/app/benchmark_cot.json")
print("\nResults saved to /app/benchmark_base.json and /app/benchmark_cot.json")
return result_no, result_cot
if __name__ == "__main__":
run_benchmark_comparison()
|