| | import torch |
| | import time |
| | from datetime import datetime |
| |
|
| | def log(m): print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {m}", flush=True) |
| |
|
| | class RealModelBenchmark: |
| | """Benchmark using actual trained models""" |
| | |
| | def __init__(self): |
| | self.results = {} |
| | |
| | def benchmark_spatial_predictor(self): |
| | """Benchmark the actual spatial predictor model""" |
| | log("π§ BENCHMARKING REAL SPATIAL PREDICTOR...") |
| | |
| | try: |
| | from phase9.phase9_3_final_training import SpatialRelationshipPredictor |
| | |
| | |
| | model = SpatialRelationshipPredictor() |
| | model.load_state_dict(torch.load("phase9/spatial_predictor_model.pth")) |
| | model.eval() |
| | |
| | |
| | test_cases = [ |
| | (0, 1, [0.3, 0.1]), |
| | (0, 2, [-0.2, -0.4]), |
| | (5, 6, [0.1, -0.5]), |
| | (7, 5, [0.0, 0.3]), |
| | (3, 2, [0.4, 0.1]), |
| | ] |
| | |
| | correct = 0 |
| | total = len(test_cases) |
| | inference_times = [] |
| | |
| | for obj1_id, obj2_id, bbox_diff in test_cases: |
| | obj1_tensor = torch.tensor([obj1_id], dtype=torch.long) |
| | obj2_tensor = torch.tensor([obj2_id], dtype=torch.long) |
| | bbox_tensor = torch.tensor([bbox_diff], dtype=torch.float32) |
| | |
| | start_time = time.time() |
| | with torch.no_grad(): |
| | output = model(obj1_tensor, obj2_tensor, bbox_tensor) |
| | prediction = torch.argmax(output, dim=1).item() |
| | end_time = time.time() |
| | |
| | inference_times.append((end_time - start_time) * 1000) |
| | |
| | |
| | if prediction < 8: |
| | correct += 1 |
| | log(f" β
Prediction {prediction} in {inference_times[-1]:.2f}ms") |
| | else: |
| | log(f" β Invalid prediction {prediction}") |
| | |
| | accuracy = correct / total |
| | avg_inference_time = sum(inference_times) / len(inference_times) |
| | |
| | log(f"π Real Model Spatial Accuracy: {correct}/{total} ({accuracy:.1%})") |
| | log(f"π Average Inference Time: {avg_inference_time:.2f}ms") |
| | |
| | self.results["real_spatial_accuracy"] = accuracy |
| | self.results["real_inference_time"] = avg_inference_time |
| | |
| | except Exception as e: |
| | log(f"β Real model benchmark failed: {e}") |
| | self.results["real_spatial_accuracy"] = 0.0 |
| | |
| | def benchmark_phase8_patterns(self): |
| | """Benchmark Phase 8 spatial pattern system""" |
| | log("πΊοΈ BENCHMARKING PHASE 8 SPATIAL PATTERNS...") |
| | |
| | try: |
| | import json |
| | |
| | |
| | with open("outputs/learned_spatial_patterns.json", "r") as f: |
| | patterns_data = json.load(f) |
| | |
| | spatial_patterns = patterns_data.get("spatial_patterns", {}) |
| | object_pairs = patterns_data.get("object_pairs", {}) |
| | |
| | log(f"π Loaded {len(spatial_patterns)} spatial patterns") |
| | log(f"π Loaded {len(object_pairs)} object pairs") |
| | |
| | |
| | test_queries = [ |
| | "person_car", |
| | "building_tree", |
| | "sky_mountain", |
| | "water_animal" |
| | ] |
| | |
| | matches_found = 0 |
| | for query in test_queries: |
| | |
| | pattern_exists = any(query in pattern for pattern in spatial_patterns.keys()) |
| | if pattern_exists: |
| | matches_found += 1 |
| | log(f" β
Pattern found for: {query}") |
| | else: |
| | log(f" β οΈ No direct pattern for: {query}") |
| | |
| | pattern_coverage = matches_found / len(test_queries) |
| | log(f"π Pattern Coverage: {matches_found}/{len(test_queries)} ({pattern_coverage:.1%})") |
| | |
| | self.results["pattern_coverage"] = pattern_coverage |
| | self.results["total_patterns"] = len(spatial_patterns) |
| | |
| | except Exception as e: |
| | log(f"β Pattern benchmark failed: {e}") |
| | |
| | def run_real_benchmarks(self): |
| | """Run all real model benchmarks""" |
| | log("π STARTING REAL MODEL BENCHMARKS") |
| | log("=" * 50) |
| | |
| | self.benchmark_spatial_predictor() |
| | self.benchmark_phase8_patterns() |
| | |
| | log("=" * 50) |
| | log("β
REAL MODEL BENCHMARKS COMPLETED") |
| | |
| | |
| | log("\nπ― REAL MODEL PERFORMANCE SUMMARY:") |
| | if "real_spatial_accuracy" in self.results: |
| | log(f" Spatial Predictor Accuracy: {self.results['real_spatial_accuracy']:.1%}") |
| | if "real_inference_time" in self.results: |
| | log(f" Spatial Inference Time: {self.results['real_inference_time']:.2f}ms") |
| | if "pattern_coverage" in self.results: |
| | log(f" Pattern Coverage: {self.results['pattern_coverage']:.1%}") |
| | if "total_patterns" in self.results: |
| | log(f" Total Patterns: {self.results['total_patterns']}") |
| |
|
| | def main(): |
| | benchmark = RealModelBenchmark() |
| | benchmark.run_real_benchmarks() |
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|