""" Batch model comparison for CVAT export datasets. For each task folder (e.g. "task_74_EMS_T1"): - Uses `annotations/instances_default.json` as ground truth - Runs OLD models and NEW models on all images in `images/` - Calculates detection/segmentation metrics vs ground truth - Creates side‑by‑side visualizations: Ground Truth | Old Models | New Models - Saves everything under `/model_comparison/` """ import os import sys import json from pathlib import Path import matplotlib.pyplot as plt # Paths SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) PROJECT_ROOT = os.path.dirname(os.path.dirname(SCRIPT_DIR)) sys.path.insert(0, SCRIPT_DIR) sys.path.insert(0, PROJECT_ROOT) from old_models import process_dataset as process_old_models from new_models import process_dataset as process_new_models from compare import calculate_metrics, align_categories, draw_coco_annotations_simple # CVAT export directory CVAT_EXPORT_DIR = Path("/home/hasan/layout/compare/data/cvat_project_7_export") def discover_task_folders(base_dir): """ Discover all task folders in the CVAT export directory. A task folder is identified by having: - annotations/instances_default.json - images/ directory """ task_folders = [] base_path = Path(base_dir) if not base_path.exists(): print(f"❌ Error: CVAT export directory not found: {base_path}") return [] for item in base_path.iterdir(): if item.is_dir(): annotations_path = item / "annotations" / "instances_default.json" images_path = item / "images" if annotations_path.exists() and images_path.exists(): task_folders.append(item.name) return sorted(task_folders) def create_side_by_side_visualization(image_path, gt_coco, old_coco, new_coco, output_path): """ Create side‑by‑side visualization: GT | Old Models | New Models """ fig, axes = plt.subplots(1, 3, figsize=(30, 10)) # Left: Ground Truth draw_coco_annotations_simple(image_path, gt_coco, "Ground Truth", axes[0]) # Middle: Old Models draw_coco_annotations_simple(image_path, old_coco, "Old Models", axes[1]) # Right: New Models draw_coco_annotations_simple(image_path, new_coco, "New Models", axes[2]) plt.tight_layout() plt.savefig(output_path, dpi=150, bbox_inches="tight") plt.close() print(f" ✓ Saved comparison to: {output_path}") def process_cvat_task(task_folder_name, base_dir=None): """ Process a single CVAT task folder: - Load annotations/instances_default.json - Run old & new models - Compute metrics - Create GT | Old | New visualizations """ if base_dir is None: base_dir = CVAT_EXPORT_DIR task_path = Path(base_dir) / task_folder_name if not task_path.exists(): print(f"⚠️ Warning: Task folder not found: {task_path}") return None print("\n" + "=" * 70) print(f"Processing CVAT task: {task_folder_name}") print("=" * 70) # Paths gt_json_path = task_path / "annotations" / "instances_default.json" images_dir = task_path / "images" output_dir = task_path / "model_comparison" os.makedirs(output_dir, exist_ok=True) if not gt_json_path.exists(): print(f"❌ Error: instances_default.json not found at {gt_json_path}") return None if not images_dir.exists(): print(f"❌ Error: images directory not found at {images_dir}") return None # Load ground truth print(f"\n[1/5] Loading ground truth...") with open(gt_json_path, "r") as f: gt_coco = json.load(f) print(f" ✓ Loaded {len(gt_coco['images'])} images") print(f" ✓ Loaded {len(gt_coco['annotations'])} annotations") # Run old models print(f"\n[2/5] Running old models...") old_output_dir = output_dir / "old_models" os.makedirs(old_output_dir, exist_ok=True) try: old_coco = process_old_models(str(images_dir), str(old_output_dir)) print(f" ✓ Generated {len(old_coco['annotations'])} annotations") print(f" ✓ Categories: {[c['name'] for c in old_coco['categories']]}") except Exception as e: print(f" ❌ Error running old models: {e}") import traceback traceback.print_exc() return None # Run new models print(f"\n[3/5] Running new models...") new_output_dir = output_dir / "new_models" os.makedirs(new_output_dir, exist_ok=True) try: new_coco = process_new_models(str(images_dir), str(new_output_dir)) print(f" ✓ Generated {len(new_coco['annotations'])} annotations") except Exception as e: print(f" ❌ Error running new models: {e}") import traceback traceback.print_exc() return None # Calculate metrics print(f"\n[4/5] Calculating metrics...") # Align categories with ground truth (by name matching) old_coco_aligned = align_categories(gt_coco.copy(), old_coco.copy()) new_coco_aligned = align_categories(gt_coco.copy(), new_coco.copy()) # Metrics for old models print(f"\n Old Models Metrics:") old_metrics = calculate_metrics(gt_coco, old_coco_aligned, str(output_dir)) print(f" mAP@50: {old_metrics.get('mAP@50', 0):.4f}") print(f" mAP@[.50:.95]: {old_metrics.get('mAP@[.50:.95]', 0):.4f}") print(f" Precision: {old_metrics.get('Precision', 0):.4f}") print(f" Recall: {old_metrics.get('Recall', 0):.4f}") # Metrics for new models print(f"\n New Models Metrics:") new_metrics = calculate_metrics(gt_coco, new_coco_aligned, str(output_dir)) print(f" mAP@50: {new_metrics.get('mAP@50', 0):.4f}") print(f" mAP@[.50:.95]: {new_metrics.get('mAP@[.50:.95]', 0):.4f}") print(f" Precision: {new_metrics.get('Precision', 0):.4f}") print(f" Recall: {new_metrics.get('Recall', 0):.4f}") # Save metrics JSON metrics_path = output_dir / "metrics.json" with open(metrics_path, "w") as f: json.dump({"old_models": old_metrics, "new_models": new_metrics}, f, indent=4) print(f" ✓ Saved metrics to: {metrics_path}") # Create visualizations print(f"\n[5/5] Creating side-by-side visualizations...") vis_dir = output_dir / "visualizations" os.makedirs(vis_dir, exist_ok=True) for img_info in gt_coco["images"]: image_name = img_info["file_name"] image_path = images_dir / image_name if not image_path.exists(): continue img_id = img_info["id"] # Filter annotations for this image gt_img_coco = { "images": [img_info], "annotations": [a for a in gt_coco["annotations"] if a["image_id"] == img_id], "categories": gt_coco["categories"], } old_img_coco = { "images": [img_info], "annotations": [a for a in old_coco["annotations"] if a["image_id"] == img_id], "categories": old_coco["categories"], } new_img_coco = { "images": [img_info], "annotations": [a for a in new_coco["annotations"] if a["image_id"] == img_id], "categories": new_coco["categories"], } out_path = vis_dir / f"{Path(image_name).stem}_comparison.png" create_side_by_side_visualization( str(image_path), gt_img_coco, old_img_coco, new_img_coco, str(out_path), ) print(f" ✓ Visualizations saved to: {vis_dir}") return { "task": task_folder_name, "old_metrics": old_metrics, "new_metrics": new_metrics, "old_annotations": len(old_coco["annotations"]), "new_annotations": len(new_coco["annotations"]), "gt_annotations": len(gt_coco["annotations"]), } def main(): """Run model comparison for all CVAT export tasks.""" print("=" * 70) print("MODEL COMPARISON ON CVAT EXPORT DATASETS") print("=" * 70) # Discover all task folders print(f"\nDiscovering task folders in: {CVAT_EXPORT_DIR}") task_folders = discover_task_folders(CVAT_EXPORT_DIR) if not task_folders: print("❌ No task folders found!") return print(f"\nFound {len(task_folders)} task folders:") for folder in task_folders: print(f" - {folder}") results = [] for task_folder_name in task_folders: result = process_cvat_task(task_folder_name) if result: results.append(result) # Print summary print("\n" + "=" * 70) print("SUMMARY") print("=" * 70) for r in results: print(f"\n{r['task']}:") print(f" Ground Truth: {r['gt_annotations']} annotations") print(f" Old Models: {r['old_annotations']} annotations") print(f" mAP@50: {r['old_metrics'].get('mAP@50', 0):.4f}") print(f" Precision: {r['old_metrics'].get('Precision', 0):.4f}") print(f" Recall: {r['old_metrics'].get('Recall', 0):.4f}") print(f" New Models: {r['new_annotations']} annotations") print(f" mAP@50: {r['new_metrics'].get('mAP@50', 0):.4f}") print(f" Precision: {r['new_metrics'].get('Precision', 0):.4f}") print(f" Recall: {r['new_metrics'].get('Recall', 0):.4f}") # Save summary summary_path = Path(SCRIPT_DIR) / "cvat_export_model_comparison_summary.json" with open(summary_path, "w") as f: json.dump(results, f, indent=4) print(f"\n✓ Summary saved to: {summary_path}") print("\n" + "=" * 70) print("COMPLETE!") print("=" * 70) if __name__ == "__main__": main()