layout / compare /data /batch_process_all_datasets.py
hassanshka's picture
Add test_combined_models.py and compare/ folder (excluding cvat_project_7_export and Annika 2 folders)
0a216c0
"""
Batch process all datasets: Convert XML to COCO and create visualizations.
Processes all folders:
- Aleyna 1 (2024)
- Annika 2 (2024)
- Luise 1 (2024)
- Luise 2 (2024)
- Nuray 1 (2024)
- Nuray 2 (2024)
For each folder:
1. Converts XML annotations to COCO format
2. Creates visualizations of annotations on images
3. Saves outputs inside each folder
"""
import os
import sys
import json
from pathlib import Path
# Add current directory to path
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, SCRIPT_DIR)
from original_annotations import load_ground_truth
from visualize_ground_truth import visualize_all_images
# List of all dataset folders to process
DATASET_FOLDERS = [
"Aleyna 1 (2024)",
"Annika 2 (2024)",
"Luise 1 (2024)",
"Luise 2 (2024)",
"Nuray 1 (2024)",
"Nuray 2 (2024)",
]
def process_dataset(folder_name, base_dir=None):
"""
Process a single dataset folder.
Args:
folder_name: Name of the dataset folder
base_dir: Base directory containing the dataset folders (default: SCRIPT_DIR)
Returns:
dict with processing results
"""
if base_dir is None:
base_dir = SCRIPT_DIR
folder_path = Path(base_dir) / folder_name
if not folder_path.exists():
print(f"⚠️ Warning: Folder not found: {folder_path}")
return {
"folder": folder_name,
"status": "not_found",
"images": 0,
"annotations": 0
}
print("\n" + "=" * 70)
print(f"Processing: {folder_name}")
print("=" * 70)
# Paths
xml_path = folder_path / "Annotations" / "annotations.xml"
images_dir = folder_path / "Images"
# Check if required files/directories exist
if not xml_path.exists():
print(f"⚠️ Warning: XML file not found: {xml_path}")
return {
"folder": folder_name,
"status": "no_xml",
"images": 0,
"annotations": 0
}
if not images_dir.exists():
print(f"⚠️ Warning: Images directory not found: {images_dir}")
return {
"folder": folder_name,
"status": "no_images",
"images": 0,
"annotations": 0
}
# Step 1: Convert XML to COCO
print(f"\n[Step 1/2] Converting XML to COCO format...")
print(f" XML: {xml_path}")
print(f" Images: {images_dir}")
try:
coco_json = load_ground_truth(str(xml_path), str(images_dir))
if not coco_json:
print(f"❌ Error: Failed to parse XML")
return {
"folder": folder_name,
"status": "parse_error",
"images": 0,
"annotations": 0
}
num_images = len(coco_json["images"])
num_annotations = len(coco_json["annotations"])
print(f" βœ“ Loaded {num_images} images")
print(f" βœ“ Loaded {num_annotations} annotations")
print(f" βœ“ Categories: {len(coco_json['categories'])}")
# Save COCO JSON inside the dataset folder
coco_output_path = folder_path / "ground_truth_coco.json"
with open(coco_output_path, 'w') as f:
json.dump(coco_json, f, indent=4)
print(f" βœ“ Saved COCO JSON to: {coco_output_path}")
except Exception as e:
print(f"❌ Error converting XML to COCO: {e}")
import traceback
traceback.print_exc()
return {
"folder": folder_name,
"status": "conversion_error",
"error": str(e),
"images": 0,
"annotations": 0
}
# Step 2: Create visualizations
print(f"\n[Step 2/2] Creating visualizations...")
try:
# Create visualizations directory inside the dataset folder
vis_output_dir = folder_path / "visualizations"
visualize_all_images(coco_json, str(images_dir), str(vis_output_dir))
print(f" βœ“ Visualizations saved to: {vis_output_dir}")
except Exception as e:
print(f"⚠️ Warning: Error creating visualizations: {e}")
import traceback
traceback.print_exc()
# Don't fail the whole process if visualization fails
return {
"folder": folder_name,
"status": "success",
"images": num_images,
"annotations": num_annotations,
"categories": len(coco_json["categories"]),
"coco_json_path": str(coco_output_path),
"visualizations_path": str(vis_output_dir)
}
def main():
"""Main function to process all datasets."""
print("=" * 70)
print("BATCH PROCESSING: XML to COCO Conversion & Visualization")
print("=" * 70)
print(f"\nProcessing {len(DATASET_FOLDERS)} datasets:")
for folder in DATASET_FOLDERS:
print(f" - {folder}")
results = []
for folder_name in DATASET_FOLDERS:
result = process_dataset(folder_name)
results.append(result)
# Print summary
print("\n" + "=" * 70)
print("PROCESSING SUMMARY")
print("=" * 70)
successful = [r for r in results if r["status"] == "success"]
failed = [r for r in results if r["status"] != "success"]
print(f"\nβœ“ Successfully processed: {len(successful)}/{len(results)}")
for r in successful:
print(f" - {r['folder']}: {r['images']} images, {r['annotations']} annotations")
if failed:
print(f"\n⚠️ Failed/Skipped: {len(failed)}/{len(results)}")
for r in failed:
print(f" - {r['folder']}: {r['status']}")
# Save summary to JSON
summary_path = Path(SCRIPT_DIR) / "processing_summary.json"
with open(summary_path, 'w') as f:
json.dump({
"total_datasets": len(DATASET_FOLDERS),
"successful": len(successful),
"failed": len(failed),
"results": results
}, f, indent=4)
print(f"\nβœ“ Summary saved to: {summary_path}")
print("\n" + "=" * 70)
print("BATCH PROCESSING COMPLETE!")
print("=" * 70)
print("\nEach dataset folder now contains:")
print(" - ground_truth_coco.json (COCO format annotations)")
print(" - visualizations/ (annotated images)")
if __name__ == "__main__":
main()