Spaces:

hassanshka
/

layout

Runtime error

File size: 6,387 Bytes

0a216c0

"""
Batch process all datasets: Convert XML to COCO and create visualizations.

Processes all folders:
- Aleyna 1 (2024)
- Annika 2 (2024)
- Luise 1 (2024)
- Luise 2 (2024)
- Nuray 1 (2024)
- Nuray 2 (2024)

For each folder:
1. Converts XML annotations to COCO format
2. Creates visualizations of annotations on images
3. Saves outputs inside each folder
"""
import os
import sys
import json
from pathlib import Path

# Add current directory to path
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, SCRIPT_DIR)

from original_annotations import load_ground_truth
from visualize_ground_truth import visualize_all_images


# List of all dataset folders to process
DATASET_FOLDERS = [
    "Aleyna 1 (2024)",
    "Annika 2 (2024)",
    "Luise 1 (2024)",
    "Luise 2 (2024)",
    "Nuray 1 (2024)",
    "Nuray 2 (2024)",
]


def process_dataset(folder_name, base_dir=None):
    """
    Process a single dataset folder.
    
    Args:
        folder_name: Name of the dataset folder
        base_dir: Base directory containing the dataset folders (default: SCRIPT_DIR)
    
    Returns:
        dict with processing results
    """
    if base_dir is None:
        base_dir = SCRIPT_DIR
    
    folder_path = Path(base_dir) / folder_name
    
    if not folder_path.exists():
        print(f"⚠️  Warning: Folder not found: {folder_path}")
        return {
            "folder": folder_name,
            "status": "not_found",
            "images": 0,
            "annotations": 0
        }
    
    print("\n" + "=" * 70)
    print(f"Processing: {folder_name}")
    print("=" * 70)
    
    # Paths
    xml_path = folder_path / "Annotations" / "annotations.xml"
    images_dir = folder_path / "Images"
    
    # Check if required files/directories exist
    if not xml_path.exists():
        print(f"⚠️  Warning: XML file not found: {xml_path}")
        return {
            "folder": folder_name,
            "status": "no_xml",
            "images": 0,
            "annotations": 0
        }
    
    if not images_dir.exists():
        print(f"⚠️  Warning: Images directory not found: {images_dir}")
        return {
            "folder": folder_name,
            "status": "no_images",
            "images": 0,
            "annotations": 0
        }
    
    # Step 1: Convert XML to COCO
    print(f"\n[Step 1/2] Converting XML to COCO format...")
    print(f"  XML: {xml_path}")
    print(f"  Images: {images_dir}")
    
    try:
        coco_json = load_ground_truth(str(xml_path), str(images_dir))
        
        if not coco_json:
            print(f"❌ Error: Failed to parse XML")
            return {
                "folder": folder_name,
                "status": "parse_error",
                "images": 0,
                "annotations": 0
            }
        
        num_images = len(coco_json["images"])
        num_annotations = len(coco_json["annotations"])
        
        print(f"  ✓ Loaded {num_images} images")
        print(f"  ✓ Loaded {num_annotations} annotations")
        print(f"  ✓ Categories: {len(coco_json['categories'])}")
        
        # Save COCO JSON inside the dataset folder
        coco_output_path = folder_path / "ground_truth_coco.json"
        with open(coco_output_path, 'w') as f:
            json.dump(coco_json, f, indent=4)
        print(f"  ✓ Saved COCO JSON to: {coco_output_path}")
        
    except Exception as e:
        print(f"❌ Error converting XML to COCO: {e}")
        import traceback
        traceback.print_exc()
        return {
            "folder": folder_name,
            "status": "conversion_error",
            "error": str(e),
            "images": 0,
            "annotations": 0
        }
    
    # Step 2: Create visualizations
    print(f"\n[Step 2/2] Creating visualizations...")
    
    try:
        # Create visualizations directory inside the dataset folder
        vis_output_dir = folder_path / "visualizations"
        
        visualize_all_images(coco_json, str(images_dir), str(vis_output_dir))
        
        print(f"  ✓ Visualizations saved to: {vis_output_dir}")
        
    except Exception as e:
        print(f"⚠️  Warning: Error creating visualizations: {e}")
        import traceback
        traceback.print_exc()
        # Don't fail the whole process if visualization fails
    
    return {
        "folder": folder_name,
        "status": "success",
        "images": num_images,
        "annotations": num_annotations,
        "categories": len(coco_json["categories"]),
        "coco_json_path": str(coco_output_path),
        "visualizations_path": str(vis_output_dir)
    }


def main():
    """Main function to process all datasets."""
    print("=" * 70)
    print("BATCH PROCESSING: XML to COCO Conversion & Visualization")
    print("=" * 70)
    print(f"\nProcessing {len(DATASET_FOLDERS)} datasets:")
    for folder in DATASET_FOLDERS:
        print(f"  - {folder}")
    
    results = []
    
    for folder_name in DATASET_FOLDERS:
        result = process_dataset(folder_name)
        results.append(result)
    
    # Print summary
    print("\n" + "=" * 70)
    print("PROCESSING SUMMARY")
    print("=" * 70)
    
    successful = [r for r in results if r["status"] == "success"]
    failed = [r for r in results if r["status"] != "success"]
    
    print(f"\n✓ Successfully processed: {len(successful)}/{len(results)}")
    for r in successful:
        print(f"  - {r['folder']}: {r['images']} images, {r['annotations']} annotations")
    
    if failed:
        print(f"\n⚠️  Failed/Skipped: {len(failed)}/{len(results)}")
        for r in failed:
            print(f"  - {r['folder']}: {r['status']}")
    
    # Save summary to JSON
    summary_path = Path(SCRIPT_DIR) / "processing_summary.json"
    with open(summary_path, 'w') as f:
        json.dump({
            "total_datasets": len(DATASET_FOLDERS),
            "successful": len(successful),
            "failed": len(failed),
            "results": results
        }, f, indent=4)
    
    print(f"\n✓ Summary saved to: {summary_path}")
    print("\n" + "=" * 70)
    print("BATCH PROCESSING COMPLETE!")
    print("=" * 70)
    print("\nEach dataset folder now contains:")
    print("  - ground_truth_coco.json (COCO format annotations)")
    print("  - visualizations/ (annotated images)")


if __name__ == "__main__":
    main()