Spaces:

hassanshka
/

layout

Runtime error

App Files Files Community

layout / compare /data /batch_process_all_datasets.py

hassanshka

Add test_combined_models.py and compare/ folder (excluding cvat_project_7_export and Annika 2 folders)

0a216c0 about 1 month ago

raw

history blame contribute delete

6.39 kB

	"""
	Batch process all datasets: Convert XML to COCO and create visualizations.

	Processes all folders:
	- Aleyna 1 (2024)
	- Annika 2 (2024)
	- Luise 1 (2024)
	- Luise 2 (2024)
	- Nuray 1 (2024)
	- Nuray 2 (2024)

	For each folder:
	1. Converts XML annotations to COCO format
	2. Creates visualizations of annotations on images
	3. Saves outputs inside each folder
	"""
	import os
	import sys
	import json
	from pathlib import Path

	# Add current directory to path
	SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
	sys.path.insert(0, SCRIPT_DIR)

	from original_annotations import load_ground_truth
	from visualize_ground_truth import visualize_all_images


	# List of all dataset folders to process
	DATASET_FOLDERS = [
	"Aleyna 1 (2024)",
	"Annika 2 (2024)",
	"Luise 1 (2024)",
	"Luise 2 (2024)",
	"Nuray 1 (2024)",
	"Nuray 2 (2024)",
	]


	def process_dataset(folder_name, base_dir=None):
	"""
	Process a single dataset folder.

	Args:
	folder_name: Name of the dataset folder
	base_dir: Base directory containing the dataset folders (default: SCRIPT_DIR)

	Returns:
	dict with processing results
	"""
	if base_dir is None:
	base_dir = SCRIPT_DIR

	folder_path = Path(base_dir) / folder_name

	if not folder_path.exists():
	print(f"⚠️ Warning: Folder not found: {folder_path}")
	return {
	"folder": folder_name,
	"status": "not_found",
	"images": 0,
	"annotations": 0
	}

	print("\n" + "=" * 70)
	print(f"Processing: {folder_name}")
	print("=" * 70)

	# Paths
	xml_path = folder_path / "Annotations" / "annotations.xml"
	images_dir = folder_path / "Images"

	# Check if required files/directories exist
	if not xml_path.exists():
	print(f"⚠️ Warning: XML file not found: {xml_path}")
	return {
	"folder": folder_name,
	"status": "no_xml",
	"images": 0,
	"annotations": 0
	}

	if not images_dir.exists():
	print(f"⚠️ Warning: Images directory not found: {images_dir}")
	return {
	"folder": folder_name,
	"status": "no_images",
	"images": 0,
	"annotations": 0
	}

	# Step 1: Convert XML to COCO
	print(f"\n[Step 1/2] Converting XML to COCO format...")
	print(f" XML: {xml_path}")
	print(f" Images: {images_dir}")

	try:
	coco_json = load_ground_truth(str(xml_path), str(images_dir))

	if not coco_json:
	print(f"❌ Error: Failed to parse XML")
	return {
	"folder": folder_name,
	"status": "parse_error",
	"images": 0,
	"annotations": 0
	}

	num_images = len(coco_json["images"])
	num_annotations = len(coco_json["annotations"])

	print(f" ✓ Loaded {num_images} images")
	print(f" ✓ Loaded {num_annotations} annotations")
	print(f" ✓ Categories: {len(coco_json['categories'])}")

	# Save COCO JSON inside the dataset folder
	coco_output_path = folder_path / "ground_truth_coco.json"
	with open(coco_output_path, 'w') as f:
	json.dump(coco_json, f, indent=4)
	print(f" ✓ Saved COCO JSON to: {coco_output_path}")

	except Exception as e:
	print(f"❌ Error converting XML to COCO: {e}")
	import traceback
	traceback.print_exc()
	return {
	"folder": folder_name,
	"status": "conversion_error",
	"error": str(e),
	"images": 0,
	"annotations": 0
	}

	# Step 2: Create visualizations
	print(f"\n[Step 2/2] Creating visualizations...")

	try:
	# Create visualizations directory inside the dataset folder
	vis_output_dir = folder_path / "visualizations"

	visualize_all_images(coco_json, str(images_dir), str(vis_output_dir))

	print(f" ✓ Visualizations saved to: {vis_output_dir}")

	except Exception as e:
	print(f"⚠️ Warning: Error creating visualizations: {e}")
	import traceback
	traceback.print_exc()
	# Don't fail the whole process if visualization fails

	return {
	"folder": folder_name,
	"status": "success",
	"images": num_images,
	"annotations": num_annotations,
	"categories": len(coco_json["categories"]),
	"coco_json_path": str(coco_output_path),
	"visualizations_path": str(vis_output_dir)
	}


	def main():
	"""Main function to process all datasets."""
	print("=" * 70)
	print("BATCH PROCESSING: XML to COCO Conversion & Visualization")
	print("=" * 70)
	print(f"\nProcessing {len(DATASET_FOLDERS)} datasets:")
	for folder in DATASET_FOLDERS:
	print(f" - {folder}")

	results = []

	for folder_name in DATASET_FOLDERS:
	result = process_dataset(folder_name)
	results.append(result)

	# Print summary
	print("\n" + "=" * 70)
	print("PROCESSING SUMMARY")
	print("=" * 70)

	successful = [r for r in results if r["status"] == "success"]
	failed = [r for r in results if r["status"] != "success"]

	print(f"\n✓ Successfully processed: {len(successful)}/{len(results)}")
	for r in successful:
	print(f" - {r['folder']}: {r['images']} images, {r['annotations']} annotations")

	if failed:
	print(f"\n⚠️ Failed/Skipped: {len(failed)}/{len(results)}")
	for r in failed:
	print(f" - {r['folder']}: {r['status']}")

	# Save summary to JSON
	summary_path = Path(SCRIPT_DIR) / "processing_summary.json"
	with open(summary_path, 'w') as f:
	json.dump({
	"total_datasets": len(DATASET_FOLDERS),
	"successful": len(successful),
	"failed": len(failed),
	"results": results
	}, f, indent=4)

	print(f"\n✓ Summary saved to: {summary_path}")
	print("\n" + "=" * 70)
	print("BATCH PROCESSING COMPLETE!")
	print("=" * 70)
	print("\nEach dataset folder now contains:")
	print(" - ground_truth_coco.json (COCO format annotations)")
	print(" - visualizations/ (annotated images)")


	if __name__ == "__main__":
	main()