Spaces:

realruneet
/

Campus-AI

Running

App Files Files Community

Campus-AI / scripts /count_splits.py

realruneett

Final Release: CampusGen AI Pipeline & Compositor

a8aea21 about 1 month ago

raw

history blame contribute delete

2.19 kB


	import os
	from pathlib import Path

	# Config
	data_root = Path("data")
	train_dir = data_root / "train"
	val_dir = data_root / "val"
	test_dir = data_root / "test"
	IMG_EXTENSIONS = {".jpg", ".jpeg", ".png", ".webp", ".bmp"}

	def count_images_in_dir(d: Path) -> int:
	if not d.exists():
	return 0
	return len([f for f in os.listdir(d) if Path(f).suffix.lower() in IMG_EXTENSIONS])

	# Find all categories from processed dir (source of truth)
	processed_dir = data_root / "processed"
	categories = set()

	if processed_dir.exists():
	for root, dirs, files in os.walk(processed_dir):
	if any(Path(f).suffix.lower() in IMG_EXTENSIONS for f in files):
	rel = Path(root).relative_to(processed_dir)
	categories.add(str(rel).replace("\\", "/"))
	else:
	# Fallback: finding categories from splits directly
	for d in [train_dir, val_dir, test_dir]:
	if d.exists():
	for root, dirs, files in os.walk(d):
	if any(Path(f).suffix.lower() in IMG_EXTENSIONS for f in files):
	rel = Path(root).relative_to(d)
	categories.add(str(rel).replace("\\", "/"))

	print(f"{'Category':<40} \| {'Train':<6} \| {'Val':<5} \| {'Test':<5} \| {'Total':<6} \| {'% Train':<8}")
	print("-" * 100)

	grand_totals = {"train": 0, "val": 0, "test": 0, "total": 0}

	for cat in sorted(list(categories)):
	c_train = count_images_in_dir(train_dir / cat)
	c_val = count_images_in_dir(val_dir / cat)
	c_test = count_images_in_dir(test_dir / cat)
	total = c_train + c_val + c_test

	grand_totals["train"] += c_train
	grand_totals["val"] += c_val
	grand_totals["test"] += c_test
	grand_totals["total"] += total

	pct_train = (c_train / total * 100) if total > 0 else 0.0

	print(f"{cat:<40} \| {c_train:<6} \| {c_val:<5} \| {c_test:<5} \| {total:<6} \| {pct_train:.1f}%")

	print("-" * 100)
	t_train = grand_totals['train']
	t_total = grand_totals['total']
	t_pct = (t_train / t_total * 100) if t_total > 0 else 0
	print(f"{'TOTAL':<40} \| {t_train:<6} \| {grand_totals['val']:<5} \| {grand_totals['test']:<5} \| {t_total:<6} \| {t_pct:.1f}%")