Spaces:

ddebree
/

JEPA-demo

Running

App Files Files Community

JEPA-demo / src /bulk_eval.py

ddebree

Upload folder using huggingface_hub

2bc3168 verified 23 days ago

raw

history blame contribute delete

10.9 kB

	from __future__ import annotations

	import argparse
	from datetime import datetime
	import json
	from pathlib import Path

	import numpy as np
	import pandas as pd
	from sklearn.metrics import confusion_matrix

	from .ijepa_localization import IJepaPatchLocalizer
	from .obstacle_dataset import DEFAULT_OBSTACLE_DATASET, load_balanced_obstacle_rows, load_obstacle_image, parse_yolo_boxes
	from .prototypes import build_class_prototypes, guess_objects_with_prototypes
	from .small_head import guess_objects_with_head, train_small_head


	def run_bulk_eval(
	dataset_name: str,
	split: str,
	model_name: str,
	eval_samples: int,
	support_samples: int,
	seed: int,
	output: str,
	) -> Path:
	run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
	output_path = resolve_output_path(output, run_id)
	localizer = IJepaPatchLocalizer(model_name=model_name)
	support_rows = load_balanced_obstacle_rows(
	dataset_name,
	split,
	support_samples,
	random_seed=seed + 10_000,
	)
	support_files = {row["file_name"] for row in support_rows}
	eval_rows = load_disjoint_eval_rows(dataset_name, split, eval_samples, seed, support_files)
	prototypes = build_class_prototypes(dataset_name, split, support_rows, localizer)
	head = train_small_head(dataset_name, split, support_rows, localizer)

	records = []
	for sample_index, row in enumerate(eval_rows):
	image = load_obstacle_image(dataset_name, row, split)
	boxes = parse_yolo_boxes(row)
	prototype_guesses = guess_objects_with_prototypes(image, boxes, localizer, prototypes)
	head_guesses = guess_objects_with_head(image, boxes, localizer, head)
	head_by_object = {guess.object_index: guess for guess in head_guesses}
	prototype_by_object = {guess.object_index: guess for guess in prototype_guesses}

	for object_index, box in enumerate(boxes, start=1):
	prototype_guess = prototype_by_object.get(object_index)
	head_guess = head_by_object.get(object_index)
	records.append(
	{
	"sample": sample_index,
	"run_id": run_id,
	"support_samples": support_samples,
	"eval_samples": eval_samples,
	"seed": seed,
	"file_name": row["file_name"],
	"object": object_index,
	"yolo_label": box.class_name,
	"prototype_guess": prototype_guess.ijepa_guess if prototype_guess else None,
	"prototype_similarity": prototype_guess.similarity if prototype_guess else None,
	"prototype_agreement": prototype_guess.agreement if prototype_guess else None,
	"head_guess": head_guess.head_guess if head_guess else None,
	"head_confidence": head_guess.confidence if head_guess else None,
	"head_agreement": head_guess.agreement if head_guess else None,
	"head_train_objects": head.train_objects if head else None,
	"head_train_accuracy": head.train_accuracy if head else None,
	"head_parameter_count": head.parameter_count if head else None,
	"prototype_classes": len(prototypes),
	}
	)

	output_path.parent.mkdir(parents=True, exist_ok=True)
	df = pd.DataFrame(records)
	df.to_csv(output_path, index=False)
	write_summaries(df, output_path)
	return output_path


	def load_disjoint_eval_rows(dataset_name: str, split: str, eval_samples: int, seed: int, support_files: set[str]):
	candidates = load_balanced_obstacle_rows(
	dataset_name,
	split,
	eval_samples + len(support_files) + 100,
	random_seed=seed,
	)
	selected = []
	for row in candidates:
	if row["file_name"] in support_files:
	continue
	selected.append(row)
	if len(selected) >= eval_samples:
	break
	if len(selected) < eval_samples:
	raise ValueError(f"Could only find {len(selected)} disjoint eval rows; requested {eval_samples}.")
	return selected


	def write_summaries(df: pd.DataFrame, output_path: Path) -> None:
	if df.empty:
	return

	summary = {
	"run_id": str(df["run_id"].iloc[0]) if "run_id" in df else None,
	"support_samples": first_int(df, "support_samples"),
	"eval_samples": first_int(df, "eval_samples"),
	"seed": first_int(df, "seed"),
	"objects": int(len(df)),
	"classes": int(df["yolo_label"].nunique()),
	"prototype_accuracy": safe_accuracy(df, "prototype_agreement"),
	"head_accuracy": safe_accuracy(df, "head_agreement"),
	"prototype_macro_accuracy": macro_accuracy(df, "prototype_agreement"),
	"head_macro_accuracy": macro_accuracy(df, "head_agreement"),
	"head_train_objects": first_int(df, "head_train_objects"),
	"head_parameter_count": first_int(df, "head_parameter_count"),
	"prototype_classes": first_int(df, "prototype_classes"),
	}
	summary_path = sibling_path(output_path, "_summary.csv")
	per_class_path = sibling_path(output_path, "_per_class.csv")
	prototype_confusion_path = sibling_path(output_path, "_prototype_confusion.csv")
	head_confusion_path = sibling_path(output_path, "_head_confusion.csv")
	report_path = sibling_path(output_path, "_report.json")

	pd.DataFrame([summary]).to_csv(summary_path, index=False)

	per_class = (
	df.groupby("yolo_label")
	.agg(
	objects=("yolo_label", "size"),
	prototype_accuracy=("prototype_agreement", safe_mean),
	head_accuracy=("head_agreement", safe_mean),
	avg_prototype_similarity=("prototype_similarity", "mean"),
	avg_head_confidence=("head_confidence", "mean"),
	)
	.reset_index()
	.sort_values(["head_accuracy", "objects"], ascending=[True, False])
	)
	per_class.to_csv(per_class_path, index=False)

	prototype_confusion = confusion_frame(df["yolo_label"], df["prototype_guess"])
	head_confusion = confusion_frame(df["yolo_label"], df["head_guess"])
	prototype_confusion.to_csv(prototype_confusion_path)
	head_confusion.to_csv(head_confusion_path)
	report = {
	"summary": summary,
	"per_class": per_class.replace({np.nan: None}).to_dict(orient="records"),
	"top_prototype_confusions": top_confusions(df, "prototype_guess"),
	"top_head_confusions": top_confusions(df, "head_guess"),
	"files": {
	"objects": str(output_path),
	"summary": str(summary_path),
	"per_class": str(per_class_path),
	"prototype_confusion": str(prototype_confusion_path),
	"head_confusion": str(head_confusion_path),
	},
	}
	report_path.write_text(json.dumps(report, indent=2), encoding="utf-8")


	def sibling_path(output_path: Path, suffix: str) -> Path:
	names = {
	"_summary.csv": "summary.csv",
	"_per_class.csv": "per_class.csv",
	"_prototype_confusion.csv": "prototype_confusion.csv",
	"_head_confusion.csv": "head_confusion.csv",
	"_report.json": "report.json",
	}
	return output_path.with_name(names.get(suffix, f"{output_path.stem}{suffix}"))


	def resolve_output_path(output: str, run_id: str) -> Path:
	path = Path(output)
	if output == "auto":
	return Path("outputs") / f"run_{run_id}" / "objects.csv"
	if "{timestamp}" in output or "{run_id}" in output:
	resolved = Path(output.format(timestamp=run_id, run_id=run_id))
	if resolved.suffix:
	return resolved
	return resolved / "objects.csv"
	if path.suffix == "":
	return path / "objects.csv"
	if path.exists():
	return path.with_name(f"{path.stem}_{run_id}{path.suffix}")
	return path


	def safe_mean(values) -> float:
	clean = pd.Series(values).dropna()
	if clean.empty:
	return np.nan
	return float(clean.astype(bool).mean())


	def safe_accuracy(df: pd.DataFrame, column: str) -> float \| None:
	clean = df[column].dropna()
	if clean.empty:
	return None
	return float(clean.astype(bool).mean())


	def macro_accuracy(df: pd.DataFrame, column: str) -> float \| None:
	clean = df.dropna(subset=[column])
	if clean.empty:
	return None
	return float(clean.groupby("yolo_label")[column].apply(safe_mean).mean())


	def confusion_frame(y_true, y_pred) -> pd.DataFrame:
	clean = pd.DataFrame({"true": y_true, "pred": y_pred}).dropna()
	labels = sorted(set(clean["true"]).union(set(clean["pred"])))
	matrix = confusion_matrix(clean["true"], clean["pred"], labels=labels)
	return pd.DataFrame(matrix, index=labels, columns=labels)


	def top_confusions(df: pd.DataFrame, prediction_column: str, limit: int = 10) -> list[dict]:
	clean = df.dropna(subset=[prediction_column])
	wrong = clean[clean["yolo_label"] != clean[prediction_column]]
	if wrong.empty:
	return []
	counts = (
	wrong.groupby(["yolo_label", prediction_column])
	.size()
	.reset_index(name="count")
	.sort_values("count", ascending=False)
	.head(limit)
	)
	return [
	{"yolo_label": row["yolo_label"], "predicted": row[prediction_column], "count": int(row["count"])}
	for _, row in counts.iterrows()
	]


	def first_int(df: pd.DataFrame, column: str) -> int \| None:
	clean = df[column].dropna()
	if clean.empty:
	return None
	return int(clean.iloc[0])


	def build_parser() -> argparse.ArgumentParser:
	parser = argparse.ArgumentParser(description="Run bulk I-JEPA prototype/head evaluation.")
	parser.add_argument("--dataset-name", default=DEFAULT_OBSTACLE_DATASET)
	parser.add_argument("--split", default="train")
	parser.add_argument("--model-name", default="facebook/ijepa_vith14_1k")
	parser.add_argument("--eval-samples", type=int, default=50)
	parser.add_argument(
	"--support-samples",
	"--reference-samples",
	dest="support_samples",
	type=int,
	default=80,
	help="Images used to build class prototypes and train the tiny classifier.",
	)
	parser.add_argument("--seed", type=int, default=7)
	parser.add_argument(
	"--output",
	default="auto",
	help="Output CSV path or run directory. Supports {timestamp} or {run_id}. Default: outputs/run_<timestamp>/objects.csv",
	)
	return parser


	def main() -> None:
	args = build_parser().parse_args()
	output = run_bulk_eval(**vars(args))
	print(f"Saved bulk evaluation: {output}")
	for suffix in [
	"_summary.csv",
	"_per_class.csv",
	"_prototype_confusion.csv",
	"_head_confusion.csv",
	"_report.json",
	]:
	print(f"Saved: {sibling_path(output, suffix)}")


	if __name__ == "__main__":
	main()