Spaces:

Celvin77
/

CoolWasteAI

Running

CoolWasteAI / scripts /evaluate_local.py

Celvin

Prepare deployable AI API for competition and free hosting

12d831f 16 days ago

6.65 kB

	"""
	Evaluate the current predictor on a separate local field-test dataset.

	Expected folder layout:
	data/local_test/
	plastic/
	paper/
	organic/
	metal/
	glass/
	unknown/ optional
	residu/ optional alias for unknown

	Usage:
	python scripts/evaluate_local.py --data_dir data/local_test --output_dir reports/local_eval
	"""

	import argparse
	import csv
	import json
	import os
	import sys
	from pathlib import Path

	PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
	MPL_CONFIG_DIR = os.path.join(PROJECT_ROOT, ".cache", "matplotlib")
	os.makedirs(MPL_CONFIG_DIR, exist_ok=True)
	os.environ.setdefault("MPLCONFIGDIR", MPL_CONFIG_DIR)
	sys.path.insert(0, PROJECT_ROOT)

	import matplotlib.pyplot as plt
	import numpy as np
	from sklearn.metrics import classification_report, confusion_matrix

	from app.predictor import CLASS_NAMES, WastePredictor

	KNOWN_CLASSES = CLASS_NAMES
	UNKNOWN_ALIASES = {"unknown", "residu", "residue", "other", "lainnya"}
	EVAL_LABELS = KNOWN_CLASSES + ["unknown"]
	INPUT_EXTS = {".jpg", ".jpeg", ".png", ".webp"}
	AUTO_APPROVE_THRESHOLD = float(os.getenv("AUTO_APPROVE_THRESHOLD", "0.85"))
	AMBIGUITY_GAP_THRESHOLD = float(os.getenv("AMBIGUITY_GAP_THRESHOLD", "0.15"))
	UNKNOWN_ON_AMBIGUOUS = os.getenv("UNKNOWN_ON_AMBIGUOUS", "true").lower() == "true"


	def normalize_label(label: str) -> str:
	normalized = label.strip().lower()
	if normalized in UNKNOWN_ALIASES:
	return "unknown"
	return normalized


	def collect_images(data_dir: Path) -> list[tuple[Path, str]]:
	rows = []
	for class_dir in sorted(data_dir.iterdir()):
	if not class_dir.is_dir():
	continue

	label = normalize_label(class_dir.name)
	if label not in EVAL_LABELS:
	print(f"Skipping unsupported folder: {class_dir}")
	continue

	for image_path in sorted(class_dir.rglob("*")):
	if image_path.is_file() and image_path.suffix.lower() in INPUT_EXTS:
	rows.append((image_path, label))

	if not rows:
	raise ValueError(f"No evaluation images found under {data_dir}")

	return rows


	def choose_decision_class(result: dict) -> str:
	ranked_scores = sorted(result["all_scores"].items(), key=lambda item: item[1], reverse=True)
	top_class, top_score = ranked_scores[0]
	second_score = ranked_scores[1][1] if len(ranked_scores) > 1 else top_score
	confidence_gap = float(top_score) - float(second_score)

	if float(result["confidence"]) < AUTO_APPROVE_THRESHOLD:
	return "unknown"
	if UNKNOWN_ON_AMBIGUOUS and confidence_gap < AMBIGUITY_GAP_THRESHOLD:
	return "unknown"
	return top_class


	def save_confusion_matrix(y_true: list[str], y_pred: list[str], output_path: Path) -> None:
	cm = confusion_matrix(y_true, y_pred, labels=EVAL_LABELS, normalize="true")
	fig, ax = plt.subplots(figsize=(8, 7))
	image = ax.imshow(cm, cmap="Blues", vmin=0, vmax=1)
	ax.set_xticks(range(len(EVAL_LABELS)))
	ax.set_yticks(range(len(EVAL_LABELS)))
	ax.set_xticklabels(EVAL_LABELS, rotation=45, ha="right")
	ax.set_yticklabels(EVAL_LABELS)
	ax.set_xlabel("Predicted")
	ax.set_ylabel("Actual")
	ax.set_title("Local Field-Test Confusion Matrix")
	plt.colorbar(image, ax=ax)

	for row in range(len(EVAL_LABELS)):
	for col in range(len(EVAL_LABELS)):
	value = cm[row, col]
	ax.text(
	col,
	row,
	f"{value:.2f}",
	ha="center",
	va="center",
	fontsize=8,
	color="white" if value > 0.5 else "black",
	)

	plt.tight_layout()
	fig.savefig(output_path, dpi=150)
	plt.close(fig)


	def main() -> None:
	parser = argparse.ArgumentParser(description="Evaluate model on a local field-test dataset.")
	parser.add_argument("--data_dir", default="data/local_test")
	parser.add_argument("--output_dir", default="reports/local_eval")
	args = parser.parse_args()

	data_dir = Path(args.data_dir)
	output_dir = Path(args.output_dir)
	output_dir.mkdir(parents=True, exist_ok=True)

	predictor = WastePredictor()
	predictor.load()

	samples = collect_images(data_dir)
	predictions = []
	y_true = []
	y_pred = []

	for image_path, actual_label in samples:
	result = predictor.predict(image_path.read_bytes())
	predicted_label = choose_decision_class(result)
	predicted_label = normalize_label(predicted_label)

	y_true.append(actual_label)
	y_pred.append(predicted_label)
	predictions.append(
	{
	"image_path": str(image_path),
	"actual": actual_label,
	"predicted": predicted_label,
	"model_class": result["class"],
	"confidence": result["confidence"],
	"raw_class": result.get("raw_class"),
	"raw_confidence": result.get("raw_confidence"),
	"all_scores": result["all_scores"],
	"correct": actual_label == predicted_label,
	}
	)

	report = classification_report(
	y_true,
	y_pred,
	labels=EVAL_LABELS,
	output_dict=True,
	zero_division=0,
	)
	readable_report = classification_report(
	y_true,
	y_pred,
	labels=EVAL_LABELS,
	zero_division=0,
	)

	(output_dir / "metrics.json").write_text(json.dumps(report, indent=2), encoding="utf-8")
	(output_dir / "classification_report.txt").write_text(readable_report, encoding="utf-8")
	save_confusion_matrix(y_true, y_pred, output_dir / "confusion_matrix.png")

	with (output_dir / "predictions.csv").open("w", newline="", encoding="utf-8") as file:
	writer = csv.DictWriter(
	file,
	fieldnames=[
	"image_path",
	"actual",
	"predicted",
	"model_class",
	"confidence",
	"raw_class",
	"raw_confidence",
	"correct",
	],
	)
	writer.writeheader()
	for row in predictions:
	writer.writerow({key: row.get(key) for key in writer.fieldnames})

	mistakes = [row for row in predictions if not row["correct"]]
	(output_dir / "mistakes.json").write_text(json.dumps(mistakes, indent=2), encoding="utf-8")

	print(readable_report)
	print(f"\nEvaluated images : {len(samples)}")
	print(f"Mistakes : {len(mistakes)}")
	print(f"Output dir : {output_dir.resolve()}")


	if __name__ == "__main__":
	main()