Spaces:

Celvin77
/

CoolWasteAI

Running

File size: 6,647 Bytes

12d831f

"""
Evaluate the current predictor on a separate local field-test dataset.

Expected folder layout:
    data/local_test/
        plastic/
        paper/
        organic/
        metal/
        glass/
        unknown/   optional
        residu/    optional alias for unknown

Usage:
    python scripts/evaluate_local.py --data_dir data/local_test --output_dir reports/local_eval
"""

import argparse
import csv
import json
import os
import sys
from pathlib import Path

PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
MPL_CONFIG_DIR = os.path.join(PROJECT_ROOT, ".cache", "matplotlib")
os.makedirs(MPL_CONFIG_DIR, exist_ok=True)
os.environ.setdefault("MPLCONFIGDIR", MPL_CONFIG_DIR)
sys.path.insert(0, PROJECT_ROOT)

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

from app.predictor import CLASS_NAMES, WastePredictor

KNOWN_CLASSES = CLASS_NAMES
UNKNOWN_ALIASES = {"unknown", "residu", "residue", "other", "lainnya"}
EVAL_LABELS = KNOWN_CLASSES + ["unknown"]
INPUT_EXTS = {".jpg", ".jpeg", ".png", ".webp"}
AUTO_APPROVE_THRESHOLD = float(os.getenv("AUTO_APPROVE_THRESHOLD", "0.85"))
AMBIGUITY_GAP_THRESHOLD = float(os.getenv("AMBIGUITY_GAP_THRESHOLD", "0.15"))
UNKNOWN_ON_AMBIGUOUS = os.getenv("UNKNOWN_ON_AMBIGUOUS", "true").lower() == "true"


def normalize_label(label: str) -> str:
    normalized = label.strip().lower()
    if normalized in UNKNOWN_ALIASES:
        return "unknown"
    return normalized


def collect_images(data_dir: Path) -> list[tuple[Path, str]]:
    rows = []
    for class_dir in sorted(data_dir.iterdir()):
        if not class_dir.is_dir():
            continue

        label = normalize_label(class_dir.name)
        if label not in EVAL_LABELS:
            print(f"Skipping unsupported folder: {class_dir}")
            continue

        for image_path in sorted(class_dir.rglob("*")):
            if image_path.is_file() and image_path.suffix.lower() in INPUT_EXTS:
                rows.append((image_path, label))

    if not rows:
        raise ValueError(f"No evaluation images found under {data_dir}")

    return rows


def choose_decision_class(result: dict) -> str:
    ranked_scores = sorted(result["all_scores"].items(), key=lambda item: item[1], reverse=True)
    top_class, top_score = ranked_scores[0]
    second_score = ranked_scores[1][1] if len(ranked_scores) > 1 else top_score
    confidence_gap = float(top_score) - float(second_score)

    if float(result["confidence"]) < AUTO_APPROVE_THRESHOLD:
        return "unknown"
    if UNKNOWN_ON_AMBIGUOUS and confidence_gap < AMBIGUITY_GAP_THRESHOLD:
        return "unknown"
    return top_class


def save_confusion_matrix(y_true: list[str], y_pred: list[str], output_path: Path) -> None:
    cm = confusion_matrix(y_true, y_pred, labels=EVAL_LABELS, normalize="true")
    fig, ax = plt.subplots(figsize=(8, 7))
    image = ax.imshow(cm, cmap="Blues", vmin=0, vmax=1)
    ax.set_xticks(range(len(EVAL_LABELS)))
    ax.set_yticks(range(len(EVAL_LABELS)))
    ax.set_xticklabels(EVAL_LABELS, rotation=45, ha="right")
    ax.set_yticklabels(EVAL_LABELS)
    ax.set_xlabel("Predicted")
    ax.set_ylabel("Actual")
    ax.set_title("Local Field-Test Confusion Matrix")
    plt.colorbar(image, ax=ax)

    for row in range(len(EVAL_LABELS)):
        for col in range(len(EVAL_LABELS)):
            value = cm[row, col]
            ax.text(
                col,
                row,
                f"{value:.2f}",
                ha="center",
                va="center",
                fontsize=8,
                color="white" if value > 0.5 else "black",
            )

    plt.tight_layout()
    fig.savefig(output_path, dpi=150)
    plt.close(fig)


def main() -> None:
    parser = argparse.ArgumentParser(description="Evaluate model on a local field-test dataset.")
    parser.add_argument("--data_dir", default="data/local_test")
    parser.add_argument("--output_dir", default="reports/local_eval")
    args = parser.parse_args()

    data_dir = Path(args.data_dir)
    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    predictor = WastePredictor()
    predictor.load()

    samples = collect_images(data_dir)
    predictions = []
    y_true = []
    y_pred = []

    for image_path, actual_label in samples:
        result = predictor.predict(image_path.read_bytes())
        predicted_label = choose_decision_class(result)
        predicted_label = normalize_label(predicted_label)

        y_true.append(actual_label)
        y_pred.append(predicted_label)
        predictions.append(
            {
                "image_path": str(image_path),
                "actual": actual_label,
                "predicted": predicted_label,
                "model_class": result["class"],
                "confidence": result["confidence"],
                "raw_class": result.get("raw_class"),
                "raw_confidence": result.get("raw_confidence"),
                "all_scores": result["all_scores"],
                "correct": actual_label == predicted_label,
            }
        )

    report = classification_report(
        y_true,
        y_pred,
        labels=EVAL_LABELS,
        output_dict=True,
        zero_division=0,
    )
    readable_report = classification_report(
        y_true,
        y_pred,
        labels=EVAL_LABELS,
        zero_division=0,
    )

    (output_dir / "metrics.json").write_text(json.dumps(report, indent=2), encoding="utf-8")
    (output_dir / "classification_report.txt").write_text(readable_report, encoding="utf-8")
    save_confusion_matrix(y_true, y_pred, output_dir / "confusion_matrix.png")

    with (output_dir / "predictions.csv").open("w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(
            file,
            fieldnames=[
                "image_path",
                "actual",
                "predicted",
                "model_class",
                "confidence",
                "raw_class",
                "raw_confidence",
                "correct",
            ],
        )
        writer.writeheader()
        for row in predictions:
            writer.writerow({key: row.get(key) for key in writer.fieldnames})

    mistakes = [row for row in predictions if not row["correct"]]
    (output_dir / "mistakes.json").write_text(json.dumps(mistakes, indent=2), encoding="utf-8")

    print(readable_report)
    print(f"\nEvaluated images : {len(samples)}")
    print(f"Mistakes         : {len(mistakes)}")
    print(f"Output dir       : {output_dir.resolve()}")


if __name__ == "__main__":
    main()