Spaces:

budijuarto
/

egg-damage-top3-classifier

Sleeping

File size: 10,442 Bytes

0258b57

from __future__ import annotations

from pathlib import Path
from typing import Any, Iterable

import matplotlib

matplotlib.use("Agg")

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from PIL import Image, ImageOps
from sklearn.calibration import calibration_curve
from sklearn.metrics import auc, precision_recall_curve, roc_curve

from .data_discovery import CANONICAL_LABELS


sns.set_theme(style="whitegrid", context="notebook")


def markdown_table(df: pd.DataFrame) -> str:
    if df.empty:
        return "_No rows._"
    safe = df.copy()
    safe = safe.fillna("")
    headers = [str(col) for col in safe.columns]
    lines = ["| " + " | ".join(headers) + " |", "| " + " | ".join(["---"] * len(headers)) + " |"]
    for row in safe.itertuples(index=False):
        values = [str(value).replace("\n", " ") for value in row]
        lines.append("| " + " | ".join(values) + " |")
    return "\n".join(lines)


def _savefig(path: str | Path) -> None:
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(path, dpi=160, bbox_inches="tight")
    plt.close()


def plot_class_distribution(df: pd.DataFrame, output_path: str | Path) -> None:
    plt.figure(figsize=(8, 4.8))
    order = ["train", "val", "test"]
    sns.countplot(data=df, x="split", hue="label", order=[s for s in order if s in set(df["split"])])
    plt.title("Class Distribution by Split")
    plt.xlabel("Split")
    plt.ylabel("Images")
    _savefig(output_path)


def plot_confusion_matrix(
    matrix: np.ndarray,
    output_path: str | Path,
    title: str,
    class_names: Iterable[str] = CANONICAL_LABELS,
) -> None:
    plt.figure(figsize=(5.6, 4.8))
    sns.heatmap(
        matrix,
        annot=True,
        fmt="d",
        cmap="Blues",
        xticklabels=list(class_names),
        yticklabels=list(class_names),
        cbar=False,
    )
    plt.title(title)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    _savefig(output_path)


def plot_roc_curve_single(
    y_true: np.ndarray,
    y_prob: np.ndarray,
    output_path: str | Path,
    title: str,
) -> float | None:
    if len(np.unique(y_true)) < 2:
        return None
    fpr, tpr, _ = roc_curve(y_true, y_prob)
    roc_auc = auc(fpr, tpr)
    plt.figure(figsize=(5.8, 4.8))
    plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.3f}", linewidth=2)
    plt.plot([0, 1], [0, 1], linestyle="--", color="gray", linewidth=1)
    plt.title(title)
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend(loc="lower right")
    _savefig(output_path)
    return float(roc_auc)


def plot_precision_recall_curve_single(
    y_true: np.ndarray,
    y_prob: np.ndarray,
    output_path: str | Path,
    title: str,
) -> float | None:
    if len(np.unique(y_true)) < 2:
        return None
    precision, recall, _ = precision_recall_curve(y_true, y_prob)
    pr_auc = auc(recall, precision)
    plt.figure(figsize=(5.8, 4.8))
    plt.plot(recall, precision, label=f"PR AUC = {pr_auc:.3f}", linewidth=2)
    plt.title(title)
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.legend(loc="lower left")
    _savefig(output_path)
    return float(pr_auc)


def plot_combined_roc(prediction_frames: list[tuple[str, pd.DataFrame]], output_path: str | Path) -> None:
    plt.figure(figsize=(7.2, 5.6))
    plotted = False
    for model_name, frame in prediction_frames:
        y_true = frame["y_true"].to_numpy()
        y_prob = frame["prob_damaged"].to_numpy()
        if len(np.unique(y_true)) < 2:
            continue
        fpr, tpr, _ = roc_curve(y_true, y_prob)
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, linewidth=2, label=f"{model_name} ({roc_auc:.3f})")
        plotted = True
    if not plotted:
        plt.text(0.5, 0.5, "ROC unavailable: only one class present", ha="center", va="center")
    plt.plot([0, 1], [0, 1], linestyle="--", color="gray", linewidth=1)
    plt.title("Test ROC Comparison")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend(loc="lower right", fontsize=8)
    _savefig(output_path)


def plot_metric_bars(metrics_df: pd.DataFrame, output_path: str | Path) -> None:
    metric_cols = ["accuracy", "precision", "recall", "f1", "roc_auc", "balanced_accuracy"]
    test_df = metrics_df[metrics_df["split"] == "test"].copy()
    if test_df.empty:
        test_df = metrics_df.copy()
    melt = test_df.melt(id_vars=["model_name"], value_vars=metric_cols, var_name="metric", value_name="score")
    plt.figure(figsize=(11, 6))
    sns.barplot(data=melt, x="metric", y="score", hue="model_name")
    plt.ylim(0, 1.02)
    plt.title("Model Metrics Comparison")
    plt.xlabel("")
    plt.ylabel("Score")
    plt.xticks(rotation=25, ha="right")
    plt.legend(loc="lower right", fontsize=8)
    _savefig(output_path)


def plot_training_curves(history_df: pd.DataFrame, output_path: str | Path, model_name: str) -> None:
    if history_df.empty:
        return
    fig, axes = plt.subplots(1, 2, figsize=(11, 4.5))
    axes[0].plot(history_df["epoch"], history_df["train_loss"], marker="o", label="Train")
    axes[0].plot(history_df["epoch"], history_df["val_loss"], marker="o", label="Validation")
    axes[0].set_title(f"{model_name}: Loss")
    axes[0].set_xlabel("Epoch")
    axes[0].set_ylabel("Loss")
    axes[0].legend()
    axes[1].plot(history_df["epoch"], history_df["train_accuracy"], marker="o", label="Train")
    axes[1].plot(history_df["epoch"], history_df["val_accuracy"], marker="o", label="Validation")
    if "val_f1" in history_df:
        axes[1].plot(history_df["epoch"], history_df["val_f1"], marker="s", label="Val F1")
    axes[1].set_title(f"{model_name}: Accuracy / F1")
    axes[1].set_xlabel("Epoch")
    axes[1].set_ylabel("Score")
    axes[1].set_ylim(0, 1.02)
    axes[1].legend()
    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
    fig.tight_layout()
    fig.savefig(output_path, dpi=160, bbox_inches="tight")
    plt.close(fig)


def plot_sample_grid(
    pred_df: pd.DataFrame,
    output_path: str | Path,
    title: str,
    max_images: int = 12,
) -> None:
    sample = pred_df.head(max_images).copy()
    cols = min(4, max(len(sample), 1))
    rows = int(np.ceil(max(len(sample), 1) / cols))
    fig, axes = plt.subplots(rows, cols, figsize=(cols * 3.2, rows * 3.4))
    axes_arr = np.asarray(axes).reshape(-1)
    for ax in axes_arr:
        ax.axis("off")
    if sample.empty:
        axes_arr[0].text(0.5, 0.5, "No samples", ha="center", va="center")
    for ax, row in zip(axes_arr, sample.itertuples(index=False)):
        img = Image.open(row.filepath)
        img = ImageOps.exif_transpose(img).convert("RGB")
        ax.imshow(img)
        ax.set_title(
            f"T: {row.label}\nP: {row.pred_label} ({row.confidence:.2f})",
            fontsize=9,
        )
        ax.axis("off")
    fig.suptitle(title, fontsize=13)
    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
    fig.tight_layout()
    fig.savefig(output_path, dpi=160, bbox_inches="tight")
    plt.close(fig)


def plot_calibration(
    y_true: np.ndarray,
    y_prob: np.ndarray,
    output_path: str | Path,
    title: str,
) -> None:
    if len(np.unique(y_true)) < 2:
        return
    prob_true, prob_pred = calibration_curve(y_true, y_prob, n_bins=8, strategy="uniform")
    plt.figure(figsize=(5.8, 4.8))
    plt.plot(prob_pred, prob_true, marker="o", linewidth=2)
    plt.plot([0, 1], [0, 1], linestyle="--", color="gray", linewidth=1)
    plt.title(title)
    plt.xlabel("Mean Predicted Probability")
    plt.ylabel("Fraction of Positives")
    _savefig(output_path)


def write_markdown_report(
    config: dict[str, Any],
    splits_df: pd.DataFrame,
    metrics_df: pd.DataFrame,
    leaderboard_df: pd.DataFrame,
    misclassified_df: pd.DataFrame,
    output_path: str | Path,
) -> None:
    output_path = Path(output_path)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    best = leaderboard_df.iloc[0].to_dict() if not leaderboard_df.empty else {}
    split_summary = markdown_table(
        splits_df.groupby(["split", "label"]).size().unstack(fill_value=0).reset_index()
    )
    metric_cols = [
        "model_name",
        "split",
        "accuracy",
        "precision",
        "recall",
        "f1",
        "roc_auc",
        "balanced_accuracy",
        "specificity",
        "sensitivity",
    ]
    metrics_md = markdown_table(metrics_df[[c for c in metric_cols if c in metrics_df.columns]])
    error_text = "No misclassified test samples were recorded."
    if not misclassified_df.empty:
        by_model = misclassified_df.groupby("model_name").size().sort_values(ascending=False)
        examples = misclassified_df.head(8)[["model_name", "label", "pred_label", "confidence", "filepath"]]
        error_text = (
            "Misclassifications by model:\n\n"
            + markdown_table(by_model.reset_index(name="count"))
            + "\n\nExample errors:\n\n"
            + markdown_table(examples)
        )
    content = f"""# Egg Damage Classification Report

## Dataset Overview

- Dataset path: `{config['paths']['data_dir']}`
- Task: binary classification, `Damaged` vs `Not Damaged`
- Split strategy: existing split folders are respected; otherwise stratified 70/15/15 splitting is used.
- Training balance: `{config.get('balance', {}).get('strategy', 'disabled')}`.

## Split Summary

{split_summary}

## Preprocessing

- Classical models: grayscale resize to {config['preprocessing']['image_size']}x{config['preprocessing']['image_size']}, HOG or LBP features, standardized SVM inputs.
- Deep models: ImageNet normalization, realistic train-only flips, small rotations, mild affine jitter, and light color jitter.
- SVM training curves are marked N/A because these models are not epoch-trained.

## Metrics

{metrics_md}

## Best Model

- Model: `{best.get('model_name', 'N/A')}`
- Test F1: `{best.get('f1', 'N/A')}`
- Test ROC-AUC: `{best.get('roc_auc', 'N/A')}`
- Test balanced accuracy: `{best.get('balanced_accuracy', 'N/A')}`
- Model path: `{best.get('model_path', 'N/A')}`

## Error Patterns

{error_text}

## Deployment

Run `python scripts/launch_gradio.py --config configs/default.yaml` to launch the local Gradio app. The app loads the best ranked model automatically and can switch among trained models.
"""
    output_path.write_text(content, encoding="utf-8")