Spaces:

pdjota
/

artydemo

Running

File size: 4,930 Bytes

0e1a1b4
 
 
762e478
0e1a1b4
cd3dc59
 
0e1a1b4
 
cd3dc59
0e1a1b4
 
 
 
 
 
 
 
cd3dc59
0e1a1b4
762e478
0e1a1b4
 
 
 
 
 
 
 
 
 
cd3dc59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e1a1b4
cd3dc59
0e1a1b4
cd3dc59
0e1a1b4
 
 
 
cd3dc59
0e1a1b4
 
cd3dc59
 
0e1a1b4
 
 
cd3dc59
0e1a1b4
 
cd3dc59
0e1a1b4
 
 
 
 
cd3dc59
762e478
 
 
0e1a1b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd3dc59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e1a1b4

"""
Evaluate best (or last) checkpoint on the test set.
Reports genre, style, artist top-1 and artist top-5 accuracy.
Usage: python scripts/eval_cnn.py [--arch cnn|cnnrnn] [--last]
"""
from __future__ import annotations

import sys
from pathlib import Path
from typing import Any, Dict, TypedDict

import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, Subset

ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT / "src"))

from config import INDEX_SELECTED, WIKIART_ROOT, checkpoint_dir_for_arch, N_STYLE, N_ARTIST, N_GENRE, BATCH_SIZE
from dataset import WikiArtDataset
from model import ResNet50BiLSTMThreeHeads, ResNet50ThreeHeads

# Reuse train split and transforms
import importlib.util
spec = importlib.util.spec_from_file_location("train_cnn", ROOT / "scripts" / "train_cnn.py")
train_cnn = importlib.util.module_from_spec(spec)
spec.loader.exec_module(train_cnn)
get_transforms = train_cnn.get_transforms
stratified_split = train_cnn.stratified_split


class EvalMetrics(TypedDict):
    arch: str
    checkpoint_name: str
    checkpoint_path: str
    epoch: Any
    test_n: int
    genre_top1: float
    style_top1: float
    artist_top1: float
    artist_top5: float


def compute_test_metrics(*, arch: str, last: bool = False) -> EvalMetrics:
    """
    Run the same evaluation as the CLI and return metrics as floats in [0, 1].
    Used by scripts/export_hf_model_card.py for Hub model card YAML.
    """
    ckpt_name = "last.pt" if last else "best.pt"
    ckpt_path = checkpoint_dir_for_arch(arch) / ckpt_name
    if not ckpt_path.exists():
        raise FileNotFoundError(f"Checkpoint not found: {ckpt_path}")
    if not INDEX_SELECTED.exists() or not WIKIART_ROOT.exists():
        raise FileNotFoundError("Index or wikiart root missing.")

    if torch.cuda.is_available():
        device = torch.device("cuda")
    elif getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available():
        device = torch.device("mps")
    else:
        device = torch.device("cpu")

    ckpt = torch.load(ckpt_path, map_location=device, weights_only=False)
    n_genre = ckpt["n_genre"]
    n_style = ckpt["n_style"]
    n_artist = ckpt["n_artist"]
    ckpt_arch = ckpt.get("arch", arch)

    import pandas as pd

    df = pd.read_csv(INDEX_SELECTED)
    _, _, idx_test = stratified_split(df)
    ds = WikiArtDataset(INDEX_SELECTED, WIKIART_ROOT, transform=get_transforms(train=False))
    test_loader = DataLoader(Subset(ds, idx_test), batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

    if ckpt_arch == "cnnrnn":
        model = ResNet50BiLSTMThreeHeads(n_genre=n_genre, n_style=n_style, n_artist=n_artist).to(device)
    else:
        model = ResNet50ThreeHeads(n_genre=n_genre, n_style=n_style, n_artist=n_artist).to(device)
    model.load_state_dict(ckpt["model_state_dict"])
    model.eval()

    correct_g = correct_s = correct_a = correct_a5 = total = 0
    with torch.no_grad():
        for images, style_id, artist_id, genre_id in test_loader:
            images = images.to(device)
            style_id = style_id.to(device)
            artist_id = artist_id.to(device)
            genre_id = genre_id.to(device)
            logits_g, logits_s, logits_a = model(images)
            n = images.size(0)
            total += n
            correct_g += (logits_g.argmax(1) == genre_id).sum().item()
            correct_s += (logits_s.argmax(1) == style_id).sum().item()
            correct_a += (logits_a.argmax(1) == artist_id).sum().item()
            _, top5 = logits_a.topk(5, dim=1)
            correct_a5 += (top5 == artist_id.unsqueeze(1)).any(1).sum().item()

    assert total > 0
    return {
        "arch": str(ckpt_arch),
        "checkpoint_name": ckpt_name,
        "checkpoint_path": str(ckpt_path),
        "epoch": ckpt.get("epoch", None),
        "test_n": int(total),
        "genre_top1": correct_g / total,
        "style_top1": correct_s / total,
        "artist_top1": correct_a / total,
        "artist_top5": correct_a5 / total,
    }


def main() -> None:
    import argparse
    p = argparse.ArgumentParser()
    p.add_argument("--arch", type=str, default="cnn", choices=["cnn", "cnnrnn"], help="Model architecture")
    p.add_argument("--last", action="store_true", help="Evaluate last.pt instead of best.pt")
    args = p.parse_args()
    try:
        m = compute_test_metrics(arch=args.arch, last=args.last)
    except FileNotFoundError as e:
        print(f"ERROR: {e}")
        sys.exit(1)

    print(
        f"Arch: {m['arch']}  Checkpoint: {m['checkpoint_name']}  "
        f"(epoch {m['epoch']!r})  Test n={m['test_n']}"
    )
    print(f"  genre acc (top-1): {m['genre_top1']:.2%}")
    print(f"  style acc (top-1): {m['style_top1']:.2%}")
    print(f"  artist acc (top-1): {m['artist_top1']:.2%}")
    print(f"  artist acc (top-5): {m['artist_top5']:.2%}")


if __name__ == "__main__":
    main()