| |
| """ |
| 로컬(dist) 또는 Hugging Face Hub에서 모델/프로세서를 로드해 |
| MNIST test set을 평가(evaluate)하는 스크립트. |
| |
| 사용법 |
| - 로컬(dist)에서 평가: |
| python -m examples.eval --source local --path dist/my-mnist-hf |
| |
| - Hub에서 평가(공개 repo): |
| python -m examples.eval --source hub --path YOUR_ID/my-mnist-hf |
| |
| - Hub에서 평가(private repo 또는 토큰 강제): |
| export HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxx |
| python -m examples.eval --source hub --path YOUR_ID/my-mnist-hf --use-token |
| |
| 옵션 |
| - 일부만 빠르게: |
| python -m examples.eval --source local --path dist/my-mnist-hf --limit 1000 |
| """ |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import os |
| from pathlib import Path |
| from typing import Tuple |
|
|
| import torch |
| from torch.utils.data import DataLoader |
| from torchvision.datasets import MNIST |
| from transformers import AutoImageProcessor, AutoModelForImageClassification |
|
|
|
|
| def _resolve_token(use_token: bool) -> str | None: |
| if not use_token: |
| return None |
| token = os.environ.get("HF_TOKEN") |
| if not token: |
| raise RuntimeError( |
| "use_token=True 인데 환경변수 HF_TOKEN이 없습니다.\n" |
| "예) export HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxx" |
| ) |
| return token |
|
|
|
|
| def _validate_local_dir(path: str) -> None: |
| p = Path(path) |
| if not p.exists(): |
| raise FileNotFoundError(f"Local path not found: {p.resolve()}") |
| if not p.is_dir(): |
| raise NotADirectoryError(f"Local path is not a directory: {p.resolve()}") |
|
|
|
|
| @torch.no_grad() |
| def evaluate( |
| *, |
| source: str, |
| path: str, |
| data_dir: str, |
| batch_size: int, |
| num_workers: int, |
| limit: int, |
| use_token: bool, |
| device: str, |
| ) -> None: |
| """ |
| MNIST test set 평가: 평균 loss + accuracy 출력 |
| """ |
| source = source.lower().strip() |
| if source not in ("local", "hub"): |
| raise ValueError("--source must be one of {'local', 'hub'}") |
|
|
| if source == "local": |
| _validate_local_dir(path) |
|
|
| token = _resolve_token(use_token) if source == "hub" else None |
|
|
| |
| processor = AutoImageProcessor.from_pretrained( |
| path, |
| trust_remote_code=True, |
| token=token, |
| ) |
| model = AutoModelForImageClassification.from_pretrained( |
| path, |
| trust_remote_code=True, |
| token=token, |
| ) |
| model.eval() |
|
|
| |
| if device == "auto": |
| dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| else: |
| dev = torch.device(device) |
| model.to(dev) |
|
|
| |
| ds = MNIST(root=data_dir, train=False, download=True) |
|
|
| |
| if limit > 0: |
| ds = torch.utils.data.Subset(ds, range(min(limit, len(ds)))) |
|
|
| |
| def collate_fn(batch) -> Tuple[dict, torch.Tensor]: |
| images, labels = zip(*batch) |
|
|
| |
| out = processor(list(images), return_tensors="pt") |
|
|
| |
| y = torch.tensor(labels, dtype=torch.long) |
| return out, y |
|
|
| loader = DataLoader( |
| ds, |
| batch_size=batch_size, |
| shuffle=False, |
| num_workers=num_workers, |
| pin_memory=(dev.type == "cuda"), |
| collate_fn=collate_fn, |
| ) |
|
|
| |
| total = 0 |
| correct = 0 |
| loss_sum = 0.0 |
|
|
| for step, (batch, y) in enumerate(loader, start=1): |
| |
| batch = {k: v.to(dev) for k, v in batch.items()} |
| y = y.to(dev) |
|
|
| out = model(**batch, labels=y) |
| loss = out.loss |
| logits = out.logits |
|
|
| pred = logits.argmax(dim=-1) |
| correct += (pred == y).sum().item() |
| total += y.numel() |
| loss_sum += loss.item() * y.size(0) |
|
|
| if step % 50 == 0: |
| acc = correct / total if total else 0.0 |
| avg_loss = loss_sum / total if total else 0.0 |
| print(f"[step {step:4d}] running acc={acc:.4f}, avg_loss={avg_loss:.4f}") |
|
|
| acc = correct / total if total else 0.0 |
| avg_loss = loss_sum / total if total else 0.0 |
|
|
| print("=" * 60) |
| print("Evaluation Done") |
| print(f"source : {source}") |
| print(f"path : {path}") |
| print(f"data_dir : {Path(data_dir).resolve()}") |
| print(f"device : {dev}") |
| print(f"num_samples: {total}") |
| print(f"avg_loss : {avg_loss:.6f}") |
| print(f"accuracy : {acc:.6f}") |
| print("=" * 60) |
|
|
|
|
| def build_argparser() -> argparse.ArgumentParser: |
| p = argparse.ArgumentParser(description="Evaluate MNIST test set for local(dist) or hub.") |
| p.add_argument("--source", choices=["local", "hub"], required=True) |
| p.add_argument("--path", required=True) |
| p.add_argument("--data-dir", default="data", help="MNIST 다운로드/캐시 폴더. 기본: data") |
| p.add_argument("--batch-size", type=int, default=256) |
| p.add_argument("--num-workers", type=int, default=2) |
| p.add_argument("--limit", type=int, default=0, help="0이면 전체 평가, 양수면 앞에서 limit개만 평가") |
| p.add_argument("--use-token", action="store_true") |
| p.add_argument( |
| "--device", |
| default="auto", |
| help="auto | cpu | cuda | cuda:0 같은 torch device string", |
| ) |
| return p |
|
|
|
|
| def main() -> None: |
| args = build_argparser().parse_args() |
| evaluate( |
| source=args.source, |
| path=args.path, |
| data_dir=args.data_dir, |
| batch_size=args.batch_size, |
| num_workers=args.num_workers, |
| limit=args.limit, |
| use_token=bool(args.use_token), |
| device=args.device, |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|