dsaint31 commited on
Commit
6d79480
·
verified ·
1 Parent(s): fab639f

Release custom MNIST model

Browse files
__init__.py CHANGED
@@ -18,7 +18,8 @@ AutoModelForImageClassification.register(
18
  try:
19
  AutoImageProcessor.register(
20
  MyMNISTConfig,
21
- MyMNISTImageProcessor,
 
22
  exist_ok=True
23
  )
24
  except TypeError:
 
18
  try:
19
  AutoImageProcessor.register(
20
  MyMNISTConfig,
21
+ # MyMNISTImageProcessor, # To avoid FutureWarning
22
+ slow_image_processor_class=MyMNISTImageProcessor,
23
  exist_ok=True
24
  )
25
  except TypeError:
examples/__pycache__/dataset_mnist.cpython-312.pyc CHANGED
Binary files a/examples/__pycache__/dataset_mnist.cpython-312.pyc and b/examples/__pycache__/dataset_mnist.cpython-312.pyc differ
 
examples/__pycache__/infer.cpython-312.pyc CHANGED
Binary files a/examples/__pycache__/infer.cpython-312.pyc and b/examples/__pycache__/infer.cpython-312.pyc differ
 
examples/__pycache__/metrics.cpython-312.pyc CHANGED
Binary files a/examples/__pycache__/metrics.cpython-312.pyc and b/examples/__pycache__/metrics.cpython-312.pyc differ
 
examples/__pycache__/train_local.cpython-312.pyc CHANGED
Binary files a/examples/__pycache__/train_local.cpython-312.pyc and b/examples/__pycache__/train_local.cpython-312.pyc differ
 
examples/eval.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # hf_custom_proj/examples/eval.py
2
+ """
3
+ 로컬(dist) 또는 Hugging Face Hub에서 모델/프로세서를 로드해
4
+ MNIST test set을 평가(evaluate)하는 스크립트.
5
+
6
+ 사용법
7
+ - 로컬(dist)에서 평가:
8
+ python -m examples.eval --source local --path dist/my-mnist-hf
9
+
10
+ - Hub에서 평가(공개 repo):
11
+ python -m examples.eval --source hub --path YOUR_ID/my-mnist-hf
12
+
13
+ - Hub에서 평가(private repo 또는 토큰 강제):
14
+ export HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxx
15
+ python -m examples.eval --source hub --path YOUR_ID/my-mnist-hf --use-token
16
+
17
+ 옵션
18
+ - 일부만 빠르게:
19
+ python -m examples.eval --source local --path dist/my-mnist-hf --limit 1000
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import argparse
25
+ import os
26
+ from pathlib import Path
27
+ from typing import Tuple
28
+
29
+ import torch
30
+ from torch.utils.data import DataLoader
31
+ from torchvision.datasets import MNIST
32
+ from transformers import AutoImageProcessor, AutoModelForImageClassification
33
+
34
+
35
+ def _resolve_token(use_token: bool) -> str | None:
36
+ if not use_token:
37
+ return None
38
+ token = os.environ.get("HF_TOKEN")
39
+ if not token:
40
+ raise RuntimeError(
41
+ "use_token=True 인데 환경변수 HF_TOKEN이 없습니다.\n"
42
+ "예) export HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxx"
43
+ )
44
+ return token
45
+
46
+
47
+ def _validate_local_dir(path: str) -> None:
48
+ p = Path(path)
49
+ if not p.exists():
50
+ raise FileNotFoundError(f"Local path not found: {p.resolve()}")
51
+ if not p.is_dir():
52
+ raise NotADirectoryError(f"Local path is not a directory: {p.resolve()}")
53
+
54
+
55
+ @torch.no_grad()
56
+ def evaluate(
57
+ *,
58
+ source: str,
59
+ path: str,
60
+ data_dir: str,
61
+ batch_size: int,
62
+ num_workers: int,
63
+ limit: int,
64
+ use_token: bool,
65
+ device: str,
66
+ ) -> None:
67
+ """
68
+ MNIST test set 평가: 평균 loss + accuracy 출력
69
+ """
70
+ source = source.lower().strip()
71
+ if source not in ("local", "hub"):
72
+ raise ValueError("--source must be one of {'local', 'hub'}")
73
+
74
+ if source == "local":
75
+ _validate_local_dir(path)
76
+
77
+ token = _resolve_token(use_token) if source == "hub" else None
78
+
79
+ # processor / model 로드
80
+ processor = AutoImageProcessor.from_pretrained(
81
+ path,
82
+ trust_remote_code=True,
83
+ token=token,
84
+ )
85
+ model = AutoModelForImageClassification.from_pretrained(
86
+ path,
87
+ trust_remote_code=True,
88
+ token=token,
89
+ )
90
+ model.eval()
91
+
92
+ # device 설정
93
+ if device == "auto":
94
+ dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
95
+ else:
96
+ dev = torch.device(device)
97
+ model.to(dev)
98
+
99
+ # MNIST test set
100
+ ds = MNIST(root=data_dir, train=False, download=True)
101
+
102
+ # limit 적용(원하면 일부만)
103
+ if limit > 0:
104
+ ds = torch.utils.data.Subset(ds, range(min(limit, len(ds))))
105
+
106
+ # collate_fn: PIL 이미지들을 processor로 배치 변환 + label 텐서화
107
+ def collate_fn(batch) -> Tuple[dict, torch.Tensor]:
108
+ images, labels = zip(*batch) # images: PIL list, labels: int list
109
+
110
+ # processor가 내부에서 resize/normalize 등 수행 (당신 커스텀 processor 구현대로)
111
+ out = processor(list(images), return_tensors="pt")
112
+
113
+ # labels는 CE loss용 long
114
+ y = torch.tensor(labels, dtype=torch.long)
115
+ return out, y
116
+
117
+ loader = DataLoader(
118
+ ds,
119
+ batch_size=batch_size,
120
+ shuffle=False,
121
+ num_workers=num_workers,
122
+ pin_memory=(dev.type == "cuda"),
123
+ collate_fn=collate_fn,
124
+ )
125
+
126
+ # metric 누적
127
+ total = 0
128
+ correct = 0
129
+ loss_sum = 0.0
130
+
131
+ for step, (batch, y) in enumerate(loader, start=1):
132
+ # batch(dict)와 y를 device로 이동
133
+ batch = {k: v.to(dev) for k, v in batch.items()}
134
+ y = y.to(dev)
135
+
136
+ out = model(**batch, labels=y) # labels를 주면 loss 계산됨
137
+ loss = out.loss
138
+ logits = out.logits
139
+
140
+ pred = logits.argmax(dim=-1)
141
+ correct += (pred == y).sum().item()
142
+ total += y.numel()
143
+ loss_sum += loss.item() * y.size(0) # 배치 평균 loss * 배치크기
144
+
145
+ if step % 50 == 0:
146
+ acc = correct / total if total else 0.0
147
+ avg_loss = loss_sum / total if total else 0.0
148
+ print(f"[step {step:4d}] running acc={acc:.4f}, avg_loss={avg_loss:.4f}")
149
+
150
+ acc = correct / total if total else 0.0
151
+ avg_loss = loss_sum / total if total else 0.0
152
+
153
+ print("=" * 60)
154
+ print("Evaluation Done")
155
+ print(f"source : {source}")
156
+ print(f"path : {path}")
157
+ print(f"data_dir : {Path(data_dir).resolve()}")
158
+ print(f"device : {dev}")
159
+ print(f"num_samples: {total}")
160
+ print(f"avg_loss : {avg_loss:.6f}")
161
+ print(f"accuracy : {acc:.6f}")
162
+ print("=" * 60)
163
+
164
+
165
+ def build_argparser() -> argparse.ArgumentParser:
166
+ p = argparse.ArgumentParser(description="Evaluate MNIST test set for local(dist) or hub.")
167
+ p.add_argument("--source", choices=["local", "hub"], required=True)
168
+ p.add_argument("--path", required=True)
169
+ p.add_argument("--data-dir", default="data", help="MNIST 다운로드/캐시 폴더. 기본: data")
170
+ p.add_argument("--batch-size", type=int, default=256)
171
+ p.add_argument("--num-workers", type=int, default=2)
172
+ p.add_argument("--limit", type=int, default=0, help="0이면 전체 평가, 양수면 앞에서 limit개만 평가")
173
+ p.add_argument("--use-token", action="store_true")
174
+ p.add_argument(
175
+ "--device",
176
+ default="auto",
177
+ help="auto | cpu | cuda | cuda:0 같은 torch device string",
178
+ )
179
+ return p
180
+
181
+
182
+ def main() -> None:
183
+ args = build_argparser().parse_args()
184
+ evaluate(
185
+ source=args.source,
186
+ path=args.path,
187
+ data_dir=args.data_dir,
188
+ batch_size=args.batch_size,
189
+ num_workers=args.num_workers,
190
+ limit=args.limit,
191
+ use_token=bool(args.use_token),
192
+ device=args.device,
193
+ )
194
+
195
+
196
+ if __name__ == "__main__":
197
+ main()
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8254addc5e867aedf73e99ef4998482b7b25c4dd2e97436aabf5baf264717a5
3
  size 1302056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a76fa806ff07f1193e9f4bd00a0c80e3711ea506920a5f73cda09fb9c1e2aafb
3
  size 1302056