JangTaeng commited on
Commit
62c8941
Β·
verified Β·
1 Parent(s): 63e7e05

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +70 -39
  2. requirements.txt +1 -0
app.py CHANGED
@@ -2,17 +2,24 @@
2
  AlexNet β€” ν—ˆκΉ…νŽ˜μ΄μŠ€ Spaces 데λͺ¨
3
  λ…Όλ¬Έ: Krizhevsky, Sutskever, Hinton (NeurIPS 2012)
4
 
 
 
 
 
 
5
  μ‹€ν–‰: Spacesμ—μ„œ μžλ™ μ‹€ν–‰ (app.py 이름 ν•„μˆ˜)
6
- 둜컬: pip install gradio torch pillow torchvision
7
  python app.py
8
  """
9
 
10
  import json
 
11
  import torch
12
  import torch.nn as nn
 
 
13
  import gradio as gr
14
  from PIL import Image
15
- import torchvision.transforms as T
16
 
17
 
18
  # ──────────────────────────────────────────────────────────────
@@ -22,16 +29,12 @@ import torchvision.transforms as T
22
  class ConvBlock(nn.Module):
23
  """
24
  groups 인자둜 λ…Όλ¬Έμ˜ GPU λΆ„ν•  μ „λž΅μ„ μ œμ–΄ν•˜λŠ” λ²”μš© 블둝.
 
 
25
 
26
- groups=1 : cross-GPU β€” Conv1Β·Conv3 (전체 채널 μ—°κ²°)
27
- groups=2 : parallel β€” Conv2Β·Conv4Β·Conv5 (채널을 λ°˜μ”© 독립 μ—°μ‚°)
28
-
29
- Conv1이 groups=1인 이유:
30
- in_channels=3(RGB)λŠ” groups=2둜 λ‚˜λˆŒ 수 μ—†μŒ (3 % 2 != 0).
31
-
32
- padding 계산 κ·Όκ±° (Conv1):
33
- padding=0 β†’ (224-11)/4+1 = 54.25 β†’ λ‚΄λ¦Ό 54 β†’ Pool ν›„ 26 β†’ ... β†’ FC μž…λ ₯ 6400 (였λ₯˜)
34
- padding=2 β†’ (224-11+4)/4+1 = 55 β†’ Pool ν›„ 27 β†’ ... β†’ FC μž…λ ₯ 9216 (정상)
35
  """
36
  def __init__(self, in_ch, out_ch, kernel_size,
37
  stride=1, padding=0, groups=1,
@@ -68,20 +71,11 @@ class AlexNet(nn.Module):
68
  """
69
  def __init__(self, num_labels=1000, dropout=0.5):
70
  super().__init__()
71
-
72
- # Conv1: padding=2 ν•„μˆ˜ β€” 55x55 좜λ ₯ 보μž₯
73
- self.conv1 = ConvBlock(
74
- 3, 96, 11, stride=4, padding=2, groups=1,
75
- use_lrn=True, use_pool=True,
76
- )
77
- self.conv2 = ConvBlock(
78
- 96, 256, 5, padding=2, groups=2,
79
- use_lrn=True, use_pool=True,
80
- )
81
- self.conv3 = ConvBlock(256, 384, 3, padding=1, groups=1)
82
- self.conv4 = ConvBlock(384, 384, 3, padding=1, groups=2)
83
- self.conv5 = ConvBlock(384, 256, 3, padding=1, groups=2, use_pool=True)
84
-
85
  self.classifier = nn.Sequential(
86
  nn.Dropout(p=dropout),
87
  nn.Linear(256 * 6 * 6, 4096),
@@ -114,24 +108,60 @@ class AlexNet(nn.Module):
114
 
115
 
116
  # ──────────────────────────────────────────────────────────────
117
- # 2. λͺ¨λΈ + 클래슀 λ ˆμ΄λΈ” λ‘œλ“œ
118
  # ──────────────────────────────────────────────────────────────
119
 
120
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
- model = AlexNet(num_labels=1000, dropout=0.5).to(DEVICE)
123
  model.eval()
124
 
 
 
 
 
 
 
 
 
 
125
  try:
126
  with open("config.json") as f:
127
  cfg = json.load(f)
128
  ID2LABEL = {int(k): v for k, v in cfg.get("id2label", {}).items()}
 
 
129
  except Exception:
130
- ID2LABEL = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
 
133
  # ──────────────────────────────────────────────────────────────
134
- # 3. μ „μ²˜λ¦¬ νŒŒμ΄ν”„λΌμΈ
 
135
  # ──────────────────────────────────────────────────────────────
136
 
137
  TRANSFORM = T.Compose([
@@ -144,7 +174,7 @@ TRANSFORM = T.Compose([
144
 
145
 
146
  # ──────────────────────────────────────────────────────────────
147
- # 4. μΆ”λ‘  ν•¨μˆ˜
148
  # ──────────────────────────────────────────────────────────────
149
 
150
  def predict(image: Image.Image) -> dict:
@@ -162,16 +192,19 @@ def predict(image: Image.Image) -> dict:
162
 
163
 
164
  # ──────────────────────────────────────────────────────────────
165
- # 5. Gradio UI
166
  # ──────────────────────────────────────────────────────────────
167
 
 
 
 
168
  with gr.Blocks(title="AlexNet β€” λ…Όλ¬Έ μž¬ν˜„") as demo:
169
- gr.Markdown("""
170
  ## AlexNet β€” λ…Όλ¬Έ μ™„μ „ μž¬ν˜„ 데λͺ¨
171
  **λ…Όλ¬Έ**: ImageNet Classification with Deep CNNs (Krizhevsky et al., NeurIPS 2012)
172
 
173
- > 이미지λ₯Ό μ—…λ‘œλ“œν•˜λ©΄ Top-5 클래슀λ₯Ό μ˜ˆμΈ‘ν•©λ‹ˆλ‹€.
174
- > β€» ν˜„μž¬ λͺ¨λΈμ€ 랜덀 μ΄ˆκΈ°ν™” μƒνƒœμž…λ‹ˆλ‹€. ImageNet ν•™μŠ΅ κ°€μ€‘μΉ˜λ₯Ό λ‘œλ“œν•˜λ©΄ μ‹€μ œ 예츑이 κ°€λŠ₯ν•©λ‹ˆλ‹€.
175
  """)
176
 
177
  with gr.Row():
@@ -185,15 +218,13 @@ with gr.Blocks(title="AlexNet β€” λ…Όλ¬Έ μž¬ν˜„") as demo:
185
  gr.Markdown("""
186
  | λ ˆμ΄μ–΄ | 좜λ ₯ shape | padding | groups | λΉ„κ³  |
187
  |--------|-----------------|---------|--------|------|
188
- | Conv1 | (B, 96, 55β†’27) | **2** | 1 | 11x11 stride4, LRN, MaxPool |
189
  | Conv2 | (B, 256, 27β†’13) | 2 | 2 | 5x5, LRN, MaxPool |
190
  | Conv3 | (B, 384, 13) | 1 | 1 | 3x3, cross-GPU |
191
  | Conv4 | (B, 384, 13) | 1 | 2 | 3x3 |
192
  | Conv5 | (B, 256, 13β†’6) | 1 | 2 | 3x3, MaxPool |
193
- | FC1Β·2 | (B, 4096) | β€” | β€” | Dropout 0.5 |
194
- | FC3 | (B, 1000) | β€” | β€” | 좜λ ₯μΈ΅ |
195
-
196
- **μˆ˜μ • 포인트**: Conv1 padding=0이면 좜λ ₯ 54(λ‚΄λ¦Ό)β†’FC μž…λ ₯ 6400 였λ₯˜. padding=2둜 55β†’FC μž…λ ₯ 9216 정상.
197
  """)
198
 
199
  run_btn.click(fn=predict, inputs=image_input, outputs=label_output)
 
2
  AlexNet β€” ν—ˆκΉ…νŽ˜μ΄μŠ€ Spaces 데λͺ¨
3
  λ…Όλ¬Έ: Krizhevsky, Sutskever, Hinton (NeurIPS 2012)
4
 
5
+ 변경사항:
6
+ - ImageNet id2label μžλ™ λ‘œλ“œ (ViT configμ—μ„œ κ°€μ Έμ˜΄)
7
+ - torchvision μ‚¬μ „ν•™μŠ΅ κ°€μ€‘μΉ˜ (FC λ ˆμ΄μ–΄) λ‘œλ“œ
8
+ - κ°€μ€‘μΉ˜ λ‘œλ“œ μ‹€νŒ¨ μ‹œ 랜덀 μ΄ˆκΈ°ν™”λ‘œ 폴백
9
+
10
  μ‹€ν–‰: Spacesμ—μ„œ μžλ™ μ‹€ν–‰ (app.py 이름 ν•„μˆ˜)
11
+ 둜컬: pip install gradio torch pillow torchvision requests
12
  python app.py
13
  """
14
 
15
  import json
16
+ import requests
17
  import torch
18
  import torch.nn as nn
19
+ import torchvision.models as tv
20
+ import torchvision.transforms as T
21
  import gradio as gr
22
  from PIL import Image
 
23
 
24
 
25
  # ──────────────────────────────────────────────────────────────
 
29
  class ConvBlock(nn.Module):
30
  """
31
  groups 인자둜 λ…Όλ¬Έμ˜ GPU λΆ„ν•  μ „λž΅μ„ μ œμ–΄ν•˜λŠ” λ²”μš© 블둝.
32
+ groups=1: cross-GPU (전체 채널 μ—°κ²°) β€” Conv1Β·Conv3
33
+ groups=2: parallel (채널 λ°˜μ”© 독립) β€” Conv2Β·Conv4Β·Conv5
34
 
35
+ Conv1 padding=2 이유:
36
+ padding=0 β†’ 좜λ ₯ 54(λ‚΄λ¦Ό) β†’ FC μž…λ ₯ 6400 였λ₯˜
37
+ padding=2 β†’ 좜λ ₯ 55(μ •ν™•) β†’ FC μž…λ ₯ 9216 정상
 
 
 
 
 
 
38
  """
39
  def __init__(self, in_ch, out_ch, kernel_size,
40
  stride=1, padding=0, groups=1,
 
71
  """
72
  def __init__(self, num_labels=1000, dropout=0.5):
73
  super().__init__()
74
+ self.conv1 = ConvBlock( 3, 96, 11, stride=4, padding=2, groups=1, use_lrn=True, use_pool=True)
75
+ self.conv2 = ConvBlock( 96, 256, 5, padding=2, groups=2, use_lrn=True, use_pool=True)
76
+ self.conv3 = ConvBlock(256, 384, 3, padding=1, groups=1)
77
+ self.conv4 = ConvBlock(384, 384, 3, padding=1, groups=2)
78
+ self.conv5 = ConvBlock(384, 256, 3, padding=1, groups=2, use_pool=True)
 
 
 
 
 
 
 
 
 
79
  self.classifier = nn.Sequential(
80
  nn.Dropout(p=dropout),
81
  nn.Linear(256 * 6 * 6, 4096),
 
108
 
109
 
110
  # ──────────────────────────────────────────────────────────────
111
+ # 2. λͺ¨λΈ 생성 + μ‚¬μ „ν•™μŠ΅ κ°€μ€‘μΉ˜ λ‘œλ“œ
112
  # ──────────────────────────────────────────────────────────────
113
 
114
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
115
+ model = AlexNet(num_labels=1000, dropout=0.5).to(DEVICE)
116
+
117
+ WEIGHTS_LOADED = False
118
+ try:
119
+ # torchvision AlexNet μ‚¬μ „ν•™μŠ΅ κ°€μ€‘μΉ˜μ—μ„œ FC λ ˆμ΄μ–΄λ§Œ 볡사
120
+ # (Conv λ ˆμ΄μ–΄λŠ” groups ꡬ쑰가 달라 직접 λ‘œλ“œ λΆˆκ°€)
121
+ pretrained = tv.alexnet(weights=tv.AlexNet_Weights.DEFAULT)
122
+ model.classifier.load_state_dict(pretrained.classifier.state_dict())
123
+ WEIGHTS_LOADED = True
124
+ print("μ‚¬μ „ν•™μŠ΅ κ°€μ€‘μΉ˜(FC) λ‘œλ“œ μ™„λ£Œ")
125
+ except Exception as e:
126
+ print(f"κ°€μ€‘μΉ˜ λ‘œλ“œ μ‹€νŒ¨, 랜덀 μ΄ˆκΈ°ν™” μœ μ§€: {e}")
127
 
 
128
  model.eval()
129
 
130
+
131
+ # ──────────────────────────────────────────────────────────────
132
+ # 3. ImageNet id2label λ‘œλ“œ
133
+ # μš°μ„ μˆœμœ„: config.json β†’ ViT config(ν—ˆκΉ…νŽ˜μ΄μŠ€) β†’ 인덱슀 ν‘œμ‹œ
134
+ # ──────────────────────────────────────────────────────────────
135
+
136
+ ID2LABEL = {}
137
+
138
+ # 1μˆœμœ„: config.json
139
  try:
140
  with open("config.json") as f:
141
  cfg = json.load(f)
142
  ID2LABEL = {int(k): v for k, v in cfg.get("id2label", {}).items()}
143
+ if ID2LABEL:
144
+ print(f"config.jsonμ—μ„œ {len(ID2LABEL)}개 클래슀 λ‘œλ“œ")
145
  except Exception:
146
+ pass
147
+
148
+ # 2μˆœμœ„: ν—ˆκΉ…νŽ˜μ΄μŠ€ ViT config (ImageNet 1000개 라벨 동일)
149
+ if not ID2LABEL:
150
+ try:
151
+ resp = requests.get(
152
+ "https://huggingface.co/google/vit-base-patch16-224/raw/main/config.json",
153
+ timeout=10,
154
+ )
155
+ vit_cfg = resp.json()
156
+ ID2LABEL = {int(k): v for k, v in vit_cfg.get("id2label", {}).items()}
157
+ print(f"ν—ˆκΉ…νŽ˜μ΄μŠ€μ—μ„œ {len(ID2LABEL)}개 클래슀 λ‘œλ“œ")
158
+ except Exception as e:
159
+ print(f"클래슀 이름 λ‘œλ“œ μ‹€νŒ¨: {e}")
160
 
161
 
162
  # ──────────────────────────────────────────────────────────────
163
+ # 4. μ „μ²˜λ¦¬ νŒŒμ΄ν”„λΌμΈ
164
+ # λ…Όλ¬Έ 2절: 256 λ¦¬μ‚¬μ΄μ¦ˆ β†’ 224 center crop β†’ ν”½μ…€ 평균 차감
165
  # ──────────────────────────────────────────────────────────────
166
 
167
  TRANSFORM = T.Compose([
 
174
 
175
 
176
  # ──────────────────────────────────────────────────────────────
177
+ # 5. μΆ”λ‘  ν•¨μˆ˜
178
  # ──────────────────────────────────────────────────────────────
179
 
180
  def predict(image: Image.Image) -> dict:
 
192
 
193
 
194
  # ──────────────────────────────────────────────────────────────
195
+ # 6. Gradio UI
196
  # ──────────────────────────────────────────────────────────────
197
 
198
+ weight_status = "FC μ‚¬μ „ν•™μŠ΅ κ°€μ€‘μΉ˜ λ‘œλ“œλ¨ (torchvision)" if WEIGHTS_LOADED else "랜덀 μ΄ˆκΈ°ν™” μƒνƒœ"
199
+ label_status = f"ImageNet {len(ID2LABEL)}개 클래슀 이름 λ‘œλ“œλ¨" if ID2LABEL else "클래슀 이름 μ—†μŒ (인덱슀 ν‘œμ‹œ)"
200
+
201
  with gr.Blocks(title="AlexNet β€” λ…Όλ¬Έ μž¬ν˜„") as demo:
202
+ gr.Markdown(f"""
203
  ## AlexNet β€” λ…Όλ¬Έ μ™„μ „ μž¬ν˜„ 데λͺ¨
204
  **λ…Όλ¬Έ**: ImageNet Classification with Deep CNNs (Krizhevsky et al., NeurIPS 2012)
205
 
206
+ - κ°€μ€‘μΉ˜: {weight_status}
207
+ - 클래슀: {label_status}
208
  """)
209
 
210
  with gr.Row():
 
218
  gr.Markdown("""
219
  | λ ˆμ΄μ–΄ | 좜λ ₯ shape | padding | groups | λΉ„κ³  |
220
  |--------|-----------------|---------|--------|------|
221
+ | Conv1 | (B, 96, 55β†’27) | 2 | 1 | 11x11 stride4, LRN, MaxPool |
222
  | Conv2 | (B, 256, 27β†’13) | 2 | 2 | 5x5, LRN, MaxPool |
223
  | Conv3 | (B, 384, 13) | 1 | 1 | 3x3, cross-GPU |
224
  | Conv4 | (B, 384, 13) | 1 | 2 | 3x3 |
225
  | Conv5 | (B, 256, 13β†’6) | 1 | 2 | 3x3, MaxPool |
226
+ | FC1Β·2 | (B, 4096) | β€” | β€” | Dropout 0.5, μ‚¬μ „ν•™μŠ΅ κ°€μ€‘μΉ˜ |
227
+ | FC3 | (B, 1000) | β€” | β€” | 좜λ ₯μΈ΅, μ‚¬μ „ν•™μŠ΅ κ°€μ€‘μΉ˜ |
 
 
228
  """)
229
 
230
  run_btn.click(fn=predict, inputs=image_input, outputs=label_output)
requirements.txt CHANGED
@@ -2,3 +2,4 @@ torch>=2.0.0
2
  torchvision>=0.15.0
3
  gradio>=4.0.0
4
  pillow>=9.0.0
 
 
2
  torchvision>=0.15.0
3
  gradio>=4.0.0
4
  pillow>=9.0.0
5
+ requests>=2.28.0