JangTaeng commited on
Commit
63e7e05
Β·
verified Β·
1 Parent(s): 306acbb

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -63
app.py CHANGED
@@ -3,7 +3,7 @@ AlexNet β€” ν—ˆκΉ…νŽ˜μ΄μŠ€ Spaces 데λͺ¨
3
  λ…Όλ¬Έ: Krizhevsky, Sutskever, Hinton (NeurIPS 2012)
4
 
5
  μ‹€ν–‰: Spacesμ—μ„œ μžλ™ μ‹€ν–‰ (app.py 이름 ν•„μˆ˜)
6
- 둜컬: pip install gradio torch pillow
7
  python app.py
8
  """
9
 
@@ -23,19 +23,24 @@ class ConvBlock(nn.Module):
23
  """
24
  groups 인자둜 λ…Όλ¬Έμ˜ GPU λΆ„ν•  μ „λž΅μ„ μ œμ–΄ν•˜λŠ” λ²”μš© 블둝.
25
 
26
- groups=1 : cross-GPU (전체 채널 μ—°κ²°) β€” Conv1Β·Conv3Β·FC
27
- groups=2 : parallel (채널을 λ°˜μ”© 독립 μ—°μ‚°) β€” Conv2Β·Conv4Β·Conv5
28
 
29
  Conv1이 groups=1인 이유:
30
- in_channels=3 (RGB)λŠ” groups=2둜 λ‚˜λˆŒ 수 μ—†μŒ (3 % 2 != 0).
31
- 논문도 μ‹€μ œλ‘œ Conv1은 3채널 μž…λ ₯ 전체λ₯Ό λ°›μ•„ 96μ±„λ„λ‘œ λ³€ν™˜ν•œ λ’€
32
- Conv2λΆ€ν„° GPUλ³„λ‘œ 채널을 뢄리함.
 
 
33
  """
34
- def __init__(self, in_ch, out_ch, kernel_size, stride=1, padding=0,
35
- groups=1, use_lrn=False, use_pool=False):
 
36
  super().__init__()
37
- self.conv = nn.Conv2d(in_ch, out_ch, kernel_size,
38
- stride=stride, padding=padding, groups=groups)
 
 
39
  self.relu = nn.ReLU(inplace=True)
40
  self.lrn = nn.LocalResponseNorm(5, alpha=1e-4, beta=0.75, k=2) if use_lrn else None
41
  self.pool = nn.MaxPool2d(kernel_size=3, stride=2) if use_pool else None
@@ -51,30 +56,32 @@ class AlexNet(nn.Module):
51
  """
52
  λ…Όλ¬Έ Figure 2 μ™„μ „ μž¬ν˜„.
53
 
54
- groups μ „λž΅:
55
- Conv1 groups=1 (in=3, RGBλŠ” 2둜 λ‚˜λˆŒ 수 μ—†μŒ)
56
- Conv2Β·4Β·5 groups=2 (parallel β€” GPU λΆ„ν• )
57
- Conv3Β·FC groups=1 (cross-GPU β€” 전체 채널 μ—°κ²°)
58
-
59
- λ ˆμ΄μ–΄λ³„ shape:
60
- μž…λ ₯ (B, 3, 224, 224)
61
- conv1 + pool (B, 96, 27, 27)
62
- conv2 + pool (B, 256, 13, 13)
63
- conv3 (B, 384, 13, 13) ← cross-GPU
64
- conv4 (B, 384, 13, 13)
65
- conv5 + pool (B, 256, 6, 6)
66
- FC1Β·2Β·3 (B, 4096) β†’ (B, 4096) β†’ (B, num_labels)
67
  """
68
  def __init__(self, num_labels=1000, dropout=0.5):
69
  super().__init__()
70
- # Conv1: in=3(RGB) β†’ groups=1 ν•„μˆ˜ (3은 2둜 λ‚˜λˆŒ 수 μ—†μŒ)
71
- self.conv1 = ConvBlock( 3, 96, 11, stride=4, groups=1, use_lrn=True, use_pool=True)
72
- # Conv2Β·4Β·5: in_chκ°€ 짝수 β†’ groups=2 둜 GPU λΆ„ν•  μž¬ν˜„
73
- self.conv2 = ConvBlock( 96, 256, 5, padding=2, groups=2, use_lrn=True, use_pool=True)
74
- # Conv3: cross-GPU
75
- self.conv3 = ConvBlock(256, 384, 3, padding=1, groups=1)
76
- self.conv4 = ConvBlock(384, 384, 3, padding=1, groups=2)
77
- self.conv5 = ConvBlock(384, 256, 3, padding=1, groups=2, use_pool=True)
 
 
 
 
 
 
78
  self.classifier = nn.Sequential(
79
  nn.Dropout(p=dropout),
80
  nn.Linear(256 * 6 * 6, 4096),
@@ -115,26 +122,22 @@ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
115
  model = AlexNet(num_labels=1000, dropout=0.5).to(DEVICE)
116
  model.eval()
117
 
118
- # config.json μ—μ„œ id2label 읽기 (μ—†μœΌλ©΄ 인덱슀둜 ν‘œμ‹œ)
119
  try:
120
  with open("config.json") as f:
121
  cfg = json.load(f)
122
- ID2LABEL = cfg.get("id2label", {})
123
- ID2LABEL = {int(k): v for k, v in ID2LABEL.items()}
124
  except Exception:
125
  ID2LABEL = {}
126
 
127
 
128
  # ──────────────────────────────────────────────────────────────
129
  # 3. μ „μ²˜λ¦¬ νŒŒμ΄ν”„λΌμΈ
130
- # λ…Όλ¬Έ 2절: 256Γ—256 λ‹€μš΄μƒ˜ν”Œ β†’ 224Γ—224 center crop β†’ ν”½μ…€ 평균 차감
131
  # ──────────────────────────────────────────────────────────────
132
 
133
  TRANSFORM = T.Compose([
134
  T.Resize(256),
135
  T.CenterCrop(224),
136
  T.ToTensor(),
137
- # ImageNet ν”½μ…€ 평균 차감 (λ…Όλ¬Έ 2절: "subtracting the mean activity")
138
  T.Normalize(mean=[0.485, 0.456, 0.406],
139
  std=[0.229, 0.224, 0.225]),
140
  ])
@@ -145,26 +148,13 @@ TRANSFORM = T.Compose([
145
  # ──────────────────────────────────────────────────────────────
146
 
147
  def predict(image: Image.Image) -> dict:
148
- """
149
- PIL 이미지λ₯Ό λ°›μ•„ Top-5 클래슀 ν™•λ₯ μ„ λ°˜ν™˜ν•©λ‹ˆλ‹€.
150
-
151
- Args:
152
- image: Gradioκ°€ λ„˜κ²¨μ£ΌλŠ” PIL.Image 객체
153
-
154
- Returns:
155
- {클래슀λͺ…: ν™•λ₯ } λ”•μ…”λ„ˆλ¦¬ β€” Gradio Label μ»΄ν¬λ„ŒνŠΈμš©
156
- """
157
  if image is None:
158
  return {}
159
-
160
- tensor = TRANSFORM(image).unsqueeze(0).to(DEVICE) # (1, 3, 224, 224)
161
-
162
  with torch.no_grad():
163
- logits = model(tensor) # (1, 1000)
164
-
165
- probs = torch.softmax(logits, dim=-1)[0] # (1000,)
166
  top5_probs, top5_idx = probs.topk(5)
167
-
168
  return {
169
  ID2LABEL.get(idx.item(), f"class_{idx.item()}"): round(prob.item(), 4)
170
  for prob, idx in zip(top5_probs, top5_idx)
@@ -193,17 +183,17 @@ with gr.Blocks(title="AlexNet β€” λ…Όλ¬Έ μž¬ν˜„") as demo:
193
 
194
  with gr.Accordion("λͺ¨λΈ ꡬ쑰 (λ…Όλ¬Έ Figure 2)", open=False):
195
  gr.Markdown("""
196
- | λ ˆμ΄μ–΄ | 좜λ ₯ shape | νŠΉμ΄μ‚¬ν•­ |
197
- |--------|-----------------|----------------------------------|
198
- | Conv1 | (B, 96, 27, 27) | 11Γ—11, stride 4, LRN, MaxPool, groups=2 |
199
- | Conv2 | (B, 256, 13, 13) | 5Γ—5, LRN, MaxPool, groups=2 |
200
- | Conv3 | (B, 384, 13, 13) | 3Γ—3, **cross-GPU** (groups=1) |
201
- | Conv4 | (B, 384, 13, 13) | 3Γ—3, groups=2 |
202
- | Conv5 | (B, 256, 6, 6) | 3Γ—3, MaxPool, groups=2 |
203
- | FC1Β·2 | (B, 4096) | Dropout 0.5 |
204
- | FC3 | (B, 1000) | 좜λ ₯μΈ΅ |
205
-
206
- 총 νŒŒλΌλ―Έν„°: μ•½ **6,000만 개**
207
  """)
208
 
209
  run_btn.click(fn=predict, inputs=image_input, outputs=label_output)
 
3
  λ…Όλ¬Έ: Krizhevsky, Sutskever, Hinton (NeurIPS 2012)
4
 
5
  μ‹€ν–‰: Spacesμ—μ„œ μžλ™ μ‹€ν–‰ (app.py 이름 ν•„μˆ˜)
6
+ 둜컬: pip install gradio torch pillow torchvision
7
  python app.py
8
  """
9
 
 
23
  """
24
  groups 인자둜 λ…Όλ¬Έμ˜ GPU λΆ„ν•  μ „λž΅μ„ μ œμ–΄ν•˜λŠ” λ²”μš© 블둝.
25
 
26
+ groups=1 : cross-GPU β€” Conv1Β·Conv3 (전체 채널 μ—°κ²°)
27
+ groups=2 : parallel β€” Conv2Β·Conv4Β·Conv5 (채널을 λ°˜μ”© 독립 μ—°μ‚°)
28
 
29
  Conv1이 groups=1인 이유:
30
+ in_channels=3(RGB)λŠ” groups=2둜 λ‚˜λˆŒ 수 μ—†μŒ (3 % 2 != 0).
31
+
32
+ padding 계산 κ·Όκ±° (Conv1):
33
+ padding=0 β†’ (224-11)/4+1 = 54.25 β†’ λ‚΄λ¦Ό 54 β†’ Pool ν›„ 26 β†’ ... β†’ FC μž…λ ₯ 6400 (였λ₯˜)
34
+ padding=2 β†’ (224-11+4)/4+1 = 55 β†’ Pool ν›„ 27 β†’ ... β†’ FC μž…λ ₯ 9216 (정상)
35
  """
36
+ def __init__(self, in_ch, out_ch, kernel_size,
37
+ stride=1, padding=0, groups=1,
38
+ use_lrn=False, use_pool=False):
39
  super().__init__()
40
+ self.conv = nn.Conv2d(
41
+ in_ch, out_ch, kernel_size,
42
+ stride=stride, padding=padding, groups=groups,
43
+ )
44
  self.relu = nn.ReLU(inplace=True)
45
  self.lrn = nn.LocalResponseNorm(5, alpha=1e-4, beta=0.75, k=2) if use_lrn else None
46
  self.pool = nn.MaxPool2d(kernel_size=3, stride=2) if use_pool else None
 
56
  """
57
  λ…Όλ¬Έ Figure 2 μ™„μ „ μž¬ν˜„.
58
 
59
+ λ ˆμ΄μ–΄λ³„ 좜λ ₯ shape:
60
+ μž…λ ₯ (B, 3, 224, 224)
61
+ Conv1 -> Pool1 (B, 96, 55, 55) -> (B, 96, 27, 27)
62
+ Conv2 -> Pool2 (B, 256, 27, 27) -> (B, 256, 13, 13)
63
+ Conv3 (B, 384, 13, 13) <- cross-GPU
64
+ Conv4 (B, 384, 13, 13)
65
+ Conv5 -> Pool5 (B, 256, 13, 13) -> (B, 256, 6, 6)
66
+ Flatten (B, 9216)
67
+ FC1->FC2->FC3 (B, 4096) -> (B, 4096) -> (B, 1000)
 
 
 
 
68
  """
69
  def __init__(self, num_labels=1000, dropout=0.5):
70
  super().__init__()
71
+
72
+ # Conv1: padding=2 ν•„μˆ˜ β€” 55x55 좜λ ₯ 보μž₯
73
+ self.conv1 = ConvBlock(
74
+ 3, 96, 11, stride=4, padding=2, groups=1,
75
+ use_lrn=True, use_pool=True,
76
+ )
77
+ self.conv2 = ConvBlock(
78
+ 96, 256, 5, padding=2, groups=2,
79
+ use_lrn=True, use_pool=True,
80
+ )
81
+ self.conv3 = ConvBlock(256, 384, 3, padding=1, groups=1)
82
+ self.conv4 = ConvBlock(384, 384, 3, padding=1, groups=2)
83
+ self.conv5 = ConvBlock(384, 256, 3, padding=1, groups=2, use_pool=True)
84
+
85
  self.classifier = nn.Sequential(
86
  nn.Dropout(p=dropout),
87
  nn.Linear(256 * 6 * 6, 4096),
 
122
  model = AlexNet(num_labels=1000, dropout=0.5).to(DEVICE)
123
  model.eval()
124
 
 
125
  try:
126
  with open("config.json") as f:
127
  cfg = json.load(f)
128
+ ID2LABEL = {int(k): v for k, v in cfg.get("id2label", {}).items()}
 
129
  except Exception:
130
  ID2LABEL = {}
131
 
132
 
133
  # ──────────────────────────────────────────────────────────────
134
  # 3. μ „μ²˜λ¦¬ νŒŒμ΄ν”„λΌμΈ
 
135
  # ──────────────────────────────────────────────────────────────
136
 
137
  TRANSFORM = T.Compose([
138
  T.Resize(256),
139
  T.CenterCrop(224),
140
  T.ToTensor(),
 
141
  T.Normalize(mean=[0.485, 0.456, 0.406],
142
  std=[0.229, 0.224, 0.225]),
143
  ])
 
148
  # ──────────────────────────────────────────────────────────────
149
 
150
  def predict(image: Image.Image) -> dict:
 
 
 
 
 
 
 
 
 
151
  if image is None:
152
  return {}
153
+ tensor = TRANSFORM(image).unsqueeze(0).to(DEVICE)
 
 
154
  with torch.no_grad():
155
+ logits = model(tensor)
156
+ probs = torch.softmax(logits, dim=-1)[0]
 
157
  top5_probs, top5_idx = probs.topk(5)
 
158
  return {
159
  ID2LABEL.get(idx.item(), f"class_{idx.item()}"): round(prob.item(), 4)
160
  for prob, idx in zip(top5_probs, top5_idx)
 
183
 
184
  with gr.Accordion("λͺ¨λΈ ꡬ쑰 (λ…Όλ¬Έ Figure 2)", open=False):
185
  gr.Markdown("""
186
+ | λ ˆμ΄μ–΄ | 좜λ ₯ shape | padding | groups | λΉ„κ³  |
187
+ |--------|-----------------|---------|--------|------|
188
+ | Conv1 | (B, 96, 55β†’27) | **2** | 1 | 11x11 stride4, LRN, MaxPool |
189
+ | Conv2 | (B, 256, 27β†’13) | 2 | 2 | 5x5, LRN, MaxPool |
190
+ | Conv3 | (B, 384, 13) | 1 | 1 | 3x3, cross-GPU |
191
+ | Conv4 | (B, 384, 13) | 1 | 2 | 3x3 |
192
+ | Conv5 | (B, 256, 13β†’6) | 1 | 2 | 3x3, MaxPool |
193
+ | FC1Β·2 | (B, 4096) | β€” | β€” | Dropout 0.5 |
194
+ | FC3 | (B, 1000) | β€” | β€” | 좜λ ₯μΈ΅ |
195
+
196
+ **μˆ˜μ • 포인트**: Conv1 padding=0이면 좜λ ₯ 54(λ‚΄λ¦Ό)β†’FC μž…λ ₯ 6400 였λ₯˜. padding=2둜 55β†’FC μž…λ ₯ 9216 정상.
197
  """)
198
 
199
  run_btn.click(fn=predict, inputs=image_input, outputs=label_output)