JangTaeng commited on
Commit
80572ca
Β·
verified Β·
1 Parent(s): d37480f

Upload 4 files

Browse files
Files changed (4) hide show
  1. README_spaces.md +44 -0
  2. app.py +206 -0
  3. config.json +9 -0
  4. requirements.txt +4 -0
README_spaces.md ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: AlexNet λ…Όλ¬Έ μž¬ν˜„
3
+ emoji: 🧠
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 4.0.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ tags:
12
+ - image-classification
13
+ - alexnet
14
+ - paper-reproduction
15
+ - pytorch
16
+ ---
17
+
18
+ # AlexNet β€” λ…Όλ¬Έ μ™„μ „ μž¬ν˜„
19
+
20
+ **λ…Όλ¬Έ**: [ImageNet Classification with Deep Convolutional Neural Networks](https://papers.nips.cc/paper/2012/hash/c399862d3b9d6b76c8436e924a68c45b-Abstract.html)
21
+ **μ €μž**: Alex Krizhevsky, Ilya Sutskever, Geoffrey E. Hinton (NeurIPS 2012)
22
+
23
+ ## 파일 ꡬ성
24
+
25
+ | 파일 | μ—­ν•  |
26
+ |------|------|
27
+ | `app.py` | Gradio 데λͺ¨ + λͺ¨λΈ 전체 μ½”λ“œ |
28
+ | `config.json` | λͺ¨λΈ ν•˜μ΄νΌνŒŒλΌλ―Έν„° |
29
+ | `requirements.txt` | νŒ¨ν‚€μ§€ λͺ©λ‘ |
30
+
31
+ ## 둜컬 μ‹€ν–‰
32
+
33
+ ```bash
34
+ pip install -r requirements.txt
35
+ python app.py
36
+ ```
37
+
38
+ ## λ…Όλ¬Έ κ΅¬ν˜„ 포인트
39
+
40
+ - **GPU λΆ„ν• ** (3.2절): `groups=2` β†’ Conv1Β·2Β·4Β·5μ—μ„œ 채널을 λ°˜μ”© λ‚˜λˆ  독립 μ—°μ‚°
41
+ - **Cross-GPU** (3.5절): `groups=1` β†’ Conv3Β·FCλŠ” 전체 채널 μ—°κ²°
42
+ - **LRN** (3.3절): Conv1Β·2 λ’€μ—λ§Œ 적용
43
+ - **Dropout** (4.2절): FC1Β·FC2μ—λ§Œ p=0.5 적용
44
+ - **κ°€μ€‘μΉ˜ μ΄ˆκΈ°ν™”** (5절): N(0, 0.01), 일뢀 λ ˆμ΄μ–΄ bias=1
app.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AlexNet β€” ν—ˆκΉ…νŽ˜μ΄μŠ€ Spaces 데λͺ¨
3
+ λ…Όλ¬Έ: Krizhevsky, Sutskever, Hinton (NeurIPS 2012)
4
+
5
+ μ‹€ν–‰: Spacesμ—μ„œ μžλ™ μ‹€ν–‰ (app.py 이름 ν•„μˆ˜)
6
+ 둜컬: pip install gradio torch pillow
7
+ python app.py
8
+ """
9
+
10
+ import json
11
+ import torch
12
+ import torch.nn as nn
13
+ import gradio as gr
14
+ from PIL import Image
15
+ import torchvision.transforms as T
16
+
17
+
18
+ # ──────────────────────────────────────────────────────────────
19
+ # 1. λͺ¨λΈ μ •μ˜
20
+ # ──────────────────────────────────────────────────────────────
21
+
22
+ class ParallelConvBlock(nn.Module):
23
+ """Conv1Β·2Β·4Β·5: groups=2 둜 λ…Όλ¬Έμ˜ GPU λΆ„ν•  ꡬ쑰 μž¬ν˜„."""
24
+ def __init__(self, in_ch, out_ch, kernel_size, stride=1, padding=0,
25
+ use_lrn=False, use_pool=False):
26
+ super().__init__()
27
+ self.conv = nn.Conv2d(in_ch, out_ch, kernel_size,
28
+ stride=stride, padding=padding, groups=2)
29
+ self.relu = nn.ReLU(inplace=True)
30
+ self.lrn = nn.LocalResponseNorm(5, alpha=1e-4, beta=0.75, k=2) if use_lrn else None
31
+ self.pool = nn.MaxPool2d(kernel_size=3, stride=2) if use_pool else None
32
+
33
+ def forward(self, x):
34
+ x = self.relu(self.conv(x))
35
+ if self.lrn: x = self.lrn(x)
36
+ if self.pool: x = self.pool(x)
37
+ return x
38
+
39
+
40
+ class CrossConvBlock(nn.Module):
41
+ """Conv3: groups=1 둜 cross-GPU 전체 채널 μ—°κ²°."""
42
+ def __init__(self, in_ch, out_ch, kernel_size, padding=0):
43
+ super().__init__()
44
+ self.conv = nn.Conv2d(in_ch, out_ch, kernel_size, padding=padding, groups=1)
45
+ self.relu = nn.ReLU(inplace=True)
46
+
47
+ def forward(self, x):
48
+ return self.relu(self.conv(x))
49
+
50
+
51
+ class AlexNet(nn.Module):
52
+ """
53
+ λ…Όλ¬Έ Figure 2 μ™„μ „ μž¬ν˜„.
54
+
55
+ λ ˆμ΄μ–΄λ³„ shape:
56
+ μž…λ ₯ (B, 3, 224, 224)
57
+ conv1 + pool (B, 96, 27, 27)
58
+ conv2 + pool (B, 256, 13, 13)
59
+ conv3 (B, 384, 13, 13) ← cross-GPU
60
+ conv4 (B, 384, 13, 13)
61
+ conv5 + pool (B, 256, 6, 6)
62
+ FC1Β·2Β·3 (B, 4096) β†’ (B, 4096) β†’ (B, num_labels)
63
+ """
64
+ def __init__(self, num_labels=1000, dropout=0.5):
65
+ super().__init__()
66
+ self.conv1 = ParallelConvBlock( 3, 96, 11, stride=4, use_lrn=True, use_pool=True)
67
+ self.conv2 = ParallelConvBlock( 96, 256, 5, padding=2, use_lrn=True, use_pool=True)
68
+ self.conv3 = CrossConvBlock (256, 384, 3, padding=1)
69
+ self.conv4 = ParallelConvBlock(384, 384, 3, padding=1)
70
+ self.conv5 = ParallelConvBlock(384, 256, 3, padding=1, use_pool=True)
71
+ self.classifier = nn.Sequential(
72
+ nn.Dropout(p=dropout),
73
+ nn.Linear(256 * 6 * 6, 4096),
74
+ nn.ReLU(inplace=True),
75
+ nn.Dropout(p=dropout),
76
+ nn.Linear(4096, 4096),
77
+ nn.ReLU(inplace=True),
78
+ nn.Linear(4096, num_labels),
79
+ )
80
+ self._init_weights()
81
+
82
+ def _init_weights(self):
83
+ bias_one = {self.conv2.conv, self.conv4.conv, self.conv5.conv}
84
+ for m in self.modules():
85
+ if isinstance(m, nn.Conv2d):
86
+ nn.init.normal_(m.weight, 0, 0.01)
87
+ nn.init.constant_(m.bias, 1.0 if m in bias_one else 0.0)
88
+ elif isinstance(m, nn.Linear):
89
+ nn.init.normal_(m.weight, 0, 0.01)
90
+ nn.init.constant_(m.bias, 1.0)
91
+
92
+ def forward(self, x):
93
+ x = self.conv1(x)
94
+ x = self.conv2(x)
95
+ x = self.conv3(x)
96
+ x = self.conv4(x)
97
+ x = self.conv5(x)
98
+ x = x.view(x.size(0), -1)
99
+ return self.classifier(x)
100
+
101
+
102
+ # ──────────────────────────────────────────────────────────────
103
+ # 2. λͺ¨λΈ + 클래슀 λ ˆμ΄λΈ” λ‘œλ“œ
104
+ # ──────────────────────────────────────────────────────────────
105
+
106
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
107
+
108
+ model = AlexNet(num_labels=1000, dropout=0.5).to(DEVICE)
109
+ model.eval()
110
+
111
+ # config.json μ—μ„œ id2label 읽기 (μ—†μœΌλ©΄ 인덱슀둜 ν‘œμ‹œ)
112
+ try:
113
+ with open("config.json") as f:
114
+ cfg = json.load(f)
115
+ ID2LABEL = cfg.get("id2label", {})
116
+ ID2LABEL = {int(k): v for k, v in ID2LABEL.items()}
117
+ except Exception:
118
+ ID2LABEL = {}
119
+
120
+
121
+ # ──────────────────────────────────────────────────────────────
122
+ # 3. μ „μ²˜λ¦¬ νŒŒμ΄ν”„λΌμΈ
123
+ # λ…Όλ¬Έ 2절: 256Γ—256 λ‹€μš΄μƒ˜ν”Œ β†’ 224Γ—224 center crop β†’ ν”½μ…€ 평균 차감
124
+ # ──────────────────────────────────────────────────────────────
125
+
126
+ TRANSFORM = T.Compose([
127
+ T.Resize(256),
128
+ T.CenterCrop(224),
129
+ T.ToTensor(),
130
+ # ImageNet ν”½μ…€ 평균 차감 (λ…Όλ¬Έ 2절: "subtracting the mean activity")
131
+ T.Normalize(mean=[0.485, 0.456, 0.406],
132
+ std=[0.229, 0.224, 0.225]),
133
+ ])
134
+
135
+
136
+ # ──────────────────────────────────────────────────────────────
137
+ # 4. μΆ”λ‘  ν•¨μˆ˜
138
+ # ──────────────────────────────────────────────────────────────
139
+
140
+ def predict(image: Image.Image) -> dict:
141
+ """
142
+ PIL 이미지λ₯Ό λ°›μ•„ Top-5 클래슀 ν™•λ₯ μ„ λ°˜ν™˜ν•©λ‹ˆλ‹€.
143
+
144
+ Args:
145
+ image: Gradioκ°€ λ„˜κ²¨μ£ΌλŠ” PIL.Image 객체
146
+
147
+ Returns:
148
+ {클래슀λͺ…: ν™•λ₯ } λ”•μ…”λ„ˆλ¦¬ β€” Gradio Label μ»΄ν¬λ„ŒνŠΈμš©
149
+ """
150
+ if image is None:
151
+ return {}
152
+
153
+ tensor = TRANSFORM(image).unsqueeze(0).to(DEVICE) # (1, 3, 224, 224)
154
+
155
+ with torch.no_grad():
156
+ logits = model(tensor) # (1, 1000)
157
+
158
+ probs = torch.softmax(logits, dim=-1)[0] # (1000,)
159
+ top5_probs, top5_idx = probs.topk(5)
160
+
161
+ return {
162
+ ID2LABEL.get(idx.item(), f"class_{idx.item()}"): round(prob.item(), 4)
163
+ for prob, idx in zip(top5_probs, top5_idx)
164
+ }
165
+
166
+
167
+ # ──────────────────────────────────────────────────────────────
168
+ # 5. Gradio UI
169
+ # ──────────────────────────────────────────────────────────────
170
+
171
+ with gr.Blocks(title="AlexNet β€” λ…Όλ¬Έ μž¬ν˜„") as demo:
172
+ gr.Markdown("""
173
+ ## AlexNet β€” λ…Όλ¬Έ μ™„μ „ μž¬ν˜„ 데λͺ¨
174
+ **λ…Όλ¬Έ**: ImageNet Classification with Deep CNNs (Krizhevsky et al., NeurIPS 2012)
175
+
176
+ > 이미지λ₯Ό μ—…λ‘œλ“œν•˜λ©΄ Top-5 클래슀λ₯Ό μ˜ˆμΈ‘ν•©λ‹ˆλ‹€.
177
+ > β€» ν˜„μž¬ λͺ¨λΈμ€ 랜덀 μ΄ˆκΈ°ν™” μƒνƒœμž…λ‹ˆλ‹€. ImageNet ν•™μŠ΅ κ°€μ€‘μΉ˜λ₯Ό λ‘œλ“œν•˜λ©΄ μ‹€μ œ 예츑이 κ°€λŠ₯ν•©λ‹ˆλ‹€.
178
+ """)
179
+
180
+ with gr.Row():
181
+ with gr.Column():
182
+ image_input = gr.Image(type="pil", label="μž…λ ₯ 이미지")
183
+ run_btn = gr.Button("μ˜ˆμΈ‘ν•˜κΈ°", variant="primary")
184
+ with gr.Column():
185
+ label_output = gr.Label(num_top_classes=5, label="Top-5 예츑")
186
+
187
+ with gr.Accordion("λͺ¨λΈ ꡬ쑰 (λ…Όλ¬Έ Figure 2)", open=False):
188
+ gr.Markdown("""
189
+ | λ ˆμ΄μ–΄ | 좜λ ₯ shape | νŠΉμ΄μ‚¬ν•­ |
190
+ |--------|-----------------|----------------------------------|
191
+ | Conv1 | (B, 96, 27, 27) | 11Γ—11, stride 4, LRN, MaxPool, groups=2 |
192
+ | Conv2 | (B, 256, 13, 13) | 5Γ—5, LRN, MaxPool, groups=2 |
193
+ | Conv3 | (B, 384, 13, 13) | 3Γ—3, **cross-GPU** (groups=1) |
194
+ | Conv4 | (B, 384, 13, 13) | 3Γ—3, groups=2 |
195
+ | Conv5 | (B, 256, 6, 6) | 3Γ—3, MaxPool, groups=2 |
196
+ | FC1Β·2 | (B, 4096) | Dropout 0.5 |
197
+ | FC3 | (B, 1000) | 좜λ ₯μΈ΅ |
198
+
199
+ 총 νŒŒλΌλ―Έν„°: μ•½ **6,000만 개**
200
+ """)
201
+
202
+ run_btn.click(fn=predict, inputs=image_input, outputs=label_output)
203
+ image_input.change(fn=predict, inputs=image_input, outputs=label_output)
204
+
205
+ if __name__ == "__main__":
206
+ demo.launch()
config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "alexnet",
3
+ "num_labels": 1000,
4
+ "dropout": 0.5,
5
+ "image_size": 224,
6
+ "num_channels": 3,
7
+ "architectures": ["AlexNetForImageClassification"],
8
+ "transformers_version": "4.40.0"
9
+ }
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch>=2.0.0
2
+ torchvision>=0.15.0
3
+ gradio>=4.0.0
4
+ pillow>=9.0.0