Spaces:

JangTaeng
/

AlexNetCode

Sleeping

App Files Files Community

JangTaeng commited on Apr 10

Commit

80572ca

verified ·

1 Parent(s): d37480f

Upload 4 files

Browse files

Files changed (4) hide show

README_spaces.md +44 -0
app.py +206 -0
config.json +9 -0
requirements.txt +4 -0

README_spaces.md ADDED Viewed

	@@ -0,0 +1,44 @@

+---
+title: AlexNet 논문 재현
+emoji: 🧠
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 4.0.0
+app_file: app.py
+pinned: false
+license: mit
+tags:
+  - image-classification
+  - alexnet
+  - paper-reproduction
+  - pytorch
+---
+# AlexNet — 논문 완전 재현
+**논문**: [ImageNet Classification with Deep Convolutional Neural Networks](https://papers.nips.cc/paper/2012/hash/c399862d3b9d6b76c8436e924a68c45b-Abstract.html)
+**저자**: Alex Krizhevsky, Ilya Sutskever, Geoffrey E. Hinton (NeurIPS 2012)
+## 파일 구성
+| 파일 | 역할 |
+|------|------|
+| `app.py` | Gradio 데모 + 모델 전체 코드 |
+| `config.json` | 모델 하이퍼파라미터 |
+| `requirements.txt` | 패키지 목록 |
+## 로컬 실행
+```bash
+pip install -r requirements.txt
+python app.py
+```
+## 논문 구현 포인트
+- **GPU 분할** (3.2절): `groups=2` → Conv1·2·4·5에서 채널을 반씩 나눠 독립 연산
+- **Cross-GPU** (3.5절): `groups=1` → Conv3·FC는 전체 채널 연결
+- **LRN** (3.3절): Conv1·2 뒤에만 적용
+- **Dropout** (4.2절): FC1·FC2에만 p=0.5 적용
+- **가중치 초기화** (5절): N(0, 0.01), 일부 레이어 bias=1

app.py ADDED Viewed

	@@ -0,0 +1,206 @@

+"""
+AlexNet — 허깅페이스 Spaces 데모
+논문: Krizhevsky, Sutskever, Hinton (NeurIPS 2012)
+실행: Spaces에서 자동 실행 (app.py 이름 필수)
+로컬: pip install gradio torch pillow
+      python app.py
+"""
+import json
+import torch
+import torch.nn as nn
+import gradio as gr
+from PIL import Image
+import torchvision.transforms as T
+# ──────────────────────────────────────────────────────────────
+# 1. 모델 정의
+# ──────────────────────────────────────────────────────────────
+class ParallelConvBlock(nn.Module):
+    """Conv1·2·4·5: groups=2 로 논문의 GPU 분할 구조 재현."""
+    def __init__(self, in_ch, out_ch, kernel_size, stride=1, padding=0,
+                 use_lrn=False, use_pool=False):
+        super().__init__()
+        self.conv = nn.Conv2d(in_ch, out_ch, kernel_size,
+                              stride=stride, padding=padding, groups=2)
+        self.relu = nn.ReLU(inplace=True)
+        self.lrn  = nn.LocalResponseNorm(5, alpha=1e-4, beta=0.75, k=2) if use_lrn  else None
+        self.pool = nn.MaxPool2d(kernel_size=3, stride=2)                if use_pool else None
+    def forward(self, x):
+        x = self.relu(self.conv(x))
+        if self.lrn:  x = self.lrn(x)
+        if self.pool: x = self.pool(x)
+        return x
+class CrossConvBlock(nn.Module):
+    """Conv3: groups=1 로 cross-GPU 전체 채널 연결."""
+    def __init__(self, in_ch, out_ch, kernel_size, padding=0):
+        super().__init__()
+        self.conv = nn.Conv2d(in_ch, out_ch, kernel_size, padding=padding, groups=1)
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, x):
+        return self.relu(self.conv(x))
+class AlexNet(nn.Module):
+    """
+    논문 Figure 2 완전 재현.
+    레이어별 shape:
+      입력          (B,   3, 224, 224)
+      conv1 + pool  (B,  96,  27,  27)
+      conv2 + pool  (B, 256,  13,  13)
+      conv3         (B, 384,  13,  13)  ← cross-GPU
+      conv4         (B, 384,  13,  13)
+      conv5 + pool  (B, 256,   6,   6)
+      FC1·2·3       (B, 4096) → (B, 4096) → (B, num_labels)
+    """
+    def __init__(self, num_labels=1000, dropout=0.5):
+        super().__init__()
+        self.conv1 = ParallelConvBlock(  3,  96, 11, stride=4, use_lrn=True,  use_pool=True)
+        self.conv2 = ParallelConvBlock( 96, 256,  5, padding=2, use_lrn=True,  use_pool=True)
+        self.conv3 = CrossConvBlock   (256, 384,  3, padding=1)
+        self.conv4 = ParallelConvBlock(384, 384,  3, padding=1)
+        self.conv5 = ParallelConvBlock(384, 256,  3, padding=1, use_pool=True)
+        self.classifier = nn.Sequential(
+            nn.Dropout(p=dropout),
+            nn.Linear(256 * 6 * 6, 4096),
+            nn.ReLU(inplace=True),
+            nn.Dropout(p=dropout),
+            nn.Linear(4096, 4096),
+            nn.ReLU(inplace=True),
+            nn.Linear(4096, num_labels),
+        )
+        self._init_weights()
+    def _init_weights(self):
+        bias_one = {self.conv2.conv, self.conv4.conv, self.conv5.conv}
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.normal_(m.weight, 0, 0.01)
+                nn.init.constant_(m.bias, 1.0 if m in bias_one else 0.0)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                nn.init.constant_(m.bias, 1.0)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+        x = self.conv4(x)
+        x = self.conv5(x)
+        x = x.view(x.size(0), -1)
+        return self.classifier(x)
+# ──────────────────────────────────────────────────────────────
+# 2. 모델 + 클래스 레이블 로드
+# ──────────────────────────────────────────────────────────────
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = AlexNet(num_labels=1000, dropout=0.5).to(DEVICE)
+model.eval()
+# config.json 에서 id2label 읽기 (없으면 인덱스로 표시)
+try:
+    with open("config.json") as f:
+        cfg = json.load(f)
+    ID2LABEL = cfg.get("id2label", {})
+    ID2LABEL = {int(k): v for k, v in ID2LABEL.items()}
+except Exception:
+    ID2LABEL = {}
+# ──────────────────────────────────────────────────────────────
+# 3. 전처리 파이프라인
+#    논문 2절: 256×256 다운샘플 → 224×224 center crop → 픽셀 평균 차감
+# ──────────────────────────────────────────────────────────────
+TRANSFORM = T.Compose([
+    T.Resize(256),
+    T.CenterCrop(224),
+    T.ToTensor(),
+    # ImageNet 픽셀 평균 차감 (논문 2절: "subtracting the mean activity")
+    T.Normalize(mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+])
+# ──────────────────────────────────────────────────────────────
+# 4. 추론 함수
+# ──────────────────────────────────────────────────────────────
+def predict(image: Image.Image) -> dict:
+    """
+    PIL 이미지를 받아 Top-5 클래스 확률을 반환합니다.
+    Args:
+        image: Gradio가 넘겨주는 PIL.Image 객체
+    Returns:
+        {클래스명: 확률} 딕셔너리 — Gradio Label 컴포넌트용
+    """
+    if image is None:
+        return {}
+    tensor = TRANSFORM(image).unsqueeze(0).to(DEVICE)  # (1, 3, 224, 224)
+    with torch.no_grad():
+        logits = model(tensor)                          # (1, 1000)
+    probs = torch.softmax(logits, dim=-1)[0]            # (1000,)
+    top5_probs, top5_idx = probs.topk(5)
+    return {
+        ID2LABEL.get(idx.item(), f"class_{idx.item()}"): round(prob.item(), 4)
+        for prob, idx in zip(top5_probs, top5_idx)
+    }
+# ──────────────────────────────────────────────────────────────
+# 5. Gradio UI
+# ──────────────────────────────────────────────────────────────
+with gr.Blocks(title="AlexNet — 논문 재현") as demo:
+    gr.Markdown("""
+    ## AlexNet — 논문 완전 재현 데모
+    **논문**: ImageNet Classification with Deep CNNs (Krizhevsky et al., NeurIPS 2012)
+    > 이미지를 업로드하면 Top-5 클래스를 예측합니다.
+    > ※ 현재 모델은 랜덤 초기화 상태입니다. ImageNet 학습 가중치를 로드하면 실제 예측이 가능합니다.
+    """)
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil", label="입력 이미지")
+            run_btn = gr.Button("예측하기", variant="primary")
+        with gr.Column():
+            label_output = gr.Label(num_top_classes=5, label="Top-5 예측")
+    with gr.Accordion("모델 구조 (논문 Figure 2)", open=False):
+        gr.Markdown("""
+        | 레이어 | 출력 shape       | 특이사항                          |
+        |--------|-----------------|----------------------------------|
+        | Conv1  | (B, 96, 27, 27)  | 11×11, stride 4, LRN, MaxPool, groups=2 |
+        | Conv2  | (B, 256, 13, 13) | 5×5, LRN, MaxPool, groups=2      |
+        | Conv3  | (B, 384, 13, 13) | 3×3, **cross-GPU** (groups=1)    |
+        | Conv4  | (B, 384, 13, 13) | 3×3, groups=2                    |
+        | Conv5  | (B, 256, 6, 6)   | 3×3, MaxPool, groups=2           |
+        | FC1·2  | (B, 4096)        | Dropout 0.5                      |
+        | FC3    | (B, 1000)        | 출력층                            |
+        총 파라미터: 약 **6,000만 개**
+        """)
+    run_btn.click(fn=predict, inputs=image_input, outputs=label_output)
+    image_input.change(fn=predict, inputs=image_input, outputs=label_output)
+if __name__ == "__main__":
+    demo.launch()

config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "model_type": "alexnet",
+  "num_labels": 1000,
+  "dropout": 0.5,
+  "image_size": 224,
+  "num_channels": 3,
+  "architectures": ["AlexNetForImageClassification"],
+  "transformers_version": "4.40.0"
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch>=2.0.0
+torchvision>=0.15.0
+gradio>=4.0.0
+pillow>=9.0.0