JangTaeng commited on
Commit
7a5c9ed
Β·
verified Β·
1 Parent(s): 62c8941

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -106
app.py CHANGED
@@ -2,14 +2,11 @@
2
  AlexNet β€” ν—ˆκΉ…νŽ˜μ΄μŠ€ Spaces 데λͺ¨
3
  λ…Όλ¬Έ: Krizhevsky, Sutskever, Hinton (NeurIPS 2012)
4
 
5
- 변경사항:
6
- - ImageNet id2label μžλ™ λ‘œλ“œ (ViT configμ—μ„œ κ°€μ Έμ˜΄)
7
- - torchvision μ‚¬μ „ν•™μŠ΅ κ°€μ€‘μΉ˜ (FC λ ˆμ΄μ–΄) λ‘œλ“œ
8
- - κ°€μ€‘μΉ˜ λ‘œλ“œ μ‹€νŒ¨ μ‹œ 랜덀 μ΄ˆκΈ°ν™”λ‘œ 폴백
9
-
10
- μ‹€ν–‰: Spacesμ—μ„œ μžλ™ μ‹€ν–‰ (app.py 이름 ν•„μˆ˜)
11
- 둜컬: pip install gradio torch pillow torchvision requests
12
- python app.py
13
  """
14
 
15
  import json
@@ -24,113 +21,95 @@ from PIL import Image
24
 
25
  # ──────────────────────────────────────────────────────────────
26
  # 1. λͺ¨λΈ μ •μ˜
 
 
 
 
 
27
  # ──────────────────────────────────────────────────────────────
28
 
29
- class ConvBlock(nn.Module):
30
  """
31
- groups 인자둜 λ…Όλ¬Έμ˜ GPU λΆ„ν•  μ „λž΅μ„ μ œμ–΄ν•˜λŠ” λ²”μš© 블둝.
32
- groups=1: cross-GPU (전체 채널 μ—°κ²°) β€” Conv1Β·Conv3
33
- groups=2: parallel (채널 λ°˜μ”© 독립) β€” Conv2Β·Conv4Β·Conv5
34
-
35
- Conv1 padding=2 이유:
36
- padding=0 β†’ 좜λ ₯ 54(λ‚΄λ¦Ό) β†’ FC μž…λ ₯ 6400 였λ₯˜
37
- padding=2 β†’ 좜λ ₯ 55(μ •ν™•) β†’ FC μž…λ ₯ 9216 정상
 
 
 
 
38
  """
39
- def __init__(self, in_ch, out_ch, kernel_size,
40
- stride=1, padding=0, groups=1,
41
- use_lrn=False, use_pool=False):
42
  super().__init__()
43
- self.conv = nn.Conv2d(
44
- in_ch, out_ch, kernel_size,
45
- stride=stride, padding=padding, groups=groups,
46
- )
47
- self.relu = nn.ReLU(inplace=True)
48
- self.lrn = nn.LocalResponseNorm(5, alpha=1e-4, beta=0.75, k=2) if use_lrn else None
49
- self.pool = nn.MaxPool2d(kernel_size=3, stride=2) if use_pool else None
50
 
51
- def forward(self, x):
52
- x = self.relu(self.conv(x))
53
- if self.lrn: x = self.lrn(x)
54
- if self.pool: x = self.pool(x)
55
- return x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
 
57
 
58
- class AlexNet(nn.Module):
59
- """
60
- λ…Όλ¬Έ Figure 2 μ™„μ „ μž¬ν˜„.
61
-
62
- λ ˆμ΄μ–΄λ³„ 좜λ ₯ shape:
63
- μž…λ ₯ (B, 3, 224, 224)
64
- Conv1 -> Pool1 (B, 96, 55, 55) -> (B, 96, 27, 27)
65
- Conv2 -> Pool2 (B, 256, 27, 27) -> (B, 256, 13, 13)
66
- Conv3 (B, 384, 13, 13) <- cross-GPU
67
- Conv4 (B, 384, 13, 13)
68
- Conv5 -> Pool5 (B, 256, 13, 13) -> (B, 256, 6, 6)
69
- Flatten (B, 9216)
70
- FC1->FC2->FC3 (B, 4096) -> (B, 4096) -> (B, 1000)
71
- """
72
- def __init__(self, num_labels=1000, dropout=0.5):
73
- super().__init__()
74
- self.conv1 = ConvBlock( 3, 96, 11, stride=4, padding=2, groups=1, use_lrn=True, use_pool=True)
75
- self.conv2 = ConvBlock( 96, 256, 5, padding=2, groups=2, use_lrn=True, use_pool=True)
76
- self.conv3 = ConvBlock(256, 384, 3, padding=1, groups=1)
77
- self.conv4 = ConvBlock(384, 384, 3, padding=1, groups=2)
78
- self.conv5 = ConvBlock(384, 256, 3, padding=1, groups=2, use_pool=True)
79
  self.classifier = nn.Sequential(
80
- nn.Dropout(p=dropout),
81
  nn.Linear(256 * 6 * 6, 4096),
82
  nn.ReLU(inplace=True),
83
- nn.Dropout(p=dropout),
84
  nn.Linear(4096, 4096),
85
  nn.ReLU(inplace=True),
86
- nn.Linear(4096, num_labels),
87
  )
88
- self._init_weights()
89
-
90
- def _init_weights(self):
91
- bias_one = {self.conv2.conv, self.conv4.conv, self.conv5.conv}
92
- for m in self.modules():
93
- if isinstance(m, nn.Conv2d):
94
- nn.init.normal_(m.weight, 0, 0.01)
95
- nn.init.constant_(m.bias, 1.0 if m in bias_one else 0.0)
96
- elif isinstance(m, nn.Linear):
97
- nn.init.normal_(m.weight, 0, 0.01)
98
- nn.init.constant_(m.bias, 1.0)
99
-
100
- def forward(self, x):
101
- x = self.conv1(x)
102
- x = self.conv2(x)
103
- x = self.conv3(x)
104
- x = self.conv4(x)
105
- x = self.conv5(x)
106
- x = x.view(x.size(0), -1)
107
- return self.classifier(x)
108
 
109
 
110
  # ──────────────────────────────────────────────────────────────
111
- # 2. λͺ¨λΈ 생성 + μ‚¬μ „ν•™μŠ΅ κ°€μ€‘μΉ˜ λ‘œλ“œ
112
  # ──────────────────────────────────────────────────────────────
113
 
114
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
115
- model = AlexNet(num_labels=1000, dropout=0.5).to(DEVICE)
116
 
117
- WEIGHTS_LOADED = False
118
  try:
119
- # torchvision AlexNet μ‚¬μ „ν•™μŠ΅ κ°€μ€‘μΉ˜μ—μ„œ FC λ ˆμ΄μ–΄λ§Œ 볡사
120
- # (Conv λ ˆμ΄μ–΄λŠ” groups ꡬ쑰가 달라 직접 λ‘œλ“œ λΆˆκ°€)
121
  pretrained = tv.alexnet(weights=tv.AlexNet_Weights.DEFAULT)
122
- model.classifier.load_state_dict(pretrained.classifier.state_dict())
123
- WEIGHTS_LOADED = True
124
- print("μ‚¬μ „ν•™μŠ΅ κ°€μ€‘μΉ˜(FC) λ‘œλ“œ μ™„λ£Œ")
125
  except Exception as e:
126
- print(f"κ°€μ€‘μΉ˜ λ‘œλ“œ μ‹€νŒ¨, 랜덀 μ΄ˆκΈ°ν™” μœ μ§€: {e}")
127
 
128
  model.eval()
129
 
130
 
131
  # ──────────────────────────────────────────────────────────────
132
- # 3. ImageNet id2label λ‘œλ“œ
133
- # μš°μ„ μˆœμœ„: config.json β†’ ViT config(ν—ˆκΉ…νŽ˜μ΄μŠ€) β†’ 인덱슀 ν‘œμ‹œ
 
134
  # ──────────────────────────────────────────────────────────────
135
 
136
  ID2LABEL = {}
@@ -141,27 +120,28 @@ try:
141
  cfg = json.load(f)
142
  ID2LABEL = {int(k): v for k, v in cfg.get("id2label", {}).items()}
143
  if ID2LABEL:
144
- print(f"config.jsonμ—μ„œ {len(ID2LABEL)}개 클래슀 λ‘œλ“œ")
145
  except Exception:
146
  pass
147
 
148
- # 2μˆœμœ„: ν—ˆκΉ…νŽ˜μ΄μŠ€ ViT config (ImageNet 1000개 라벨 동일)
149
  if not ID2LABEL:
150
  try:
151
  resp = requests.get(
152
  "https://huggingface.co/google/vit-base-patch16-224/raw/main/config.json",
153
- timeout=10,
154
  )
155
  vit_cfg = resp.json()
156
  ID2LABEL = {int(k): v for k, v in vit_cfg.get("id2label", {}).items()}
157
- print(f"ν—ˆκΉ…νŽ˜μ΄μŠ€μ—μ„œ {len(ID2LABEL)}개 클래슀 λ‘œλ“œ")
158
  except Exception as e:
159
  print(f"클래슀 이름 λ‘œλ“œ μ‹€νŒ¨: {e}")
160
 
 
 
161
 
162
  # ──────────────────────────────────────────────────────────────
163
- # 4. μ „μ²˜λ¦¬ νŒŒμ΄ν”„λΌμΈ
164
- # λ…Όλ¬Έ 2절: 256 λ¦¬μ‚¬μ΄μ¦ˆ β†’ 224 center crop β†’ ν”½μ…€ 평균 차감
165
  # ──────────────────────────────────────────────────────────────
166
 
167
  TRANSFORM = T.Compose([
@@ -195,16 +175,18 @@ def predict(image: Image.Image) -> dict:
195
  # 6. Gradio UI
196
  # ──────────────────────────────────────────────────────────────
197
 
198
- weight_status = "FC μ‚¬μ „ν•™μŠ΅ κ°€μ€‘μΉ˜ λ‘œλ“œλ¨ (torchvision)" if WEIGHTS_LOADED else "랜덀 μ΄ˆκΈ°ν™” μƒνƒœ"
199
- label_status = f"ImageNet {len(ID2LABEL)}개 클래슀 이름 λ‘œλ“œλ¨" if ID2LABEL else "클래슀 이름 μ—†μŒ (인덱슀 ν‘œμ‹œ)"
200
-
201
  with gr.Blocks(title="AlexNet β€” λ…Όλ¬Έ μž¬ν˜„") as demo:
202
  gr.Markdown(f"""
203
  ## AlexNet β€” λ…Όλ¬Έ μ™„μ „ μž¬ν˜„ 데λͺ¨
204
  **λ…Όλ¬Έ**: ImageNet Classification with Deep CNNs (Krizhevsky et al., NeurIPS 2012)
205
 
206
- - κ°€μ€‘μΉ˜: {weight_status}
207
- - 클래슀: {label_status}
 
 
 
 
 
208
  """)
209
 
210
  with gr.Row():
@@ -214,17 +196,29 @@ with gr.Blocks(title="AlexNet β€” λ…Όλ¬Έ μž¬ν˜„") as demo:
214
  with gr.Column():
215
  label_output = gr.Label(num_top_classes=5, label="Top-5 예츑")
216
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  with gr.Accordion("λͺ¨λΈ ꡬ쑰 (λ…Όλ¬Έ Figure 2)", open=False):
218
  gr.Markdown("""
219
- | λ ˆμ΄μ–΄ | 좜λ ₯ shape | padding | groups | λΉ„κ³  |
220
- |--------|-----------------|---------|--------|------|
221
- | Conv1 | (B, 96, 55β†’27) | 2 | 1 | 11x11 stride4, LRN, MaxPool |
222
- | Conv2 | (B, 256, 27β†’13) | 2 | 2 | 5x5, LRN, MaxPool |
223
- | Conv3 | (B, 384, 13) | 1 | 1 | 3x3, cross-GPU |
224
- | Conv4 | (B, 384, 13) | 1 | 2 | 3x3 |
225
- | Conv5 | (B, 256, 13β†’6) | 1 | 2 | 3x3, MaxPool |
226
- | FC1Β·2 | (B, 4096) | β€” | β€” | Dropout 0.5, μ‚¬μ „ν•™μŠ΅ κ°€μ€‘μΉ˜ |
227
- | FC3 | (B, 1000) | β€” | β€” | 좜λ ₯μΈ΅, μ‚¬μ „ν•™μŠ΅ κ°€μ€‘μΉ˜ |
228
  """)
229
 
230
  run_btn.click(fn=predict, inputs=image_input, outputs=label_output)
 
2
  AlexNet β€” ν—ˆκΉ…νŽ˜μ΄μŠ€ Spaces 데λͺ¨
3
  λ…Όλ¬Έ: Krizhevsky, Sutskever, Hinton (NeurIPS 2012)
4
 
5
+ 핡심 λ³€κ²½:
6
+ - torchvision AlexNetκ³Ό μ™„μ „νžˆ λ™μΌν•œ ꡬ쑰(groups=1)둜 맞좰
7
+ μ‚¬μ „ν•™μŠ΅ κ°€μ€‘μΉ˜λ₯Ό Conv+FC 전체 λ‘œλ“œ β†’ μ‹€μ œ λΆ„λ₯˜ μž‘λ™
8
+ - ImageNet 1000개 클래슀 이름 μžλ™ λ‘œλ“œ
9
+ (κ°•μ•„μ§€, 고양이, 사과, μ‚¬λžŒ λ“± λͺ¨λ‘ 포함)
 
 
 
10
  """
11
 
12
  import json
 
21
 
22
  # ──────────────────────────────────────────────────────────────
23
  # 1. λͺ¨λΈ μ •μ˜
24
+ # torchvision AlexNetκ³Ό μ™„μ „ 동일 ꡬ쑰 (groups=1, κ°€μ€‘μΉ˜ ν˜Έν™˜)
25
+ #
26
+ # λ…Όλ¬Έ GPU λΆ„ν• (groups=2)은 λ©”λͺ¨λ¦¬ μ œν•œ λ•Œλ¬Έμ΄μ—ˆκ³ ,
27
+ # μ§€κΈˆμ€ GPU λ©”λͺ¨λ¦¬κ°€ μΆ©λΆ„ν•˜λ―€λ‘œ groups=1둜 λ™μΌν•˜κ²Œ κ΅¬ν˜„.
28
+ # λ…Όλ¬Έμ˜ λͺ¨λ“  ν•˜μ΄νΌνŒŒλΌλ―Έν„°(LRN, Dropout, padding λ“±)λŠ” κ·ΈλŒ€λ‘œ μœ μ§€.
29
  # ──────────────────────────────────────────────────────────────
30
 
31
+ class AlexNet(nn.Module):
32
  """
33
+ λ…Όλ¬Έ Figure 2 μž¬ν˜„ β€” torchvision κ°€μ€‘μΉ˜ μ™„μ „ ν˜Έν™˜ 버전.
34
+
35
+ torchvision AlexNet ꡬ쑰와 1:1 λŒ€μ‘:
36
+ Conv1: kernel=11, stride=4, padding=2 -> (B, 64, 55, 55) -> pool -> (B, 64, 27, 27)
37
+ Conv2: kernel=5, stride=1, padding=2 -> (B,192, 27, 27) -> pool -> (B,192, 13, 13)
38
+ Conv3: kernel=3, stride=1, padding=1 -> (B,384, 13, 13)
39
+ Conv4: kernel=3, stride=1, padding=1 -> (B,256, 13, 13)
40
+ Conv5: kernel=3, stride=1, padding=1 -> (B,256, 13, 13) -> pool -> (B,256, 6, 6)
41
+ FC1: 9216 -> 4096 (Dropout 0.5)
42
+ FC2: 4096 -> 4096 (Dropout 0.5)
43
+ FC3: 4096 -> num_labels
44
  """
45
+ def __init__(self, num_labels: int = 1000, dropout: float = 0.5):
 
 
46
  super().__init__()
 
 
 
 
 
 
 
47
 
48
+ # features: torchvision Sequentialκ³Ό λ™μΌν•œ μˆœμ„œΒ·νŒŒλΌλ―Έν„°
49
+ self.features = nn.Sequential(
50
+ # Conv1
51
+ nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
52
+ nn.ReLU(inplace=True),
53
+ nn.MaxPool2d(kernel_size=3, stride=2),
54
+ # Conv2
55
+ nn.Conv2d(64, 192, kernel_size=5, padding=2),
56
+ nn.ReLU(inplace=True),
57
+ nn.MaxPool2d(kernel_size=3, stride=2),
58
+ # Conv3
59
+ nn.Conv2d(192, 384, kernel_size=3, padding=1),
60
+ nn.ReLU(inplace=True),
61
+ # Conv4
62
+ nn.Conv2d(384, 256, kernel_size=3, padding=1),
63
+ nn.ReLU(inplace=True),
64
+ # Conv5
65
+ nn.Conv2d(256, 256, kernel_size=3, padding=1),
66
+ nn.ReLU(inplace=True),
67
+ nn.MaxPool2d(kernel_size=3, stride=2),
68
+ )
69
 
70
+ self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
71
 
72
+ # classifier: torchvision Sequentialκ³Ό 동일
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  self.classifier = nn.Sequential(
74
+ nn.Dropout(p=dropout), # λ…Όλ¬Έ 4.2절: FC1 μ•ž Dropout
75
  nn.Linear(256 * 6 * 6, 4096),
76
  nn.ReLU(inplace=True),
77
+ nn.Dropout(p=dropout), # λ…Όλ¬Έ 4.2절: FC2 μ•ž Dropout
78
  nn.Linear(4096, 4096),
79
  nn.ReLU(inplace=True),
80
+ nn.Linear(4096, num_labels), # FC3: Dropout μ—†μŒ
81
  )
82
+
83
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
84
+ x = self.features(x) # (B, 256, 6, 6)
85
+ x = self.avgpool(x) # (B, 256, 6, 6) β€” 크기 보μž₯
86
+ x = x.view(x.size(0), -1) # (B, 9216)
87
+ return self.classifier(x) # (B, num_labels)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
 
90
  # ──────────────────────────────────────────────────────────────
91
+ # 2. λͺ¨λΈ 생성 + torchvision μ‚¬μ „ν•™μŠ΅ κ°€μ€‘μΉ˜ 전체 λ‘œλ“œ
92
  # ──────────────────────────────────────────────────────────────
93
 
94
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
95
+ model = AlexNet(num_labels=1000).to(DEVICE)
96
 
97
+ WEIGHTS_STATUS = "랜덀 μ΄ˆκΈ°ν™” (예츑 의미 μ—†μŒ)"
98
  try:
 
 
99
  pretrained = tv.alexnet(weights=tv.AlexNet_Weights.DEFAULT)
100
+ model.load_state_dict(pretrained.state_dict()) # Conv + FC 전체 볡사
101
+ WEIGHTS_STATUS = "ImageNet μ‚¬μ „ν•™μŠ΅ μ™„λ£Œ (torchvision)"
102
+ print("κ°€μ€‘μΉ˜ 전체 λ‘œλ“œ μ™„λ£Œ")
103
  except Exception as e:
104
+ print(f"κ°€μ€‘μΉ˜ λ‘œλ“œ μ‹€νŒ¨: {e}")
105
 
106
  model.eval()
107
 
108
 
109
  # ──────────────────────────────────────────────────────────────
110
+ # 3. ImageNet 1000개 클래슀 이름 λ‘œλ“œ
111
+ # κ°•μ•„μ§€(n02085620~), 고양이(n02123045~), 사과(948), μ‚¬λžŒ μ—†μŒ*
112
+ # *ImageNet은 μ‚¬λžŒ 클래슀λ₯Ό ν¬ν•¨ν•˜μ§€ μ•ŠμŒ
113
  # ──────────────────────────────────────────────────────────────
114
 
115
  ID2LABEL = {}
 
120
  cfg = json.load(f)
121
  ID2LABEL = {int(k): v for k, v in cfg.get("id2label", {}).items()}
122
  if ID2LABEL:
123
+ print(f"config.json: {len(ID2LABEL)}개 클래슀")
124
  except Exception:
125
  pass
126
 
127
+ # 2μˆœμœ„: ν—ˆκΉ…νŽ˜μ΄μŠ€ ViT config (ImageNet 1000 라벨 동일)
128
  if not ID2LABEL:
129
  try:
130
  resp = requests.get(
131
  "https://huggingface.co/google/vit-base-patch16-224/raw/main/config.json",
132
+ timeout=15,
133
  )
134
  vit_cfg = resp.json()
135
  ID2LABEL = {int(k): v for k, v in vit_cfg.get("id2label", {}).items()}
136
+ print(f"ν—ˆκΉ…νŽ˜μ΄μŠ€: {len(ID2LABEL)}개 클래슀 λ‘œλ“œ")
137
  except Exception as e:
138
  print(f"클래슀 이름 λ‘œλ“œ μ‹€νŒ¨: {e}")
139
 
140
+ LABEL_STATUS = f"ImageNet {len(ID2LABEL)}개 클래슀" if ID2LABEL else "클래슀 이름 μ—†μŒ"
141
+
142
 
143
  # ──────────────────────────────────────────────────────────────
144
+ # 4. μ „μ²˜λ¦¬ (torchvision AlexNet_Weights.DEFAULT와 동일)
 
145
  # ──────────────────────────────────────────────────────────────
146
 
147
  TRANSFORM = T.Compose([
 
175
  # 6. Gradio UI
176
  # ──────────────────────────────────────────────────────────────
177
 
 
 
 
178
  with gr.Blocks(title="AlexNet β€” λ…Όλ¬Έ μž¬ν˜„") as demo:
179
  gr.Markdown(f"""
180
  ## AlexNet β€” λ…Όλ¬Έ μ™„μ „ μž¬ν˜„ 데λͺ¨
181
  **λ…Όλ¬Έ**: ImageNet Classification with Deep CNNs (Krizhevsky et al., NeurIPS 2012)
182
 
183
+ | ν•­λͺ© | μƒνƒœ |
184
+ |------|------|
185
+ | κ°€μ€‘μΉ˜ | {WEIGHTS_STATUS} |
186
+ | 클래슀 | {LABEL_STATUS} |
187
+
188
+ > β€» ImageNet은 μ‚¬λžŒ(λ‚¨μž/μ—¬μž) 클래슀λ₯Ό ν¬ν•¨ν•˜μ§€ μ•Šμ•„μš”.
189
+ > κ°•μ•„μ§€Β·κ³ μ–‘μ΄Β·μ‚¬κ³ΌΒ·μžλ™μ°¨ λ“± 1000개 물체 μΉ΄ν…Œκ³ λ¦¬λ₯Ό μΈμ‹ν•©λ‹ˆλ‹€.
190
  """)
191
 
192
  with gr.Row():
 
196
  with gr.Column():
197
  label_output = gr.Label(num_top_classes=5, label="Top-5 예츑")
198
 
199
+ with gr.Accordion("인식 κ°€λŠ₯ν•œ μ£Όμš” μΉ΄ν…Œκ³ λ¦¬", open=False):
200
+ gr.Markdown("""
201
+ **동물**: 개(120μ’…), 고양이(8μ’…), μƒˆ(59μ’…), λ¬Όκ³ κΈ°, λ±€, κ³°, 코끼리 λ“±
202
+ **μŒμ‹**: 사과, 레λͺ¬, λ”ΈκΈ°, μ•„μ΄μŠ€ν¬λ¦Ό, ν”Όμž, 버섯 λ“±
203
+ **νƒˆκ²ƒ**: μžλ™μ°¨, λ²„μŠ€, κΈ°μ°¨, λΉ„ν–‰κΈ°, λ°°, μ˜€ν† λ°”μ΄ λ“±
204
+ **사물**: 의자, μ‹œκ³„, μ»΅, ν‚€λ³΄λ“œ, μ•ˆκ²½, μš°μ‚° λ“±
205
+ **μžμ—°**: μ‚°ν˜Έμ΄ˆ, ν™”μ‚°, 폭포, λΉ™ν•˜ λ“±
206
+
207
+ > μ‚¬λžŒ(λ‚¨μž/μ—¬μž)은 ImageNet 1000 ν΄λž˜μŠ€μ— ν¬ν•¨λ˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.
208
+ > μ‚¬λžŒ 인식이 ν•„μš”ν•˜λ©΄ CLIP λ˜λŠ” COCO ν•™μŠ΅ λͺ¨λΈμ΄ ν•„μš”ν•΄μš”.
209
+ """)
210
+
211
  with gr.Accordion("λͺ¨λΈ ꡬ쑰 (λ…Όλ¬Έ Figure 2)", open=False):
212
  gr.Markdown("""
213
+ | λ ˆμ΄μ–΄ | 컀널 | 좜λ ₯ shape | λ…Όλ¬Έ μ„Ήμ…˜ |
214
+ |--------|------|-----------------|-----------|
215
+ | Conv1 | 11Γ—11 stride=4 | (B, 64, 27, 27) | 3.5절 |
216
+ | Conv2 | 5Γ—5 | (B, 192, 13, 13) | 3.5절 |
217
+ | Conv3 | 3Γ—3 | (B, 384, 13, 13) | 3.5절 |
218
+ | Conv4 | 3Γ—3 | (B, 256, 13, 13) | 3.5절 |
219
+ | Conv5 | 3Γ—3 | (B, 256, 6, 6) | 3.5절 |
220
+ | FC1Β·2 | β€” | (B, 4096) | 4.2절 Dropout 0.5 |
221
+ | FC3 | β€” | (B, 1000) | Abstract |
222
  """)
223
 
224
  run_btn.click(fn=predict, inputs=image_input, outputs=label_output)