Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -34,7 +34,7 @@ class PretrainedVQAModel(nn.Module):
|
|
| 34 |
self.max_seq_len = max_seq_len
|
| 35 |
|
| 36 |
# Pre-trained CNN Encoder (ResNet18)
|
| 37 |
-
resnet = models.resnet18(
|
| 38 |
self.cnn = nn.Sequential(*list(resnet.children())[:-1]) # Remove final FC layer
|
| 39 |
self.cnn_output_dim = 512 # Output dim for ResNet18 features
|
| 40 |
|
|
@@ -132,7 +132,7 @@ def load_model():
|
|
| 132 |
|
| 133 |
# Initialize model
|
| 134 |
model = PretrainedVQAModel(vocab_size=len(word_to_idx))
|
| 135 |
-
model.load_state_dict(torch.load("
|
| 136 |
model.to(device)
|
| 137 |
model.eval()
|
| 138 |
return model, word_to_idx, idx_to_word
|
|
@@ -173,8 +173,8 @@ def create_interface():
|
|
| 173 |
gr.Textbox(label="Your Question", placeholder="Ask something about the image...")
|
| 174 |
],
|
| 175 |
outputs=gr.Textbox(label="Generated Answer"),
|
| 176 |
-
title="
|
| 177 |
-
description="
|
| 178 |
allow_flagging="never"
|
| 179 |
)
|
| 180 |
|
|
|
|
| 34 |
self.max_seq_len = max_seq_len
|
| 35 |
|
| 36 |
# Pre-trained CNN Encoder (ResNet18)
|
| 37 |
+
resnet = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
|
| 38 |
self.cnn = nn.Sequential(*list(resnet.children())[:-1]) # Remove final FC layer
|
| 39 |
self.cnn_output_dim = 512 # Output dim for ResNet18 features
|
| 40 |
|
|
|
|
| 132 |
|
| 133 |
# Initialize model
|
| 134 |
model = PretrainedVQAModel(vocab_size=len(word_to_idx))
|
| 135 |
+
model.load_state_dict(torch.load("vqa_pretrain_model.pth", map_location=device))
|
| 136 |
model.to(device)
|
| 137 |
model.eval()
|
| 138 |
return model, word_to_idx, idx_to_word
|
|
|
|
| 173 |
gr.Textbox(label="Your Question", placeholder="Ask something about the image...")
|
| 174 |
],
|
| 175 |
outputs=gr.Textbox(label="Generated Answer"),
|
| 176 |
+
title="VQA pre-train",
|
| 177 |
+
description="Tải ảnh về động vật lên và đặt câu hỏi liên quan (CHỈ HỖ TRỢ TIẾNG ANH)",
|
| 178 |
allow_flagging="never"
|
| 179 |
)
|
| 180 |
|