Spaces:

Tin113
/

pretrained

Sleeping

Tin113 commited on Apr 1, 2025

Commit

b2e80e9

verified ·

1 Parent(s): 84165ce

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -34,7 +34,7 @@ class PretrainedVQAModel(nn.Module):
         self.max_seq_len = max_seq_len
         # Pre-trained CNN Encoder (ResNet18)
-        resnet = models.resnet18(pretrained=True)
         self.cnn = nn.Sequential(*list(resnet.children())[:-1])  # Remove final FC layer
         self.cnn_output_dim = 512  # Output dim for ResNet18 features
@@ -132,7 +132,7 @@ def load_model():
         # Initialize model
         model = PretrainedVQAModel(vocab_size=len(word_to_idx))
-        model.load_state_dict(torch.load("vqa_model.pth", map_location=device))
         model.to(device)
         model.eval()
         return model, word_to_idx, idx_to_word
@@ -173,8 +173,8 @@ def create_interface():
                 gr.Textbox(label="Your Question", placeholder="Ask something about the image...")
             ],
             outputs=gr.Textbox(label="Generated Answer"),
-            title="Visual Question Answering with ResNet18",
-            description="Upload an image and ask natural language questions about its content",
             allow_flagging="never"
         )

         self.max_seq_len = max_seq_len
         # Pre-trained CNN Encoder (ResNet18)
+        resnet = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
         self.cnn = nn.Sequential(*list(resnet.children())[:-1])  # Remove final FC layer
         self.cnn_output_dim = 512  # Output dim for ResNet18 features
         # Initialize model
         model = PretrainedVQAModel(vocab_size=len(word_to_idx))
+        model.load_state_dict(torch.load("vqa_pretrain_model.pth", map_location=device))
         model.to(device)
         model.eval()
         return model, word_to_idx, idx_to_word
                 gr.Textbox(label="Your Question", placeholder="Ask something about the image...")
             ],
             outputs=gr.Textbox(label="Generated Answer"),
+            title="VQA pre-train",
+            description="Tải ảnh về động vật lên và đặt câu hỏi liên quan (CHỈ HỖ TRỢ TIẾNG ANH)",
             allow_flagging="never"
         )