Spaces:

Tin113
/

vqa_project

Sleeping

App Files Files Community

Tin113 commited on Mar 30, 2025

Commit

8f08c02

verified ·

1 Parent(s): 6c39b64

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -19

app.py CHANGED Viewed

@@ -186,9 +186,8 @@ transform = transforms.Compose([
     transforms.ToTensor(),
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 ])
 def create_interface():
-    device = 'cuda' if torch.cuda.is_available() else 'cpu'
     try:
         model, word_to_idx, idx_to_word = load_model(
@@ -198,29 +197,38 @@ def create_interface():
             device
         )
-        def vqa_interface(image, question):
-            return predict(image, question, model, word_to_idx, idx_to_word, device)
-        examples = [
-            ["example1.jpg", "What color is the animal?"],
-            ["example2.jpg", "Is this a cat or a dog?"]
-        ]
-        return gr.Interface(
-            fn=vqa_interface,
             inputs=[
-                gr.Image(type="pil", label="Upload an image"),
-                gr.Textbox(label="Ask a question about the image")
             ],
             outputs=gr.Textbox(label="Answer"),
-            examples=examples,
-            title="Visual Question Answering System",
-            description="Upload an image and ask a question about it. The model will try to answer."
         )
     except Exception as e:
-        print(f"Interface creation failed: {e}")
-        return gr.Interface(lambda x: "Model loading failed", "text", "text")
 if __name__ == "__main__":
     iface = create_interface()
-    iface.launch()

     transforms.ToTensor(),
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 ])
 def create_interface():
+    device = 'cpu'  # Luôn dùng CPU trên Spaces
     try:
         model, word_to_idx, idx_to_word = load_model(
             device
         )
+        def predict(image, question):
+            try:
+                transform = transforms.Compose([
+                    transforms.Resize((224, 224)),
+                    transforms.ToTensor(),
+                    transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                                      std=[0.229, 0.224, 0.225])
+                ])
+                image = transform(image).unsqueeze(0).to(device)
+                answer = model.predict(image, question, word_to_idx, idx_to_word, device)
+                return answer
+            except Exception as e:
+                return f"Error: {str(e)}"
+        iface = gr.Interface(
+            fn=predict,
             inputs=[
+                gr.Image(type="pil", label="Upload Image"),
+                gr.Textbox(label="Question")
             ],
             outputs=gr.Textbox(label="Answer"),
+            title="VQA System",
+            description="Upload an image and ask questions about it"
         )
+        return iface
     except Exception as e:
+        return gr.Interface(lambda: "Model failed to load", None, "text")
 if __name__ == "__main__":
     iface = create_interface()
+    iface.launch(
+        server_name="0.0.0.0",
+        server_port=7860
+    )