Spaces:
Build error
Build error
| import streamlit as st | |
| from transformers import ViltProcessor, ViltForQuestionAnswering | |
| from PIL import Image | |
| import torch | |
| # Load the VILT processor and model for visual question answering | |
| processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa") | |
| model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa") | |
| # Streamlit app UI | |
| st.title("Visual Question Answering (VQA) with VILT") | |
| # Image uploader | |
| uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) | |
| # Question input | |
| question = st.text_input("Enter your question about the image:") | |
| # A button to trigger the VQA task | |
| if st.button("Get Answer"): | |
| if uploaded_image is None: | |
| st.error("Please upload an image.") | |
| elif question == "": | |
| st.error("Please enter a question.") | |
| else: | |
| try: | |
| # Load the image from the uploader | |
| image = Image.open(uploaded_image) | |
| # Show the uploaded image in the app | |
| st.image(image, caption="Uploaded Image", use_column_width=True) | |
| # Process the image and question | |
| encoding = processor(image, question, return_tensors="pt") | |
| # Forward pass through the model | |
| outputs = model(**encoding) | |
| logits = outputs.logits | |
| idx = logits.argmax(-1).item() | |
| # Get the predicted answer | |
| answer = model.config.id2label[idx] | |
| # Show the answer | |
| st.success(f"Predicted Answer: {answer}") | |
| except Exception as e: | |
| st.error(f"Error: {str(e)}") |