VQA / app.py
rhazeljay's picture
added instructions on how to use the app
cdeae7d verified
import streamlit as st
from transformers import BlipProcessor, BlipForQuestionAnswering
from PIL import Image
import torch
# Load the pre-trained model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")
def answer_question(image, question):
inputs = processor(images=image, text=question, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs)
answer = processor.decode(outputs[0], skip_special_tokens=True)
return answer
# Streamlit UI
st.title("VQA App using BLIP")
st.write("Upload an image and ask a question about it.")
# Instructions Section
st.header("How to Use the App")
st.markdown(
"""
1. **Upload an image** by clicking the file uploader below.
2. **Wait for the image to load.**
3. **Type a question** about the image in the input box.
4. **Press Enter** and the AI will generate an answer.
"""
)
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])
if uploaded_file is not None:
image = Image.open(uploaded_file).convert("RGB")
st.image(image, caption="Uploaded Image", use_column_width=True)
question = st.text_input("Ask a question about the image:")
if question:
answer = answer_question(image, question)
st.write(f"**Answer:** {answer}")