|
|
import streamlit as st |
|
|
from transformers import BlipProcessor, BlipForQuestionAnswering |
|
|
from PIL import Image |
|
|
import torch |
|
|
|
|
|
|
|
|
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base") |
|
|
model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base") |
|
|
|
|
|
def answer_question(image, question): |
|
|
inputs = processor(images=image, text=question, return_tensors="pt") |
|
|
with torch.no_grad(): |
|
|
outputs = model.generate(**inputs) |
|
|
answer = processor.decode(outputs[0], skip_special_tokens=True) |
|
|
return answer |
|
|
|
|
|
|
|
|
st.title("VQA App using BLIP") |
|
|
st.write("Upload an image and ask a question about it.") |
|
|
|
|
|
|
|
|
st.header("How to Use the App") |
|
|
st.markdown( |
|
|
""" |
|
|
1. **Upload an image** by clicking the file uploader below. |
|
|
2. **Wait for the image to load.** |
|
|
3. **Type a question** about the image in the input box. |
|
|
4. **Press Enter** and the AI will generate an answer. |
|
|
""" |
|
|
) |
|
|
|
|
|
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"]) |
|
|
if uploaded_file is not None: |
|
|
image = Image.open(uploaded_file).convert("RGB") |
|
|
st.image(image, caption="Uploaded Image", use_column_width=True) |
|
|
|
|
|
question = st.text_input("Ask a question about the image:") |
|
|
if question: |
|
|
answer = answer_question(image, question) |
|
|
st.write(f"**Answer:** {answer}") |
|
|
|