Steven-GU-Yu-Di's picture
Update app.py
8e585c6 verified
raw
history blame
1.33 kB
!pip install streamlit transformers gtts
import streamlit as st
from transformers import pipeline
from PIL import Image
from gtts import gTTS
import os
# Load the Visual Question Answering (VQA) model
vqa_model = pipeline("question-answering")
# Create a Streamlit app
st.title("Visual Question Answering and Text-to-Speech")
# Sidebar for user inputs
uploaded_image = st.file_uploader("Upload Image", type=["jpg", "jpeg", "png"])
question_input = st.text_input("Enter Question")
# Function to perform Visual Question Answering
def perform_vqa(image, question):
if image is not None and question:
image = Image.open(image)
st.image(image, caption="Uploaded Image", use_column_width=True)
st.write("Question:", question)
# Visual Question Answering
vqa_input = {
"question": question,
"context": "This is an image.",
}
vqa_output = vqa_model(image=image, **vqa_input)
answer = vqa_output['answer']
st.write("Answer:", answer)
# Text-to-Speech using gTTS
tts = gTTS(answer)
tts.save("output.mp3")
st.audio("output.mp3", format='audio/mp3')
# Button to trigger Visual Question Answering and Text-to-Speech
if st.button("Perform VQA and TTS"):
perform_vqa(uploaded_image, question_input)