Spaces:
Runtime error
Runtime error
| !pip install streamlit transformers gtts | |
| import streamlit as st | |
| from transformers import pipeline | |
| from PIL import Image | |
| from gtts import gTTS | |
| import os | |
| # Load the Visual Question Answering (VQA) model | |
| vqa_model = pipeline("question-answering") | |
| # Create a Streamlit app | |
| st.title("Visual Question Answering and Text-to-Speech") | |
| # Sidebar for user inputs | |
| uploaded_image = st.file_uploader("Upload Image", type=["jpg", "jpeg", "png"]) | |
| question_input = st.text_input("Enter Question") | |
| # Function to perform Visual Question Answering | |
| def perform_vqa(image, question): | |
| if image is not None and question: | |
| image = Image.open(image) | |
| st.image(image, caption="Uploaded Image", use_column_width=True) | |
| st.write("Question:", question) | |
| # Visual Question Answering | |
| vqa_input = { | |
| "question": question, | |
| "context": "This is an image.", | |
| } | |
| vqa_output = vqa_model(image=image, **vqa_input) | |
| answer = vqa_output['answer'] | |
| st.write("Answer:", answer) | |
| # Text-to-Speech using gTTS | |
| tts = gTTS(answer) | |
| tts.save("output.mp3") | |
| st.audio("output.mp3", format='audio/mp3') | |
| # Button to trigger Visual Question Answering and Text-to-Speech | |
| if st.button("Perform VQA and TTS"): | |
| perform_vqa(uploaded_image, question_input) | |