Spaces:
Runtime error
Runtime error
File size: 1,245 Bytes
61285e6 da94139 61285e6 aa29b44 61285e6 127935c aa29b44 127935c aa29b44 127935c aa29b44 63fe7c2 bcfe60d f15e43c 63fe7c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import streamlit as st
from transformers import pipeline
from gtts import gTTS
from PIL import Image
import time
from playsound import playsound
@st.cache(allow_output_mutation=True)
def load_model():
"""Retrieves the trained model"""
model = pipeline('image-to-text')
return model
def main():
caption = load_model()
st.title('Welcome to image to speech app')
instructions = """Click an image using inbuilt camera or upload an image file"""
st.write(instructions)
img = None
pictureCam = st.camera_input('Take a picture')
pictureUpload = st.file_uploader('Upload An Image')
if pictureCam :
st.write('clicked image from webcam')
st.image(pictureCam)
img = Image.open(pictureCam)
elif pictureUpload :
st.write('uploaded image from device')
st.image(pictureUpload)
img = Image.open(pictureUpload)
if img is not None :
description = caption(img)
generated_text = description[0]['generated_text']
st.write(generated_text)
generated_audio = gTTS(generated_text)
generated_audio.save('demo.mp3')
audio_file = open('demo.mp3', 'rb')
audio_bytes = audio_file.read()
st.audio(audio_bytes, format='audio/ogg',start_time=0)
if __name__ == '__main__' :
main() |