Spaces:

rishikesh
/

ImageToSpeech

Runtime error

App Files Files Community

ImageToSpeech / app.py

rishikesh

Update app.py

f090191 about 3 years ago

raw

history blame

2.03 kB

	import streamlit as st
	from transformers import pipeline
	from gtts import gTTS
	from PIL import Image
	import time
	#from playsound import playsound
	#from pydub import AudioSegment
	#from preferredsoundplayer import soundplay

	#@st.cache()
	@st.cache(allow_output_mutation=True)
	def load_model():
	"""Retrieves the trained model"""
	model = pipeline('image-to-text')
	return model

	def main():
	caption = load_model()
	st.title("Welcome to image to speech app")
	instructions = """Click an image using inbuilt camera
	or upload an image file"""
	st.write(instructions)

	img = None
	pictureCam = st.camera_input("Take a picture")
	pictureUpload = st.file_uploader('Upload An Image')

	if pictureCam :
	st.write('clicked image from webcam')
	st.image(pictureCam)
	img = Image.open(pictureCam)
	elif pictureUpload :
	st.write('uploaded image from device')
	st.image(pictureUpload)
	img = Image.open(pictureUpload)

	if img is not None :
	description = caption(img)
	generated_text = description[0]['generated_text']
	st.write(generated_text)
	generated_audio = gTTS(generated_text)
	generated_audio.save('demo.mp3')

	audio_file = open(‘demo.mp3’, ‘rb’)
	audio_bytes = audio_file.read()
	st.audio(audio_bytes, format=‘audio/ogg’,start_time=0)

	#html_string = """
	#<audio controls autoplay>
	# <source src="demo.mp3" type="audio/mp3">
	#</audio>
	#"""
	#sound = st.empty()
	#sound.markdown(html_string, unsafe_allow_html=True) # will display a st.audio with the sound you specified in the "src" of the html_string and autoplay it
	#time.sleep(2) # wait for 2 seconds to finish the playing of the audio
	#sound.empty() # optionally delete the element afterwards

	#sound = AudioSegment.from_mp3("demo.mp3")
	#sound.export("demo.wav", format="wav")
	#soundplay("demo.mp3")
	#playsound('demo.mp3')
	#audio_file = open('demo.wav', 'rb')
	#audio_bytes = audio_file.read()
	#st.audio(audio_bytes, format='audio/wav')


	if __name__ == '__main__' :
	main()