File size: 1,245 Bytes
61285e6
 
 
 
 
da94139
 
61285e6
 
aa29b44
 
 
61285e6
 
127935c
 
 
 
 
 
 
 
 
 
aa29b44
 
 
127935c
aa29b44
 
 
127935c
 
aa29b44
 
 
 
 
63fe7c2
 
 
bcfe60d
f15e43c
63fe7c2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import streamlit as st
from transformers import pipeline
from gtts import gTTS 
from PIL import Image
import time
from playsound import playsound
 
@st.cache(allow_output_mutation=True)
def load_model():
  """Retrieves the trained model"""
  model = pipeline('image-to-text')
  return model

def main():
  caption = load_model()
  st.title('Welcome to image to speech app')
  instructions = """Click an image using inbuilt camera or upload an image file"""
  st.write(instructions)
	
  img = None
  pictureCam = st.camera_input('Take a picture')
  pictureUpload = st.file_uploader('Upload An Image')
	
  if pictureCam :
    st.write('clicked image from webcam')
    st.image(pictureCam)
    img = Image.open(pictureCam)
  elif pictureUpload : 
    st.write('uploaded image from device')
    st.image(pictureUpload)
    img = Image.open(pictureUpload)
	
  if img is not None : 
    description = caption(img)
    generated_text = description[0]['generated_text']
    st.write(generated_text)
    generated_audio = gTTS(generated_text)
    generated_audio.save('demo.mp3')
    audio_file = open('demo.mp3', 'rb')
    audio_bytes = audio_file.read()
    st.audio(audio_bytes, format='audio/ogg',start_time=0)
		
if __name__ == '__main__' :
  main()