Spaces:

isom5240
/

SA

Sleeping

isom5240 commited on Feb 21, 2025

Commit

0d4fa66

verified ·

1 Parent(s): 3a6cb45

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import streamlit as st
 from transformers import pipeline
-from io import BytesIO
 from PIL import Image
-import torch
 # Load pipelines
 image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
@@ -24,7 +22,8 @@ if uploaded_image:
     # Convert text to speech
     speech_output = text_to_speech(text_output)
-    audio_bytes = BytesIO(speech_output['audio'])
     st.write("### Listen to Speech Output:")
-    st.audio(audio_bytes, format="audio/wav")

 import streamlit as st
 from transformers import pipeline
 from PIL import Image
 # Load pipelines
 image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
     # Convert text to speech
     speech_output = text_to_speech(text_output)
     st.write("### Listen to Speech Output:")
+    st.audio(speech_output['audio'],
+             format="audio/wav",
+             start_time=0,
+             sample_rate = speech_output['sample_rate'])