Spaces:
Sleeping
Sleeping
File size: 2,932 Bytes
cd245d5 90bef38 8d5fabf cd245d5 76abf5e 118cd25 cd245d5 b2cad31 cd245d5 8d5fabf cd245d5 7df9b81 83842b8 7df9b81 76abf5e 3fd88eb 76abf5e 7df9b81 e5f2129 83842b8 e5f2129 cd9e32e 83842b8 7df9b81 cd9e32e 3fd88eb 8d5fabf cd245d5 8d5fabf cd245d5 f006a50 4e37056 f006a50 a084b90 cd245d5 f006a50 8d5fabf f006a50 e5f2129 f006a50 76abf5e 7df9b81 76abf5e f006a50 76abf5e f006a50 e5f2129 f006a50 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
# import part
import streamlit as st
from transformers import pipeline
import os
import tempfile
# function part
# img2text
def img2text(image_path):
image_to_text = pipeline("image-to-text", model="sooh-j/blip-image-captioning-base")
text = image_to_text(image_path)[0]["generated_text"]
return text
# text2story
def text2audio(story_text):
try:
# Use the HelpingAI TTS model as requested
synthesizer = pipeline("text-to-speech", model="HelpingAI/HelpingAI-TTS-v1")
# Limit text length to avoid timeouts
max_chars = 500
if len(story_text) > max_chars:
last_period = story_text[:max_chars].rfind('.')
if last_period > 0:
story_text = story_text[:last_period + 1]
else:
story_text = story_text[:max_chars]
# Generate speech
st.write("Generating audio...")
speech = synthesizer(story_text)
st.write(f"Speech output keys: {list(speech.keys())}")
# We'll pass the audio data directly to Streamlit instead of saving to a file
# This works because Streamlit's st.audio() can take raw audio data
return speech
except Exception as e:
st.error(f"Error generating audio: {str(e)}")
import traceback
st.error(traceback.format_exc())
return None
# Function to save temporary image file
def save_uploaded_image(uploaded_file):
if not os.path.exists("temp"):
os.makedirs("temp")
image_path = os.path.join("temp", uploaded_file.name)
with open(image_path, "wb") as f:
f.write(uploaded_file.getvalue())
return image_path
# main part
st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
st.header("Turn Your Image to Audio Story")
uploaded_file = st.file_uploader("Select an Image...")
if uploaded_file is not None:
# Display the uploaded image
st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
# Save the image temporarily
image_path = save_uploaded_image(uploaded_file)
# Stage 1: Image to Text
st.text('Processing img2text...')
caption = img2text(image_path)
st.write(caption)
# Stage 2: Text to Story
st.text('Generating a story...')
story = text2story(caption)
st.write(story)
# Stage 3: Story to Audio data
st.text('Generating audio data...')
audio_file = text2audio(story)
# Play button
if st.button("Play Audio"):
if audio_file and os.path.exists(audio_file):
# Play the audio file
st.audio(audio_file)
else:
st.error("Audio generation failed. Please try again.")
# Clean up the temporary files
try:
os.remove(image_path)
# Don't delete audio file immediately as it might still be playing
except:
pass |