Spaces:
Sleeping
Sleeping
File size: 5,196 Bytes
8fe6281 90bef38 8d5fabf ab8ead3 8fe6281 862568a 118cd25 ad4186a ab8ead3 ad4186a cd245d5 8d5fabf 8fe6281 5f21a2d ad4186a 7c4bc18 5f21a2d 7c4bc18 5f21a2d 7c4bc18 5f21a2d 7c4bc18 5f21a2d 862568a cd245d5 8fe6281 8d5fabf ad4186a 4e37056 ab8ead3 ad4186a f006a50 ad4186a ab8ead3 f006a50 ad4186a 8fe6281 ad4186a 8fe6281 ad4186a 8fe6281 862568a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
# Imports
import streamlit as st
from transformers import pipeline
from PIL import Image
import torch
import os
import tempfile
import sys
import subprocess
# Try to import gTTS, install if missing
try:
from gtts import gTTS
except ImportError:
st.warning("Installing required package: gTTS...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "gTTS"])
from gtts import gTTS
st.success("gTTS installed successfully!")
# Simple image-to-text function
def img2text(image):
image_to_text = pipeline("image-to-text", model="sooh-j/blip-image-captioning-base")
text = image_to_text(image)[0]["generated_text"]
return text
# Improved text-to-story function with natural ending
def text2story(text):
generator = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
prompt = f"Write a short children's story based on this: {text}. The story should have a clear beginning, middle, and end. Keep it under 150 words. Once upon a time, "
# Generate a longer text to ensure we get a complete story
story_result = generator(
prompt,
max_length=300,
num_return_sequences=1,
temperature=0.7,
do_sample=True
)
story_text = story_result[0]['generated_text']
story_text = story_text.replace(prompt, "Once upon a time, ")
# Find natural ending points (end of sentences)
periods = [i for i, char in enumerate(story_text) if char == '.']
question_marks = [i for i, char in enumerate(story_text) if char == '?']
exclamation_marks = [i for i, char in enumerate(story_text) if char == '!']
# Combine all ending punctuation and sort
all_endings = sorted(periods + question_marks + exclamation_marks)
# If we have any sentence endings
if all_endings:
# Get the index where the story should reasonably end (after at least 100 characters)
min_story_length = 100
suitable_endings = [i for i in all_endings if i >= min_story_length]
if suitable_endings:
# Find an ending that completes a thought (not just the first sentence)
if len(suitable_endings) > 2:
# Use the third sentence ending or later for a more complete story
return story_text[:suitable_endings[2]+1]
else:
# If we don't have many sentences, use the last one we found
return story_text[:suitable_endings[-1]+1]
# If no good ending is found, return as is
return story_text
# Updated text-to-audio function using gTTS
def text2audio(story_text):
# Create a temporary file
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
temp_filename = temp_file.name
temp_file.close()
# Use gTTS to convert text to speech
tts = gTTS(text=story_text, lang='en', slow=False)
tts.save(temp_filename)
# Read the audio file
with open(temp_filename, 'rb') as audio_file:
audio_bytes = audio_file.read()
# Clean up the temporary file
os.unlink(temp_filename)
return audio_bytes
# Basic Streamlit interface
st.title("Image to Audio Story")
uploaded_file = st.file_uploader("Upload an image")
if uploaded_file is not None:
# Display image
st.image(uploaded_file, caption="Uploaded Image")
# Convert to PIL Image
image = Image.open(uploaded_file)
# Image to Text
with st.spinner("Generating caption..."):
caption = img2text(image)
st.write(f"Caption: {caption}")
# Text to Story
with st.spinner("Creating story..."):
story = text2story(caption)
st.write(f"Story: {story}")
# Text to Audio
with st.spinner("Generating audio..."):
try:
audio_bytes = text2audio(story)
# Play audio
st.audio(audio_bytes, format='audio/mp3')
except Exception as e:
st.error(f"Error generating or playing audio: {e}")
st.info("If you're having issues with gTTS, you might need to manually install it with: pip install gTTS")
# Fallback to a simple TTS if gTTS fails
try:
st.write("Attempting fallback to pyttsx3...")
import pyttsx3
engine = pyttsx3.init()
# Create a temporary file for the fallback audio
temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
temp_wav_filename = temp_wav.name
temp_wav.close()
# Generate and save speech
engine.save_to_file(story, temp_wav_filename)
engine.runAndWait()
# Read the audio file
with open(temp_wav_filename, 'rb') as audio_file:
fallback_audio = audio_file.read()
# Clean up
os.unlink(temp_wav_filename)
st.audio(fallback_audio, format='audio/wav')
except:
st.error("Both TTS methods failed. Please install gTTS manually.") |