Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,13 +1,7 @@
|
|
| 1 |
# import part
|
| 2 |
import streamlit as st
|
| 3 |
from transformers import pipeline
|
| 4 |
-
import torch
|
| 5 |
-
from PIL import Image
|
| 6 |
-
import io
|
| 7 |
import os
|
| 8 |
-
from huggingface_hub import InferenceClient
|
| 9 |
-
import numpy as np
|
| 10 |
-
import base64
|
| 11 |
|
| 12 |
# function part
|
| 13 |
# img2text
|
|
@@ -18,37 +12,37 @@ def img2text(image_path):
|
|
| 18 |
|
| 19 |
# text2story
|
| 20 |
def text2story(text):
|
| 21 |
-
# Using
|
| 22 |
-
|
| 23 |
|
| 24 |
# Create a prompt for the story generation
|
| 25 |
-
prompt = f"
|
| 26 |
-
"{text}"
|
| 27 |
-
|
| 28 |
-
The story should be suitable for kids aged 3-10 years old, with simple language, positive themes, and a clear beginning, middle, and end.
|
| 29 |
-
"""
|
| 30 |
|
| 31 |
# Generate the story
|
| 32 |
-
|
| 33 |
prompt,
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
| 38 |
)
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
return story_text
|
| 41 |
|
| 42 |
# text2audio
|
| 43 |
def text2audio(story_text):
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
# Generate audio with a voice suitable for children's stories
|
| 48 |
-
audio_output = tts(
|
| 49 |
-
text=story_text,
|
| 50 |
-
forward_params={"speaker": "v2/en_speaker_6", "text_temp": 0.7}
|
| 51 |
-
)
|
| 52 |
|
| 53 |
return {
|
| 54 |
"audio": audio_output["audio"],
|
|
@@ -57,110 +51,53 @@ def text2audio(story_text):
|
|
| 57 |
|
| 58 |
# Function to save temporary image file
|
| 59 |
def save_uploaded_image(uploaded_file):
|
| 60 |
-
# Create a temp directory if it doesn't exist
|
| 61 |
if not os.path.exists("temp"):
|
| 62 |
os.makedirs("temp")
|
| 63 |
|
| 64 |
-
# Define the path to save the image
|
| 65 |
image_path = os.path.join("temp", uploaded_file.name)
|
| 66 |
|
| 67 |
-
# Save the image
|
| 68 |
with open(image_path, "wb") as f:
|
| 69 |
f.write(uploaded_file.getvalue())
|
| 70 |
|
| 71 |
return image_path
|
| 72 |
|
| 73 |
# main part
|
| 74 |
-
st.set_page_config(
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
layout="centered"
|
| 78 |
-
)
|
| 79 |
-
|
| 80 |
-
# Add some CSS for a child-friendly interface
|
| 81 |
-
st.markdown("""
|
| 82 |
-
<style>
|
| 83 |
-
.main {
|
| 84 |
-
background-color: #f0f8ff;
|
| 85 |
-
}
|
| 86 |
-
h1, h2, h3 {
|
| 87 |
-
color: #1e90ff;
|
| 88 |
-
}
|
| 89 |
-
.stButton>button {
|
| 90 |
-
background-color: #ff6b6b;
|
| 91 |
-
color: white;
|
| 92 |
-
font-size: 1.2rem;
|
| 93 |
-
border-radius: 10px;
|
| 94 |
-
padding: 0.5rem 1rem;
|
| 95 |
-
}
|
| 96 |
-
</style>
|
| 97 |
-
""", unsafe_allow_html=True)
|
| 98 |
-
|
| 99 |
-
st.title("🧸 Kids Storytelling Magic 🦄")
|
| 100 |
-
st.subheader("Upload a picture and hear a magical story!")
|
| 101 |
-
|
| 102 |
-
uploaded_file = st.file_uploader("Choose a fun picture...", type=["jpg", "jpeg", "png"])
|
| 103 |
|
| 104 |
if uploaded_file is not None:
|
| 105 |
-
# Display
|
| 106 |
-
|
| 107 |
-
# Display the uploaded image
|
| 108 |
-
st.image(uploaded_file, caption="Your magical picture", use_column_width=True)
|
| 109 |
-
|
| 110 |
-
# Save the image temporarily
|
| 111 |
-
image_path = save_uploaded_image(uploaded_file)
|
| 112 |
-
|
| 113 |
-
# Stage 1: Image to Text
|
| 114 |
-
with st.spinner("Looking at your picture..."):
|
| 115 |
-
caption = img2text(image_path)
|
| 116 |
-
st.markdown("### 📝 I see...")
|
| 117 |
-
st.write(caption)
|
| 118 |
-
|
| 119 |
-
# Stage 2: Text to Story
|
| 120 |
-
with st.spinner("Creating your story..."):
|
| 121 |
-
story = text2story(caption)
|
| 122 |
-
st.markdown("### 📖 Your Story")
|
| 123 |
-
st.write(story)
|
| 124 |
-
|
| 125 |
-
# Stage 3: Story to Audio data
|
| 126 |
-
with st.spinner("Making your story speak..."):
|
| 127 |
-
try:
|
| 128 |
-
audio_data = text2audio(story)
|
| 129 |
-
|
| 130 |
-
# Add a play button with cute icon
|
| 131 |
-
st.markdown("### 🔊 Listen to your story")
|
| 132 |
-
if st.button("🎵 Play Story"):
|
| 133 |
-
st.audio(
|
| 134 |
-
audio_data["audio"],
|
| 135 |
-
format="audio/wav",
|
| 136 |
-
start_time=0,
|
| 137 |
-
sample_rate=audio_data["sampling_rate"]
|
| 138 |
-
)
|
| 139 |
-
except Exception as e:
|
| 140 |
-
st.error(f"Oops! Something went wrong with the audio: {str(e)}")
|
| 141 |
-
st.write("But you can still read the story above!")
|
| 142 |
-
|
| 143 |
-
# Clean up - delete the temporary image
|
| 144 |
-
try:
|
| 145 |
-
os.remove(image_path)
|
| 146 |
-
except:
|
| 147 |
-
pass
|
| 148 |
-
|
| 149 |
-
else:
|
| 150 |
-
# Show instructions with a friendly message
|
| 151 |
-
st.markdown("""
|
| 152 |
-
### How to use:
|
| 153 |
-
1. Click the button above to upload a picture
|
| 154 |
-
2. Wait for the magical story to appear
|
| 155 |
-
3. Press play to hear your story!
|
| 156 |
|
| 157 |
-
|
| 158 |
-
|
| 159 |
|
| 160 |
-
#
|
| 161 |
-
st.
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# import part
|
| 2 |
import streamlit as st
|
| 3 |
from transformers import pipeline
|
|
|
|
|
|
|
|
|
|
| 4 |
import os
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
# function part
|
| 7 |
# img2text
|
|
|
|
| 12 |
|
| 13 |
# text2story
|
| 14 |
def text2story(text):
|
| 15 |
+
# Using a smaller text generation model
|
| 16 |
+
generator = pipeline('text-generation', model='gpt2')
|
| 17 |
|
| 18 |
# Create a prompt for the story generation
|
| 19 |
+
prompt = f"Write a fun children's story based on this: {text}. Once upon a time, "
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# Generate the story
|
| 22 |
+
story_result = generator(
|
| 23 |
prompt,
|
| 24 |
+
max_length=200,
|
| 25 |
+
num_return_sequences=1,
|
| 26 |
+
temperature=0.8,
|
| 27 |
+
top_k=50,
|
| 28 |
+
top_p=0.95,
|
| 29 |
+
do_sample=True
|
| 30 |
)
|
| 31 |
|
| 32 |
+
# Extract the generated text
|
| 33 |
+
story_text = story_result[0]['generated_text']
|
| 34 |
+
story_text = story_text.replace(prompt, "Once upon a time, ")
|
| 35 |
+
|
| 36 |
+
# Make sure the story is at least 100 words
|
| 37 |
+
if len(story_text.split()) < 100:
|
| 38 |
+
story_text += " The children had a wonderful adventure and learned that imagination can take you anywhere. They returned home with smiles, eager to share their magical story with family and friends. And they lived happily ever after."
|
| 39 |
+
|
| 40 |
return story_text
|
| 41 |
|
| 42 |
# text2audio
|
| 43 |
def text2audio(story_text):
|
| 44 |
+
tts = pipeline("text-to-speech", model="espnet/kan-bayashi_ljspeech_vits")
|
| 45 |
+
audio_output = tts(story_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
return {
|
| 48 |
"audio": audio_output["audio"],
|
|
|
|
| 51 |
|
| 52 |
# Function to save temporary image file
|
| 53 |
def save_uploaded_image(uploaded_file):
|
|
|
|
| 54 |
if not os.path.exists("temp"):
|
| 55 |
os.makedirs("temp")
|
| 56 |
|
|
|
|
| 57 |
image_path = os.path.join("temp", uploaded_file.name)
|
| 58 |
|
|
|
|
| 59 |
with open(image_path, "wb") as f:
|
| 60 |
f.write(uploaded_file.getvalue())
|
| 61 |
|
| 62 |
return image_path
|
| 63 |
|
| 64 |
# main part
|
| 65 |
+
st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
|
| 66 |
+
st.header("Turn Your Image to Audio Story")
|
| 67 |
+
uploaded_file = st.file_uploader("Select an Image...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
if uploaded_file is not None:
|
| 70 |
+
# Display the uploaded image
|
| 71 |
+
st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
+
# Save the image temporarily
|
| 74 |
+
image_path = save_uploaded_image(uploaded_file)
|
| 75 |
|
| 76 |
+
# Stage 1: Image to Text
|
| 77 |
+
st.text('Processing img2text...')
|
| 78 |
+
caption = img2text(image_path)
|
| 79 |
+
st.write(caption)
|
| 80 |
+
|
| 81 |
+
# Stage 2: Text to Story
|
| 82 |
+
st.text('Generating a story...')
|
| 83 |
+
story = text2story(caption)
|
| 84 |
+
st.write(story)
|
| 85 |
+
|
| 86 |
+
# Stage 3: Story to Audio data
|
| 87 |
+
st.text('Generating audio data...')
|
| 88 |
+
audio_data = text2audio(story)
|
| 89 |
+
|
| 90 |
+
# Play button
|
| 91 |
+
if st.button("Play Audio"):
|
| 92 |
+
st.audio(
|
| 93 |
+
audio_data["audio"],
|
| 94 |
+
format="audio/wav",
|
| 95 |
+
start_time=0,
|
| 96 |
+
sample_rate=audio_data["sampling_rate"]
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
# Clean up the temporary file
|
| 100 |
+
try:
|
| 101 |
+
os.remove(image_path)
|
| 102 |
+
except:
|
| 103 |
+
pass
|