Spaces:
Sleeping
Sleeping
File size: 5,023 Bytes
cd245d5 90bef38 8d5fabf cd245d5 90bef38 8d5fabf cd245d5 118cd25 cd245d5 8d5fabf cd245d5 8d5fabf cd245d5 8d5fabf cd245d5 8d5fabf cd245d5 90bef38 cd245d5 8d5fabf cd245d5 90bef38 cd245d5 4e37056 cd245d5 8d5fabf cd245d5 8d5fabf cd245d5 8d5fabf cd245d5 8d5fabf cd245d5 8d5fabf cd245d5 8d5fabf cd245d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
# import part
import streamlit as st
from transformers import pipeline
import torch
from PIL import Image
import io
import os
from huggingface_hub import InferenceClient
import numpy as np
import base64
# function part
# img2text
def img2text(image_path):
image_to_text = pipeline("image-to-text", model="noamrot/FuseCap_Image_Captioning")
text = image_to_text(image_path)[0]["generated_text"]
return text
# text2story
def text2story(text):
# Using Llama model through API to avoid GGUF format complexities in Streamlit
client = InferenceClient(model="MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF")
# Create a prompt for the story generation
prompt = f"""Write a fun, engaging children's story of about 100 words based on this caption:
"{text}"
The story should be suitable for kids aged 3-10 years old, with simple language, positive themes, and a clear beginning, middle, and end.
"""
# Generate the story
story_text = client.text_generation(
prompt,
max_new_tokens=250,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.2
)
return story_text
# text2audio
def text2audio(story_text):
# Using Bark text-to-speech model
tts = pipeline("text-to-speech", model="suno/bark")
# Generate audio with a voice suitable for children's stories
audio_output = tts(
text=story_text,
forward_params={"speaker": "v2/en_speaker_6", "text_temp": 0.7}
)
return {
"audio": audio_output["audio"],
"sampling_rate": audio_output["sampling_rate"]
}
# Function to save temporary image file
def save_uploaded_image(uploaded_file):
# Create a temp directory if it doesn't exist
if not os.path.exists("temp"):
os.makedirs("temp")
# Define the path to save the image
image_path = os.path.join("temp", uploaded_file.name)
# Save the image
with open(image_path, "wb") as f:
f.write(uploaded_file.getvalue())
return image_path
# main part
st.set_page_config(
page_title="Kids Storytelling Magic",
page_icon="π",
layout="centered"
)
# Add some CSS for a child-friendly interface
st.markdown("""
<style>
.main {
background-color: #f0f8ff;
}
h1, h2, h3 {
color: #1e90ff;
}
.stButton>button {
background-color: #ff6b6b;
color: white;
font-size: 1.2rem;
border-radius: 10px;
padding: 0.5rem 1rem;
}
</style>
""", unsafe_allow_html=True)
st.title("π§Έ Kids Storytelling Magic π¦")
st.subheader("Upload a picture and hear a magical story!")
uploaded_file = st.file_uploader("Choose a fun picture...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Display a loading spinner
with st.spinner("Working on your magical story..."):
# Display the uploaded image
st.image(uploaded_file, caption="Your magical picture", use_column_width=True)
# Save the image temporarily
image_path = save_uploaded_image(uploaded_file)
# Stage 1: Image to Text
with st.spinner("Looking at your picture..."):
caption = img2text(image_path)
st.markdown("### π I see...")
st.write(caption)
# Stage 2: Text to Story
with st.spinner("Creating your story..."):
story = text2story(caption)
st.markdown("### π Your Story")
st.write(story)
# Stage 3: Story to Audio data
with st.spinner("Making your story speak..."):
try:
audio_data = text2audio(story)
# Add a play button with cute icon
st.markdown("### π Listen to your story")
if st.button("π΅ Play Story"):
st.audio(
audio_data["audio"],
format="audio/wav",
start_time=0,
sample_rate=audio_data["sampling_rate"]
)
except Exception as e:
st.error(f"Oops! Something went wrong with the audio: {str(e)}")
st.write("But you can still read the story above!")
# Clean up - delete the temporary image
try:
os.remove(image_path)
except:
pass
else:
# Show instructions with a friendly message
st.markdown("""
### How to use:
1. Click the button above to upload a picture
2. Wait for the magical story to appear
3. Press play to hear your story!
Try pictures of animals, nature, toys, or anything fun!
""")
# Show a placeholder image
st.image("https://placehold.co/600x400/9370db/ffffff?text=Upload+an+image+to+start+the+magic!",
caption="Ready for your picture!", use_column_width=True)
# Add a footer
st.markdown("---")
st.markdown("Made for kids to enjoy the stories") |