Spaces:

CR7CAD
/

Assignment1

Sleeping

File size: 5,023 Bytes

cd245d5
90bef38
8d5fabf
cd245d5
90bef38
8d5fabf
cd245d5
 
 
 
118cd25
cd245d5
 
 
 
 
 
8d5fabf
cd245d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d5fabf
cd245d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d5fabf
cd245d5
 
 
 
 
 
 
 
 
 
 
 
 
 
8d5fabf
cd245d5
 
 
 
 
 
90bef38
cd245d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d5fabf
cd245d5
 
90bef38
cd245d5
4e37056
 
cd245d5
 
8d5fabf
cd245d5
 
 
 
 
 
 
 
 
 
8d5fabf
cd245d5
 
 
 
8d5fabf
cd245d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d5fabf
cd245d5
 
 
8d5fabf
cd245d5
8d5fabf
cd245d5

# import part
import streamlit as st
from transformers import pipeline
import torch
from PIL import Image
import io
import os
from huggingface_hub import InferenceClient
import numpy as np
import base64

# function part
# img2text
def img2text(image_path):
    image_to_text = pipeline("image-to-text", model="noamrot/FuseCap_Image_Captioning")
    text = image_to_text(image_path)[0]["generated_text"]
    return text

# text2story
def text2story(text):
    # Using Llama model through API to avoid GGUF format complexities in Streamlit
    client = InferenceClient(model="MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF")
    
    # Create a prompt for the story generation
    prompt = f"""Write a fun, engaging children's story of about 100 words based on this caption: 
    "{text}"
    
    The story should be suitable for kids aged 3-10 years old, with simple language, positive themes, and a clear beginning, middle, and end.
    """
    
    # Generate the story
    story_text = client.text_generation(
        prompt,
        max_new_tokens=250,
        temperature=0.7,
        top_p=0.9,
        repetition_penalty=1.2
    )
    
    return story_text

# text2audio
def text2audio(story_text):
    # Using Bark text-to-speech model
    tts = pipeline("text-to-speech", model="suno/bark")
    
    # Generate audio with a voice suitable for children's stories
    audio_output = tts(
        text=story_text,
        forward_params={"speaker": "v2/en_speaker_6", "text_temp": 0.7}
    )
    
    return {
        "audio": audio_output["audio"],
        "sampling_rate": audio_output["sampling_rate"]
    }

# Function to save temporary image file
def save_uploaded_image(uploaded_file):
    # Create a temp directory if it doesn't exist
    if not os.path.exists("temp"):
        os.makedirs("temp")
    
    # Define the path to save the image
    image_path = os.path.join("temp", uploaded_file.name)
    
    # Save the image
    with open(image_path, "wb") as f:
        f.write(uploaded_file.getvalue())
    
    return image_path

# main part
st.set_page_config(
    page_title="Kids Storytelling Magic",
    page_icon="📚",
    layout="centered"
)

# Add some CSS for a child-friendly interface
st.markdown("""
    <style>
    .main {
        background-color: #f0f8ff;
    }
    h1, h2, h3 {
        color: #1e90ff;
    }
    .stButton>button {
        background-color: #ff6b6b;
        color: white;
        font-size: 1.2rem;
        border-radius: 10px;
        padding: 0.5rem 1rem;
    }
    </style>
    """, unsafe_allow_html=True)

st.title("🧸 Kids Storytelling Magic 🦄")
st.subheader("Upload a picture and hear a magical story!")

uploaded_file = st.file_uploader("Choose a fun picture...", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    # Display a loading spinner
    with st.spinner("Working on your magical story..."):
        # Display the uploaded image
        st.image(uploaded_file, caption="Your magical picture", use_column_width=True)
        
        # Save the image temporarily
        image_path = save_uploaded_image(uploaded_file)
        
        # Stage 1: Image to Text
        with st.spinner("Looking at your picture..."):
            caption = img2text(image_path)
            st.markdown("### 📝 I see...")
            st.write(caption)
        
        # Stage 2: Text to Story
        with st.spinner("Creating your story..."):
            story = text2story(caption)
            st.markdown("### 📖 Your Story")
            st.write(story)
        
        # Stage 3: Story to Audio data
        with st.spinner("Making your story speak..."):
            try:
                audio_data = text2audio(story)
                
                # Add a play button with cute icon
                st.markdown("### 🔊 Listen to your story")
                if st.button("🎵 Play Story"):
                    st.audio(
                        audio_data["audio"],
                        format="audio/wav",
                        start_time=0,
                        sample_rate=audio_data["sampling_rate"]
                    )
            except Exception as e:
                st.error(f"Oops! Something went wrong with the audio: {str(e)}")
                st.write("But you can still read the story above!")
        
        # Clean up - delete the temporary image
        try:
            os.remove(image_path)
        except:
            pass

else:
    # Show instructions with a friendly message
    st.markdown("""
    ### How to use:
    1. Click the button above to upload a picture
    2. Wait for the magical story to appear
    3. Press play to hear your story!
    
    Try pictures of animals, nature, toys, or anything fun!
    """)
    
    # Show a placeholder image
    st.image("https://placehold.co/600x400/9370db/ffffff?text=Upload+an+image+to+start+the+magic!", 
             caption="Ready for your picture!", use_column_width=True)

# Add a footer
st.markdown("---")
st.markdown("Made for kids to enjoy the stories")