Spaces:

CR7CAD
/

Assignment1

Sleeping

App Files Files Community

CR7CAD commited on Mar 8

Commit

f006a50

verified ·

1 Parent(s): c62c780

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -118

app.py CHANGED Viewed

@@ -1,13 +1,7 @@
 # import part
 import streamlit as st
 from transformers import pipeline
-import torch
-from PIL import Image
-import io
 import os
-from huggingface_hub import InferenceClient
-import numpy as np
-import base64
 # function part
 # img2text
@@ -18,37 +12,37 @@ def img2text(image_path):
 # text2story
 def text2story(text):
-    # Using Llama model through API to avoid GGUF format complexities in Streamlit
-    client = InferenceClient(model="MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF")
     # Create a prompt for the story generation
-    prompt = f"""Write a fun, engaging children's story of about 100 words based on this caption:
-    "{text}"
-    The story should be suitable for kids aged 3-10 years old, with simple language, positive themes, and a clear beginning, middle, and end.
-    """
     # Generate the story
-    story_text = client.text_generation(
         prompt,
-        max_new_tokens=250,
-        temperature=0.7,
-        top_p=0.9,
-        repetition_penalty=1.2
     )
     return story_text
 # text2audio
 def text2audio(story_text):
-    # Using Bark text-to-speech model
-    tts = pipeline("text-to-speech", model="suno/bark")
-    # Generate audio with a voice suitable for children's stories
-    audio_output = tts(
-        text=story_text,
-        forward_params={"speaker": "v2/en_speaker_6", "text_temp": 0.7}
-    )
     return {
         "audio": audio_output["audio"],
@@ -57,110 +51,53 @@ def text2audio(story_text):
 # Function to save temporary image file
 def save_uploaded_image(uploaded_file):
-    # Create a temp directory if it doesn't exist
     if not os.path.exists("temp"):
         os.makedirs("temp")
-    # Define the path to save the image
     image_path = os.path.join("temp", uploaded_file.name)
-    # Save the image
     with open(image_path, "wb") as f:
         f.write(uploaded_file.getvalue())
     return image_path
 # main part
-st.set_page_config(
-    page_title="Kids Storytelling Magic",
-    page_icon="📚",
-    layout="centered"
-)
-# Add some CSS for a child-friendly interface
-st.markdown("""
-    <style>
-    .main {
-        background-color: #f0f8ff;
-    }
-    h1, h2, h3 {
-        color: #1e90ff;
-    }
-    .stButton>button {
-        background-color: #ff6b6b;
-        color: white;
-        font-size: 1.2rem;
-        border-radius: 10px;
-        padding: 0.5rem 1rem;
-    }
-    </style>
-    """, unsafe_allow_html=True)
-st.title("🧸 Kids Storytelling Magic 🦄")
-st.subheader("Upload a picture and hear a magical story!")
-uploaded_file = st.file_uploader("Choose a fun picture...", type=["jpg", "jpeg", "png"])
 if uploaded_file is not None:
-    # Display a loading spinner
-    with st.spinner("Working on your magical story..."):
-        # Display the uploaded image
-        st.image(uploaded_file, caption="Your magical picture", use_column_width=True)
-        # Save the image temporarily
-        image_path = save_uploaded_image(uploaded_file)
-        # Stage 1: Image to Text
-        with st.spinner("Looking at your picture..."):
-            caption = img2text(image_path)
-            st.markdown("### 📝 I see...")
-            st.write(caption)
-        # Stage 2: Text to Story
-        with st.spinner("Creating your story..."):
-            story = text2story(caption)
-            st.markdown("### 📖 Your Story")
-            st.write(story)
-        # Stage 3: Story to Audio data
-        with st.spinner("Making your story speak..."):
-            try:
-                audio_data = text2audio(story)
-                # Add a play button with cute icon
-                st.markdown("### 🔊 Listen to your story")
-                if st.button("🎵 Play Story"):
-                    st.audio(
-                        audio_data["audio"],
-                        format="audio/wav",
-                        start_time=0,
-                        sample_rate=audio_data["sampling_rate"]
-                    )
-            except Exception as e:
-                st.error(f"Oops! Something went wrong with the audio: {str(e)}")
-                st.write("But you can still read the story above!")
-        # Clean up - delete the temporary image
-        try:
-            os.remove(image_path)
-        except:
-            pass
-else:
-    # Show instructions with a friendly message
-    st.markdown("""
-    ### How to use:
-    1. Click the button above to upload a picture
-    2. Wait for the magical story to appear
-    3. Press play to hear your story!
-    Try pictures of animals, nature, toys, or anything fun!
-    """)
-    # Show a placeholder image
-    st.image("https://placehold.co/600x400/9370db/ffffff?text=Upload+an+image+to+start+the+magic!",
-             caption="Ready for your picture!", use_column_width=True)
-# Add a footer
-st.markdown("---")
-st.markdown("Made for kids to enjoy the stories")

 # import part
 import streamlit as st
 from transformers import pipeline
 import os
 # function part
 # img2text
 # text2story
 def text2story(text):
+    # Using a smaller text generation model
+    generator = pipeline('text-generation', model='gpt2')
     # Create a prompt for the story generation
+    prompt = f"Write a fun children's story based on this: {text}. Once upon a time, "
     # Generate the story
+    story_result = generator(
         prompt,
+        max_length=200,
+        num_return_sequences=1,
+        temperature=0.8,
+        top_k=50,
+        top_p=0.95,
+        do_sample=True
     )
+    # Extract the generated text
+    story_text = story_result[0]['generated_text']
+    story_text = story_text.replace(prompt, "Once upon a time, ")
+    # Make sure the story is at least 100 words
+    if len(story_text.split()) < 100:
+        story_text += " The children had a wonderful adventure and learned that imagination can take you anywhere. They returned home with smiles, eager to share their magical story with family and friends. And they lived happily ever after."
     return story_text
 # text2audio
 def text2audio(story_text):
+    tts = pipeline("text-to-speech", model="espnet/kan-bayashi_ljspeech_vits")
+    audio_output = tts(story_text)
     return {
         "audio": audio_output["audio"],
 # Function to save temporary image file
 def save_uploaded_image(uploaded_file):
     if not os.path.exists("temp"):
         os.makedirs("temp")
     image_path = os.path.join("temp", uploaded_file.name)
     with open(image_path, "wb") as f:
         f.write(uploaded_file.getvalue())
     return image_path
 # main part
+st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
+st.header("Turn Your Image to Audio Story")
+uploaded_file = st.file_uploader("Select an Image...")
 if uploaded_file is not None:
+    # Display the uploaded image
+    st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
+    # Save the image temporarily
+    image_path = save_uploaded_image(uploaded_file)
+    # Stage 1: Image to Text
+    st.text('Processing img2text...')
+    caption = img2text(image_path)
+    st.write(caption)
+    # Stage 2: Text to Story
+    st.text('Generating a story...')
+    story = text2story(caption)
+    st.write(story)
+    # Stage 3: Story to Audio data
+    st.text('Generating audio data...')
+    audio_data = text2audio(story)
+    # Play button
+    if st.button("Play Audio"):
+        st.audio(
+            audio_data["audio"],
+            format="audio/wav",
+            start_time=0,
+            sample_rate=audio_data["sampling_rate"]
+        )
+    # Clean up the temporary file
+    try:
+        os.remove(image_path)
+    except:
+        pass