""" Magic Story Generator App for Hugging Face Spaces Creates custom children's stories from uploaded images """ import streamlit as st from PIL import Image import tempfile import numpy as np from transformers import pipeline import torch import os # ====================== # UI Configuration # ====================== def configure_ui(): """Sets up child-friendly interface with custom styling""" st.set_page_config( page_title="✨ Magic Story Generator", page_icon="🧚", layout="wide" ) # Custom CSS for child-friendly design st.markdown(""" """, unsafe_allow_html=True) # ====================== # Stage 1: Image Captioning # ====================== @st.cache_resource def load_image_captioner(): """Loads BLIP image captioning model with GPU support if available""" return pipeline( "image-to-text", model="Salesforce/blip-image-captioning-base", device="cuda" if torch.cuda.is_available() else "cpu" ) def generate_caption(_pipeline, image): """Generates English description of uploaded image""" try: # Generate caption with 20-50 words result = _pipeline(image, max_new_tokens=50) return result[0]['generated_text'] except Exception as e: st.error(f"Caption generation failed: {str(e)}") return None # ====================== # Stage 2: Story Generation # ====================== @st.cache_resource def load_story_generator(): """Loads fine-tuned GPT-2 story generator""" return pipeline( "text-generation", model="pranavpsv/gpt2-genre-story-generator", device="cuda" if torch.cuda.is_available() else "cpu" ) def generate_story(_pipeline, keywords): """Creates a children's story (60-100 words) based on image caption""" prompt = f"""Generate a children's story (60-100 words) based on: {keywords} Requirements: - Use simple English (Grade 2 level) - Include magical/fantasy elements - Have positive moral lesson - Happy ending - Exactly 3 paragraphs Story:""" try: story = _pipeline( prompt, max_length=250, # Controls token count (~100 words) temperature=0.7, # Balance creativity vs coherence do_sample=True, top_k=50 )[0]['generated_text'] # Clean up output by removing prompt return story.replace(prompt, "").strip() except Exception as e: st.error(f"Story generation failed: {str(e)}") return None # ====================== # Stage 3: Text-to-Speech # ====================== @st.cache_resource def load_tts(): """Loads multilingual TTS model""" return pipeline( "text-to-speech", model="facebook/mms-tts-eng", device="cuda" if torch.cuda.is_available() else "cpu" ) def text_to_speech(_pipeline, text): """Converts generated story to speech audio""" try: audio = _pipeline(text) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: import soundfile as sf sf.write(f.name, audio["audio"].squeeze().numpy(), audio["sampling_rate"]) return f.name except Exception as e: st.error(f"Audio generation failed: {str(e)}") return None # ====================== # Main Application # ====================== def main(): # Configure UI first configure_ui() # App header st.title("🧚 Magic Story Generator") st.markdown("""

Upload a child's photo and AI will create a custom fairy tale with audio!

""", unsafe_allow_html=True) # File upload section uploaded_file = st.file_uploader( "Choose a photo of children's activity", type=["jpg", "jpeg", "png"], help="Examples: playing, reading, drawing etc." ) if not uploaded_file: st.info("👆 Please upload an image to begin") return # Display uploaded image image = Image.open(uploaded_file) st.image(image, caption="Your uploaded photo", use_column_width=True) # Load all models (shows loading animation) with st.spinner("🪄 Preparing magic tools..."): caption_pipe = load_image_captioner() story_pipe = load_story_generator() tts_pipe = load_tts() # --- Stage 1: Image Captioning --- with st.spinner("🔍 Analyzing the image..."): caption = generate_caption(caption_pipe, image) if caption: st.success(f"📝 AI sees: {caption}") # --- Stage 2: Story Generation --- if caption: with st.spinner("✍️ Writing your story..."): story = generate_story(story_pipe, caption) if story: st.subheader("📖 Your Custom Story") st.markdown(f"""

{story}

""", unsafe_allow_html=True) # --- Stage 3: Text-to-Speech --- with st.spinner("🔊 Creating audio version..."): audio_path = text_to_speech(tts_pipe, story) if audio_path: st.audio(audio_path, format="audio/wav") st.success("Audio ready! Click play above to listen") st.balloons() # Celebration animation if __name__ == "__main__": # Set Hugging Face cache location os.environ["HF_HUB_CACHE"] = "/tmp/huggingface" main()