import streamlit as st from transformers import pipeline from PIL import Image import io from gtts import gTTS import time # Set page title st.set_page_config(page_title="Kids Story Generator") # Title and introduction st.title("Kids Story Generator") st.write("Upload a picture and let's create a magical story!") # Initialize models @st.cache_resource def load_models(): image_to_text = pipeline("image-to-text", model="microsoft/git-base-coco") story_generator = pipeline("text-generation", model="gpt2") return image_to_text, story_generator image_to_text, story_generator = load_models() # Function to generate caption from image def generate_caption(image): caption = image_to_text(image)[0]['generated_text'] return caption # Function to generate story from caption (less than 100 words) def generate_story(caption): prompt = f"Once upon a time, {caption} " # Set max_length to control story length (approximately 100 words) # Typical English word is ~5 characters, so ~500 characters ≈ 100 words story = story_generator(prompt, max_length=100, do_sample=True)[0]['generated_text'] # Ensure story doesn't exceed 100 words words = story.split() if len(words) > 100: words = words[:100] story = " ".join(words) # Add period to the end if needed if not story.endswith(('.', '!', '?')): story += '.' return story # Function to convert text to speech def text_to_speech(text): tts = gTTS(text=text, lang='en', slow=False) audio_file = "story_audio.mp3" tts.save(audio_file) return audio_file # File uploader uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: # Display the uploaded image image = Image.open(uploaded_file) st.image(image, caption='Uploaded Image', use_container_width=True) # Generate button if st.button("Generate Story"): with st.spinner("Generating your story..."): # Generate caption caption = generate_caption(image) st.write("Image caption:", caption) # Generate story story = generate_story(caption) word_count = len(story.split()) st.write(f"### Your Story ({word_count} words)") st.write(story) # Generate audio audio_file = text_to_speech(story) # Display audio st.write("### Listen to your story") st.audio(audio_file) st.markdown("---") st.write("Created for ISOM5240 Assignment")