import streamlit as st
from transformers import pipeline
from PIL import Image
import io
from gtts import gTTS
import time

# Set page title
st.set_page_config(page_title="Kids Story Generator")

# Title and introduction
st.title("Kids Story Generator")
st.write("Upload a picture and let's create a magical story!")

# Initialize models
@st.cache_resource
def load_models():
    image_to_text = pipeline("image-to-text", model="microsoft/git-base-coco")
    story_generator = pipeline("text-generation", model="gpt2")
    return image_to_text, story_generator

image_to_text, story_generator = load_models()

# Function to generate caption from image
def generate_caption(image):
    caption = image_to_text(image)[0]['generated_text']
    return caption

# Function to generate story from caption (less than 100 words)
def generate_story(caption):
    prompt = f"Once upon a time, {caption} "
    
    # Set max_length to control story length (approximately 100 words)
    # Typical English word is ~5 characters, so ~500 characters ≈ 100 words
    story = story_generator(prompt, max_length=100, do_sample=True)[0]['generated_text']
    
    # Ensure story doesn't exceed 100 words
    words = story.split()
    if len(words) > 100:
        words = words[:100]
        story = " ".join(words)
        # Add period to the end if needed
        if not story.endswith(('.', '!', '?')):
            story += '.'
    
    return story

# Function to convert text to speech
def text_to_speech(text):
    tts = gTTS(text=text, lang='en', slow=False)
    audio_file = "story_audio.mp3"
    tts.save(audio_file)
    return audio_file

# File uploader
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    # Display the uploaded image
    image = Image.open(uploaded_file)
    st.image(image, caption='Uploaded Image', use_container_width=True)
    
    # Generate button
    if st.button("Generate Story"):
        with st.spinner("Generating your story..."):
            # Generate caption
            caption = generate_caption(image)
            st.write("Image caption:", caption)
            
            # Generate story
            story = generate_story(caption)
            word_count = len(story.split())
            st.write(f"### Your Story ({word_count} words)")
            st.write(story)
            
            # Generate audio
            audio_file = text_to_speech(story)
            
            # Display audio
            st.write("### Listen to your story")
            st.audio(audio_file)

st.markdown("---")
st.write("Created for ISOM5240 Assignment")