import streamlit as st
from transformers import pipeline
from gtts import gTTS
import tempfile
import os

# function part
# img2text
def img2text(img_path):
    # Image captioning model
    captioner = pipeline(
        "image-to-text",
        model="nlpconnect/vit-gpt2-image-captioning"  #This model is relatively fast and accurate
    )
    result = captioner(img_path)
    return result[0]["generated_text"]

# text2story
def text2story(scenario):
    # Story generator config
    generator = pipeline(
        "text-generation",
        model="gpt2",  #Relatively small but fast
        max_length=200, # Maximum story lengt
        num_return_sequences=1 #Number of variants to generate
    )
    prompt = f"Create a children's story based on: {scenario}"
    story = generator(prompt)[0]["generated_text"]
    return story

# text2audio
def text2audio(story_text):
    # Audio file creation
    tts = gTTS(text=story_text, lang="en")
    audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
    tts.save(audio_file.name)
    return audio_file.name

def main():
    st.set_page_config(
        page_title="Image to Story",
        page_icon="📖"
    )
    st.header("Upload Your Image")
    
    uploaded_file = st.file_uploader(
        "Choose Image",
        type=["jpg", "png", "jpeg"]
    )
    
    if uploaded_file:
        temp_img = os.path.join(tempfile.gettempdir(), uploaded_file.name)
        with open(temp_img, "wb") as f:
            f.write(uploaded_file.getvalue())
        
        st.image(uploaded_file)

        #Stage 1: Image to Text
        with st.status("🖼️ Processing image..."):
            scenario = img2text(temp_img)
            st.write("Image Caption:", scenario)

        #Stage 2: Text to Story
        with st.status("📖 Generating story..."):
            story = text2story(scenario)
            st.subheader("Story")
            st.write(story)

        #Stage 3: Story to Audio data
        with st.status("🔊 Converting audio..."):
            audio_path = text2audio(story)
        # Play button
        if st.button("▶️ Play Audio Story"):
            st.audio(audio_path, format="audio/mp3")

        # Cleanup
        os.unlink(temp_img)
        os.unlink(audio_path)

if __name__ == "__main__":
    main()