Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from PIL import Image | |
| import tempfile | |
| from transformers import pipeline | |
| # --- Stage 1: Image β Caption --- | |
| def generate_caption(image): | |
| caption_pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") | |
| caption = caption_pipeline(image)[0]['generated_text'] | |
| return caption | |
| # --- Stage 2: Caption β Story --- | |
| def generate_story(caption): | |
| story_pipeline = pipeline("text-generation", model="gpt2") | |
| prompt = f"Write a fun, short story (50-100 words) for a child based on: {caption}" | |
| story = story_pipeline(prompt, max_length=100, do_sample=True)[0]['generated_text'] | |
| return story | |
| # --- Stage 3: Story β Audio --- | |
| def generate_audio(story_text): | |
| tts_pipeline = pipeline("text-to-speech", model="espnet/kan-bayashi_ljspeech_vits") | |
| speech = tts_pipeline(story_text) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: | |
| f.write(speech["audio"]) | |
| return f.name | |
| # --- Streamlit UI --- | |
| def main(): | |
| st.title("π AI Storyteller for Kids (3 Stages)") | |
| st.write("Upload a child-friendly image and let the app create a story and read it out loud!") | |
| uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) | |
| if uploaded_image: | |
| image = Image.open(uploaded_image) | |
| st.image(image, caption="Your uploaded image", use_column_width=True) | |
| with st.spinner("π Generating caption..."): | |
| caption = generate_caption(image) | |
| st.success(f"πΌοΈ Caption: {caption}") | |
| with st.spinner("π Generating story..."): | |
| story = generate_story(caption) | |
| st.markdown("### π Generated Story:") | |
| st.write(story) | |
| with st.spinner("π Generating audio..."): | |
| audio_path = generate_audio(story) | |
| st.audio(audio_path, format="audio/wav") | |
| if __name__ == "__main__": | |
| main() | |