|
|
import streamlit as st |
|
|
from PIL import Image |
|
|
import tempfile |
|
|
from transformers import pipeline |
|
|
|
|
|
|
|
|
def generate_caption(image): |
|
|
caption_pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") |
|
|
caption = caption_pipeline(image)[0]['generated_text'] |
|
|
return caption |
|
|
|
|
|
|
|
|
def generate_story(caption): |
|
|
story_pipeline = pipeline("text-generation", model="gpt2") |
|
|
prompt = f"Write a fun, short story (50-100 words) for a child based on: {caption}" |
|
|
story = story_pipeline(prompt, max_length=100, do_sample=True)[0]['generated_text'] |
|
|
return story |
|
|
|
|
|
|
|
|
def generate_audio(story_text): |
|
|
tts_pipeline = pipeline("text-to-speech", model="espnet/kan-bayashi_ljspeech_vits") |
|
|
speech = tts_pipeline(story_text) |
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: |
|
|
f.write(speech["audio"]) |
|
|
return f.name |
|
|
|
|
|
|
|
|
def main(): |
|
|
st.title("π AI Storyteller for Kids (3 Stages)") |
|
|
st.write("Upload a child-friendly image and let the app create a story and read it out loud!") |
|
|
|
|
|
uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) |
|
|
|
|
|
if uploaded_image: |
|
|
image = Image.open(uploaded_image) |
|
|
st.image(image, caption="Your uploaded image", use_column_width=True) |
|
|
|
|
|
with st.spinner("π Generating caption..."): |
|
|
caption = generate_caption(image) |
|
|
st.success(f"πΌοΈ Caption: {caption}") |
|
|
|
|
|
with st.spinner("π Generating story..."): |
|
|
story = generate_story(caption) |
|
|
st.markdown("### π Generated Story:") |
|
|
st.write(story) |
|
|
|
|
|
with st.spinner("π Generating audio..."): |
|
|
audio_path = generate_audio(story) |
|
|
st.audio(audio_path, format="audio/wav") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|