# Only the two imports you requested
import streamlit as st
from transformers import pipeline
from PIL import Image

# Simple image-to-text function
def img2text(image):
    image_to_text = pipeline("image-to-text", model="sooh-j/blip-image-captioning-base")
    text = image_to_text(image)[0]["generated_text"]
    return text

# Simple text-to-story function
def text2story(text):
    generator = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
    prompt = f"Write a short children's story based on this: {text}. Once upon a time, "
    
    story_result = generator(
        prompt,
        max_length=150,
        num_return_sequences=1,
        temperature=0.7,
        do_sample=True
    )
   
    story_text = story_result[0]['generated_text']
    story_text = story_text.replace(prompt, "Once upon a time, ")
    return story_text

# Simple text-to-audio function
def text2audio(story_text):
    synthesizer = pipeline("text-to-speech", model="HelpingAI/HelpingAI-TTS-v1")
    speech = synthesizer(story_text)
    return speech

# Basic Streamlit interface
st.title("Image to Audio Story")
uploaded_file = st.file_uploader("Upload an image")

if uploaded_file is not None:
    # Display image
    st.image(uploaded_file, caption="Uploaded Image")
    
    # Convert to PIL Image
    image = Image.open(uploaded_file)
    
    # Image to Text
    st.write("Generating caption...")
    caption = img2text(image)
    st.write(f"Caption: {caption}")
    
    # Text to Story
    st.write("Creating story...")
    story = text2story(caption)
    st.write(f"Story: {story}")
    
    # Text to Audio
    st.write("Generating audio...")
    speech_output = text2audio(story)
    
    # Play audio
    try:
        if 'audio' in speech_output and 'sampling_rate' in speech_output:
            st.audio(speech_output['audio'], sample_rate=speech_output['sampling_rate'])
        elif 'audio_array' in speech_output and 'sampling_rate' in speech_output:
            st.audio(speech_output['audio_array'], sample_rate=speech_output['sampling_rate'])
        else:
            st.write("Audio generated but could not be played.")
    except Exception as e:
        st.error(f"Error playing audio: {e}")