# Only the two imports you requested import streamlit as st from transformers import pipeline from PIL import Image # Simple image-to-text function def img2text(image): image_to_text = pipeline("image-to-text", model="sooh-j/blip-image-captioning-base") text = image_to_text(image)[0]["generated_text"] return text # Simple text-to-story function def text2story(text): generator = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0") prompt = f"Write a short children's story based on this: {text}. Once upon a time, " story_result = generator( prompt, max_length=150, num_return_sequences=1, temperature=0.7, do_sample=True ) story_text = story_result[0]['generated_text'] story_text = story_text.replace(prompt, "Once upon a time, ") return story_text # Simple text-to-audio function def text2audio(story_text): synthesizer = pipeline("text-to-speech", model="HelpingAI/HelpingAI-TTS-v1") speech = synthesizer(story_text) return speech # Basic Streamlit interface st.title("Image to Audio Story") uploaded_file = st.file_uploader("Upload an image") if uploaded_file is not None: # Display image st.image(uploaded_file, caption="Uploaded Image") # Convert to PIL Image image = Image.open(uploaded_file) # Image to Text st.write("Generating caption...") caption = img2text(image) st.write(f"Caption: {caption}") # Text to Story st.write("Creating story...") story = text2story(caption) st.write(f"Story: {story}") # Text to Audio st.write("Generating audio...") speech_output = text2audio(story) # Play audio try: if 'audio' in speech_output and 'sampling_rate' in speech_output: st.audio(speech_output['audio'], sample_rate=speech_output['sampling_rate']) elif 'audio_array' in speech_output and 'sampling_rate' in speech_output: st.audio(speech_output['audio_array'], sample_rate=speech_output['sampling_rate']) else: st.write("Audio generated but could not be played.") except Exception as e: st.error(f"Error playing audio: {e}")