Spaces:
Sleeping
Sleeping
| # Only the two imports you requested | |
| import streamlit as st | |
| from transformers import pipeline | |
| from PIL import Image | |
| # Simple image-to-text function | |
| def img2text(image): | |
| image_to_text = pipeline("image-to-text", model="sooh-j/blip-image-captioning-base") | |
| text = image_to_text(image)[0]["generated_text"] | |
| return text | |
| # Simple text-to-story function | |
| def text2story(text): | |
| generator = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0") | |
| prompt = f"Write a short children's story based on this: {text}. Once upon a time, " | |
| story_result = generator( | |
| prompt, | |
| max_length=150, | |
| num_return_sequences=1, | |
| temperature=0.7, | |
| do_sample=True | |
| ) | |
| story_text = story_result[0]['generated_text'] | |
| story_text = story_text.replace(prompt, "Once upon a time, ") | |
| return story_text | |
| # Simple text-to-audio function | |
| def text2audio(story_text): | |
| synthesizer = pipeline("text-to-speech", model="HelpingAI/HelpingAI-TTS-v1") | |
| speech = synthesizer(story_text) | |
| return speech | |
| # Basic Streamlit interface | |
| st.title("Image to Audio Story") | |
| uploaded_file = st.file_uploader("Upload an image") | |
| if uploaded_file is not None: | |
| # Display image | |
| st.image(uploaded_file, caption="Uploaded Image") | |
| # Convert to PIL Image | |
| image = Image.open(uploaded_file) | |
| # Image to Text | |
| st.write("Generating caption...") | |
| caption = img2text(image) | |
| st.write(f"Caption: {caption}") | |
| # Text to Story | |
| st.write("Creating story...") | |
| story = text2story(caption) | |
| st.write(f"Story: {story}") | |
| # Text to Audio | |
| st.write("Generating audio...") | |
| speech_output = text2audio(story) | |
| # Play audio | |
| try: | |
| if 'audio' in speech_output and 'sampling_rate' in speech_output: | |
| st.audio(speech_output['audio'], sample_rate=speech_output['sampling_rate']) | |
| elif 'audio_array' in speech_output and 'sampling_rate' in speech_output: | |
| st.audio(speech_output['audio_array'], sample_rate=speech_output['sampling_rate']) | |
| else: | |
| st.write("Audio generated but could not be played.") | |
| except Exception as e: | |
| st.error(f"Error playing audio: {e}") |