Spaces:
Sleeping
Sleeping
File size: 2,223 Bytes
ad4186a 90bef38 8d5fabf ab8ead3 118cd25 ad4186a ab8ead3 ad4186a cd245d5 8d5fabf ad4186a 5f21a2d ad4186a 5f21a2d ad4186a 5f21a2d ad4186a cd245d5 ad4186a 8d5fabf ad4186a 4e37056 ab8ead3 ad4186a f006a50 ad4186a ab8ead3 f006a50 ad4186a 7df9b81 ad4186a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
# Only the two imports you requested
import streamlit as st
from transformers import pipeline
from PIL import Image
# Simple image-to-text function
def img2text(image):
image_to_text = pipeline("image-to-text", model="sooh-j/blip-image-captioning-base")
text = image_to_text(image)[0]["generated_text"]
return text
# Simple text-to-story function
def text2story(text):
generator = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
prompt = f"Write a short children's story based on this: {text}. Once upon a time, "
story_result = generator(
prompt,
max_length=150,
num_return_sequences=1,
temperature=0.7,
do_sample=True
)
story_text = story_result[0]['generated_text']
story_text = story_text.replace(prompt, "Once upon a time, ")
return story_text
# Simple text-to-audio function
def text2audio(story_text):
synthesizer = pipeline("text-to-speech", model="HelpingAI/HelpingAI-TTS-v1")
speech = synthesizer(story_text)
return speech
# Basic Streamlit interface
st.title("Image to Audio Story")
uploaded_file = st.file_uploader("Upload an image")
if uploaded_file is not None:
# Display image
st.image(uploaded_file, caption="Uploaded Image")
# Convert to PIL Image
image = Image.open(uploaded_file)
# Image to Text
st.write("Generating caption...")
caption = img2text(image)
st.write(f"Caption: {caption}")
# Text to Story
st.write("Creating story...")
story = text2story(caption)
st.write(f"Story: {story}")
# Text to Audio
st.write("Generating audio...")
speech_output = text2audio(story)
# Play audio
try:
if 'audio' in speech_output and 'sampling_rate' in speech_output:
st.audio(speech_output['audio'], sample_rate=speech_output['sampling_rate'])
elif 'audio_array' in speech_output and 'sampling_rate' in speech_output:
st.audio(speech_output['audio_array'], sample_rate=speech_output['sampling_rate'])
else:
st.write("Audio generated but could not be played.")
except Exception as e:
st.error(f"Error playing audio: {e}") |