Assignment1 / app.py
CR7CAD's picture
Update app.py
ad4186a verified
raw
history blame
2.22 kB
# Only the two imports you requested
import streamlit as st
from transformers import pipeline
from PIL import Image
# Simple image-to-text function
def img2text(image):
image_to_text = pipeline("image-to-text", model="sooh-j/blip-image-captioning-base")
text = image_to_text(image)[0]["generated_text"]
return text
# Simple text-to-story function
def text2story(text):
generator = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
prompt = f"Write a short children's story based on this: {text}. Once upon a time, "
story_result = generator(
prompt,
max_length=150,
num_return_sequences=1,
temperature=0.7,
do_sample=True
)
story_text = story_result[0]['generated_text']
story_text = story_text.replace(prompt, "Once upon a time, ")
return story_text
# Simple text-to-audio function
def text2audio(story_text):
synthesizer = pipeline("text-to-speech", model="HelpingAI/HelpingAI-TTS-v1")
speech = synthesizer(story_text)
return speech
# Basic Streamlit interface
st.title("Image to Audio Story")
uploaded_file = st.file_uploader("Upload an image")
if uploaded_file is not None:
# Display image
st.image(uploaded_file, caption="Uploaded Image")
# Convert to PIL Image
image = Image.open(uploaded_file)
# Image to Text
st.write("Generating caption...")
caption = img2text(image)
st.write(f"Caption: {caption}")
# Text to Story
st.write("Creating story...")
story = text2story(caption)
st.write(f"Story: {story}")
# Text to Audio
st.write("Generating audio...")
speech_output = text2audio(story)
# Play audio
try:
if 'audio' in speech_output and 'sampling_rate' in speech_output:
st.audio(speech_output['audio'], sample_rate=speech_output['sampling_rate'])
elif 'audio_array' in speech_output and 'sampling_rate' in speech_output:
st.audio(speech_output['audio_array'], sample_rate=speech_output['sampling_rate'])
else:
st.write("Audio generated but could not be played.")
except Exception as e:
st.error(f"Error playing audio: {e}")