testStreamDemo / app.py
sshenai's picture
Update app.py
1f9f0c5 verified
import streamlit as st
from PIL import Image
import time
from transformers import pipeline
import tempfile
import os
# Function to generate image caption
def generate_image_caption(image_path):
"""Generates a caption for the given image using a pre-trained model."""
img2caption = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
result = img2caption(image_path)
return result[0]['generated_text']
# Function to generate story from text
def text2story(text):
"""Generates a story from input text"""
pipe = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")
story_text = pipe(text, max_length=200)[0]['generated_text']
return story_text
# Function to convert text to speech
def text_to_speech(text):
"""Converts text to speech audio"""
try:
# Initialize text-to-audio pipeline
tts_pipe = pipeline("text-to-audio", model="facebook/mms-tts-eng")
# Generate audio (returns dict with 'audio' array and 'sampling_rate')
audio_output = tts_pipe(text[:1000]) # Limit text length
# Return the audio array and sampling rate
return audio_output['audio'], audio_output['sampling_rate']
except Exception as e:
st.error(f"Speech generation failed: {str(e)}")
return None, None
# Main application
def main():
st.title("Image to Story with Speech")
st.write("Upload an image to generate a caption, story, and audio narration")
uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
if uploaded_image is not None:
try:
# Process image
with st.spinner("Processing image..."):
image = Image.open(uploaded_image)
st.image(image, caption="Uploaded Image", use_column_width=True)
# Save temporary file
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_file:
image.save(temp_file.name)
image_path = temp_file.name
# Generate caption
with st.spinner("Generating caption..."):
caption = generate_image_caption(image_path)
st.subheader("Generated Caption")
st.write(caption)
# Generate story
with st.spinner("Generating story..."):
story = text2story(caption)
st.subheader("Generated Story")
st.write(story)
# Generate speech
with st.spinner("Generating audio..."):
audio_array, sample_rate = text_to_speech(story)
if audio_array is not None:
st.subheader("Audio Narration")
st.audio(audio_array, sample_rate=sample_rate)
except Exception as e:
st.error(f"An error occurred: {str(e)}")
finally:
# Clean up temporary file
if 'image_path' in locals() and os.path.exists(image_path):
os.remove(image_path)
if __name__ == "__main__":
main()