Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from PIL import Image | |
| import time | |
| from transformers import pipeline | |
| import tempfile | |
| import os | |
| # Function to generate image caption | |
| def generate_image_caption(image_path): | |
| """Generates a caption for the given image using a pre-trained model.""" | |
| img2caption = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") | |
| result = img2caption(image_path) | |
| return result[0]['generated_text'] | |
| # Function to generate story from text | |
| def text2story(text): | |
| """Generates a story from input text""" | |
| pipe = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2") | |
| story_text = pipe(text, max_length=200)[0]['generated_text'] | |
| return story_text | |
| # Function to convert text to speech | |
| def text_to_speech(text): | |
| """Converts text to speech audio""" | |
| try: | |
| # Initialize text-to-audio pipeline | |
| tts_pipe = pipeline("text-to-audio", model="facebook/mms-tts-eng") | |
| # Generate audio (returns dict with 'audio' array and 'sampling_rate') | |
| audio_output = tts_pipe(text[:1000]) # Limit text length | |
| # Return the audio array and sampling rate | |
| return audio_output['audio'], audio_output['sampling_rate'] | |
| except Exception as e: | |
| st.error(f"Speech generation failed: {str(e)}") | |
| return None, None | |
| # Main application | |
| def main(): | |
| st.title("Image to Story with Speech") | |
| st.write("Upload an image to generate a caption, story, and audio narration") | |
| uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) | |
| if uploaded_image is not None: | |
| try: | |
| # Process image | |
| with st.spinner("Processing image..."): | |
| image = Image.open(uploaded_image) | |
| st.image(image, caption="Uploaded Image", use_column_width=True) | |
| # Save temporary file | |
| with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_file: | |
| image.save(temp_file.name) | |
| image_path = temp_file.name | |
| # Generate caption | |
| with st.spinner("Generating caption..."): | |
| caption = generate_image_caption(image_path) | |
| st.subheader("Generated Caption") | |
| st.write(caption) | |
| # Generate story | |
| with st.spinner("Generating story..."): | |
| story = text2story(caption) | |
| st.subheader("Generated Story") | |
| st.write(story) | |
| # Generate speech | |
| with st.spinner("Generating audio..."): | |
| audio_array, sample_rate = text_to_speech(story) | |
| if audio_array is not None: | |
| st.subheader("Audio Narration") | |
| st.audio(audio_array, sample_rate=sample_rate) | |
| except Exception as e: | |
| st.error(f"An error occurred: {str(e)}") | |
| finally: | |
| # Clean up temporary file | |
| if 'image_path' in locals() and os.path.exists(image_path): | |
| os.remove(image_path) | |
| if __name__ == "__main__": | |
| main() |