Spaces:

sshenai
/

testStreamDemo

Sleeping

App Files Files Community

sshenai commited on May 1, 2025

Commit

ea5ce1f

verified ·

1 Parent(s): 4e66561

Upload app.py

Browse files

Files changed (1) hide show

app.py +84 -0

app.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import streamlit as st
+from PIL import Image
+import time
+from transformers import pipeline
+import tempfile
+import os
+# Function to generate image caption
+def generate_image_caption(image_path):
+    """Generates a caption for the given image using a pre-trained model."""
+    img2caption = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
+    result = img2caption(image_path)
+    return result[0]['generated_text']
+# Function to generate story from text
+def text2story(text):
+    """Generates a story from input text"""
+    pipe = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")
+    story_text = pipe(text, max_length=200)[0]['generated_text']
+    return story_text
+# Function to convert text to speech
+def text_to_speech(text):
+    """Converts text to speech audio"""
+    try:
+        # Initialize text-to-audio pipeline
+        tts_pipe = pipeline("text-to-audio", model="facebook/mms-tts-eng")
+        # Generate audio (returns dict with 'audio' array and 'sampling_rate')
+        audio_output = tts_pipe(text[:1000])  # Limit text length
+        # Return the audio array and sampling rate
+        return audio_output['audio'], audio_output['sampling_rate']
+    except Exception as e:
+        st.error(f"Speech generation failed: {str(e)}")
+        return None, None
+# Main application
+def main():
+    st.title("Image to Story with Speech")
+    st.write("Upload an image to generate a caption, story, and audio narration")
+    uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
+    if uploaded_image is not None:
+        try:
+            # Process image
+            with st.spinner("Processing image..."):
+                image = Image.open(uploaded_image)
+                st.image(image, caption="Uploaded Image", use_column_width=True)
+                # Save temporary file
+                with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_file:
+                    image.save(temp_file.name)
+                    image_path = temp_file.name
+            # Generate caption
+            with st.spinner("Generating caption..."):
+                caption = generate_image_caption(image_path)
+                st.subheader("Generated Caption")
+                st.write(caption)
+            # Generate story
+            with st.spinner("Generating story..."):
+                story = text2story(caption)
+                st.subheader("Generated Story")
+                st.write(story)
+            # Generate speech
+            with st.spinner("Generating audio..."):
+                audio_array, sample_rate = text_to_speech(story)
+                if audio_array is not None:
+                    st.subheader("Audio Narration")
+                    st.audio(audio_array, sample_rate=sample_rate)
+        except Exception as e:
+            st.error(f"An error occurred: {str(e)}")
+        finally:
+            # Clean up temporary file
+            if 'image_path' in locals() and os.path.exists(image_path):
+                os.remove(image_path)
+if __name__ == "__main__":
+    main()