sshenai commited on
Commit
ea5ce1f
·
verified ·
1 Parent(s): 4e66561

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import time
4
+ from transformers import pipeline
5
+ import tempfile
6
+ import os
7
+
8
+ # Function to generate image caption
9
+ def generate_image_caption(image_path):
10
+ """Generates a caption for the given image using a pre-trained model."""
11
+ img2caption = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
12
+ result = img2caption(image_path)
13
+ return result[0]['generated_text']
14
+
15
+ # Function to generate story from text
16
+ def text2story(text):
17
+ """Generates a story from input text"""
18
+ pipe = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")
19
+ story_text = pipe(text, max_length=200)[0]['generated_text']
20
+ return story_text
21
+
22
+ # Function to convert text to speech
23
+ def text_to_speech(text):
24
+ """Converts text to speech audio"""
25
+ try:
26
+ # Initialize text-to-audio pipeline
27
+ tts_pipe = pipeline("text-to-audio", model="facebook/mms-tts-eng")
28
+
29
+ # Generate audio (returns dict with 'audio' array and 'sampling_rate')
30
+ audio_output = tts_pipe(text[:1000]) # Limit text length
31
+
32
+ # Return the audio array and sampling rate
33
+ return audio_output['audio'], audio_output['sampling_rate']
34
+ except Exception as e:
35
+ st.error(f"Speech generation failed: {str(e)}")
36
+ return None, None
37
+
38
+ # Main application
39
+ def main():
40
+ st.title("Image to Story with Speech")
41
+ st.write("Upload an image to generate a caption, story, and audio narration")
42
+
43
+ uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
44
+
45
+ if uploaded_image is not None:
46
+ try:
47
+ # Process image
48
+ with st.spinner("Processing image..."):
49
+ image = Image.open(uploaded_image)
50
+ st.image(image, caption="Uploaded Image", use_column_width=True)
51
+
52
+ # Save temporary file
53
+ with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_file:
54
+ image.save(temp_file.name)
55
+ image_path = temp_file.name
56
+
57
+ # Generate caption
58
+ with st.spinner("Generating caption..."):
59
+ caption = generate_image_caption(image_path)
60
+ st.subheader("Generated Caption")
61
+ st.write(caption)
62
+
63
+ # Generate story
64
+ with st.spinner("Generating story..."):
65
+ story = text2story(caption)
66
+ st.subheader("Generated Story")
67
+ st.write(story)
68
+
69
+ # Generate speech
70
+ with st.spinner("Generating audio..."):
71
+ audio_array, sample_rate = text_to_speech(story)
72
+ if audio_array is not None:
73
+ st.subheader("Audio Narration")
74
+ st.audio(audio_array, sample_rate=sample_rate)
75
+
76
+ except Exception as e:
77
+ st.error(f"An error occurred: {str(e)}")
78
+ finally:
79
+ # Clean up temporary file
80
+ if 'image_path' in locals() and os.path.exists(image_path):
81
+ os.remove(image_path)
82
+
83
+ if __name__ == "__main__":
84
+ main()