DWD1211 commited on
Commit
a526533
·
verified ·
1 Parent(s): a71e738

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+
4
+ # --- Load models once at the beginning ---
5
+ image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
6
+ text_to_story_model = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")
7
+ story_to_audio_model = pipeline("text-to-speech", model="facebook/mms-tts-eng")
8
+
9
+ # --- Define Functions ---
10
+ # img2text
11
+ def img2text(image_path):
12
+ text = image_to_text_model(image_path)[0]["generated_text"]
13
+ return text
14
+
15
+ # text2story
16
+ def text2story(text):
17
+ story_text = text_to_story_model(text, max_new_tokens=150)[0]['generated_text']
18
+ words = story_text.split()
19
+ if len(words) > 100:
20
+ story_text = ' '.join(words[:100]) + '.'
21
+ return story_text
22
+
23
+ # text2audio
24
+ def text2audio(story_text):
25
+ speech_data = story_to_audio_model(story_text)
26
+ return speech_data
27
+
28
+ # play_audio
29
+ def play_audio(audio_data):
30
+ audio_buffer = io.BytesIO()
31
+ sf.write(audio_buffer, audio_data['audio'], audio_data['sampling_rate'], format='WAV')
32
+ audio_buffer.seek(0)
33
+ st.audio(audio_buffer, format='audio/wav')
34
+
35
+ # --- Streamlit App ---
36
+ st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
37
+ st.header("Turn Your Image to Audio Story")
38
+
39
+ uploaded_file = st.file_uploader("Select an Image...")
40
+
41
+ if uploaded_file is not None:
42
+ bytes_data = uploaded_file.getvalue()
43
+ with open(uploaded_file.name, "wb") as file:
44
+ file.write(bytes_data)
45
+ st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
46
+
47
+ # Stage 1: Image to Text
48
+ if "scenario" not in st.session_state:
49
+ with st.spinner('Processing image to text...'):
50
+ st.session_state.scenario = img2text(uploaded_file.name)
51
+ st.subheader("Image Description:")
52
+ st.write(st.session_state.scenario)
53
+
54
+ # Stage 2: Text to Story
55
+ if "story" not in st.session_state:
56
+ with st.spinner('Generating a story...'):
57
+ st.session_state.story = text2story(st.session_state.scenario)
58
+ st.subheader("Generated Story:")
59
+ st.write(st.session_state.story)
60
+
61
+ # Stage 3: Story to Audio
62
+ if "audio_data" not in st.session_state:
63
+ with st.spinner('Generating audio narration...'):
64
+ st.session_state.audio_data = text2audio(st.session_state.story)
65
+
66
+ # Play Audio Button
67
+ if st.button("Play Audio"):
68
+ if "audio_data" in st.session_state:
69
+ play_audio(st.session_state.audio_data)
70
+ else:
71
+ st.warning("Please generate the audio first.")