shingguy1 commited on
Commit
04e49b0
Β·
verified Β·
1 Parent(s): 6543b0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -143
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import io
3
  import wave
4
  import streamlit as st
@@ -9,170 +8,170 @@ import numpy as np
9
  # β€”β€”β€” 1) MODEL LOADING (cached) β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
10
  @st.cache_resource
11
  def get_image_captioner(model_name="Salesforce/blip-image-captioning-base"):
12
- return pipeline("image-to-text", model=model_name, device="cpu")
13
 
14
  @st.cache_resource
15
  def get_story_pipe(model_name="google/flan-t5-base"):
16
- return pipeline("text2text-generation", model=model_name, device="cpu")
17
 
18
  @st.cache_resource
19
  def get_tts_pipe(model_name="facebook/mms-tts-eng"):
20
- return pipeline("text-to-speech", model=model_name, device="cpu")
21
 
22
  # β€”β€”β€” 2) TRANSFORM FUNCTIONS β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
23
  def part1_image_to_text(pil_img, captioner):
24
- results = captioner(pil_img)
25
- return results[0].get("generated_text", "") if results else ""
26
 
27
  def part2_text_to_story(
28
- caption: str,
29
- story_pipe,
30
- target_words: int = 100,
31
- max_length: int = 100,
32
- min_length: int = 80,
33
- do_sample: bool = True,
34
- top_k: int = 100,
35
- top_p: float= 0.9,
36
- temperature: float= 0.7,
37
- repetition_penalty: float = 1.1,
38
- no_repeat_ngram_size: int = 4
39
  ) -> str:
40
- prompt = (
41
- f"Write a vivid, imaginative short story of about {target_words} words "
42
- f"describing this scene: {caption}"
43
- )
44
- out = story_pipe(
45
- prompt,
46
- max_length=max_length,
47
- min_length=min_length,
48
- do_sample=do_sample,
49
- top_k=top_k,
50
- top_p=top_p,
51
- temperature=temperature,
52
- repetition_penalty=repetition_penalty,
53
- no_repeat_ngram_size=no_repeat_ngram_size,
54
- early_stopping=False
55
- )
56
- raw = out[0].get("generated_text", "").strip()
57
- if not raw:
58
- return ""
59
- # strip echo of prompt
60
- if raw.lower().startswith(prompt.lower()):
61
- story = raw[len(prompt):].strip()
62
- else:
63
- story = raw
64
- # cut at last full stop
65
- idx = story.rfind(".")
66
- if idx != -1:
67
- story = story[:idx+1]
68
- return story
69
 
70
  def part3_text_to_speech_bytes(text: str, tts_pipe) -> bytes:
71
- out = tts_pipe(text)
72
- if isinstance(out, list):
73
- out = out[0]
74
- audio_array = out["audio"] # np.ndarray (channels, samples)
75
- rate = out["sampling_rate"] # int
76
- data = audio_array.T if audio_array.ndim == 2 else audio_array
77
- pcm = (data * 32767).astype(np.int16)
78
-
79
- buffer = io.BytesIO()
80
- wf = wave.open(buffer, "wb")
81
- channels = 1 if data.ndim == 1 else data.shape[1]
82
- wf.setnchannels(channels)
83
- wf.setsampwidth(2)
84
- wf.setframerate(rate)
85
- wf.writeframes(pcm.tobytes())
86
- wf.close()
87
- buffer.seek(0)
88
- return buffer.read()
89
 
90
  # β€”β€”β€” 3) STREAMLIT UI β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
91
  # Set page config as the first Streamlit command
92
  st.set_page_config(
93
- page_title="Picture to Story Magic",
94
- page_icon="✨",
95
- layout="centered"
96
  )
97
 
98
  # Custom CSS for kid-friendly styling with improved readability
99
  st.markdown("""
100
  <style>
101
  .main {
102
- background-color: #e6f3ff;
103
- padding: 20px;
104
- border-radius: 15px;
105
  }
106
  .stButton>button {
107
- background-color: #ffcccb;
108
- color: #000000; /* Black text */
109
- border-radius: 10px;
110
- border: 2px solid #ff9999;
111
- font-size: 18px;
112
- font-weight: bold;
113
- padding: 10px 20px;
114
- transition: all 0.3s;
115
  }
116
  .stButton>button:hover {
117
- background-color: #ff9999;
118
- color: #ffffff; /* White text on hover for contrast */
119
- transform: scale(1.05);
120
  }
121
  .stFileUploader {
122
- background-color: #ffb300; /* Darker yellow for better contrast with white label text */
123
- border: 2px dashed #ff8c00; /* Darker orange border to match */
124
- border-radius: 10px;
125
- padding: 10px;
126
  }
127
  /* Style for the file uploader's inner text */
128
  .stFileUploader div[role="button"] {
129
- background-color: #f0f0f0; /* Very light gray background for contrast with black text */
130
- border-radius: 10px;
131
- padding: 10px;
132
  }
133
  .stFileUploader div[role="button"] > div {
134
- color: #000000 !important; /* Black text */
135
- font-size: 16px;
136
  }
137
  /* Style for the "Browse files" button inside the file uploader */
138
  .stFileUploader button {
139
- background-color: #ffca28 !important; /* Yellow button background */
140
- color: #000000 !important; /* Black text */
141
- border-radius: 8px !important;
142
- border: 2px solid #ffb300 !important; /* Match the container background */
143
- padding: 5px 15px !important;
144
- font-weight: bold !important;
145
- box-shadow: 0 2px 4px rgba(0,0,0,0.2) !important; /* Subtle shadow to make button stand out */
146
  }
147
  .stFileUploader button:hover {
148
- background-color: #ff8c00 !important; /* Slightly darker yellow on hover */
149
- color: #000000 !important; /* Keep black text */
150
  }
151
  .stImage {
152
- border: 3px solid #81c784;
153
- border-radius: 10px;
154
- box-shadow: 0 4px 8px rgba(0,0,0,0.1);
155
  }
156
  .section-header {
157
- background-color: #b3e5fc;
158
- padding: 10px;
159
- border-radius: 10px;
160
- text-align: center;
161
- font-size: 24px;
162
- font-weight: bold;
163
- color: #000000; /* Black text */
164
- margin-bottom: 10px;
165
  }
166
  .caption-box, .story-box {
167
- background-color: #f0f4c3;
168
- padding: 15px;
169
- border-radius: 10px;
170
- border: 2px solid #d4e157;
171
- margin-bottom: 20px;
172
- color: #000000; /* Black text */
173
  }
174
  .caption-box b, .story-box b {
175
- color: #000000; /* Black text for bold headers */
176
  }
177
  </style>
178
  """, unsafe_allow_html=True)
@@ -182,38 +181,35 @@ st.markdown("<div class='section-header'>Picture to Story Magic! ✨</div>", uns
182
 
183
  # Image upload section
184
  with st.container():
185
- st.markdown("<div class='section-header'>1️⃣ Pick a Fun Picture! πŸ–ΌοΈ</div>", unsafe_allow_html=True)
186
- uploaded = st.file_uploader("Choose a picture to start the magic! 😊", type=["jpg","jpeg","png"])
187
- if not uploaded:
188
- st.info("Upload a picture, and let's make a story! πŸŽ‰")
189
- st.stop()
190
 
191
  # Show image
192
  with st.spinner("Looking at your picture..."):
193
- pil_img = Image.open(uploaded)
194
- st.image(pil_img, use_container_width=True)
195
 
196
  # Caption section
197
  with st.container():
198
- captioner = get_image_captioner()
199
- with st.spinner("Figuring out what's in your picture..."):
200
- caption = part1_image_to_text(pil_img, captioner)
201
- st.markdown(f"<div class='caption-box'><b>What's in the Picture? 🧐</b><br>{caption}</div>", unsafe_allow_html=True)
202
 
203
  # Story and audio section
204
  with st.container():
205
- st.markdown("<div class='section-header'>2️⃣ Make a Story and Hear It! 🎡</div>", unsafe_allow_html=True)
206
- if st.button("Create My Story! πŸŽ‰"):
207
- # Story
208
- story_pipe = get_story_pipe()
209
- with st.spinner("Writing a super cool story..."):
210
- story = part2_text_to_story(caption, story_pipe)
211
- st.markdown(f"<div class='story-box'><b>Your Cool Story! πŸ“š</b><br>{story}</div>", unsafe_allow_html=True)
212
-
213
- # TTS
214
- tts_pipe = get_tts_pipe()
215
- with st.spinner("Turning your story into sound..."):
216
- audio_bytes = part3_text_to_speech_bytes(story, tts_pipe)
217
- st.audio(audio_bytes, format="audio/wav")
218
- st.success("Yay! Your story is ready! 🎈")
219
- st.balloons() # Fun animation
 
 
1
  import io
2
  import wave
3
  import streamlit as st
 
8
  # β€”β€”β€” 1) MODEL LOADING (cached) β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
9
  @st.cache_resource
10
  def get_image_captioner(model_name="Salesforce/blip-image-captioning-base"):
11
+ return pipeline("image-to-text", model=model_name, device="cpu")
12
 
13
  @st.cache_resource
14
  def get_story_pipe(model_name="google/flan-t5-base"):
15
+ return pipeline("text2text-generation", model=model_name, device="cpu")
16
 
17
  @st.cache_resource
18
  def get_tts_pipe(model_name="facebook/mms-tts-eng"):
19
+ return pipeline("text-to-speech", model=model_name, device="cpu")
20
 
21
  # β€”β€”β€” 2) TRANSFORM FUNCTIONS β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
22
  def part1_image_to_text(pil_img, captioner):
23
+ results = captioner(pil_img)
24
+ return results[0].get("generated_text", "") if results else ""
25
 
26
  def part2_text_to_story(
27
+ caption: str,
28
+ story_pipe,
29
+ target_words: int = 100,
30
+ max_length: int = 100,
31
+ min_length: int = 80,
32
+ do_sample: bool = True,
33
+ top_k: int = 100,
34
+ top_p: float= 0.9,
35
+ temperature: float= 0.7,
36
+ repetition_penalty: float = 1.1,
37
+ no_repeat_ngram_size: int = 4
38
  ) -> str:
39
+ prompt = (
40
+ f"Write a vivid, imaginative short story of about {target_words} words "
41
+ f"describing this scene: {caption}"
42
+ )
43
+ out = story_pipe(
44
+ prompt,
45
+ max_length=max_length,
46
+ min_length=min_length,
47
+ do_sample=do_sample,
48
+ top_k=top_k,
49
+ top_p=top_p,
50
+ temperature=temperature,
51
+ repetition_penalty=repetition_penalty,
52
+ no_repeat_ngram_size=no_repeat_ngram_size,
53
+ early_stopping=False
54
+ )
55
+ raw = out[0].get("generated_text", "").strip()
56
+ if not raw:
57
+ return ""
58
+ # strip echo of prompt
59
+ if raw.lower().startswith(prompt.lower()):
60
+ story = raw[len(prompt):].strip()
61
+ else:
62
+ story = raw
63
+ # cut at last full stop
64
+ idx = story.rfind(".")
65
+ if idx != -1:
66
+ story = story[:idx+1]
67
+ return story
68
 
69
  def part3_text_to_speech_bytes(text: str, tts_pipe) -> bytes:
70
+ out = tts_pipe(text)
71
+ if isinstance(out, list):
72
+ out = out[0]
73
+ audio_array = out["audio"] # np.ndarray (channels, samples)
74
+ rate = out["sampling_rate"] # int
75
+ data = audio_array.T if audio_array.ndim == 2 else audio_array
76
+ pcm = (data * 32767).astype(np.int16)
77
+
78
+ buffer = io.BytesIO()
79
+ wf = wave.open(buffer, "wb")
80
+ channels = 1 if data.ndim == 1 else data.shape[1]
81
+ wf.setnchannels(channels)
82
+ wf.setsampwidth(2)
83
+ wf.setframerate(rate)
84
+ wf.writeframes(pcm.tobytes())
85
+ wf.close()
86
+ buffer.seek(0)
87
+ return buffer.read()
88
 
89
  # β€”β€”β€” 3) STREAMLIT UI β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
90
  # Set page config as the first Streamlit command
91
  st.set_page_config(
92
+ page_title="Picture to Story Magic",
93
+ page_icon="✨",
94
+ layout="centered"
95
  )
96
 
97
  # Custom CSS for kid-friendly styling with improved readability
98
  st.markdown("""
99
  <style>
100
  .main {
101
+ background-color: #e6f3ff;
102
+ padding: 20px;
103
+ border-radius: 15px;
104
  }
105
  .stButton>button {
106
+ background-color: #ffcccb;
107
+ color: #000000; /* Black text */
108
+ border-radius: 10px;
109
+ border: 2px solid #ff9999;
110
+ font-size: 18px;
111
+ font-weight: bold;
112
+ padding: 10px 20px;
113
+ transition: all 0.3s;
114
  }
115
  .stButton>button:hover {
116
+ background-color: #ff9999;
117
+ color: #ffffff; /* White text on hover for contrast */
118
+ transform: scale(1.05);
119
  }
120
  .stFileUploader {
121
+ background-color: #ffb300; /* Darker yellow for better contrast with white label text */
122
+ border: 2px dashed #ff8c00; /* Darker orange border to match */
123
+ border-radius: 10px;
124
+ padding: 10px;
125
  }
126
  /* Style for the file uploader's inner text */
127
  .stFileUploader div[role="button"] {
128
+ background-color: #f0f0f0; /* Very light gray background for contrast with black text */
129
+ border-radius: 10px;
130
+ padding: 10px;
131
  }
132
  .stFileUploader div[role="button"] > div {
133
+ color: #000000 !important; /* Black text */
134
+ font-size: 16px;
135
  }
136
  /* Style for the "Browse files" button inside the file uploader */
137
  .stFileUploader button {
138
+ background-color: #ffca28 !important; /* Yellow button background */
139
+ color: #000000 !important; /* Black text */
140
+ border-radius: 8px !important;
141
+ border: 2px solid #ffb300 !important; /* Match the container background */
142
+ padding: 5px 15px !important;
143
+ font-weight: bold !important;
144
+ box-shadow: 0 2px 4px rgba(0,0,0,0.2) !important; /* Subtle shadow to make button stand out */
145
  }
146
  .stFileUploader button:hover {
147
+ background-color: #ff8c00 !important; /* Slightly darker yellow on hover */
148
+ color: #000000 !important; /* Keep black text */
149
  }
150
  .stImage {
151
+ border: 3px solid #81c784;
152
+ border-radius: 10px;
153
+ box-shadow: 0 4px 8px rgba(0,0,0,0.1);
154
  }
155
  .section-header {
156
+ background-color: #b3e5fc;
157
+ padding: 10px;
158
+ border-radius: 10px;
159
+ text-align: center;
160
+ font-size: 24px;
161
+ font-weight: bold;
162
+ color: #000000; /* Black text */
163
+ margin-bottom: 10px;
164
  }
165
  .caption-box, .story-box {
166
+ background-color: #f0f4c3;
167
+ padding: 15px;
168
+ border-radius: 10px;
169
+ border: 2px solid #d4e157;
170
+ margin-bottom: 20px;
171
+ color: #000000; /* Black text */
172
  }
173
  .caption-box b, .story-box b {
174
+ color: #000000; /* Black text for bold headers */
175
  }
176
  </style>
177
  """, unsafe_allow_html=True)
 
181
 
182
  # Image upload section
183
  with st.container():
184
+ st.markdown("<div class='section-header'>1️⃣ Pick a Fun Picture! πŸ–ΌοΈ</div>", unsafe_allow_html=True)
185
+ uploaded = st.file_uploader("Choose a picture to start the magic! 😊", type=["jpg","jpeg","png"])
186
+ if not uploaded:
187
+ st.info("Upload a picture, and let's make a story! πŸŽ‰")
188
+ st.stop()
189
 
190
  # Show image
191
  with st.spinner("Looking at your picture..."):
192
+ pil_img = Image.open(uploaded)
193
+ st.image(pil_img, use_container_width=True)
194
 
195
  # Caption section
196
  with st.container():
197
+ captioner = get_image_captioner()
198
+ with st.spinner("Figuring out what's in your picture..."):
199
+ caption = part1_image_to_text(pil_img, captioner)
200
+ st.markdown(f"<div class='caption-box'><b>What's in the Picture? 🧐</b><br>{caption}</div>", unsafe_allow_html=True)
201
 
202
  # Story and audio section
203
  with st.container():
204
+ st.markdown("<div class='section-header'>2️⃣ Make a Story and Hear It! 🎡</div>", unsafe_allow_html=True)
205
+ if st.button("Create My Story! πŸŽ‰"):
206
+ # Story
207
+ story_pipe = get_story_pipe()
208
+ with st.spinner("Writing a super cool story..."):
209
+ story = part2_text_to_story(caption, story_pipe)
210
+ st.markdown(f"<div class='story-box'><b>Your Cool Story! πŸ“š</b><br>{story}</div>", unsafe_allow_html=True)
211
+
212
+ # TTS
213
+ tts_pipe = get_tts_pipe()
214
+ with st.spinner("Turning your story into sound..."):
215
+ audio