Assignment1

Sleeping

App Files Files Community

sshenai commited on May 1, 2025

Commit

7d08859

verified ·

1 Parent(s): cef64d8

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -23

app.py CHANGED Viewed

@@ -1,10 +1,12 @@
 # Import Part
 import streamlit as st
 from PIL import Image
-import gtts
-import io
 import time
 import tensorflow as tf
 from transformers import pipeline
@@ -22,11 +24,10 @@ def caption_to_story(text):
 # Generates an audio for the story
 def story_to_audio(text):
-    audio = io.BytesIO()
-    tts = gTTS(text=text, lang='en', slow=False)
-    tts.write_to_fp(audio)
-    audio.seek(0)
-    return audio
 # Child-Friendly Interface Design
@@ -37,31 +38,45 @@ st.markdown("Upload an image and generate your exclusive fairy tale!")
 # File Upload
 uploaded_image = st.file_uploader("Choose a picture", type=["jpg", "jpeg", "png"], key="image_uploader")
-# Main Part
 if uploaded_image is not None:
-    # Display the uploaded image
     st.image(uploaded_image, caption='Uploaded Image', use_column_width=True)
-    # Save the uploaded image as a temporary file since the pipeline requires a file path as input
-    import tempfile
     with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as tmp_file:
         tmp_file.write(uploaded_image.getvalue())
         tmp_file_path = tmp_file.name
-    # Generate an image caption
-    caption = image_to_caption(tmp_file_path)
-    st.write(f"Caption: {caption}")
-    # Generate a story based on the caption
-    story = caption_to_story(caption)
-    st.write(f"Story: {story}")
-    # Convert the story to audio
-    audio = story_to_audio(story)
-    if audio:
-        st.audio(audio, format='audio/mp3')
-    # Delete the temporary file
-    import os
     os.remove(tmp_file_path)

 # Import Part
 import streamlit as st
 from PIL import Image
 import time
+import numpy as np
 import tensorflow as tf
+from transformers import pipeline
+import tempfile
+import os
 from transformers import pipeline
 # Generates an audio for the story
 def story_to_audio(text):
+    """Converts story to audio"""
+    storytoaudio = pipeline("text-to-audio", model="facebook/mms-tts-eng")
+    audio_output = storytoaudio(text[:1000])
+    return audio_output['audio'], audio_output['sampling_rate']
 # Child-Friendly Interface Design
 # File Upload
 uploaded_image = st.file_uploader("Choose a picture", type=["jpg", "jpeg", "png"], key="image_uploader")
 if uploaded_image is not None:
+    # 显示上传的图像
     st.image(uploaded_image, caption='Uploaded Image', use_column_width=True)
+    # 保存临时文件
     with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as tmp_file:
         tmp_file.write(uploaded_image.getvalue())
         tmp_file_path = tmp_file.name
+    # 生成图像描述
+    with st.spinner("正在生成图像描述..."):
+        caption = image_to_caption(tmp_file_path)
+        st.write(f"Caption: {caption}")
+    # 生成故事
+    with st.spinner("正在创作故事..."):
+        story = caption_to_story(caption)
+        st.write(f"Story: {story}")
+    # 生成音频
+    with st.spinner("正在为故事配音..."):
+        audio_array, sampling_rate = story_to_audio(story)
+        if audio_array is not None:
+            # 将音频数据转换为正确格式
+            try:
+                # 确保音频数据是numpy数组
+                if not isinstance(audio_array, np.ndarray):
+                    audio_array = np.array(audio_array)
+                # 转换为16位整数（如果需要）
+                if audio_array.dtype != np.int16:
+                    audio_array = (audio_array * 32767).astype(np.int16)
+                # 播放音频
+                st.audio(audio_array.tobytes(), format='audio/wav', sample_rate=sampling_rate)
+            except Exception as e:
+                st.error(f"音频播放失败: {e}")
+    # 删除临时文件
     os.remove(tmp_file_path)