sshenai commited on
Commit
7d08859
·
verified ·
1 Parent(s): cef64d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -23
app.py CHANGED
@@ -1,10 +1,12 @@
1
  # Import Part
2
  import streamlit as st
3
  from PIL import Image
4
- import gtts
5
- import io
6
  import time
 
7
  import tensorflow as tf
 
 
 
8
 
9
  from transformers import pipeline
10
 
@@ -22,11 +24,10 @@ def caption_to_story(text):
22
 
23
  # Generates an audio for the story
24
  def story_to_audio(text):
25
- audio = io.BytesIO()
26
- tts = gTTS(text=text, lang='en', slow=False)
27
- tts.write_to_fp(audio)
28
- audio.seek(0)
29
- return audio
30
 
31
 
32
  # Child-Friendly Interface Design
@@ -37,31 +38,45 @@ st.markdown("Upload an image and generate your exclusive fairy tale!")
37
  # File Upload
38
  uploaded_image = st.file_uploader("Choose a picture", type=["jpg", "jpeg", "png"], key="image_uploader")
39
 
40
- # Main Part
41
  if uploaded_image is not None:
42
- # Display the uploaded image
43
  st.image(uploaded_image, caption='Uploaded Image', use_column_width=True)
44
- # Save the uploaded image as a temporary file since the pipeline requires a file path as input
45
- import tempfile
46
  with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as tmp_file:
47
  tmp_file.write(uploaded_image.getvalue())
48
  tmp_file_path = tmp_file.name
49
 
50
- # Generate an image caption
51
- caption = image_to_caption(tmp_file_path)
52
- st.write(f"Caption: {caption}")
 
53
 
54
- # Generate a story based on the caption
55
- story = caption_to_story(caption)
56
- st.write(f"Story: {story}")
 
57
 
58
- # Convert the story to audio
59
- audio = story_to_audio(story)
60
- if audio:
61
- st.audio(audio, format='audio/mp3')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- # Delete the temporary file
64
- import os
65
  os.remove(tmp_file_path)
66
 
67
 
 
1
  # Import Part
2
  import streamlit as st
3
  from PIL import Image
 
 
4
  import time
5
+ import numpy as np
6
  import tensorflow as tf
7
+ from transformers import pipeline
8
+ import tempfile
9
+ import os
10
 
11
  from transformers import pipeline
12
 
 
24
 
25
  # Generates an audio for the story
26
  def story_to_audio(text):
27
+ """Converts story to audio"""
28
+ storytoaudio = pipeline("text-to-audio", model="facebook/mms-tts-eng")
29
+ audio_output = storytoaudio(text[:1000])
30
+ return audio_output['audio'], audio_output['sampling_rate']
 
31
 
32
 
33
  # Child-Friendly Interface Design
 
38
  # File Upload
39
  uploaded_image = st.file_uploader("Choose a picture", type=["jpg", "jpeg", "png"], key="image_uploader")
40
 
 
41
  if uploaded_image is not None:
42
+ # 显示上传的图像
43
  st.image(uploaded_image, caption='Uploaded Image', use_column_width=True)
44
+
45
+ # 保存临时文件
46
  with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as tmp_file:
47
  tmp_file.write(uploaded_image.getvalue())
48
  tmp_file_path = tmp_file.name
49
 
50
+ # 生成图像描述
51
+ with st.spinner("正在生成图像描述..."):
52
+ caption = image_to_caption(tmp_file_path)
53
+ st.write(f"Caption: {caption}")
54
 
55
+ # 生成故事
56
+ with st.spinner("正在创作故事..."):
57
+ story = caption_to_story(caption)
58
+ st.write(f"Story: {story}")
59
 
60
+ # 生成音频
61
+ with st.spinner("正在为故事配音..."):
62
+ audio_array, sampling_rate = story_to_audio(story)
63
+ if audio_array is not None:
64
+ # 将音频数据转换为正确格式
65
+ try:
66
+ # 确保音频数据是numpy数组
67
+ if not isinstance(audio_array, np.ndarray):
68
+ audio_array = np.array(audio_array)
69
+
70
+ # 转换为16位整数(如果需要)
71
+ if audio_array.dtype != np.int16:
72
+ audio_array = (audio_array * 32767).astype(np.int16)
73
+
74
+ # 播放音频
75
+ st.audio(audio_array.tobytes(), format='audio/wav', sample_rate=sampling_rate)
76
+ except Exception as e:
77
+ st.error(f"音频播放失败: {e}")
78
 
79
+ # 删除临时文件
 
80
  os.remove(tmp_file_path)
81
 
82