tbdavid2019 commited on
Commit
fd892b5
·
1 Parent(s): 4bd94d3

加上 log console

Browse files
Files changed (2) hide show
  1. __pycache__/app.cpython-311.pyc +0 -0
  2. app.py +65 -9
__pycache__/app.cpython-311.pyc ADDED
Binary file (18.7 kB). View file
 
app.py CHANGED
@@ -34,6 +34,7 @@ STANDARD_VOICES = [
34
 
35
  # 優化腳本處理 - 合並相同說話者連續文本
36
  def optimize_script(script):
 
37
  lines = [line.strip() for line in script.splitlines() if line.strip()]
38
  optimized = []
39
  current_speaker = None
@@ -67,10 +68,13 @@ def optimize_script(script):
67
  if current_text:
68
  optimized.append((current_speaker, current_text))
69
 
 
70
  return optimized
71
 
72
  def get_mp3(text: str, voice: str, audio_model: str, audio_api_key: str, instructions: str = None) -> bytes:
73
  """使用 OpenAI TTS API 生成音頻"""
 
 
74
  # 檢查文本長度,OpenAI TTS API 有 4096 個標記的限制
75
  # 大約 1000 個漢字約等於 2000-3000 個標記,為安全起見,我們將限制設為 1000 個字符
76
  MAX_TEXT_LENGTH = 1000
@@ -79,15 +83,18 @@ def get_mp3(text: str, voice: str, audio_model: str, audio_api_key: str, instruc
79
 
80
  # 如果文本長度超過限制,分割文本
81
  if len(text) > MAX_TEXT_LENGTH:
82
- print(f"Text too long ({len(text)} chars), splitting into chunks")
83
  # 將文本分割成更小的塊
84
  text_chunks = []
85
  for i in range(0, len(text), MAX_TEXT_LENGTH):
86
  text_chunks.append(text[i:i + MAX_TEXT_LENGTH])
87
 
 
 
88
  # 為每個塊生成音頻並合並
89
  combined_audio = b""
90
- for chunk in text_chunks:
 
91
  try:
92
  # 構建 API 參數
93
  api_params = {
@@ -97,16 +104,21 @@ def get_mp3(text: str, voice: str, audio_model: str, audio_api_key: str, instruc
97
  }
98
  if instructions:
99
  api_params["instructions"] = instructions
 
100
 
 
101
  with client.audio.speech.with_streaming_response.create(**api_params) as response:
102
  with io.BytesIO() as file:
103
  for audio_chunk in response.iter_bytes():
104
  file.write(audio_chunk)
105
- combined_audio += file.getvalue()
 
 
106
  except Exception as e:
107
- print(f"Error generating audio for chunk: {e}")
108
  raise
109
 
 
110
  return combined_audio
111
  else:
112
  # 原始邏輯,處理短文本
@@ -119,14 +131,18 @@ def get_mp3(text: str, voice: str, audio_model: str, audio_api_key: str, instruc
119
  }
120
  if instructions:
121
  api_params["instructions"] = instructions
 
122
 
 
123
  with client.audio.speech.with_streaming_response.create(**api_params) as response:
124
  with io.BytesIO() as file:
125
- for chunk in response.iter_bytes():
126
- file.write(chunk)
127
- return file.getvalue()
 
 
128
  except Exception as e:
129
- print(f"Error generating audio: {e}")
130
  raise
131
 
132
  def generate_audio_from_script(
@@ -140,22 +156,35 @@ def generate_audio_from_script(
140
  speaker2_instructions: str = "保持活潑愉快的語氣",
141
  ) -> tuple[bytes, str]:
142
  """從腳本生成音頻,支持兩個說話者,並優化 API 調用"""
 
 
 
 
 
143
  status_log = []
144
 
145
  # 優化腳本處理
 
146
  optimized_script = optimize_script(script)
 
147
 
148
  # 使用 pydub 處理音頻合並
149
  combined_segment = None
150
 
151
  # 處��每一段
152
- for speaker, text in optimized_script:
 
 
 
153
  voice_to_use = speaker1_voice if speaker == "speaker-1" else speaker2_voice
154
  instructions_to_use = speaker1_instructions if speaker == "speaker-1" else speaker2_instructions
 
 
155
  status_log.append(f"[{speaker}] {text}")
156
 
157
  try:
158
  # 生成這一段的音頻
 
159
  audio_chunk = get_mp3(
160
  text,
161
  voice_to_use,
@@ -164,6 +193,8 @@ def generate_audio_from_script(
164
  instructions_to_use
165
  )
166
 
 
 
167
  # 將二進制數據轉換為 AudioSegment
168
  with NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
169
  temp_file.write(audio_chunk)
@@ -178,40 +209,63 @@ def generate_audio_from_script(
178
  # 合並音頻段
179
  if combined_segment is None:
180
  combined_segment = chunk_segment
 
181
  else:
182
  combined_segment += chunk_segment
 
 
183
  except Exception as e:
 
 
184
  status_log.append(f"[錯誤] 無法生成音頻: {str(e)}")
 
185
 
186
  # 如果沒有生成任何音頻段
187
  if combined_segment is None:
 
 
188
  status_log.append("[錯誤] 沒有生成任何音頻")
189
  return b"", "\n".join(status_log)
190
 
191
  # 如果需要調整音量
192
  if volume_boost > 0:
193
  try:
 
194
  # 調整音量
195
  combined_segment = combined_segment + volume_boost # 增加音量 (dB)
196
  status_log.append(f"[音量] 已增加 {volume_boost} dB")
 
197
  except Exception as e:
 
 
198
  status_log.append(f"[警告] 音量調整失敗: {str(e)}")
199
 
200
  # 將 AudioSegment 轉換為二進制數據
 
201
  output = io.BytesIO()
202
  combined_segment.export(output, format="mp3")
203
  combined_audio = output.getvalue()
204
 
 
205
  return combined_audio, "\n".join(status_log)
206
 
207
  def save_audio_file(audio_data: bytes) -> str:
208
  """將音頻數據保存為臨時文件"""
 
 
209
  temp_dir = Path("./temp_audio")
210
  temp_dir.mkdir(exist_ok=True)
 
211
  # 清理舊文件
 
212
  for old_file in temp_dir.glob("*.mp3"):
213
  if old_file.stat().st_mtime < (time.time() - 24*60*60): # 24小時前的文件
214
  old_file.unlink()
 
 
 
 
 
215
  # 創建新的臨時文件
216
  temp_file = NamedTemporaryFile(
217
  dir=temp_dir,
@@ -220,6 +274,8 @@ def save_audio_file(audio_data: bytes) -> str:
220
  )
221
  temp_file.write(audio_data)
222
  temp_file.close()
 
 
223
  return temp_file.name
224
 
225
  def process_and_save_audio(script, api_key, model, voice1, voice2, volume_boost, instr1, instr2):
 
34
 
35
  # 優化腳本處理 - 合並相同說話者連續文本
36
  def optimize_script(script):
37
+ print("🔄 開始優化腳本處理...")
38
  lines = [line.strip() for line in script.splitlines() if line.strip()]
39
  optimized = []
40
  current_speaker = None
 
68
  if current_text:
69
  optimized.append((current_speaker, current_text))
70
 
71
+ print(f"✅ 腳本優化完成,共 {len(optimized)} 段對話")
72
  return optimized
73
 
74
  def get_mp3(text: str, voice: str, audio_model: str, audio_api_key: str, instructions: str = None) -> bytes:
75
  """使用 OpenAI TTS API 生成音頻"""
76
+ print(f"🎤 開始生成音頻: 長度 {len(text)} 字符, 聲音: {voice}, 模型: {audio_model}")
77
+
78
  # 檢查文本長度,OpenAI TTS API 有 4096 個標記的限制
79
  # 大約 1000 個漢字約等於 2000-3000 個標記,為安全起見,我們將限制設為 1000 個字符
80
  MAX_TEXT_LENGTH = 1000
 
83
 
84
  # 如果文本長度超過限制,分割文本
85
  if len(text) > MAX_TEXT_LENGTH:
86
+ print(f"📝 文本過長 ({len(text)} 字符),分割成多個區塊")
87
  # 將文本分割成更小的塊
88
  text_chunks = []
89
  for i in range(0, len(text), MAX_TEXT_LENGTH):
90
  text_chunks.append(text[i:i + MAX_TEXT_LENGTH])
91
 
92
+ print(f"📦 共分割成 {len(text_chunks)} 個區塊")
93
+
94
  # 為每個塊生成音頻並合並
95
  combined_audio = b""
96
+ for i, chunk in enumerate(text_chunks, 1):
97
+ print(f"🔄 處理區塊 {i}/{len(text_chunks)}: {len(chunk)} 字符")
98
  try:
99
  # 構建 API 參數
100
  api_params = {
 
104
  }
105
  if instructions:
106
  api_params["instructions"] = instructions
107
+ print(f"💬 使用語氣指示: {instructions}")
108
 
109
+ print(f"📡 調用 OpenAI TTS API...")
110
  with client.audio.speech.with_streaming_response.create(**api_params) as response:
111
  with io.BytesIO() as file:
112
  for audio_chunk in response.iter_bytes():
113
  file.write(audio_chunk)
114
+ chunk_audio = file.getvalue()
115
+ combined_audio += chunk_audio
116
+ print(f"✅ 區塊 {i} 生成完成: {len(chunk_audio)} bytes")
117
  except Exception as e:
118
+ print(f" 區塊 {i} 生成失敗: {e}")
119
  raise
120
 
121
+ print(f"🎵 所有區塊合並完成,總大小: {len(combined_audio)} bytes")
122
  return combined_audio
123
  else:
124
  # 原始邏輯,處理短文本
 
131
  }
132
  if instructions:
133
  api_params["instructions"] = instructions
134
+ print(f"💬 使用語氣指示: {instructions}")
135
 
136
+ print(f"📡 調用 OpenAI TTS API...")
137
  with client.audio.speech.with_streaming_response.create(**api_params) as response:
138
  with io.BytesIO() as file:
139
+ for audio_chunk in response.iter_bytes():
140
+ file.write(audio_chunk)
141
+ audio_data = file.getvalue()
142
+ print(f"✅ 音頻生成完成: {len(audio_data)} bytes")
143
+ return audio_data
144
  except Exception as e:
145
+ print(f" 音頻生成失敗: {e}")
146
  raise
147
 
148
  def generate_audio_from_script(
 
156
  speaker2_instructions: str = "保持活潑愉快的語氣",
157
  ) -> tuple[bytes, str]:
158
  """從腳本生成音頻,支持兩個說話者,並優化 API 調用"""
159
+ print("🎬 開始從腳本生成音頻")
160
+ print(f"📜 腳本總長度: {len(script)} 字符")
161
+ print(f"🎤 說話者聲音: 說話者1={speaker1_voice}, 說話者2={speaker2_voice}")
162
+ print(f"🔊 音量增強: {volume_boost} dB")
163
+
164
  status_log = []
165
 
166
  # 優化腳本處理
167
+ print("🔍 優化腳本內容...")
168
  optimized_script = optimize_script(script)
169
+ print(f"✅ 腳本優化完成,共 {len(optimized_script)} 個片段")
170
 
171
  # 使用 pydub 處理音頻合並
172
  combined_segment = None
173
 
174
  # 處��每一段
175
+ total_segments = len(optimized_script)
176
+ print(f"🎵 開始處理 {total_segments} 個音頻片段")
177
+
178
+ for i, (speaker, text) in enumerate(optimized_script, 1):
179
  voice_to_use = speaker1_voice if speaker == "speaker-1" else speaker2_voice
180
  instructions_to_use = speaker1_instructions if speaker == "speaker-1" else speaker2_instructions
181
+
182
+ print(f"🎭 處理片段 {i}/{total_segments}: {speaker} ({len(text)} 字符)")
183
  status_log.append(f"[{speaker}] {text}")
184
 
185
  try:
186
  # 生成這一段的音頻
187
+ print(f"📡 生成 {speaker} 的音頻...")
188
  audio_chunk = get_mp3(
189
  text,
190
  voice_to_use,
 
193
  instructions_to_use
194
  )
195
 
196
+ print(f"✅ {speaker} 音頻生成完成: {len(audio_chunk)} bytes")
197
+
198
  # 將二進制數據轉換為 AudioSegment
199
  with NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
200
  temp_file.write(audio_chunk)
 
209
  # 合並音頻段
210
  if combined_segment is None:
211
  combined_segment = chunk_segment
212
+ print("🔗 創建第一個音頻片段")
213
  else:
214
  combined_segment += chunk_segment
215
+ print(f"🔗 已合並片段 {i}/{total_segments}")
216
+
217
  except Exception as e:
218
+ error_msg = f"❌ 片段 {i} ({speaker}) 生成失敗: {str(e)}"
219
+ print(error_msg)
220
  status_log.append(f"[錯誤] 無法生成音頻: {str(e)}")
221
+ raise
222
 
223
  # 如果沒有生成任何音頻段
224
  if combined_segment is None:
225
+ error_msg = "❌ 沒有生成任何音頻"
226
+ print(error_msg)
227
  status_log.append("[錯誤] 沒有生成任何音頻")
228
  return b"", "\n".join(status_log)
229
 
230
  # 如果需要調整音量
231
  if volume_boost > 0:
232
  try:
233
+ print(f"🔊 調整音量 +{volume_boost} dB...")
234
  # 調整音量
235
  combined_segment = combined_segment + volume_boost # 增加音量 (dB)
236
  status_log.append(f"[音量] 已增加 {volume_boost} dB")
237
+ print("✅ 音量調整完成")
238
  except Exception as e:
239
+ warning_msg = f"⚠️ 音量調整失敗: {str(e)}"
240
+ print(warning_msg)
241
  status_log.append(f"[警告] 音量調整失敗: {str(e)}")
242
 
243
  # 將 AudioSegment 轉換為二進制數據
244
+ print("💾 導出最終音頻文件...")
245
  output = io.BytesIO()
246
  combined_segment.export(output, format="mp3")
247
  combined_audio = output.getvalue()
248
 
249
+ print(f"🎉 腳本音頻生成完成!最終大小: {len(combined_audio)} bytes")
250
  return combined_audio, "\n".join(status_log)
251
 
252
  def save_audio_file(audio_data: bytes) -> str:
253
  """將音頻數據保存為臨時文件"""
254
+ print("💾 開始保存音頻文件...")
255
+
256
  temp_dir = Path("./temp_audio")
257
  temp_dir.mkdir(exist_ok=True)
258
+
259
  # 清理舊文件
260
+ old_files_count = 0
261
  for old_file in temp_dir.glob("*.mp3"):
262
  if old_file.stat().st_mtime < (time.time() - 24*60*60): # 24小時前的文件
263
  old_file.unlink()
264
+ old_files_count += 1
265
+
266
+ if old_files_count > 0:
267
+ print(f"🧹 清理了 {old_files_count} 個舊的臨時文件")
268
+
269
  # 創建新的臨時文件
270
  temp_file = NamedTemporaryFile(
271
  dir=temp_dir,
 
274
  )
275
  temp_file.write(audio_data)
276
  temp_file.close()
277
+
278
+ print(f"✅ 音頻文件已保存: {temp_file.name} ({len(audio_data)} bytes)")
279
  return temp_file.name
280
 
281
  def process_and_save_audio(script, api_key, model, voice1, voice2, volume_boost, instr1, instr2):