Ryanus commited on
Commit
7d2c1dc
·
verified ·
1 Parent(s): 1982214

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -105
app.py CHANGED
@@ -9,10 +9,10 @@ import re
9
  from tqdm import tqdm
10
  import time
11
 
12
- # --- Coqui TTS 授權同意 ---
13
  os.environ["COQUI_TOS_AGREED"] = "1"
14
 
15
- # --- 解決 PyTorch 2.6+ WeightsUnpickler 錯誤 ---
16
  try:
17
  import torch.serialization
18
  from TTS.tts.configs.xtts_config import XttsConfig
@@ -22,152 +22,166 @@ try:
22
  torch.serialization.add_safe_globals([
23
  XttsConfig, XttsAudioConfig, BaseDatasetConfig, XttsArgs
24
  ])
25
- print("已將 XTTS 相關配置類加入 PyTorch 安全全局變數白名單。")
26
  except Exception as e:
27
- print(f"警告:無法將安全全局變數加入 PyTorch 白名單: {e}")
28
- print("如果遇到模型載入錯誤,請檢查 PyTorch 和 TTS 庫版本。")
29
 
 
30
  device = "cuda" if torch.cuda.is_available() else "cpu"
31
- print(f"使用設備: {device}")
32
 
 
 
 
 
 
 
 
33
  tts = None
34
  model_load_error = None
35
-
36
  SUPPORTED_LANGUAGES = [
37
  "en", "zh-cn", "es", "fr", "de", "it", "pt", "pl", "ru", "ja", "ko", "ar", "hi", "tr",
38
  "nl", "sv", "da", "fi", "no", "cs", "hu", "el", "uk", "vi", "th", "id", "ms", "ro",
39
  "sk", "hr", "bg", "ca", "fa", "he", "ur", "bn", "gu", "kn", "ml", "mr", "pa", "ta", "te",
40
  ]
41
-
42
  DEFAULT_SPEAKER_WAV = "speaker.wav"
43
- SAVE_GENERATED_AUDIO_DIR = "generated_audio"
44
- SAVE_UPLOADED_REFERENCES_DIR = "uploaded_references"
45
-
46
- os.makedirs(SAVE_GENERATED_AUDIO_DIR, exist_ok=True)
47
- os.makedirs(SAVE_UPLOADED_REFERENCES_DIR, exist_ok=True)
48
 
49
  def sanitize_filename(text: str, max_len: int = 50) -> str:
 
50
  safe_text = re.sub(r'[^\w\s-]', '', text).strip()
51
  safe_text = re.sub(r'\s+', '_', safe_text)
52
  if len(safe_text) > max_len:
53
  safe_text = safe_text[:max_len]
54
  return safe_text
55
 
56
- # --- 載入模型 ---
57
  try:
 
58
  tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=True).to(device)
59
- print("Coqui TTS XTTS-v2 模型已成功載入。")
60
  except Exception as e:
61
- model_load_error = f"載入 Coqui TTS XTTS-v2 模型時發生錯誤: {e}"
 
62
 
63
- def generate_speech(text, language, uploaded_speaker_audio_path):
 
64
  if model_load_error:
65
- return None, f"應用程式啟動錯誤:{model_load_error}"
66
-
67
- steps = [
68
- "檢查模型狀態",
69
- "檢查輸入",
70
- "處理語音參考檔案",
71
- "生成語音",
72
- "儲存語音檔案",
73
- "完成"
74
- ]
75
-
76
- for i, step in enumerate(tqdm(steps, desc="語音生成流程", ncols=80)):
77
- if step == "檢查模型狀態":
78
- if tts is None:
79
- return None, "TTS 模型未成功載入,無法生成語音。"
80
- time.sleep(0.1)
81
- elif step == "檢查輸入":
82
- if not text:
83
- return None, "請輸入一些文字!"
84
- if not language:
85
- return None, "請選擇一個語言!"
86
- time.sleep(0.1)
87
- elif step == "處理語音參考檔案":
88
- global speaker_wav_to_use
89
- speaker_wav_to_use = None
90
- global status_message
91
- status_message = ""
92
- if uploaded_speaker_audio_path:
93
- speaker_wav_to_use = uploaded_speaker_audio_path
94
- try:
95
- timestamp_ref = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
96
- original_ext = os.path.splitext(uploaded_speaker_audio_path)[1]
97
- saved_ref_file_name = f"{timestamp_ref}_uploaded_ref{original_ext}"
98
- saved_ref_file_path = os.path.join(SAVE_UPLOADED_REFERENCES_DIR, saved_ref_file_name)
99
- shutil.copy(uploaded_speaker_audio_path, saved_ref_file_path)
100
- status_message += f"參考語音已儲存到:{saved_ref_file_path}\n"
101
- except Exception as e:
102
- status_message += f"警告:儲存參考語音失敗: {e}\n"
103
- else:
104
- speaker_wav_to_use = DEFAULT_SPEAKER_WAV
105
- if not os.path.exists(speaker_wav_to_use):
106
- return None, f"錯誤:預設語音參考檔案 ({DEFAULT_SPEAKER_WAV}) 未找���。請上傳一個檔案或確保預設檔案存在。"
107
- time.sleep(0.1)
108
- elif step == "生成語音":
109
- global output_file
110
- output_file = None
111
  try:
112
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
113
- output_file = fp.name
114
- tts.tts_to_file(text=text, language=language, speaker_wav=speaker_wav_to_use, file_path=output_file)
 
 
 
115
  except Exception as e:
116
- if output_file and os.path.exists(output_file):
117
- os.remove(output_file)
118
- return None, f"生成語音失敗: {e}"
119
- elif step == "儲存語音檔案":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  try:
121
- timestamp_gen = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
122
- sanitized_text = sanitize_filename(text)
123
- saved_file_name = f"{timestamp_gen}_{language}_{sanitized_text}.wav"
124
- saved_file_path = os.path.join(SAVE_GENERATED_AUDIO_DIR, saved_file_name)
125
- shutil.copy(output_file, saved_file_path)
126
- status_message += f"語音生成成功!已儲存為:{saved_file_path}"
127
- except Exception as e:
128
- return None, f"儲存語音檔案失敗: {e}"
129
- elif step == "完成":
130
- pass
131
- time.sleep(0.1)
132
- return output_file, status_message
133
 
134
  def list_saved_audio_files():
 
135
  audio_files = []
136
  if os.path.exists(SAVE_GENERATED_AUDIO_DIR):
137
  for filename in os.listdir(SAVE_GENERATED_AUDIO_DIR):
138
- if filename.lower().endswith(".wav"):
139
  audio_files.append(os.path.join(SAVE_GENERATED_AUDIO_DIR, filename))
140
  audio_files.sort(key=os.path.getmtime, reverse=True)
141
  return audio_files
142
 
143
  def list_uploaded_reference_files():
 
144
  ref_files = []
145
  if os.path.exists(SAVE_UPLOADED_REFERENCES_DIR):
146
  for filename in os.listdir(SAVE_UPLOADED_REFERENCES_DIR):
147
- if filename.lower().endswith(".wav"):
148
  ref_files.append(os.path.join(SAVE_UPLOADED_REFERENCES_DIR, filename))
149
  ref_files.sort(key=os.path.getmtime, reverse=True)
150
  return ref_files
151
 
152
- with gr.Blocks(title="Coqui TTS XTTS-v2 語音生成") as demo:
153
- gr.Markdown("# Coqui TTS XTTS-v2 語音生成 (CPU)")
154
- gr.Markdown("此演示使用 CPU 運行,請注意 XTTS-v2 在 CPU 上運行會非常慢。您可以上傳自己的語音,或使用預設語音。**生成的語音和上傳的參考語音都將自動儲存到 Space 專案中。**")
155
- gr.Markdown("**重要提示:** 每次儲存檔案都會觸發 Hugging Face Space 的自動重建,導致應用程式暫時不可用並重新載入模型。")
156
-
157
- with gr.Tab("語音生成"):
 
 
 
 
158
  with gr.Row():
159
  with gr.Column():
160
- text_input = gr.Textbox(lines=5, label="輸入文字", placeholder="請在這裡輸入你想要轉換成語音的文字...")
161
- language_dropdown = gr.Dropdown(choices=SUPPORTED_LANGUAGES, label="選擇語言", value="en")
162
  speaker_audio_upload = gr.Audio(
163
  type="filepath",
164
- label="上傳語音參考檔案 (WAV) (可選)",
165
  sources=["microphone", "upload"],
166
  )
167
- generate_button = gr.Button("生成語音")
168
  with gr.Column():
169
- output_audio = gr.Audio(label="生成的語音", type="filepath")
170
- status_textbox = gr.Textbox(label="狀態")
171
 
172
  generate_button.click(
173
  fn=generate_speech,
@@ -175,26 +189,27 @@ with gr.Blocks(title="Coqui TTS XTTS-v2 語音生成") as demo:
175
  outputs=[output_audio, status_textbox]
176
  )
177
 
178
- with gr.Tab("查看已儲存語音"):
179
- gr.Markdown("### 已儲存的生成語音檔案")
180
  saved_generated_files_output = gr.File(
181
- label="生成的語音檔案",
182
  file_count="multiple",
183
  interactive=False
184
  )
185
- refresh_generated_button = gr.Button("刷新生成語音列表")
186
  demo.load(list_saved_audio_files, outputs=[saved_generated_files_output])
187
  refresh_generated_button.click(list_saved_audio_files, outputs=[saved_generated_files_output])
188
 
189
- with gr.Tab("查看已上傳參考語音"):
190
- gr.Markdown("### 已儲存的上傳參考語音檔案")
191
  saved_uploaded_ref_files_output = gr.File(
192
- label="上傳的參考語音檔案",
193
  file_count="multiple",
194
  interactive=False
195
  )
196
- refresh_uploaded_ref_button = gr.Button("刷新參考語音列表")
197
  demo.load(list_uploaded_reference_files, outputs=[saved_uploaded_ref_files_output])
198
  refresh_uploaded_ref_button.click(list_uploaded_reference_files, outputs=[saved_uploaded_ref_files_output])
199
 
200
- demo.launch()
 
 
9
  from tqdm import tqdm
10
  import time
11
 
12
+ # --- Coqui TTS 授权同意 ---
13
  os.environ["COQUI_TOS_AGREED"] = "1"
14
 
15
+ # --- 解决 PyTorch 2.6+ WeightsUnpickler 错误 ---
16
  try:
17
  import torch.serialization
18
  from TTS.tts.configs.xtts_config import XttsConfig
 
22
  torch.serialization.add_safe_globals([
23
  XttsConfig, XttsAudioConfig, BaseDatasetConfig, XttsArgs
24
  ])
25
+ print("已将 XTTS 相关配置类加入 PyTorch 安全全局变量白名单。")
26
  except Exception as e:
27
+ print(f"警告:无法将安全全局变量加入 PyTorch 白名单: {e}")
28
+ print("如果遇到模型载入错误,请检查 PyTorch 和 TTS 库版本。")
29
 
30
+ # 设备配置
31
  device = "cuda" if torch.cuda.is_available() else "cpu"
32
+ print(f"使用设备: {device}")
33
 
34
+ # 目录配置
35
+ SAVE_GENERATED_AUDIO_DIR = "generated_audio"
36
+ SAVE_UPLOADED_REFERENCES_DIR = "uploaded_references"
37
+ os.makedirs(SAVE_GENERATED_AUDIO_DIR, exist_ok=True)
38
+ os.makedirs(SAVE_UPLOADED_REFERENCES_DIR, exist_ok=True)
39
+
40
+ # 全局变量
41
  tts = None
42
  model_load_error = None
 
43
  SUPPORTED_LANGUAGES = [
44
  "en", "zh-cn", "es", "fr", "de", "it", "pt", "pl", "ru", "ja", "ko", "ar", "hi", "tr",
45
  "nl", "sv", "da", "fi", "no", "cs", "hu", "el", "uk", "vi", "th", "id", "ms", "ro",
46
  "sk", "hr", "bg", "ca", "fa", "he", "ur", "bn", "gu", "kn", "ml", "mr", "pa", "ta", "te",
47
  ]
 
48
  DEFAULT_SPEAKER_WAV = "speaker.wav"
 
 
 
 
 
49
 
50
  def sanitize_filename(text: str, max_len: int = 50) -> str:
51
+ """清理文本以用作安全的文件名"""
52
  safe_text = re.sub(r'[^\w\s-]', '', text).strip()
53
  safe_text = re.sub(r'\s+', '_', safe_text)
54
  if len(safe_text) > max_len:
55
  safe_text = safe_text[:max_len]
56
  return safe_text
57
 
58
+ # --- 载入模型 ---
59
  try:
60
+ print("正在载入 Coqui TTS XTTS-v2 模型...")
61
  tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=True).to(device)
62
+ print("Coqui TTS XTTS-v2 模型已成功载入。")
63
  except Exception as e:
64
+ model_load_error = f"载入 Coqui TTS XTTS-v2 模型时发生错误: {e}"
65
+ print(model_load_error)
66
 
67
+ def generate_speech(text, language, uploaded_speaker_audio_path, progress=gr.Progress()):
68
+ """生成语音并保存文件"""
69
  if model_load_error:
70
+ return None, f"应用程序启动错误:{model_load_error}"
71
+
72
+ # 检查输入
73
+ if not text:
74
+ return None, "请输入一些文字!"
75
+ if not language:
76
+ return None, "请选择一个语言!"
77
+
78
+ if tts is None:
79
+ return None, "TTS 模型未成功载入,无法生成语音。"
80
+
81
+ status_message = ""
82
+ output_file = None
83
+
84
+ try:
85
+ # 处理语音参考文件
86
+ progress(0.2, desc="处理语音参考文件")
87
+
88
+ if uploaded_speaker_audio_path:
89
+ speaker_wav_to_use = uploaded_speaker_audio_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  try:
91
+ timestamp_ref = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
92
+ original_ext = os.path.splitext(uploaded_speaker_audio_path)[1]
93
+ saved_ref_file_name = f"{timestamp_ref}_uploaded_ref{original_ext}"
94
+ saved_ref_file_path = os.path.join(SAVE_UPLOADED_REFERENCES_DIR, saved_ref_file_name)
95
+ shutil.copy(uploaded_speaker_audio_path, saved_ref_file_path)
96
+ status_message += f"参考语音已保存到:{saved_ref_file_path}\n"
97
  except Exception as e:
98
+ status_message += f"警告:保存参考语音失败: {e}\n"
99
+ else:
100
+ speaker_wav_to_use = DEFAULT_SPEAKER_WAV
101
+ if not os.path.exists(speaker_wav_to_use):
102
+ return None, f"错误:默认语音参考文件 ({DEFAULT_SPEAKER_WAV}) 未找到。请上传一个文件或确保默认文件存在。"
103
+
104
+ # 生成语音
105
+ progress(0.5, desc="生成语音中...")
106
+
107
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
108
+ output_file = fp.name
109
+
110
+ try:
111
+ tts.tts_to_file(text=text, language=language, speaker_wav=speaker_wav_to_use, file_path=output_file)
112
+ except Exception as e:
113
+ if output_file and os.path.exists(output_file):
114
+ os.remove(output_file)
115
+ return None, f"生成语音失败: {e}"
116
+
117
+ # 保存语音文件
118
+ progress(0.8, desc="保存语音文件")
119
+
120
+ try:
121
+ timestamp_gen = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
122
+ sanitized_text = sanitize_filename(text)
123
+ saved_file_name = f"{timestamp_gen}_{language}_{sanitized_text}.wav"
124
+ saved_file_path = os.path.join(SAVE_GENERATED_AUDIO_DIR, saved_file_name)
125
+ shutil.copy(output_file, saved_file_path)
126
+ status_message += f"语音生成成功!已保存为:{saved_file_path}"
127
+ except Exception as e:
128
+ return None, f"保存语音文件失败: {e}"
129
+
130
+ progress(1.0, desc="完成")
131
+ return output_file, status_message
132
+
133
+ except Exception as e:
134
+ # 清理临时文件
135
+ if output_file and os.path.exists(output_file):
136
  try:
137
+ os.remove(output_file)
138
+ except:
139
+ pass
140
+ return None, f"处理过程中发生错误: {str(e)}"
 
 
 
 
 
 
 
 
141
 
142
  def list_saved_audio_files():
143
+ """列出已保存的音频文件"""
144
  audio_files = []
145
  if os.path.exists(SAVE_GENERATED_AUDIO_DIR):
146
  for filename in os.listdir(SAVE_GENERATED_AUDIO_DIR):
147
+ if filename.lower().endswith((".wav", ".mp3")):
148
  audio_files.append(os.path.join(SAVE_GENERATED_AUDIO_DIR, filename))
149
  audio_files.sort(key=os.path.getmtime, reverse=True)
150
  return audio_files
151
 
152
  def list_uploaded_reference_files():
153
+ """列出已上传的参考语音文件"""
154
  ref_files = []
155
  if os.path.exists(SAVE_UPLOADED_REFERENCES_DIR):
156
  for filename in os.listdir(SAVE_UPLOADED_REFERENCES_DIR):
157
+ if filename.lower().endswith((".wav", ".mp3")):
158
  ref_files.append(os.path.join(SAVE_UPLOADED_REFERENCES_DIR, filename))
159
  ref_files.sort(key=os.path.getmtime, reverse=True)
160
  return ref_files
161
 
162
+ # 创建Gradio界面
163
+ with gr.Blocks(title="Coqui TTS XTTS-v2 语音生成") as demo:
164
+ gr.Markdown("# Coqui TTS XTTS-v2 语音生成")
165
+ gr.Markdown(f"此演示使用 {'GPU' if device == 'cuda' else 'CPU'} 运行。您可以上传自己的语音,或使用默认语音。")
166
+ gr.Markdown("**生成的语音和上传的参考语音都将自动保存到服务器中。**")
167
+
168
+ if device == "cpu":
169
+ gr.Markdown("⚠️ **注意:** 当前使用CPU运行,XTTS-v2在CPU上运行会较慢。")
170
+
171
+ with gr.Tab("语音生成"):
172
  with gr.Row():
173
  with gr.Column():
174
+ text_input = gr.Textbox(lines=5, label="输入文字", placeholder="请在这里输入你想要转换成语音的文字...")
175
+ language_dropdown = gr.Dropdown(choices=SUPPORTED_LANGUAGES, label="选择语言", value="en")
176
  speaker_audio_upload = gr.Audio(
177
  type="filepath",
178
+ label="上传语音参考文件 (WAV/MP3) (可选)",
179
  sources=["microphone", "upload"],
180
  )
181
+ generate_button = gr.Button("生成语音")
182
  with gr.Column():
183
+ output_audio = gr.Audio(label="生成的语音", type="filepath")
184
+ status_textbox = gr.Textbox(label="状态")
185
 
186
  generate_button.click(
187
  fn=generate_speech,
 
189
  outputs=[output_audio, status_textbox]
190
  )
191
 
192
+ with gr.Tab("查看已保存语音"):
193
+ gr.Markdown("### 已保存的生成语音文件")
194
  saved_generated_files_output = gr.File(
195
+ label="生成的语音文件",
196
  file_count="multiple",
197
  interactive=False
198
  )
199
+ refresh_generated_button = gr.Button("刷新生成语音列表")
200
  demo.load(list_saved_audio_files, outputs=[saved_generated_files_output])
201
  refresh_generated_button.click(list_saved_audio_files, outputs=[saved_generated_files_output])
202
 
203
+ with gr.Tab("查看已上传参考语音"):
204
+ gr.Markdown("### 已保存的上传参考语音文件")
205
  saved_uploaded_ref_files_output = gr.File(
206
+ label="上传的参考语音文件",
207
  file_count="multiple",
208
  interactive=False
209
  )
210
+ refresh_uploaded_ref_button = gr.Button("刷新参考语音列表")
211
  demo.load(list_uploaded_reference_files, outputs=[saved_uploaded_ref_files_output])
212
  refresh_uploaded_ref_button.click(list_uploaded_reference_files, outputs=[saved_uploaded_ref_files_output])
213
 
214
+ if __name__ == "__main__":
215
+ demo.launch()