txh17 commited on
Commit
5b10940
·
verified ·
1 Parent(s): e699b9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -30
app.py CHANGED
@@ -5,58 +5,91 @@ import pandas as pd # Still useful for example dataframes if needed for report t
5
 
6
  # --- TTS Model Functions (CPU Friendly) ---
7
 
8
- def synthesize_espeak(text, lang="en-us"):
9
  """
10
  Synthesizes speech using espeak-ng.
11
  Requires espeak-ng to be installed in the Space environment (via Dockerfile).
12
  """
13
  output_file = "espeak_output.wav"
 
 
 
 
 
14
  try:
15
- # -v: voice, -w: output to WAV file, --stdout: write to stdout (less common for WAV)
16
- # Using a temporary file for output
17
  command = ["espeak-ng", f"-v{lang}", "--stdout", text]
18
 
19
- # Capture stdout as bytes and save to file
20
- process = subprocess.run(command, capture_output=True, check=True)
 
 
 
 
 
 
 
 
21
  with open(output_file, "wb") as f:
22
  f.write(process.stdout)
23
 
24
  print(f"eSpeak-ng synthesis successful: {output_file}")
25
- return output_file
 
26
  except FileNotFoundError:
27
- error_msg = "Error: espeak-ng not found. Please ensure it's installed in your Space's Dockerfile."
28
  print(error_msg)
29
- return None # Return None or raise an error for Gradio to handle
 
30
  except subprocess.CalledProcessError as e:
31
- error_msg = f"Error during espeak-ng synthesis: {e.stderr.decode()}"
 
 
 
 
 
32
  print(error_msg)
 
33
  return None
34
  except Exception as e:
35
  error_msg = f"An unexpected error occurred during espeak-ng synthesis: {e}"
36
  print(error_msg)
 
37
  return None
38
 
39
- def synthesize_api_tts(text):
40
  """
41
  Placeholder for an API-based Text-to-Speech service (e.g., Azure TTS, Google TTS).
42
  In a real application, you would make an HTTP request to the API here.
43
  For this demo, it returns a placeholder audio file.
44
  """
45
  print(f"Simulating API TTS for: '{text}'")
46
- # Replace this with actual API call using 'requests' library
47
- # Example:
 
48
  # import requests
49
- # url = "YOUR_TTS_API_ENDPOINT"
50
- # headers = {"Authorization": "Bearer YOUR_API_KEY", "Content-Type": "application/json"}
51
- # data = {"text": text, "voice": "some_api_voice"}
52
- # response = requests.post(url, json=data)
53
- # if response.status_code == 200:
54
- # with open("api_output.mp3", "wb") as f:
 
 
 
55
  # f.write(response.content)
56
- # return "api_output.mp3"
57
- # else:
58
- # print(f"API Error: {response.status_code} - {response.text}")
59
- # return None # Return None for error
 
 
 
 
 
 
 
 
60
 
61
  # Placeholder: Return a generic audio for demonstration
62
  # In a real scenario, you'd fetch an actual audio file from the API.
@@ -138,7 +171,7 @@ with gr.Blocks(css="""
138
 
139
  1. **eSpeak-ng:**
140
  * **类型与背景:** eSpeak-ng 是一个开源的语音合成器,基于**音素拼接**技术。它通过将预先录制或合成的音素拼接起来生成语音。其核心优势在于极低的资源消耗和极快的合成速度,可以在没有 GPU 的情况下高效运行。它支持多种语言,但声音通常听起来比较机械和“机器人化”。
141
- * **用途对比:** 适用于对语音质量要求不高,但对资源限制严格、需要快速生成语音的场景,例如嵌入式设备、辅助技术或批量文本预览。
142
  * **异同点分析:** * **异:** 采用传统拼接技术,非深度学习模型。
143
  * **同:** 均能实现文本到语音的转换。
144
 
@@ -215,13 +248,13 @@ with gr.Blocks(css="""
215
 
216
  ### 多条统一输入样例输出结果表格
217
 
218
- | 输入文本 (中文) | 输入文本 (英文) | eSpeak-ng 输出特点 | API TTS 示例输出特点 |
219
- | :---------------------------------- | :-------------------------------- | :------------------------------------- | :----------------------------------- |
220
- | 你好,这是一个文本转音频的测试。 | Hello, this is a text-to-speech test. | 机械、音调平直,有明显的合成感。 | 流畅、自然,有起伏和韵律,接近人声。 |
221
- | 请问今天天气怎么样? | What's the weather like today? | 语调缺乏疑问语气,略显生硬。 | 能体现疑问语气,更自然。 |
222
- | 天气真好啊! | What a beautiful day! | 无法表达情感,音量和速度变化不大。 | 能体现积极情感,语调更富表现力。 |
223
- | 复杂一点的句子,比如人工智能的未来发展。 | The future development of artificial intelligence. | 容易在长句中出现不自然的停顿或节奏问题。 | 较好地处理复杂长句,保持流畅性。 |
224
- | (在此处添加更多您的测试样例) | (在此处添加更多您的测试样例) | (描述 eSpeak-ng 的输出) | (描述 API TTS 示例的输出) |
225
 
226
  ### 雷达图或柱状图展示维度评分
227
 
@@ -260,4 +293,5 @@ with gr.Blocks(css="""
260
  )
261
 
262
  # --- Launch Gradio Demo ---
 
263
  demo.queue().launch()
 
5
 
6
  # --- TTS Model Functions (CPU Friendly) ---
7
 
8
+ def synthesize_espeak(text: str, lang: str = "en-us") -> str | None:
9
  """
10
  Synthesizes speech using espeak-ng.
11
  Requires espeak-ng to be installed in the Space environment (via Dockerfile).
12
  """
13
  output_file = "espeak_output.wav"
14
+
15
+ # Clean up previous output file if it exists
16
+ if os.path.exists(output_file):
17
+ os.remove(output_file)
18
+
19
  try:
20
+ # Command to run espeak-ng. --stdout outputs to stdout, which we capture.
 
21
  command = ["espeak-ng", f"-v{lang}", "--stdout", text]
22
 
23
+ # Execute the command. Added timeout to prevent infinite hangs.
24
+ process = subprocess.run(command, capture_output=True, check=True, timeout=10)
25
+
26
+ # Check if espeak-ng actually produced audio output
27
+ if not process.stdout:
28
+ gr.Warning("eSpeak-ng produced no audio output for the given text. Try different text.")
29
+ print(f"eSpeak-ng produced no output for text: '{text}'")
30
+ return None # Return None to clear the audio component
31
+
32
+ # Write the captured stdout (audio data) to a WAV file
33
  with open(output_file, "wb") as f:
34
  f.write(process.stdout)
35
 
36
  print(f"eSpeak-ng synthesis successful: {output_file}")
37
+ return output_file # Return the path to the generated audio file
38
+
39
  except FileNotFoundError:
40
+ error_msg = "Error: espeak-ng not found. Please ensure it's installed in your Space's Dockerfile and the Space is rebuilt."
41
  print(error_msg)
42
+ gr.Error(error_msg) # Show a persistent error message in Gradio
43
+ return None
44
  except subprocess.CalledProcessError as e:
45
+ error_msg = f"Error during espeak-ng synthesis. Command exited with code {e.returncode}. Stderr: {e.stderr.decode()}"
46
+ print(error_msg)
47
+ gr.Error(error_msg)
48
+ return None
49
+ except subprocess.TimeoutExpired:
50
+ error_msg = "eSpeak-ng command timed out. The text might be too long or complex."
51
  print(error_msg)
52
+ gr.Warning(error_msg)
53
  return None
54
  except Exception as e:
55
  error_msg = f"An unexpected error occurred during espeak-ng synthesis: {e}"
56
  print(error_msg)
57
+ gr.Error(error_msg)
58
  return None
59
 
60
+ def synthesize_api_tts(text: str) -> str | None:
61
  """
62
  Placeholder for an API-based Text-to-Speech service (e.g., Azure TTS, Google TTS).
63
  In a real application, you would make an HTTP request to the API here.
64
  For this demo, it returns a placeholder audio file.
65
  """
66
  print(f"Simulating API TTS for: '{text}'")
67
+
68
+ # --- IMPORTANT: Replace this with your actual API call ---
69
+ # Example using requests (requires 'requests' in requirements.txt):
70
  # import requests
71
+ # try:
72
+ # url = "YOUR_TTS_API_ENDPOINT" # Replace with your actual API endpoint
73
+ # headers = {"Authorization": "Bearer YOUR_API_KEY", "Content-Type": "application/json"} # Replace with your auth
74
+ # data = {"text": text, "voice": "some_api_voice"} # Adjust payload as per API documentation
75
+ # response = requests.post(url, json=data, timeout=15)
76
+ # response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
77
+ #
78
+ # api_output_file = "api_output.mp3" # Or .wav, depending on API
79
+ # with open(api_output_file, "wb") as f:
80
  # f.write(response.content)
81
+ # return api_output_file
82
+ # except requests.exceptions.RequestException as e:
83
+ # error_msg = f"API TTS request failed: {e}"
84
+ # print(error_msg)
85
+ # gr.Error(error_msg)
86
+ # return None
87
+ # except Exception as e:
88
+ # error_msg = f"An unexpected error occurred with API TTS: {e}"
89
+ # print(error_msg)
90
+ # gr.Error(error_msg)
91
+ # return None
92
+ # --------------------------------------------------------
93
 
94
  # Placeholder: Return a generic audio for demonstration
95
  # In a real scenario, you'd fetch an actual audio file from the API.
 
171
 
172
  1. **eSpeak-ng:**
173
  * **类型与背景:** eSpeak-ng 是一个开源的语音合成器,基于**音素拼接**技术。它通过将预先录制或合成的音素拼接起来生成语音。其核心优势在于极低的资源消耗和极快的合成速度,可以在没有 GPU 的情况下高效运行。它支持多种语言,但声音通常听起来比较机械和“机器人化”。
174
+ * **用途对比::** 适用于对语音质量要求不高,但对资源限制严格、需要快速生成语音的场景,例如嵌入式设备、辅助技术或批量文本预览。
175
  * **异同点分析:** * **异:** 采用传统拼接技术,非深度学习模型。
176
  * **同:** 均能实现文本到语音的转换。
177
 
 
248
 
249
  ### 多条统一输入样例输出结果表格
250
 
251
+ | 输入文本 (中文) | 输入文本 (英文) | eSpeak-ng 输出特点 | API TTS 示例输出特点 |
252
+ | :--- | :--- | :--- | :--- |
253
+ | 你好,这是一个文本转音频的测试。 | Hello, this is a text-to-speech test. | 机械、音调平直,有明显的合成感。 | 流畅、自然,有起伏和韵律,接近人声。 |
254
+ | 请问今天天气怎么样? | What's the weather like today? | 语调缺乏疑问语气,略显生硬。 | 能体现疑问语气,更自然。 |
255
+ | 天气真好啊! | What a beautiful day! | 无法表达情感,音量和速度变化不大。 | 能体现积极情感,语调更富表现力。 |
256
+ | 复杂一点的句子,比如人工智能的未来发展。 | The future development of artificial intelligence. | 容易在长句中出现不自然的停顿或节奏问题。 | 较好地处理复杂长句,保持流畅性。 |
257
+ | (在此处添加更多您的测试样例) | (在此处添加更多您的测试样例) | (描述 eSpeak-ng 的输出) | (描述 API TTS 示例的输出) |
258
 
259
  ### 雷达图或柱状图展示维度评分
260
 
 
293
  )
294
 
295
  # --- Launch Gradio Demo ---
296
+ # Using queue() is good practice for Spaces
297
  demo.queue().launch()