Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -275,6 +275,32 @@ STRICT DIRECTIVES:
|
|
| 275 |
# ----------------------------------------------------
|
| 276 |
# TTS DIRECT API ENDPOINT
|
| 277 |
# ----------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
@app.route('/api/tts', methods=['POST'])
|
| 279 |
def generate_tts():
|
| 280 |
if tts is None:
|
|
@@ -287,10 +313,8 @@ def generate_tts():
|
|
| 287 |
if not text.strip():
|
| 288 |
return Response(json.dumps({"error": "Text is empty."}), status=400, mimetype='application/json')
|
| 289 |
|
| 290 |
-
#
|
| 291 |
-
|
| 292 |
-
# Only strip control characters that may cause issues.
|
| 293 |
-
clean_text = ''.join(c for c in text if ord(c) >= 32 or c in '\n\r\t')
|
| 294 |
|
| 295 |
if not clean_text.strip():
|
| 296 |
return Response(json.dumps({"error": "No valid text left to process."}), status=400, mimetype='application/json')
|
|
@@ -320,7 +344,7 @@ def generate_tts():
|
|
| 320 |
channels=1
|
| 321 |
)
|
| 322 |
|
| 323 |
-
#
|
| 324 |
wav_buffer = io.BytesIO()
|
| 325 |
audio_segment.export(wav_buffer, format="wav")
|
| 326 |
wav_buffer.seek(0)
|
|
|
|
| 275 |
# ----------------------------------------------------
|
| 276 |
# TTS DIRECT API ENDPOINT
|
| 277 |
# ----------------------------------------------------
|
| 278 |
+
def clean_tts_text(text):
|
| 279 |
+
"""Remove markdown, HTML, emojis, code blocks, and extra symbols for clean TTS."""
|
| 280 |
+
# Remove code blocks (```...```)
|
| 281 |
+
text = re.sub(r'```[\s\S]*?```', '', text)
|
| 282 |
+
# Remove inline code (`...`)
|
| 283 |
+
text = re.sub(r'`[^`]*`', '', text)
|
| 284 |
+
# Remove HTML tags
|
| 285 |
+
text = re.sub(r'<[^>]+>', '', text)
|
| 286 |
+
# Remove Markdown links [text](url)
|
| 287 |
+
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
|
| 288 |
+
# Remove Markdown emphasis: **bold**, *italic*, __underline__, _italic_
|
| 289 |
+
text = re.sub(r'(\*\*|__)(.*?)\1', r'\2', text)
|
| 290 |
+
text = re.sub(r'(\*|_)(.*?)\1', r'\2', text)
|
| 291 |
+
# Remove Markdown headers (#, ##, etc.)
|
| 292 |
+
text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE)
|
| 293 |
+
# Remove URLs
|
| 294 |
+
text = re.sub(r'https?://[^\s]+', '', text)
|
| 295 |
+
# Remove emojis and other non-printable symbols
|
| 296 |
+
text = re.sub(r'[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]', '', text)
|
| 297 |
+
# Replace multiple newlines and spaces with single space
|
| 298 |
+
text = re.sub(r'\s+', ' ', text).strip()
|
| 299 |
+
# Limit length to avoid TTS overflow
|
| 300 |
+
if len(text) > 1500:
|
| 301 |
+
text = text[:1500] + '...'
|
| 302 |
+
return text
|
| 303 |
+
|
| 304 |
@app.route('/api/tts', methods=['POST'])
|
| 305 |
def generate_tts():
|
| 306 |
if tts is None:
|
|
|
|
| 313 |
if not text.strip():
|
| 314 |
return Response(json.dumps({"error": "Text is empty."}), status=400, mimetype='application/json')
|
| 315 |
|
| 316 |
+
# Clean the text to remove markdown, HTML, emojis etc.
|
| 317 |
+
clean_text = clean_tts_text(text)
|
|
|
|
|
|
|
| 318 |
|
| 319 |
if not clean_text.strip():
|
| 320 |
return Response(json.dumps({"error": "No valid text left to process."}), status=400, mimetype='application/json')
|
|
|
|
| 344 |
channels=1
|
| 345 |
)
|
| 346 |
|
| 347 |
+
# Export as WAV to maintain original quality and avoid ffmpeg dependency
|
| 348 |
wav_buffer = io.BytesIO()
|
| 349 |
audio_segment.export(wav_buffer, format="wav")
|
| 350 |
wav_buffer.seek(0)
|