Vedika66 commited on
Commit
bf99c34
·
verified ·
1 Parent(s): abe5aad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -5
app.py CHANGED
@@ -275,6 +275,32 @@ STRICT DIRECTIVES:
275
  # ----------------------------------------------------
276
  # TTS DIRECT API ENDPOINT
277
  # ----------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  @app.route('/api/tts', methods=['POST'])
279
  def generate_tts():
280
  if tts is None:
@@ -287,10 +313,8 @@ def generate_tts():
287
  if not text.strip():
288
  return Response(json.dumps({"error": "Text is empty."}), status=400, mimetype='application/json')
289
 
290
- # FIX: REMOVED AGGRESSIVE REGEX FILTERING - PRESERVE ALL UNICODE CHARACTERS
291
- # The TTS engine supports multiple languages, including non-Latin scripts.
292
- # Only strip control characters that may cause issues.
293
- clean_text = ''.join(c for c in text if ord(c) >= 32 or c in '\n\r\t')
294
 
295
  if not clean_text.strip():
296
  return Response(json.dumps({"error": "No valid text left to process."}), status=400, mimetype='application/json')
@@ -320,7 +344,7 @@ def generate_tts():
320
  channels=1
321
  )
322
 
323
- # FIX: Export as WAV instead of MP3 to avoid ffmpeg dependency
324
  wav_buffer = io.BytesIO()
325
  audio_segment.export(wav_buffer, format="wav")
326
  wav_buffer.seek(0)
 
275
  # ----------------------------------------------------
276
  # TTS DIRECT API ENDPOINT
277
  # ----------------------------------------------------
278
+ def clean_tts_text(text):
279
+ """Remove markdown, HTML, emojis, code blocks, and extra symbols for clean TTS."""
280
+ # Remove code blocks (```...```)
281
+ text = re.sub(r'```[\s\S]*?```', '', text)
282
+ # Remove inline code (`...`)
283
+ text = re.sub(r'`[^`]*`', '', text)
284
+ # Remove HTML tags
285
+ text = re.sub(r'<[^>]+>', '', text)
286
+ # Remove Markdown links [text](url)
287
+ text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
288
+ # Remove Markdown emphasis: **bold**, *italic*, __underline__, _italic_
289
+ text = re.sub(r'(\*\*|__)(.*?)\1', r'\2', text)
290
+ text = re.sub(r'(\*|_)(.*?)\1', r'\2', text)
291
+ # Remove Markdown headers (#, ##, etc.)
292
+ text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE)
293
+ # Remove URLs
294
+ text = re.sub(r'https?://[^\s]+', '', text)
295
+ # Remove emojis and other non-printable symbols
296
+ text = re.sub(r'[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]', '', text)
297
+ # Replace multiple newlines and spaces with single space
298
+ text = re.sub(r'\s+', ' ', text).strip()
299
+ # Limit length to avoid TTS overflow
300
+ if len(text) > 1500:
301
+ text = text[:1500] + '...'
302
+ return text
303
+
304
  @app.route('/api/tts', methods=['POST'])
305
  def generate_tts():
306
  if tts is None:
 
313
  if not text.strip():
314
  return Response(json.dumps({"error": "Text is empty."}), status=400, mimetype='application/json')
315
 
316
+ # Clean the text to remove markdown, HTML, emojis etc.
317
+ clean_text = clean_tts_text(text)
 
 
318
 
319
  if not clean_text.strip():
320
  return Response(json.dumps({"error": "No valid text left to process."}), status=400, mimetype='application/json')
 
344
  channels=1
345
  )
346
 
347
+ # Export as WAV to maintain original quality and avoid ffmpeg dependency
348
  wav_buffer = io.BytesIO()
349
  audio_segment.export(wav_buffer, format="wav")
350
  wav_buffer.seek(0)