CodeVed

Running

App Files Files Community

Vedika66 commited on 4 days ago

Commit

bf99c34

verified ·

1 Parent(s): abe5aad

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -5

app.py CHANGED Viewed

@@ -275,6 +275,32 @@ STRICT DIRECTIVES:
 # ----------------------------------------------------
 # TTS DIRECT API ENDPOINT
 # ----------------------------------------------------
 @app.route('/api/tts', methods=['POST'])
 def generate_tts():
     if tts is None:
@@ -287,10 +313,8 @@ def generate_tts():
     if not text.strip():
         return Response(json.dumps({"error": "Text is empty."}), status=400, mimetype='application/json')
-    # FIX: REMOVED AGGRESSIVE REGEX FILTERING - PRESERVE ALL UNICODE CHARACTERS
-    # The TTS engine supports multiple languages, including non-Latin scripts.
-    # Only strip control characters that may cause issues.
-    clean_text = ''.join(c for c in text if ord(c) >= 32 or c in '\n\r\t')
     if not clean_text.strip():
          return Response(json.dumps({"error": "No valid text left to process."}), status=400, mimetype='application/json')
@@ -320,7 +344,7 @@ def generate_tts():
             channels=1
         )
-        # FIX: Export as WAV instead of MP3 to avoid ffmpeg dependency
         wav_buffer = io.BytesIO()
         audio_segment.export(wav_buffer, format="wav")
         wav_buffer.seek(0)

 # ----------------------------------------------------
 # TTS DIRECT API ENDPOINT
 # ----------------------------------------------------
+def clean_tts_text(text):
+    """Remove markdown, HTML, emojis, code blocks, and extra symbols for clean TTS."""
+    # Remove code blocks (```...```)
+    text = re.sub(r'```[\s\S]*?```', '', text)
+    # Remove inline code (`...`)
+    text = re.sub(r'`[^`]*`', '', text)
+    # Remove HTML tags
+    text = re.sub(r'<[^>]+>', '', text)
+    # Remove Markdown links [text](url)
+    text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
+    # Remove Markdown emphasis: **bold**, *italic*, __underline__, _italic_
+    text = re.sub(r'(\*\*|__)(.*?)\1', r'\2', text)
+    text = re.sub(r'(\*|_)(.*?)\1', r'\2', text)
+    # Remove Markdown headers (#, ##, etc.)
+    text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE)
+    # Remove URLs
+    text = re.sub(r'https?://[^\s]+', '', text)
+    # Remove emojis and other non-printable symbols
+    text = re.sub(r'[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]', '', text)
+    # Replace multiple newlines and spaces with single space
+    text = re.sub(r'\s+', ' ', text).strip()
+    # Limit length to avoid TTS overflow
+    if len(text) > 1500:
+        text = text[:1500] + '...'
+    return text
 @app.route('/api/tts', methods=['POST'])
 def generate_tts():
     if tts is None:
     if not text.strip():
         return Response(json.dumps({"error": "Text is empty."}), status=400, mimetype='application/json')
+    # Clean the text to remove markdown, HTML, emojis etc.
+    clean_text = clean_tts_text(text)
     if not clean_text.strip():
          return Response(json.dumps({"error": "No valid text left to process."}), status=400, mimetype='application/json')
             channels=1
         )
+        # Export as WAV to maintain original quality and avoid ffmpeg dependency
         wav_buffer = io.BytesIO()
         audio_segment.export(wav_buffer, format="wav")
         wav_buffer.seek(0)