Edge-TTS-WebUI-Long-Text

Running

App Files Files Community

cs2764 commited on Nov 19, 2025

Commit

d67a3fb

verified ·

1 Parent(s): 72ed4f2

Upload 2 files

Browse files

Files changed (2) hide show

app.py +29 -14
text_cleaning.py +12 -0

app.py CHANGED Viewed

@@ -74,17 +74,17 @@ def estimate_text_duration(text):
     return duration
-def split_text_by_paragraphs(text, max_duration_minutes=5):
     """Split text into segments that won't exceed limit with safety margin"""
     max_duration = max_duration_minutes
     estimated_duration = estimate_text_duration(text)
-    logger.info(f"Checking segmentation: Duration={estimated_duration:.2f}m, Limit={max_duration}m")
-    if estimated_duration <= max_duration:
         return [text]
-    logger.info(f"Text duration ({estimated_duration:.2f}m) exceeds limit ({max_duration}m). Splitting...")
     # Split by paragraphs first
     paragraphs = text.split('\n\n')
@@ -95,20 +95,31 @@ def split_text_by_paragraphs(text, max_duration_minutes=5):
         paragraph_duration = estimate_text_duration(paragraph)
         # If single paragraph is too long, split by sentences
-        if paragraph_duration > max_duration:
-            sentences = re.split(r'[.!?]+', paragraph)
-            for sentence in sentences:
                 sentence = sentence.strip()
                 if not sentence:
                     continue
-                if estimate_text_duration(current_segment + sentence) > max_duration and current_segment:
                     segments.append(current_segment.strip())
-                    current_segment = sentence + ". "
                 else:
-                    current_segment += sentence + ". "
         else:
-            if estimate_text_duration(current_segment + paragraph) > max_duration and current_segment:
                 segments.append(current_segment.strip())
                 current_segment = paragraph + "\n\n"
             else:
@@ -128,9 +139,13 @@ async def generate_audio_segment(text_segment, voice_short_name, rate_str, volum
     communicate = edge_tts.Communicate(text_segment, voice_short_name, rate=rate_str, volume=volume_str, pitch=pitch_str)
     audio_data = io.BytesIO()
-    async for chunk in communicate.stream():
-        if chunk["type"] == "audio":
-            audio_data.write(chunk["data"])
     audio_data.seek(0)

     return duration
+def split_text_by_paragraphs(text, max_duration_minutes=5, max_chars=1500):
     """Split text into segments that won't exceed limit with safety margin"""
     max_duration = max_duration_minutes
     estimated_duration = estimate_text_duration(text)
+    logger.info(f"Checking segmentation: Duration={estimated_duration:.2f}m, Chars={len(text)}, Limit={max_duration}m/{max_chars}chars")
+    if estimated_duration <= max_duration and len(text) <= max_chars:
         return [text]
+    logger.info(f"Text exceeds limits. Splitting...")
     # Split by paragraphs first
     paragraphs = text.split('\n\n')
         paragraph_duration = estimate_text_duration(paragraph)
         # If single paragraph is too long, split by sentences
+        # Improved regex to include Chinese punctuation
+        if paragraph_duration > max_duration or len(paragraph) > max_chars:
+            sentences = re.split(r'([.!?。！？]+)', paragraph)
+            # Re-attach delimiters to sentences
+            real_sentences = []
+            for i in range(0, len(sentences) - 1, 2):
+                real_sentences.append(sentences[i] + sentences[i+1])
+            if len(sentences) % 2 == 1 and sentences[-1]:
+                real_sentences.append(sentences[-1])
+            for sentence in real_sentences:
                 sentence = sentence.strip()
                 if not sentence:
                     continue
+                # Check both duration and char count
+                if (estimate_text_duration(current_segment + sentence) > max_duration or
+                    len(current_segment + sentence) > max_chars) and current_segment:
                     segments.append(current_segment.strip())
+                    current_segment = sentence
                 else:
+                    current_segment += sentence
         else:
+            if (estimate_text_duration(current_segment + paragraph) > max_duration or
+                len(current_segment + paragraph) > max_chars) and current_segment:
                 segments.append(current_segment.strip())
                 current_segment = paragraph + "\n\n"
             else:
     communicate = edge_tts.Communicate(text_segment, voice_short_name, rate=rate_str, volume=volume_str, pitch=pitch_str)
     audio_data = io.BytesIO()
+    try:
+        async for chunk in communicate.stream():
+            if chunk["type"] == "audio":
+                audio_data.write(chunk["data"])
+    except Exception as e:
+        logger.error(f"Error generating segment {segment_index} (Length: {len(text_segment)} chars): {e}")
+        raise gr.Error(f"Error generating segment {segment_index}: {e}")
     audio_data.seek(0)

text_cleaning.py CHANGED Viewed

@@ -49,8 +49,20 @@ class TextCleaner:
         """Normalize whitespace"""
         # Replace multiple spaces with single space
         text = re.sub(r' +', ' ', text)
         # Replace multiple newlines with double newline (paragraph break)
         text = re.sub(r'\n\s*\n', '\n\n', text)
         return text.strip()
     @staticmethod

         """Normalize whitespace"""
         # Replace multiple spaces with single space
         text = re.sub(r' +', ' ', text)
         # Replace multiple newlines with double newline (paragraph break)
         text = re.sub(r'\n\s*\n', '\n\n', text)
+        # Merge lines for CJK text (remove single newlines between CJK characters)
+        # Lookbehind for CJK/Punctuation, match newline, Lookahead for CJK/Punctuation
+        # Ranges:
+        # \u4e00-\u9fa5 (Common CJK)
+        # \u3000-\u303f (CJK Symbols and Punctuation)
+        # \uff00-\uffef (Fullwidth forms)
+        cjk_range = r'[\u4e00-\u9fa5\u3000-\u303f\uff00-\uffef]'
+        pattern = f'(?<={cjk_range})\\s*\\n\\s*(?={cjk_range})'
+        text = re.sub(pattern, '', text)
         return text.strip()
     @staticmethod