Spaces:

ArtSpace
/

Cu

Sleeping

App Files Files Community

ArtSpace commited on Jan 18

Commit

d0c1255

verified ·

1 Parent(s): 7d6ef8c

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -50

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import edge_tts
 import asyncio
 import re
 import os
-import uuid  # [ACEE] Added for unique filename generation
 from pydub import AudioSegment
 # ---------------------------------------------------------
@@ -21,22 +21,25 @@ EMOTION_PRESETS = {
 # ---------------------------------------------------------
 # 2. وظائف المساعدة والتنظيف
 # ---------------------------------------------------------
-def sanitize_text(text):
     """
-    [ACEE] ENHANCED SANITIZATION
-    تنظيف النص من الرموز غير المرغوبة (النجوم، الهاشتاج، الإيموجي)
-    مع الحفاظ على العلامات الخاصة بالتوقفات.
     """
     if not text:
         return ""
-    # 1. إزالة الرموز الخاصة المحددة
-    # [ACEE] Added removal of '#' as requested
-    for char in ["*", "#"]:
-        text = text.replace(char, "")
     # 2. إزالة الإيموجي (نطاق Unicode واسع)
-    # [ACEE] Comprehensive Emoji Regex
     emoji_pattern = re.compile("["
         u"\U0001F600-\U0001F64F"  # Emoticons
         u"\U0001F300-\U0001F5FF"  # Symbols & Pictographs
@@ -65,14 +68,14 @@ async def generate_segment(text, voice, rate, pitch, filename):
 # ---------------------------------------------------------
 # 3. المنطق الرئيسي
 # ---------------------------------------------------------
-async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rate, manual_pitch):
-    # [ACEE] 1. Sanitize Input
-    text = sanitize_text(text)
     if not text or text.strip() == "":
         return None
-    # [ACEE] 2. Unique Session ID to prevent race conditions
     session_id = str(uuid.uuid4())
     # إعداد الصوت
@@ -83,8 +86,6 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
         "رجل (سعودي)": "ar-SA-HamedNeural",
         "سيدة (سعودية)": "ar-SA-ZariyahNeural"
     }
-    # [ACEE] Fixed default fallback to match the UI default (Saudi Male) logic if needed,
-    # but kept flexible. Defaulting to Salma as safe fallback.
     voice = voice_map.get(character, "ar-EG-SalmaNeural")
     # إعداد Rate & Pitch
@@ -107,17 +108,12 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
     parts = re.split(pattern, text)
-    # [ACEE] Optimization: Prepare lists for parallel processing
     segments_to_generate = []
     temp_files_cleanup = []
-    # Structure to hold the sequence of events (Audio or Silence)
-    # timeline = [ {'type': 'silence', 'duration': 1.0}, {'type': 'audio', 'file': 'path.mp3'}, ... ]
     timeline = []
     print(f"Session {session_id}: Detected {len(parts)} parts")
-    # Phase 1: Analyze and schedule tasks
     for i, part in enumerate(parts):
         part = part.strip()
         if not part:
@@ -140,7 +136,6 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
         # الحالة 2: <break time="4s"/>
         elif part.startswith("<break") and "time=" in part:
             try:
-                # [ACEE] FIXED REGEX: Removed double backslashes
                 match = re.search(r"time=[\"']([\d\.]+)s[\"']", part)
                 if match:
                     pause_duration = float(match.group(1))
@@ -158,29 +153,22 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
             if pause_duration > 0:
                 timeline.append({'type': 'silence', 'duration': pause_duration})
         else:
-            # Generate unique filename for this segment
             temp_filename = f"temp_{session_id}_{i}.mp3"
             temp_files_cleanup.append(temp_filename)
-            # Add to timeline placeholder
             timeline.append({'type': 'audio', 'file': temp_filename})
-            # Add to generation queue
             segments_to_generate.append(
                 generate_segment(part, voice, rate, pitch, temp_filename)
             )
-    # Phase 2: Parallel Execution (Optimization)
     if segments_to_generate:
-        print(f"Generating {len(segments_to_generate)} segments in parallel...")
         await asyncio.gather(*segments_to_generate)
-    # Phase 3: Stitching (Assembly)
     final_audio = AudioSegment.empty()
     for item in timeline:
         if item['type'] == 'silence':
-            print(f"Adding Silence: {item['duration']}s")
             final_audio += AudioSegment.silent(duration=item['duration'] * 1000)
         elif item['type'] == 'audio':
@@ -189,19 +177,17 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
                     segment = AudioSegment.from_mp3(item['file'])
                     final_audio += segment
                 else:
-                    print(f"Warning: Audio file missing {item['file']}")
             except Exception as e:
-                print(f"Error merging segment {item['file']}: {e}")
-    # الحفظ النهائي
     output_file = f"output_{session_id}.mp3"
     try:
         final_audio.export(output_file, format="mp3")
-    except Exception as e:
-        print(f"Export Error: {e}")
         return None
-    # [ACEE] Cleanup: Delete unique temp files
     for f in temp_files_cleanup:
         if os.path.exists(f):
             try: os.remove(f)
@@ -212,12 +198,6 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
 # ---------------------------------------------------------
 # 4. الواجهة
 # ---------------------------------------------------------
-EXAMPLES = [
-    ["مرحباً [pause:2] هذا توقف مؤقت.", "رجل (مصري)", "محايد"],
-    ['تجربة نمط SSML <break time="3s"/> ونعود.', "سيدة (مصرية)", "هادئ 😌"],
-    ["واحد ... اثنين ... ثلاثة ... انطلاق!", "رجل (سعودي)", "متحمس 🎉"]
-]
 css = """
 footer {visibility: hidden}
 .gradio-container {direction: rtl}
@@ -231,7 +211,15 @@ with gr.Blocks(title="🎙️ TTS Pro Max", theme=gr.themes.Soft(), css=css) as
             text_input = gr.Textbox(
                 label="📝 النص",
                 lines=5,
-                placeholder="جرب: [pause:2] أو <break time=\"4s\"/> أو ... (سيتم تجاهل * و #)",
                 text_align="right"
             )
@@ -245,7 +233,6 @@ with gr.Blocks(title="🎙️ TTS Pro Max", theme=gr.themes.Soft(), css=css) as
                     label="الشعور", value="محايد"
                 )
-            # ✅ [ACEE] Default Activated Settings
             with gr.Accordion("⚙️ إعدادات", open=True):
                 use_advanced = gr.Checkbox(label="يدوي", value=True)
                 manual_rate = gr.Slider(0.5, 2.0, 0.7, label="السرعة")
@@ -256,13 +243,16 @@ with gr.Blocks(title="🎙️ TTS Pro Max", theme=gr.themes.Soft(), css=css) as
         with gr.Column(scale=1):
             audio_out = gr.Audio(label="النتيجة")
             gr.Markdown("""
-            ### ℹ️ مميزات النظام (ACEE Optimized):
-            - **تنظيف ذكي**: حذف تلقائي للرموز `*` و `#` والإيموجي.
-            - **توقفات دقيقة**: دعم `[pause:N]`، `<break time="Ns"/>`، و `...`
-            - **أداء عالي**: معالجة متوازية للنصوص الطويلة.
             """)
-    btn.click(text_to_speech_edge, inputs=[text_input, voice_selector, emotion_selector, use_advanced, manual_rate, manual_pitch], outputs=audio_out)
 if __name__ == "__main__":
     demo.launch()

 import asyncio
 import re
 import os
+import uuid
 from pydub import AudioSegment
 # ---------------------------------------------------------
 # ---------------------------------------------------------
 # 2. وظائف المساعدة والتنظيف
 # ---------------------------------------------------------
+def sanitize_text(text, ignored_chars_str):
     """
+    [ACEE] ENHANCED DYNAMIC SANITIZATION
+    تنظيف النص من الرموز التي يحددها المستخدم بالإضافة إلى الإيموجي.
     """
     if not text:
         return ""
+    # 1. تحليل قائمة الرموز المتجاهلة من المدخلات
+    # مثال المدخلات: "*, #, $" -> ["*", "#", "$"]
+    if ignored_chars_str:
+        # تقسيم النص بالفواصل أو المسافات
+        chars_to_remove = [char.strip() for char in ignored_chars_str.split(',') if char.strip()]
+        for char in chars_to_remove:
+            # استخدام replace العادي للأحرف البسيطة لتجنب مشاكل Regex الخاصة
+            text = text.replace(char, "")
     # 2. إزالة الإيموجي (نطاق Unicode واسع)
     emoji_pattern = re.compile("["
         u"\U0001F600-\U0001F64F"  # Emoticons
         u"\U0001F300-\U0001F5FF"  # Symbols & Pictographs
 # ---------------------------------------------------------
 # 3. المنطق الرئيسي
 # ---------------------------------------------------------
+async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rate, manual_pitch, ignored_chars):
+    # [ACEE] 1. Sanitize Input using the dynamic list
+    text = sanitize_text(text, ignored_chars)
     if not text or text.strip() == "":
         return None
+    # [ACEE] 2. Unique Session ID
     session_id = str(uuid.uuid4())
     # إعداد الصوت
         "رجل (سعودي)": "ar-SA-HamedNeural",
         "سيدة (سعودية)": "ar-SA-ZariyahNeural"
     }
     voice = voice_map.get(character, "ar-EG-SalmaNeural")
     # إعداد Rate & Pitch
     parts = re.split(pattern, text)
     segments_to_generate = []
     temp_files_cleanup = []
     timeline = []
     print(f"Session {session_id}: Detected {len(parts)} parts")
     for i, part in enumerate(parts):
         part = part.strip()
         if not part:
         # الحالة 2: <break time="4s"/>
         elif part.startswith("<break") and "time=" in part:
             try:
                 match = re.search(r"time=[\"']([\d\.]+)s[\"']", part)
                 if match:
                     pause_duration = float(match.group(1))
             if pause_duration > 0:
                 timeline.append({'type': 'silence', 'duration': pause_duration})
         else:
             temp_filename = f"temp_{session_id}_{i}.mp3"
             temp_files_cleanup.append(temp_filename)
             timeline.append({'type': 'audio', 'file': temp_filename})
             segments_to_generate.append(
                 generate_segment(part, voice, rate, pitch, temp_filename)
             )
+    # معالجة متوازية
     if segments_to_generate:
         await asyncio.gather(*segments_to_generate)
+    # الدمج
     final_audio = AudioSegment.empty()
     for item in timeline:
         if item['type'] == 'silence':
             final_audio += AudioSegment.silent(duration=item['duration'] * 1000)
         elif item['type'] == 'audio':
                     segment = AudioSegment.from_mp3(item['file'])
                     final_audio += segment
                 else:
+                    print(f"Warning: Missing file {item['file']}")
             except Exception as e:
+                print(f"Merge Error: {e}")
     output_file = f"output_{session_id}.mp3"
     try:
         final_audio.export(output_file, format="mp3")
+    except:
         return None
+    # تنظيف
     for f in temp_files_cleanup:
         if os.path.exists(f):
             try: os.remove(f)
 # ---------------------------------------------------------
 # 4. الواجهة
 # ---------------------------------------------------------
 css = """
 footer {visibility: hidden}
 .gradio-container {direction: rtl}
             text_input = gr.Textbox(
                 label="📝 النص",
                 lines=5,
+                placeholder="اكتب نصك هنا... استخدم [pause:N] للتوقف",
+                text_align="right"
+            )
+            # [ACEE] New Field for Ignored Characters
+            ignored_chars_input = gr.Textbox(
+                label="🚫 أحرف للتجاهل (افصل بينها بفاصلة)",
+                value="*, #, _",
+                placeholder="مثال: *, #, $",
                 text_align="right"
             )
                     label="الشعور", value="محايد"
                 )
             with gr.Accordion("⚙️ إعدادات", open=True):
                 use_advanced = gr.Checkbox(label="يدوي", value=True)
                 manual_rate = gr.Slider(0.5, 2.0, 0.7, label="السرعة")
         with gr.Column(scale=1):
             audio_out = gr.Audio(label="النتيجة")
             gr.Markdown("""
+            ### ℹ️ مميزات النظام:
+            - **قائمة التجاهل**: يمكنك الآن تحديد الرموز التي تريد حذفها (مثل `*`, `#`) من الحقل الجديد.
+            - **توقفات دقيقة**: دعم `[pause:N]`، `<break time="Ns"/>`، و `...`.
             """)
+    btn.click(
+        text_to_speech_edge,
+        inputs=[text_input, voice_selector, emotion_selector, use_advanced, manual_rate, manual_pitch, ignored_chars_input],
+        outputs=audio_out
+    )
 if __name__ == "__main__":
     demo.launch()