import gradio as gr import edge_tts import asyncio import re import os import uuid from pydub import AudioSegment # --------------------------------------------------------- # 1. إعدادات المشاعر # --------------------------------------------------------- EMOTION_PRESETS = { "محايد": {"rate": "+0%", "pitch": "+0Hz"}, "سعيد 😊": {"rate": "+15%", "pitch": "+10Hz"}, "حزين 😢": {"rate": "-15%", "pitch": "-10Hz"}, "غاضب 😠": {"rate": "+20%", "pitch": "+5Hz"}, "هادئ 😌": {"rate": "-10%", "pitch": "+0Hz"}, "متحمس 🎉": {"rate": "+30%", "pitch": "+15Hz"} } # --------------------------------------------------------- # 2. وظائف المساعدة والتنظيف # --------------------------------------------------------- def sanitize_text(text, ignored_chars_str): """ [ACEE] ENHANCED DYNAMIC SANITIZATION تنظيف النص من الرموز التي يحددها المستخدم بالإضافة إلى الإيموجي. """ if not text: return "" # 1. تحليل قائمة الرموز المتجاهلة من المدخلات # مثال المدخلات: "*, #, $" -> ["*", "#", "$"] if ignored_chars_str: # تقسيم النص بالفواصل أو المسافات chars_to_remove = [char.strip() for char in ignored_chars_str.split(',') if char.strip()] for char in chars_to_remove: # استخدام replace العادي للأحرف البسيطة لتجنب مشاكل Regex الخاصة text = text.replace(char, "") # 2. إزالة الإيموجي (نطاق Unicode واسع) emoji_pattern = re.compile("[" u"\U0001F600-\U0001F64F" # Emoticons u"\U0001F300-\U0001F5FF" # Symbols & Pictographs u"\U0001F680-\U0001F6FF" # Transport & Map Symbols u"\U0001F1E0-\U0001F1FF" # Flags (iOS) u"\U00002700-\U000027BF" # Dingbats u"\u2600-\u26FF" # Misc symbols u"\u2700-\u27BF" # Dingbats "]+", flags=re.UNICODE) text = emoji_pattern.sub(r'', text) return text.strip() async def generate_segment(text, voice, rate, pitch, filename): """توليد جزء صوتي واحد""" if not text or not text.strip(): return False try: communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch) await communicate.save(filename) return True except Exception as e: print(f"Error generating segment '{text}': {e}") return False # --------------------------------------------------------- # 3. المنطق الرئيسي # --------------------------------------------------------- async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rate, manual_pitch, ignored_chars): # [ACEE] 1. Sanitize Input using the dynamic list text = sanitize_text(text, ignored_chars) if not text or text.strip() == "": return None # [ACEE] 2. Unique Session ID session_id = str(uuid.uuid4()) # إعداد الصوت voice_map = { "رجل (مصري)": "ar-EG-ShakirNeural", "سيدة (مصرية)": "ar-EG-SalmaNeural", "طفل (محاكاة)": "ar-EG-SalmaNeural", "رجل (سعودي)": "ar-SA-HamedNeural", "سيدة (سعودية)": "ar-SA-ZariyahNeural" } voice = voice_map.get(character, "ar-EG-SalmaNeural") # إعداد Rate & Pitch if use_advanced: rate_percent = int((manual_rate - 1.0) * 100) rate = f"{rate_percent:+d}%" pitch = manual_pitch else: preset = EMOTION_PRESETS.get(emotion, EMOTION_PRESETS["محايد"]) rate = preset["rate"] pitch = preset["pitch"] if character == "طفل (محاكاة)": pitch = "+50Hz" # ------------------------------------------------------------------ # 🔥 REGEX MAGIC: التعرف على كل الأنماط # ------------------------------------------------------------------ pattern = r"(\[pause:[\d\.]+\]||\.\.\.)" parts = re.split(pattern, text) segments_to_generate = [] temp_files_cleanup = [] timeline = [] print(f"Session {session_id}: Detected {len(parts)} parts") for i, part in enumerate(parts): part = part.strip() if not part: continue pause_duration = 0.0 is_pause = False # --- تحليل النمط --- # الحالة 1: [pause:2.5] if part.startswith("[pause:") and part.endswith("]"): try: val = part.split(":")[1].strip("]") pause_duration = float(val) is_pause = True except: pass # الحالة 2: elif part.startswith(" 0: timeline.append({'type': 'silence', 'duration': pause_duration}) else: temp_filename = f"temp_{session_id}_{i}.mp3" temp_files_cleanup.append(temp_filename) timeline.append({'type': 'audio', 'file': temp_filename}) segments_to_generate.append( generate_segment(part, voice, rate, pitch, temp_filename) ) # معالجة متوازية if segments_to_generate: await asyncio.gather(*segments_to_generate) # الدمج final_audio = AudioSegment.empty() for item in timeline: if item['type'] == 'silence': final_audio += AudioSegment.silent(duration=item['duration'] * 1000) elif item['type'] == 'audio': try: if os.path.exists(item['file']): segment = AudioSegment.from_mp3(item['file']) final_audio += segment else: print(f"Warning: Missing file {item['file']}") except Exception as e: print(f"Merge Error: {e}") output_file = f"output_{session_id}.mp3" try: final_audio.export(output_file, format="mp3") except: return None # تنظيف for f in temp_files_cleanup: if os.path.exists(f): try: os.remove(f) except: pass return output_file # --------------------------------------------------------- # 4. الواجهة # --------------------------------------------------------- css = """ footer {visibility: hidden} .gradio-container {direction: rtl} """ with gr.Blocks(title="🎙️ TTS Pro Max", theme=gr.themes.Soft(), css=css) as demo: gr.Markdown("# 🎙️ TTS Pro: دعم كامل للتوقفات") with gr.Row(): with gr.Column(scale=2): text_input = gr.Textbox( label="📝 النص", lines=5, placeholder="اكتب نصك هنا... استخدم [pause:N] للتوقف", text_align="right" ) # [ACEE] New Field for Ignored Characters ignored_chars_input = gr.Textbox( label="🚫 أحرف للتجاهل (افصل بينها بفاصلة)", value="*, #, _", placeholder="مثال: *, #, $", text_align="right" ) with gr.Row(): voice_selector = gr.Radio( ["رجل (مصري)", "سيدة (مصرية)", "طفل (محاكاة)", "رجل (سعودي)", "سيدة (سعودية)"], label="الصوت", value="رجل (سعودي)" ) emotion_selector = gr.Radio( list(EMOTION_PRESETS.keys()), label="الشعور", value="محايد" ) with gr.Accordion("⚙️ إعدادات", open=True): use_advanced = gr.Checkbox(label="يدوي", value=True) manual_rate = gr.Slider(0.5, 2.0, 0.7, label="السرعة") manual_pitch = gr.Textbox("+10Hz", label="Pitch") btn = gr.Button("🎬 توليد", variant="primary") with gr.Column(scale=1): audio_out = gr.Audio(label="النتيجة") gr.Markdown(""" ### ℹ️ مميزات النظام: - **قائمة التجاهل**: يمكنك الآن تحديد الرموز التي تريد حذفها (مثل `*`, `#`) من الحقل الجديد. - **توقفات دقيقة**: دعم `[pause:N]`، ``، و `...`. """) btn.click( text_to_speech_edge, inputs=[text_input, voice_selector, emotion_selector, use_advanced, manual_rate, manual_pitch, ignored_chars_input], outputs=audio_out ) if __name__ == "__main__": demo.launch()