import gradio as gr
import edge_tts
import asyncio
import re
import os
import uuid
from pydub import AudioSegment
# ---------------------------------------------------------
# 1. إعدادات المشاعر
# ---------------------------------------------------------
EMOTION_PRESETS = {
"محايد": {"rate": "+0%", "pitch": "+0Hz"},
"سعيد 😊": {"rate": "+15%", "pitch": "+10Hz"},
"حزين 😢": {"rate": "-15%", "pitch": "-10Hz"},
"غاضب 😠": {"rate": "+20%", "pitch": "+5Hz"},
"هادئ 😌": {"rate": "-10%", "pitch": "+0Hz"},
"متحمس 🎉": {"rate": "+30%", "pitch": "+15Hz"}
}
# ---------------------------------------------------------
# 2. وظائف المساعدة والتنظيف
# ---------------------------------------------------------
def sanitize_text(text, ignored_chars_str):
"""
[ACEE] ENHANCED DYNAMIC SANITIZATION
تنظيف النص من الرموز التي يحددها المستخدم بالإضافة إلى الإيموجي.
"""
if not text:
return ""
# 1. تحليل قائمة الرموز المتجاهلة من المدخلات
# مثال المدخلات: "*, #, $" -> ["*", "#", "$"]
if ignored_chars_str:
# تقسيم النص بالفواصل أو المسافات
chars_to_remove = [char.strip() for char in ignored_chars_str.split(',') if char.strip()]
for char in chars_to_remove:
# استخدام replace العادي للأحرف البسيطة لتجنب مشاكل Regex الخاصة
text = text.replace(char, "")
# 2. إزالة الإيموجي (نطاق Unicode واسع)
emoji_pattern = re.compile("["
u"\U0001F600-\U0001F64F" # Emoticons
u"\U0001F300-\U0001F5FF" # Symbols & Pictographs
u"\U0001F680-\U0001F6FF" # Transport & Map Symbols
u"\U0001F1E0-\U0001F1FF" # Flags (iOS)
u"\U00002700-\U000027BF" # Dingbats
u"\u2600-\u26FF" # Misc symbols
u"\u2700-\u27BF" # Dingbats
"]+", flags=re.UNICODE)
text = emoji_pattern.sub(r'', text)
return text.strip()
async def generate_segment(text, voice, rate, pitch, filename):
"""توليد جزء صوتي واحد"""
if not text or not text.strip():
return False
try:
communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
await communicate.save(filename)
return True
except Exception as e:
print(f"Error generating segment '{text}': {e}")
return False
# ---------------------------------------------------------
# 3. المنطق الرئيسي
# ---------------------------------------------------------
async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rate, manual_pitch, ignored_chars):
# [ACEE] 1. Sanitize Input using the dynamic list
text = sanitize_text(text, ignored_chars)
if not text or text.strip() == "":
return None
# [ACEE] 2. Unique Session ID
session_id = str(uuid.uuid4())
# إعداد الصوت
voice_map = {
"رجل (مصري)": "ar-EG-ShakirNeural",
"سيدة (مصرية)": "ar-EG-SalmaNeural",
"طفل (محاكاة)": "ar-EG-SalmaNeural",
"رجل (سعودي)": "ar-SA-HamedNeural",
"سيدة (سعودية)": "ar-SA-ZariyahNeural"
}
voice = voice_map.get(character, "ar-EG-SalmaNeural")
# إعداد Rate & Pitch
if use_advanced:
rate_percent = int((manual_rate - 1.0) * 100)
rate = f"{rate_percent:+d}%"
pitch = manual_pitch
else:
preset = EMOTION_PRESETS.get(emotion, EMOTION_PRESETS["محايد"])
rate = preset["rate"]
pitch = preset["pitch"]
if character == "طفل (محاكاة)":
pitch = "+50Hz"
# ------------------------------------------------------------------
# 🔥 REGEX MAGIC: التعرف على كل الأنماط
# ------------------------------------------------------------------
pattern = r"(\[pause:[\d\.]+\]||\.\.\.)"
parts = re.split(pattern, text)
segments_to_generate = []
temp_files_cleanup = []
timeline = []
print(f"Session {session_id}: Detected {len(parts)} parts")
for i, part in enumerate(parts):
part = part.strip()
if not part:
continue
pause_duration = 0.0
is_pause = False
# --- تحليل النمط ---
# الحالة 1: [pause:2.5]
if part.startswith("[pause:") and part.endswith("]"):
try:
val = part.split(":")[1].strip("]")
pause_duration = float(val)
is_pause = True
except:
pass
# الحالة 2:
elif part.startswith(" 0:
timeline.append({'type': 'silence', 'duration': pause_duration})
else:
temp_filename = f"temp_{session_id}_{i}.mp3"
temp_files_cleanup.append(temp_filename)
timeline.append({'type': 'audio', 'file': temp_filename})
segments_to_generate.append(
generate_segment(part, voice, rate, pitch, temp_filename)
)
# معالجة متوازية
if segments_to_generate:
await asyncio.gather(*segments_to_generate)
# الدمج
final_audio = AudioSegment.empty()
for item in timeline:
if item['type'] == 'silence':
final_audio += AudioSegment.silent(duration=item['duration'] * 1000)
elif item['type'] == 'audio':
try:
if os.path.exists(item['file']):
segment = AudioSegment.from_mp3(item['file'])
final_audio += segment
else:
print(f"Warning: Missing file {item['file']}")
except Exception as e:
print(f"Merge Error: {e}")
output_file = f"output_{session_id}.mp3"
try:
final_audio.export(output_file, format="mp3")
except:
return None
# تنظيف
for f in temp_files_cleanup:
if os.path.exists(f):
try: os.remove(f)
except: pass
return output_file
# ---------------------------------------------------------
# 4. الواجهة
# ---------------------------------------------------------
css = """
footer {visibility: hidden}
.gradio-container {direction: rtl}
"""
with gr.Blocks(title="🎙️ TTS Pro Max", theme=gr.themes.Soft(), css=css) as demo:
gr.Markdown("# 🎙️ TTS Pro: دعم كامل للتوقفات")
with gr.Row():
with gr.Column(scale=2):
text_input = gr.Textbox(
label="📝 النص",
lines=5,
placeholder="اكتب نصك هنا... استخدم [pause:N] للتوقف",
text_align="right"
)
# [ACEE] New Field for Ignored Characters
ignored_chars_input = gr.Textbox(
label="🚫 أحرف للتجاهل (افصل بينها بفاصلة)",
value="*, #, _",
placeholder="مثال: *, #, $",
text_align="right"
)
with gr.Row():
voice_selector = gr.Radio(
["رجل (مصري)", "سيدة (مصرية)", "طفل (محاكاة)", "رجل (سعودي)", "سيدة (سعودية)"],
label="الصوت", value="رجل (سعودي)"
)
emotion_selector = gr.Radio(
list(EMOTION_PRESETS.keys()),
label="الشعور", value="محايد"
)
with gr.Accordion("⚙️ إعدادات", open=True):
use_advanced = gr.Checkbox(label="يدوي", value=True)
manual_rate = gr.Slider(0.5, 2.0, 0.7, label="السرعة")
manual_pitch = gr.Textbox("+10Hz", label="Pitch")
btn = gr.Button("🎬 توليد", variant="primary")
with gr.Column(scale=1):
audio_out = gr.Audio(label="النتيجة")
gr.Markdown("""
### ℹ️ مميزات النظام:
- **قائمة التجاهل**: يمكنك الآن تحديد الرموز التي تريد حذفها (مثل `*`, `#`) من الحقل الجديد.
- **توقفات دقيقة**: دعم `[pause:N]`، ``، و `...`.
""")
btn.click(
text_to_speech_edge,
inputs=[text_input, voice_selector, emotion_selector, use_advanced, manual_rate, manual_pitch, ignored_chars_input],
outputs=audio_out
)
if __name__ == "__main__":
demo.launch()