Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,7 @@ import edge_tts
|
|
| 3 |
import asyncio
|
| 4 |
import re
|
| 5 |
import os
|
| 6 |
-
import uuid
|
| 7 |
from pydub import AudioSegment
|
| 8 |
|
| 9 |
# ---------------------------------------------------------
|
|
@@ -21,22 +21,25 @@ EMOTION_PRESETS = {
|
|
| 21 |
# ---------------------------------------------------------
|
| 22 |
# 2. وظائف المساعدة والتنظيف
|
| 23 |
# ---------------------------------------------------------
|
| 24 |
-
def sanitize_text(text):
|
| 25 |
"""
|
| 26 |
-
[ACEE] ENHANCED SANITIZATION
|
| 27 |
-
تنظيف النص من الرموز
|
| 28 |
-
مع الحفاظ على العلامات الخاصة بالتوقفات.
|
| 29 |
"""
|
| 30 |
if not text:
|
| 31 |
return ""
|
| 32 |
|
| 33 |
-
# 1.
|
| 34 |
-
#
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
# 2. إزالة الإيموجي (نطاق Unicode واسع)
|
| 39 |
-
# [ACEE] Comprehensive Emoji Regex
|
| 40 |
emoji_pattern = re.compile("["
|
| 41 |
u"\U0001F600-\U0001F64F" # Emoticons
|
| 42 |
u"\U0001F300-\U0001F5FF" # Symbols & Pictographs
|
|
@@ -65,14 +68,14 @@ async def generate_segment(text, voice, rate, pitch, filename):
|
|
| 65 |
# ---------------------------------------------------------
|
| 66 |
# 3. المنطق الرئيسي
|
| 67 |
# ---------------------------------------------------------
|
| 68 |
-
async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rate, manual_pitch):
|
| 69 |
-
# [ACEE] 1. Sanitize Input
|
| 70 |
-
text = sanitize_text(text)
|
| 71 |
|
| 72 |
if not text or text.strip() == "":
|
| 73 |
return None
|
| 74 |
|
| 75 |
-
# [ACEE] 2. Unique Session ID
|
| 76 |
session_id = str(uuid.uuid4())
|
| 77 |
|
| 78 |
# إعداد الصوت
|
|
@@ -83,8 +86,6 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
|
|
| 83 |
"رجل (سعودي)": "ar-SA-HamedNeural",
|
| 84 |
"سيدة (سعودية)": "ar-SA-ZariyahNeural"
|
| 85 |
}
|
| 86 |
-
# [ACEE] Fixed default fallback to match the UI default (Saudi Male) logic if needed,
|
| 87 |
-
# but kept flexible. Defaulting to Salma as safe fallback.
|
| 88 |
voice = voice_map.get(character, "ar-EG-SalmaNeural")
|
| 89 |
|
| 90 |
# إعداد Rate & Pitch
|
|
@@ -107,17 +108,12 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
|
|
| 107 |
|
| 108 |
parts = re.split(pattern, text)
|
| 109 |
|
| 110 |
-
# [ACEE] Optimization: Prepare lists for parallel processing
|
| 111 |
segments_to_generate = []
|
| 112 |
temp_files_cleanup = []
|
| 113 |
-
|
| 114 |
-
# Structure to hold the sequence of events (Audio or Silence)
|
| 115 |
-
# timeline = [ {'type': 'silence', 'duration': 1.0}, {'type': 'audio', 'file': 'path.mp3'}, ... ]
|
| 116 |
timeline = []
|
| 117 |
|
| 118 |
print(f"Session {session_id}: Detected {len(parts)} parts")
|
| 119 |
|
| 120 |
-
# Phase 1: Analyze and schedule tasks
|
| 121 |
for i, part in enumerate(parts):
|
| 122 |
part = part.strip()
|
| 123 |
if not part:
|
|
@@ -140,7 +136,6 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
|
|
| 140 |
# الحالة 2: <break time="4s"/>
|
| 141 |
elif part.startswith("<break") and "time=" in part:
|
| 142 |
try:
|
| 143 |
-
# [ACEE] FIXED REGEX: Removed double backslashes
|
| 144 |
match = re.search(r"time=[\"']([\d\.]+)s[\"']", part)
|
| 145 |
if match:
|
| 146 |
pause_duration = float(match.group(1))
|
|
@@ -158,29 +153,22 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
|
|
| 158 |
if pause_duration > 0:
|
| 159 |
timeline.append({'type': 'silence', 'duration': pause_duration})
|
| 160 |
else:
|
| 161 |
-
# Generate unique filename for this segment
|
| 162 |
temp_filename = f"temp_{session_id}_{i}.mp3"
|
| 163 |
temp_files_cleanup.append(temp_filename)
|
| 164 |
-
|
| 165 |
-
# Add to timeline placeholder
|
| 166 |
timeline.append({'type': 'audio', 'file': temp_filename})
|
| 167 |
-
|
| 168 |
-
# Add to generation queue
|
| 169 |
segments_to_generate.append(
|
| 170 |
generate_segment(part, voice, rate, pitch, temp_filename)
|
| 171 |
)
|
| 172 |
|
| 173 |
-
#
|
| 174 |
if segments_to_generate:
|
| 175 |
-
print(f"Generating {len(segments_to_generate)} segments in parallel...")
|
| 176 |
await asyncio.gather(*segments_to_generate)
|
| 177 |
|
| 178 |
-
#
|
| 179 |
final_audio = AudioSegment.empty()
|
| 180 |
|
| 181 |
for item in timeline:
|
| 182 |
if item['type'] == 'silence':
|
| 183 |
-
print(f"Adding Silence: {item['duration']}s")
|
| 184 |
final_audio += AudioSegment.silent(duration=item['duration'] * 1000)
|
| 185 |
|
| 186 |
elif item['type'] == 'audio':
|
|
@@ -189,19 +177,17 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
|
|
| 189 |
segment = AudioSegment.from_mp3(item['file'])
|
| 190 |
final_audio += segment
|
| 191 |
else:
|
| 192 |
-
print(f"Warning:
|
| 193 |
except Exception as e:
|
| 194 |
-
print(f"
|
| 195 |
|
| 196 |
-
# الحفظ النهائي
|
| 197 |
output_file = f"output_{session_id}.mp3"
|
| 198 |
try:
|
| 199 |
final_audio.export(output_file, format="mp3")
|
| 200 |
-
except
|
| 201 |
-
print(f"Export Error: {e}")
|
| 202 |
return None
|
| 203 |
|
| 204 |
-
#
|
| 205 |
for f in temp_files_cleanup:
|
| 206 |
if os.path.exists(f):
|
| 207 |
try: os.remove(f)
|
|
@@ -212,12 +198,6 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
|
|
| 212 |
# ---------------------------------------------------------
|
| 213 |
# 4. الواجهة
|
| 214 |
# ---------------------------------------------------------
|
| 215 |
-
EXAMPLES = [
|
| 216 |
-
["مرحباً [pause:2] هذا توقف مؤقت.", "رجل (مصري)", "محايد"],
|
| 217 |
-
['تجربة نمط SSML <break time="3s"/> ونعود.', "سيدة (مصرية)", "هادئ 😌"],
|
| 218 |
-
["واحد ... اثنين ... ثلاثة ... انطلاق!", "رجل (سعودي)", "متحمس 🎉"]
|
| 219 |
-
]
|
| 220 |
-
|
| 221 |
css = """
|
| 222 |
footer {visibility: hidden}
|
| 223 |
.gradio-container {direction: rtl}
|
|
@@ -231,7 +211,15 @@ with gr.Blocks(title="🎙️ TTS Pro Max", theme=gr.themes.Soft(), css=css) as
|
|
| 231 |
text_input = gr.Textbox(
|
| 232 |
label="📝 النص",
|
| 233 |
lines=5,
|
| 234 |
-
placeholder="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
text_align="right"
|
| 236 |
)
|
| 237 |
|
|
@@ -245,7 +233,6 @@ with gr.Blocks(title="🎙️ TTS Pro Max", theme=gr.themes.Soft(), css=css) as
|
|
| 245 |
label="الشعور", value="محايد"
|
| 246 |
)
|
| 247 |
|
| 248 |
-
# ✅ [ACEE] Default Activated Settings
|
| 249 |
with gr.Accordion("⚙️ إعدادات", open=True):
|
| 250 |
use_advanced = gr.Checkbox(label="يدوي", value=True)
|
| 251 |
manual_rate = gr.Slider(0.5, 2.0, 0.7, label="السرعة")
|
|
@@ -256,13 +243,16 @@ with gr.Blocks(title="🎙️ TTS Pro Max", theme=gr.themes.Soft(), css=css) as
|
|
| 256 |
with gr.Column(scale=1):
|
| 257 |
audio_out = gr.Audio(label="النتيجة")
|
| 258 |
gr.Markdown("""
|
| 259 |
-
### ℹ️ مميزات النظام
|
| 260 |
-
- **
|
| 261 |
-
- **توقفات دقيقة**: دعم `[pause:N]`، `<break time="Ns"/>`، و `...`
|
| 262 |
-
- **أداء عالي**: معالجة متوازية للنصوص الطويلة.
|
| 263 |
""")
|
| 264 |
|
| 265 |
-
btn.click(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
|
| 267 |
if __name__ == "__main__":
|
| 268 |
demo.launch()
|
|
|
|
| 3 |
import asyncio
|
| 4 |
import re
|
| 5 |
import os
|
| 6 |
+
import uuid
|
| 7 |
from pydub import AudioSegment
|
| 8 |
|
| 9 |
# ---------------------------------------------------------
|
|
|
|
| 21 |
# ---------------------------------------------------------
|
| 22 |
# 2. وظائف المساعدة والتنظيف
|
| 23 |
# ---------------------------------------------------------
|
| 24 |
+
def sanitize_text(text, ignored_chars_str):
|
| 25 |
"""
|
| 26 |
+
[ACEE] ENHANCED DYNAMIC SANITIZATION
|
| 27 |
+
تنظيف النص من الرموز التي يحددها المستخدم بالإضافة إلى الإيموجي.
|
|
|
|
| 28 |
"""
|
| 29 |
if not text:
|
| 30 |
return ""
|
| 31 |
|
| 32 |
+
# 1. تحليل قائمة الرموز المتجاهلة من المدخلات
|
| 33 |
+
# مثال المدخلات: "*, #, $" -> ["*", "#", "$"]
|
| 34 |
+
if ignored_chars_str:
|
| 35 |
+
# تقسيم النص بالفواصل أو المسافات
|
| 36 |
+
chars_to_remove = [char.strip() for char in ignored_chars_str.split(',') if char.strip()]
|
| 37 |
+
|
| 38 |
+
for char in chars_to_remove:
|
| 39 |
+
# استخدام replace العادي للأحرف البسيطة لتجنب مشاكل Regex الخاصة
|
| 40 |
+
text = text.replace(char, "")
|
| 41 |
|
| 42 |
# 2. إزالة الإيموجي (نطاق Unicode واسع)
|
|
|
|
| 43 |
emoji_pattern = re.compile("["
|
| 44 |
u"\U0001F600-\U0001F64F" # Emoticons
|
| 45 |
u"\U0001F300-\U0001F5FF" # Symbols & Pictographs
|
|
|
|
| 68 |
# ---------------------------------------------------------
|
| 69 |
# 3. المنطق الرئيسي
|
| 70 |
# ---------------------------------------------------------
|
| 71 |
+
async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rate, manual_pitch, ignored_chars):
|
| 72 |
+
# [ACEE] 1. Sanitize Input using the dynamic list
|
| 73 |
+
text = sanitize_text(text, ignored_chars)
|
| 74 |
|
| 75 |
if not text or text.strip() == "":
|
| 76 |
return None
|
| 77 |
|
| 78 |
+
# [ACEE] 2. Unique Session ID
|
| 79 |
session_id = str(uuid.uuid4())
|
| 80 |
|
| 81 |
# إعداد الصوت
|
|
|
|
| 86 |
"رجل (سعودي)": "ar-SA-HamedNeural",
|
| 87 |
"سيدة (سعودية)": "ar-SA-ZariyahNeural"
|
| 88 |
}
|
|
|
|
|
|
|
| 89 |
voice = voice_map.get(character, "ar-EG-SalmaNeural")
|
| 90 |
|
| 91 |
# إعداد Rate & Pitch
|
|
|
|
| 108 |
|
| 109 |
parts = re.split(pattern, text)
|
| 110 |
|
|
|
|
| 111 |
segments_to_generate = []
|
| 112 |
temp_files_cleanup = []
|
|
|
|
|
|
|
|
|
|
| 113 |
timeline = []
|
| 114 |
|
| 115 |
print(f"Session {session_id}: Detected {len(parts)} parts")
|
| 116 |
|
|
|
|
| 117 |
for i, part in enumerate(parts):
|
| 118 |
part = part.strip()
|
| 119 |
if not part:
|
|
|
|
| 136 |
# الحالة 2: <break time="4s"/>
|
| 137 |
elif part.startswith("<break") and "time=" in part:
|
| 138 |
try:
|
|
|
|
| 139 |
match = re.search(r"time=[\"']([\d\.]+)s[\"']", part)
|
| 140 |
if match:
|
| 141 |
pause_duration = float(match.group(1))
|
|
|
|
| 153 |
if pause_duration > 0:
|
| 154 |
timeline.append({'type': 'silence', 'duration': pause_duration})
|
| 155 |
else:
|
|
|
|
| 156 |
temp_filename = f"temp_{session_id}_{i}.mp3"
|
| 157 |
temp_files_cleanup.append(temp_filename)
|
|
|
|
|
|
|
| 158 |
timeline.append({'type': 'audio', 'file': temp_filename})
|
|
|
|
|
|
|
| 159 |
segments_to_generate.append(
|
| 160 |
generate_segment(part, voice, rate, pitch, temp_filename)
|
| 161 |
)
|
| 162 |
|
| 163 |
+
# معالجة متوازية
|
| 164 |
if segments_to_generate:
|
|
|
|
| 165 |
await asyncio.gather(*segments_to_generate)
|
| 166 |
|
| 167 |
+
# الدمج
|
| 168 |
final_audio = AudioSegment.empty()
|
| 169 |
|
| 170 |
for item in timeline:
|
| 171 |
if item['type'] == 'silence':
|
|
|
|
| 172 |
final_audio += AudioSegment.silent(duration=item['duration'] * 1000)
|
| 173 |
|
| 174 |
elif item['type'] == 'audio':
|
|
|
|
| 177 |
segment = AudioSegment.from_mp3(item['file'])
|
| 178 |
final_audio += segment
|
| 179 |
else:
|
| 180 |
+
print(f"Warning: Missing file {item['file']}")
|
| 181 |
except Exception as e:
|
| 182 |
+
print(f"Merge Error: {e}")
|
| 183 |
|
|
|
|
| 184 |
output_file = f"output_{session_id}.mp3"
|
| 185 |
try:
|
| 186 |
final_audio.export(output_file, format="mp3")
|
| 187 |
+
except:
|
|
|
|
| 188 |
return None
|
| 189 |
|
| 190 |
+
# تنظيف
|
| 191 |
for f in temp_files_cleanup:
|
| 192 |
if os.path.exists(f):
|
| 193 |
try: os.remove(f)
|
|
|
|
| 198 |
# ---------------------------------------------------------
|
| 199 |
# 4. الواجهة
|
| 200 |
# ---------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
css = """
|
| 202 |
footer {visibility: hidden}
|
| 203 |
.gradio-container {direction: rtl}
|
|
|
|
| 211 |
text_input = gr.Textbox(
|
| 212 |
label="📝 النص",
|
| 213 |
lines=5,
|
| 214 |
+
placeholder="اكتب نصك هنا... استخدم [pause:N] للتوقف",
|
| 215 |
+
text_align="right"
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
# [ACEE] New Field for Ignored Characters
|
| 219 |
+
ignored_chars_input = gr.Textbox(
|
| 220 |
+
label="🚫 أحرف للتجاهل (افصل بينها بفاصلة)",
|
| 221 |
+
value="*, #, _",
|
| 222 |
+
placeholder="مثال: *, #, $",
|
| 223 |
text_align="right"
|
| 224 |
)
|
| 225 |
|
|
|
|
| 233 |
label="الشعور", value="محايد"
|
| 234 |
)
|
| 235 |
|
|
|
|
| 236 |
with gr.Accordion("⚙️ إعدادات", open=True):
|
| 237 |
use_advanced = gr.Checkbox(label="يدوي", value=True)
|
| 238 |
manual_rate = gr.Slider(0.5, 2.0, 0.7, label="السرعة")
|
|
|
|
| 243 |
with gr.Column(scale=1):
|
| 244 |
audio_out = gr.Audio(label="النتيجة")
|
| 245 |
gr.Markdown("""
|
| 246 |
+
### ℹ️ مميزات النظام:
|
| 247 |
+
- **قائمة التجاهل**: يمكنك الآن تحديد الرموز التي تريد حذفها (مثل `*`, `#`) من الحقل الجديد.
|
| 248 |
+
- **توقفات دقيقة**: دعم `[pause:N]`، `<break time="Ns"/>`، و `...`.
|
|
|
|
| 249 |
""")
|
| 250 |
|
| 251 |
+
btn.click(
|
| 252 |
+
text_to_speech_edge,
|
| 253 |
+
inputs=[text_input, voice_selector, emotion_selector, use_advanced, manual_rate, manual_pitch, ignored_chars_input],
|
| 254 |
+
outputs=audio_out
|
| 255 |
+
)
|
| 256 |
|
| 257 |
if __name__ == "__main__":
|
| 258 |
demo.launch()
|