ArtSpace commited on
Commit
d0c1255
·
verified ·
1 Parent(s): 7d6ef8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -50
app.py CHANGED
@@ -3,7 +3,7 @@ import edge_tts
3
  import asyncio
4
  import re
5
  import os
6
- import uuid # [ACEE] Added for unique filename generation
7
  from pydub import AudioSegment
8
 
9
  # ---------------------------------------------------------
@@ -21,22 +21,25 @@ EMOTION_PRESETS = {
21
  # ---------------------------------------------------------
22
  # 2. وظائف المساعدة والتنظيف
23
  # ---------------------------------------------------------
24
- def sanitize_text(text):
25
  """
26
- [ACEE] ENHANCED SANITIZATION
27
- تنظيف النص من الرموز غير المرغوبة (النجوم، الهاشتاج، الإيموجي)
28
- مع الحفاظ على العلامات الخاصة بالتوقفات.
29
  """
30
  if not text:
31
  return ""
32
 
33
- # 1. إزالة الرموز الخاصة المحددة
34
- # [ACEE] Added removal of '#' as requested
35
- for char in ["*", "#"]:
36
- text = text.replace(char, "")
 
 
 
 
 
37
 
38
  # 2. إزالة الإيموجي (نطاق Unicode واسع)
39
- # [ACEE] Comprehensive Emoji Regex
40
  emoji_pattern = re.compile("["
41
  u"\U0001F600-\U0001F64F" # Emoticons
42
  u"\U0001F300-\U0001F5FF" # Symbols & Pictographs
@@ -65,14 +68,14 @@ async def generate_segment(text, voice, rate, pitch, filename):
65
  # ---------------------------------------------------------
66
  # 3. المنطق الرئيسي
67
  # ---------------------------------------------------------
68
- async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rate, manual_pitch):
69
- # [ACEE] 1. Sanitize Input
70
- text = sanitize_text(text)
71
 
72
  if not text or text.strip() == "":
73
  return None
74
 
75
- # [ACEE] 2. Unique Session ID to prevent race conditions
76
  session_id = str(uuid.uuid4())
77
 
78
  # إعداد الصوت
@@ -83,8 +86,6 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
83
  "رجل (سعودي)": "ar-SA-HamedNeural",
84
  "سيدة (سعودية)": "ar-SA-ZariyahNeural"
85
  }
86
- # [ACEE] Fixed default fallback to match the UI default (Saudi Male) logic if needed,
87
- # but kept flexible. Defaulting to Salma as safe fallback.
88
  voice = voice_map.get(character, "ar-EG-SalmaNeural")
89
 
90
  # إعداد Rate & Pitch
@@ -107,17 +108,12 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
107
 
108
  parts = re.split(pattern, text)
109
 
110
- # [ACEE] Optimization: Prepare lists for parallel processing
111
  segments_to_generate = []
112
  temp_files_cleanup = []
113
-
114
- # Structure to hold the sequence of events (Audio or Silence)
115
- # timeline = [ {'type': 'silence', 'duration': 1.0}, {'type': 'audio', 'file': 'path.mp3'}, ... ]
116
  timeline = []
117
 
118
  print(f"Session {session_id}: Detected {len(parts)} parts")
119
 
120
- # Phase 1: Analyze and schedule tasks
121
  for i, part in enumerate(parts):
122
  part = part.strip()
123
  if not part:
@@ -140,7 +136,6 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
140
  # الحالة 2: <break time="4s"/>
141
  elif part.startswith("<break") and "time=" in part:
142
  try:
143
- # [ACEE] FIXED REGEX: Removed double backslashes
144
  match = re.search(r"time=[\"']([\d\.]+)s[\"']", part)
145
  if match:
146
  pause_duration = float(match.group(1))
@@ -158,29 +153,22 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
158
  if pause_duration > 0:
159
  timeline.append({'type': 'silence', 'duration': pause_duration})
160
  else:
161
- # Generate unique filename for this segment
162
  temp_filename = f"temp_{session_id}_{i}.mp3"
163
  temp_files_cleanup.append(temp_filename)
164
-
165
- # Add to timeline placeholder
166
  timeline.append({'type': 'audio', 'file': temp_filename})
167
-
168
- # Add to generation queue
169
  segments_to_generate.append(
170
  generate_segment(part, voice, rate, pitch, temp_filename)
171
  )
172
 
173
- # Phase 2: Parallel Execution (Optimization)
174
  if segments_to_generate:
175
- print(f"Generating {len(segments_to_generate)} segments in parallel...")
176
  await asyncio.gather(*segments_to_generate)
177
 
178
- # Phase 3: Stitching (Assembly)
179
  final_audio = AudioSegment.empty()
180
 
181
  for item in timeline:
182
  if item['type'] == 'silence':
183
- print(f"Adding Silence: {item['duration']}s")
184
  final_audio += AudioSegment.silent(duration=item['duration'] * 1000)
185
 
186
  elif item['type'] == 'audio':
@@ -189,19 +177,17 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
189
  segment = AudioSegment.from_mp3(item['file'])
190
  final_audio += segment
191
  else:
192
- print(f"Warning: Audio file missing {item['file']}")
193
  except Exception as e:
194
- print(f"Error merging segment {item['file']}: {e}")
195
 
196
- # الحفظ النهائي
197
  output_file = f"output_{session_id}.mp3"
198
  try:
199
  final_audio.export(output_file, format="mp3")
200
- except Exception as e:
201
- print(f"Export Error: {e}")
202
  return None
203
 
204
- # [ACEE] Cleanup: Delete unique temp files
205
  for f in temp_files_cleanup:
206
  if os.path.exists(f):
207
  try: os.remove(f)
@@ -212,12 +198,6 @@ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rat
212
  # ---------------------------------------------------------
213
  # 4. الواجهة
214
  # ---------------------------------------------------------
215
- EXAMPLES = [
216
- ["مرحباً [pause:2] هذا توقف مؤقت.", "رجل (مصري)", "محايد"],
217
- ['تجربة نمط SSML <break time="3s"/> ونعود.', "سيدة (مصرية)", "هادئ 😌"],
218
- ["واحد ... اثنين ... ثلاثة ... انطلاق!", "رجل (سعودي)", "متحمس 🎉"]
219
- ]
220
-
221
  css = """
222
  footer {visibility: hidden}
223
  .gradio-container {direction: rtl}
@@ -231,7 +211,15 @@ with gr.Blocks(title="🎙️ TTS Pro Max", theme=gr.themes.Soft(), css=css) as
231
  text_input = gr.Textbox(
232
  label="📝 النص",
233
  lines=5,
234
- placeholder="جرب: [pause:2] أو <break time=\"4s\"/> أو ... (سيتم تجاهل * و #)",
 
 
 
 
 
 
 
 
235
  text_align="right"
236
  )
237
 
@@ -245,7 +233,6 @@ with gr.Blocks(title="🎙️ TTS Pro Max", theme=gr.themes.Soft(), css=css) as
245
  label="الشعور", value="محايد"
246
  )
247
 
248
- # ✅ [ACEE] Default Activated Settings
249
  with gr.Accordion("⚙️ إعدادات", open=True):
250
  use_advanced = gr.Checkbox(label="يدوي", value=True)
251
  manual_rate = gr.Slider(0.5, 2.0, 0.7, label="السرعة")
@@ -256,13 +243,16 @@ with gr.Blocks(title="🎙️ TTS Pro Max", theme=gr.themes.Soft(), css=css) as
256
  with gr.Column(scale=1):
257
  audio_out = gr.Audio(label="النتيجة")
258
  gr.Markdown("""
259
- ### ℹ️ مميزات النظام (ACEE Optimized):
260
- - **تنظيف ذكي**: حذف تلقائي للرموز `*` و `#` والإيموجي.
261
- - **توقفات دقيقة**: دعم `[pause:N]`، `<break time="Ns"/>`، و `...`
262
- - **أداء عالي**: معالجة متوازية للنصوص الطويلة.
263
  """)
264
 
265
- btn.click(text_to_speech_edge, inputs=[text_input, voice_selector, emotion_selector, use_advanced, manual_rate, manual_pitch], outputs=audio_out)
 
 
 
 
266
 
267
  if __name__ == "__main__":
268
  demo.launch()
 
3
  import asyncio
4
  import re
5
  import os
6
+ import uuid
7
  from pydub import AudioSegment
8
 
9
  # ---------------------------------------------------------
 
21
  # ---------------------------------------------------------
22
  # 2. وظائف المساعدة والتنظيف
23
  # ---------------------------------------------------------
24
+ def sanitize_text(text, ignored_chars_str):
25
  """
26
+ [ACEE] ENHANCED DYNAMIC SANITIZATION
27
+ تنظيف النص من الرموز التي يحددها المستخدم بالإضافة إلى الإيموجي.
 
28
  """
29
  if not text:
30
  return ""
31
 
32
+ # 1. تحليل قائمة الرموز المتجاهلة من المدخلات
33
+ # مثال المدخلات: "*, #, $" -> ["*", "#", "$"]
34
+ if ignored_chars_str:
35
+ # تقسيم النص بالفواصل أو المسافات
36
+ chars_to_remove = [char.strip() for char in ignored_chars_str.split(',') if char.strip()]
37
+
38
+ for char in chars_to_remove:
39
+ # استخدام replace العادي للأحرف البسيطة لتجنب مشاكل Regex الخاصة
40
+ text = text.replace(char, "")
41
 
42
  # 2. إزالة الإيموجي (نطاق Unicode واسع)
 
43
  emoji_pattern = re.compile("["
44
  u"\U0001F600-\U0001F64F" # Emoticons
45
  u"\U0001F300-\U0001F5FF" # Symbols & Pictographs
 
68
  # ---------------------------------------------------------
69
  # 3. المنطق الرئيسي
70
  # ---------------------------------------------------------
71
+ async def text_to_speech_edge(text, character, emotion, use_advanced, manual_rate, manual_pitch, ignored_chars):
72
+ # [ACEE] 1. Sanitize Input using the dynamic list
73
+ text = sanitize_text(text, ignored_chars)
74
 
75
  if not text or text.strip() == "":
76
  return None
77
 
78
+ # [ACEE] 2. Unique Session ID
79
  session_id = str(uuid.uuid4())
80
 
81
  # إعداد الصوت
 
86
  "رجل (سعودي)": "ar-SA-HamedNeural",
87
  "سيدة (سعودية)": "ar-SA-ZariyahNeural"
88
  }
 
 
89
  voice = voice_map.get(character, "ar-EG-SalmaNeural")
90
 
91
  # إعداد Rate & Pitch
 
108
 
109
  parts = re.split(pattern, text)
110
 
 
111
  segments_to_generate = []
112
  temp_files_cleanup = []
 
 
 
113
  timeline = []
114
 
115
  print(f"Session {session_id}: Detected {len(parts)} parts")
116
 
 
117
  for i, part in enumerate(parts):
118
  part = part.strip()
119
  if not part:
 
136
  # الحالة 2: <break time="4s"/>
137
  elif part.startswith("<break") and "time=" in part:
138
  try:
 
139
  match = re.search(r"time=[\"']([\d\.]+)s[\"']", part)
140
  if match:
141
  pause_duration = float(match.group(1))
 
153
  if pause_duration > 0:
154
  timeline.append({'type': 'silence', 'duration': pause_duration})
155
  else:
 
156
  temp_filename = f"temp_{session_id}_{i}.mp3"
157
  temp_files_cleanup.append(temp_filename)
 
 
158
  timeline.append({'type': 'audio', 'file': temp_filename})
 
 
159
  segments_to_generate.append(
160
  generate_segment(part, voice, rate, pitch, temp_filename)
161
  )
162
 
163
+ # معالجة متوازية
164
  if segments_to_generate:
 
165
  await asyncio.gather(*segments_to_generate)
166
 
167
+ # الدمج
168
  final_audio = AudioSegment.empty()
169
 
170
  for item in timeline:
171
  if item['type'] == 'silence':
 
172
  final_audio += AudioSegment.silent(duration=item['duration'] * 1000)
173
 
174
  elif item['type'] == 'audio':
 
177
  segment = AudioSegment.from_mp3(item['file'])
178
  final_audio += segment
179
  else:
180
+ print(f"Warning: Missing file {item['file']}")
181
  except Exception as e:
182
+ print(f"Merge Error: {e}")
183
 
 
184
  output_file = f"output_{session_id}.mp3"
185
  try:
186
  final_audio.export(output_file, format="mp3")
187
+ except:
 
188
  return None
189
 
190
+ # تنظيف
191
  for f in temp_files_cleanup:
192
  if os.path.exists(f):
193
  try: os.remove(f)
 
198
  # ---------------------------------------------------------
199
  # 4. الواجهة
200
  # ---------------------------------------------------------
 
 
 
 
 
 
201
  css = """
202
  footer {visibility: hidden}
203
  .gradio-container {direction: rtl}
 
211
  text_input = gr.Textbox(
212
  label="📝 النص",
213
  lines=5,
214
+ placeholder="اكتب نصك هنا... استخدم [pause:N] للتوقف",
215
+ text_align="right"
216
+ )
217
+
218
+ # [ACEE] New Field for Ignored Characters
219
+ ignored_chars_input = gr.Textbox(
220
+ label="🚫 أحرف للتجاهل (افصل بينها بفاصلة)",
221
+ value="*, #, _",
222
+ placeholder="مثال: *, #, $",
223
  text_align="right"
224
  )
225
 
 
233
  label="الشعور", value="محايد"
234
  )
235
 
 
236
  with gr.Accordion("⚙️ إعدادات", open=True):
237
  use_advanced = gr.Checkbox(label="يدوي", value=True)
238
  manual_rate = gr.Slider(0.5, 2.0, 0.7, label="السرعة")
 
243
  with gr.Column(scale=1):
244
  audio_out = gr.Audio(label="النتيجة")
245
  gr.Markdown("""
246
+ ### ℹ️ مميزات النظام:
247
+ - **قائمة التجاهل**: يمكنك الآن تحديد الرموز التي تريد حذفها (مثل `*`, `#`) من الحقل الجديد.
248
+ - **توقفات دقيقة**: دعم `[pause:N]`، `<break time="Ns"/>`، و `...`.
 
249
  """)
250
 
251
+ btn.click(
252
+ text_to_speech_edge,
253
+ inputs=[text_input, voice_selector, emotion_selector, use_advanced, manual_rate, manual_pitch, ignored_chars_input],
254
+ outputs=audio_out
255
+ )
256
 
257
  if __name__ == "__main__":
258
  demo.launch()