EmadAgha commited on
Commit
4a1191e
·
verified ·
1 Parent(s): d173109

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -58
app.py CHANGED
@@ -1,10 +1,10 @@
1
  import gradio as gr
2
  import edge_tts
3
- import asyncio
4
  import tempfile
5
  import os
 
6
 
7
- # Map readable labels to Edge TTS internal IDs
8
  VOICE_MAP = {
9
  "رجل (مصري)": "ar-EG-ShakirNeural",
10
  "سيدة (مصرية)": "ar-EG-SalmaNeural",
@@ -14,76 +14,68 @@ VOICE_MAP = {
14
  "English (US) F": "en-US-AriaNeural"
15
  }
16
 
17
- async def text_to_speech_edge(text, voice, emotion, is_symbol, rate, pitch):
18
- """
19
- Generates speech using Edge TTS.
20
- Args:
21
- text (str): The text to speak.
22
- voice (str): Voice ID or Label.
23
- emotion (str): Ignored (kept for compatibility).
24
- is_symbol (bool): Ignored (kept for compatibility).
25
- rate (str): Speed change (e.g., "+0%", "+20%").
26
- pitch (str): Pitch change (e.g., "+0Hz", "+2st").
27
- """
28
-
29
- # 1. Validation
30
  if not text or not text.strip():
31
  return None
32
-
33
- # 2. Voice Resolution
34
- # Check if the input is a known Key (Label) or Value (ID)
35
- selected_voice = "ar-SA-HamedNeural" # Default
 
 
 
 
 
36
  if voice in VOICE_MAP:
37
  selected_voice = VOICE_MAP[voice]
38
  elif voice in VOICE_MAP.values():
39
  selected_voice = voice
40
 
41
- # 3. Parameter Sanitization
42
- # EdgeTTS crashes if rate/pitch are None or empty strings.
43
- # We ensure they always have a valid default value.
44
- final_rate = rate if rate and isinstance(rate, str) and len(rate.strip()) > 0 else "+0%"
45
- final_pitch = pitch if pitch and isinstance(pitch, str) and len(pitch.strip()) > 0 else "+0Hz"
46
-
47
- print(f"Generating: TextLen={len(text)}, Voice={selected_voice}, Rate={final_rate}, Pitch={final_pitch}")
48
-
49
- # 4. Temp File Creation
50
- # delete=False is required for Gradio to serve the file after the function returns
51
- output_file = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
52
- output_path = output_file.name
53
- output_file.close()
54
 
55
  try:
56
- # 5. Generation
 
 
 
 
 
57
  communicate = edge_tts.Communicate(text, selected_voice, rate=final_rate, pitch=final_pitch)
58
  await communicate.save(output_path)
59
- return output_path
60
 
61
- except Exception as e:
62
- print(f"CRITICAL ERROR: {str(e)}")
63
- # Return None or raise a Gradio Error to notify the frontend
64
- raise gr.Error(f"Generation Failed: {str(e)}")
65
-
66
- # Define the Interface
67
- # The order of inputs MUST match the array sent from React:
68
- # [text, voice, emotion, is_symbol, rate, pitch]
69
- inputs = [
70
- gr.Textbox(label="Text"),
71
- gr.Dropdown(label="Voice", choices=list(VOICE_MAP.keys()) + list(VOICE_MAP.values())),
72
- gr.Textbox(label="Emotion", value="neutral"),
73
- gr.Checkbox(label="Is Symbol", value=True),
74
- gr.Textbox(label="Rate", value="+0%"),
75
- gr.Textbox(label="Pitch", value="+0Hz")
76
- ]
77
 
78
- outputs = gr.Audio(label="Generated Audio", type="filepath")
 
 
 
79
 
80
- demo = gr.Interface(
81
- fn=text_to_speech_edge,
82
- inputs=inputs,
83
- outputs=outputs,
84
- title="Natiq Pro API",
85
- allow_flagging="never"
86
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  if __name__ == "__main__":
89
  demo.queue().launch()
 
1
  import gradio as gr
2
  import edge_tts
 
3
  import tempfile
4
  import os
5
+ import asyncio
6
 
7
+ # خريطة الأصوات
8
  VOICE_MAP = {
9
  "رجل (مصري)": "ar-EG-ShakirNeural",
10
  "سيدة (مصرية)": "ar-EG-SalmaNeural",
 
14
  "English (US) F": "en-US-AriaNeural"
15
  }
16
 
17
+ async def generate_speech(text, voice, emotion, is_symbol, rate, pitch):
18
+ # 1. التحقق من المدخلات الأساسية
 
 
 
 
 
 
 
 
 
 
 
19
  if not text or not text.strip():
20
  return None
21
+
22
+ # 2. معالجة القيم الافتراضية لتجنب الأخطاء
23
+ # إذا وصلت القيم فارغة من الواجهة الأمامية، نستخدم القيم الافتراضية
24
+ final_rate = rate if rate and isinstance(rate, str) and len(rate.strip()) > 0 else "+0%"
25
+ final_pitch = pitch if pitch and isinstance(pitch, str) and len(pitch.strip()) > 0 else "+0Hz"
26
+
27
+ # 3. تحديد هوية الصوت
28
+ # نحاول البحث في القاموس، إذا لم نجد الاسم، نستخدم القيمة كما هي (على افتراض أنها ID)
29
+ selected_voice = "ar-SA-HamedNeural" # صوت افتراضي
30
  if voice in VOICE_MAP:
31
  selected_voice = VOICE_MAP[voice]
32
  elif voice in VOICE_MAP.values():
33
  selected_voice = voice
34
 
35
+ print(f"Processing: TextLen={len(text)}, Voice={selected_voice}, Rate={final_rate}, Pitch={final_pitch}")
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  try:
38
+ # 4. إنشاء ملف مؤقت
39
+ output_file = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
40
+ output_path = output_file.name
41
+ output_file.close()
42
+
43
+ # 5. التوليد باستخدام Edge TTS
44
  communicate = edge_tts.Communicate(text, selected_voice, rate=final_rate, pitch=final_pitch)
45
  await communicate.save(output_path)
 
46
 
47
+ return output_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ except Exception as e:
50
+ print(f"Error generating speech: {str(e)}")
51
+ # في حال حدوث خطأ، نرجح None ليتم التعامل معه في الواجهة
52
+ raise gr.Error(f"TTS Error: {str(e)}")
53
 
54
+ # استخدام Blocks بدلاً من Interface لتفادي مشاكل النسخ الجديدة
55
+ with gr.Blocks(title="Natiq Pro API") as demo:
56
+ gr.Markdown("# Natiq Pro API Endpoints")
57
+
58
+ # تعريف المدخلات (يجب أن تطابق الترتيب المرسل من React)
59
+ with gr.Row(visible=False): # نجعلها مخفية لأننا نستخدمها كـ API فقط
60
+ input_text = gr.Textbox(label="Text")
61
+ input_voice = gr.Textbox(label="Voice")
62
+ input_emotion = gr.Textbox(label="Emotion", value="neutral")
63
+ input_symbol = gr.Checkbox(label="Is Symbol", value=True)
64
+ input_rate = gr.Textbox(label="Rate", value="+0%")
65
+ input_pitch = gr.Textbox(label="Pitch", value="+0Hz")
66
+
67
+ output_audio = gr.Audio(label="Generated Audio", type="filepath")
68
+
69
+ btn = gr.Button("Generate", visible=False)
70
+
71
+ # ربط الدالة بالزر وتعريف اسم الـ API بشكل صريح
72
+ # api_name="text_to_speech_edge" هو المفتاح الذي يبحث عنه تطبيق React
73
+ btn.click(
74
+ fn=generate_speech,
75
+ inputs=[input_text, input_voice, input_emotion, input_symbol, input_rate, input_pitch],
76
+ outputs=[output_audio],
77
+ api_name="text_to_speech_edge"
78
+ )
79
 
80
  if __name__ == "__main__":
81
  demo.queue().launch()