EmadAgha commited on
Commit
05f09ca
·
verified ·
1 Parent(s): 967d797

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import edge_tts
3
+ import tempfile
4
+ import asyncio
5
+
6
+ # Map frontend voice labels to Edge TTS internal IDs
7
+ VOICES = {
8
+ "رجل (مصري)": "ar-EG-ShakirNeural",
9
+ "سيدة (مصرية)": "ar-EG-SalmaNeural",
10
+ "رجل (سعودي)": "ar-SA-HamedNeural",
11
+ "سيدة (سعودية)": "ar-SA-ZariyahNeural"
12
+ }
13
+
14
+ async def generate_audio(text, voice_label, emotion_ignored, advanced_ignored, rate_ignored, pitch_ignored):
15
+ """
16
+ Generates audio.
17
+ Note: We ignore rate/pitch arguments because Natiq Pro handles
18
+ speed and pitch shifting on the Frontend (Web Audio API) for zero-latency adjustments.
19
+ """
20
+ if not text or not text.strip():
21
+ return None
22
+
23
+ # Default to Saudi Male if voice not found
24
+ voice_id = VOICES.get(voice_label, "ar-SA-HamedNeural")
25
+
26
+ communicate = edge_tts.Communicate(text, voice_id)
27
+
28
+ # Create a temporary file
29
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
30
+ tmp_path = tmp_file.name
31
+
32
+ await communicate.save(tmp_path)
33
+ return tmp_path
34
+
35
+ # Build the Interface
36
+ with gr.Blocks() as demo:
37
+ # Inputs match the order sent by your frontend
38
+ with gr.Row():
39
+ text_input = gr.Textbox(label="Text")
40
+ voice_input = gr.Textbox(label="Voice")
41
+ emotion_input = gr.Textbox(label="Emotion")
42
+ advanced_input = gr.Checkbox(label="Is Advanced")
43
+ rate_input = gr.Number(label="Rate")
44
+ pitch_input = gr.Textbox(label="Pitch")
45
+
46
+ audio_output = gr.Audio(label="Generated Audio", type="filepath")
47
+
48
+ generate_btn = gr.Button("Generate")
49
+
50
+ # The API name must match what is in audioService.ts
51
+ generate_btn.click(
52
+ fn=generate_audio,
53
+ inputs=[text_input, voice_input, emotion_input, advanced_input, rate_input, pitch_input],
54
+ outputs=audio_output,
55
+ api_name="text_to_speech_edge"
56
+ )
57
+
58
+ # CRITICAL: This enables parallel processing.
59
+ # The frontend sends 10 requests at once; this allows the backend to accept 20 at once.
60
+ demo.queue(max_size=40, default_concurrency_limit=20).launch()