File size: 2,145 Bytes
05f09ca
 
4ebd109
4a1191e
05f09ca
a4dc04b
d173109
 
 
 
 
 
 
 
 
4a1191e
a4dc04b
4a1191e
a4dc04b
4a1191e
 
 
a4dc04b
 
 
 
4ebd109
a4dc04b
05f09ca
d173109
4a1191e
 
 
 
d173109
 
4a1191e
 
a4dc04b
4a1191e
d173109
a4dc04b
4a1191e
a4dc04b
 
 
 
 
 
 
4a1191e
a4dc04b
 
4a1191e
a4dc04b
 
05f09ca
4ebd109
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import gradio as gr
import edge_tts
import tempfile
import asyncio

# Voice Mapping
VOICE_MAP = {
    "رجل (مصري)": "ar-EG-ShakirNeural",
    "سيدة (مصرية)": "ar-EG-SalmaNeural",
    "رجل (سعودي)": "ar-SA-HamedNeural",
    "سيدة (سعودية)": "ar-SA-ZariyahNeural",
    "English (US) M": "en-US-EricNeural",
    "English (US) F": "en-US-AriaNeural"
}

async def generate_speech(text, voice, emotion, is_symbol, rate, pitch):
    if not text or not text.strip(): return None
    
    # Defaults
    final_rate = rate if rate and isinstance(rate, str) and len(rate.strip()) > 0 else "+0%"
    final_pitch = pitch if pitch and isinstance(pitch, str) and len(pitch.strip()) > 0 else "+0Hz"
    
    # Voice Selection
    selected_voice = "ar-SA-HamedNeural"
    if voice in VOICE_MAP: selected_voice = VOICE_MAP[voice]
    elif voice in VOICE_MAP.values(): selected_voice = voice
    
    print(f"Generating: {len(text)} chars | {selected_voice} | {final_rate} | {final_pitch}")

    try:
        output_file = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
        output_path = output_file.name
        output_file.close()
        
        communicate = edge_tts.Communicate(text, selected_voice, rate=final_rate, pitch=final_pitch)
        await communicate.save(output_path)
        return output_path
    except Exception as e:
        print(f"ERROR: {str(e)}")
        raise gr.Error(f"TTS Error: {str(e)}")

# UI Definition using Blocks (Fixes TypeError)
with gr.Blocks(title="Natiq Pro API") as demo:
    with gr.Row(visible=False):
        t = gr.Textbox(label="Text")
        v = gr.Textbox(label="Voice")
        e = gr.Textbox(label="Emotion", value="neutral")
        s = gr.Checkbox(label="Is Symbol", value=True)
        r = gr.Textbox(label="Rate", value="+0%")
        p = gr.Textbox(label="Pitch", value="+0Hz")
    
    o = gr.Audio(label="Output", type="filepath")
    b = gr.Button("Generate", visible=False)
    
    # Explicit API Name
    b.click(generate_speech, inputs=[t,v,e,s,r,p], outputs=[o], api_name="text_to_speech_edge")

if __name__ == "__main__":
    demo.queue().launch()