File size: 1,628 Bytes
83a259c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d663b53
 
 
83a259c
 
 
 
 
 
d663b53
 
83a259c
 
 
 
 
 
d663b53
 
83a259c
 
 
 
 
 
 
 
 
 
 
 
d663b53
 
83a259c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import gradio as gr
import requests
import os

# Define the API URLs for both languages
MODELS = {
    "English": "https://api-inference.huggingface.co/models/facebook/mms-tts-eng",
    "Yoruba": "https://api-inference.huggingface.co/models/facebook/mms-tts-yor"
}

# Get token from environment variables
hf_token = os.getenv("HF_TOKEN")
headers = {"Authorization": f"Bearer {hf_token}"}

def text_to_speech(text, language):
    if not text.strip():
        return None # Do nothing if text is empty
    
    api_url = MODELS[language]
    
    try:
        response = requests.post(api_url, headers=headers, json={"inputs": text})
        response.raise_for_status() # Check for errors
    except Exception as e:
        # This creates a red error popup in the UI instead of crashing
        raise gr.Error(f"API Error: {str(e)}")

    # Save audio to file
    output_file = "output.wav"
    with open(output_file, "wb") as f:
        f.write(response.content)
    
    # RETURN ONLY THE FILE PATH (Single value)
    return output_file

# Create the Interface
with gr.Blocks() as demo:
    gr.Markdown("# πŸ‡³πŸ‡¬ English & Yoruba Text-to-Speech")
    
    with gr.Row():
        txt_input = gr.Textbox(label="Enter Text", placeholder="Type something here...")
        lang_dropdown = gr.Dropdown(choices=["English", "Yoruba"], value="English", label="Select Language")
    
    btn = gr.Button("Generate Audio")
    audio_out = gr.Audio(label="Output Audio")
    
    # Ensure inputs and outputs match the function signature
    btn.click(fn=text_to_speech, inputs=[txt_input, lang_dropdown], outputs=audio_out)

demo.launch()