File size: 5,841 Bytes
32539d5
 
 
 
 
 
 
a254fde
32539d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import gradio as gr
import requests
import numpy as np
import io
import wave

# API endpoint
API_URL = "https://0608-44-220-51-89.ngrok-free.app"

def check_api_health():
    """Check if the TTS API service is available"""
    try:
        response = requests.get(f"{API_URL}/health", timeout=5)
        if response.status_code == 200:
            # Check if S3 storage is available
            data = response.json()
            return data.get("status") == "healthy", data.get("s3_storage") == "available"
        return False, False
    except requests.exceptions.RequestException as e:
        print(f"Error checking API health: {str(e)}")
        return False, False

def stream_text_to_speech(text, description, token):
    """Request streaming speech generation from the TTS API"""
    if not text:
        return None
    
    # Check if API is available
    is_healthy, _ = check_api_health()
    if not is_healthy:
        raise gr.Error("TTS API service is not available. Please ensure the FastAPI service is running.")
    
    # Prepare the request
    payload = {
        "text": text,
        "description": description,
        "token": token
    }
    
    try:
        response = requests.post(f"{API_URL}/tts/stream", json=payload)

        audio_data = io.BytesIO(response.content)
        with wave.open(audio_data, 'rb') as wav_file:
            sample_rate = wav_file.getframerate()
            n_frames = wav_file.getnframes()
            audio_data = np.frombuffer(wav_file.readframes(n_frames), dtype=np.int16)
            audio_data = audio_data.astype(np.float32) / 32767.0
        
        return (sample_rate, audio_data)
            
    except Exception as e:
        raise gr.Error(f"Request failed: {str(e)}")

def store_text_to_speech(text, description, token):
    """Request speech generation and storage from the TTS API"""
    if not text:
        return "Error: Text cannot be empty"
    
    # Check if API and S3 storage are available
    is_healthy, s3_available = check_api_health()
    if not is_healthy:
        return "Error: TTS API service is not available. Please ensure the FastAPI service is running."
    
    if not s3_available:
        return "Error: S3 storage is not available. Please check the API server configuration."
    
    # Prepare the request
    payload = {
        "text": text,
        "description": description,
        "token": token
    }
    
    try:
        response = requests.post(f"{API_URL}/tts/store", json=payload)
        data = response.json()
        return f"✅ Audio generated and stored! Audio duration: {data['duration_seconds']:.2f}s\n\nURL: {data['url']}"

    except Exception as e:
        return f"⚠️ Request failed: {str(e)}"

# Create the Gradio interface
with gr.Blocks(title="Baamtu TTS") as demo:
    gr.Markdown("# Wolof Text-to-Speech by Baamtu")
    gr.Markdown("### Generate speech from text in Wolof. You can use the streaming or the storage option.")
    
    with gr.Tabs():
        with gr.TabItem("Streaming"):
            with gr.Row():
                with gr.Column(scale=2):
                    stream_text_input = gr.Textbox(
                        label="Text to speak", 
                        lines=4
                    )
                    stream_token = gr.Textbox(
                        label="Token for authentication", 
                        lines=1
                    )
                
                with gr.Column(scale=1):
                    stream_voice_desc = gr.Textbox(
                        label="Voice description", 
                        lines=5, 
                        value="Female speaks in a very distant-sounding voice, with a very noisy background, and a monotone delivery, speaking slowly."
                    )
                    stream_btn = gr.Button("Generate Audio", variant="primary")
            
            audio_output = gr.Audio(
                label="Generated Speech", 
                type="numpy",
                streaming=True,
                interactive=False
            )
            
            stream_btn.click(
                fn=stream_text_to_speech, 
                inputs=[stream_text_input, stream_voice_desc, stream_token],
                outputs=audio_output
            )
        
        with gr.TabItem("Storage"):
            is_healthy, s3_available = check_api_health()
            if not s3_available:
                gr.Markdown("⚠️ **S3 storage is not available.** Please check the API server configuration.")
            
            with gr.Row():
                with gr.Column(scale=2):
                    store_text_input = gr.Textbox(
                        label="Text to speak", 
                        lines=4
                    )
                    store_token = gr.Textbox(
                        label="Token for authentication", 
                        lines=1
                    )
                
                with gr.Column(scale=1):
                    store_voice_desc = gr.Textbox(
                        label="Voice description", 
                        lines=5,
                        value="Female's speech is very close-sounding and very clear. She speaks fast with an expressive and animated voice."
                    )
                    store_btn = gr.Button("Generate & Store in S3", variant="primary")
            
            store_result = gr.Textbox(
                label="Storage Result", 
                lines=4,
                placeholder="Generated audio link will appear here...",
                interactive=False
            )
            
            store_btn.click(
                fn=store_text_to_speech,
                inputs=[store_text_input, store_voice_desc, store_token], 
                outputs=store_result
            )

# Launch the app
demo.launch()