Spaces:
Sleeping
Sleeping
File size: 5,841 Bytes
32539d5 a254fde 32539d5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | import gradio as gr
import requests
import numpy as np
import io
import wave
# API endpoint
API_URL = "https://0608-44-220-51-89.ngrok-free.app"
def check_api_health():
"""Check if the TTS API service is available"""
try:
response = requests.get(f"{API_URL}/health", timeout=5)
if response.status_code == 200:
# Check if S3 storage is available
data = response.json()
return data.get("status") == "healthy", data.get("s3_storage") == "available"
return False, False
except requests.exceptions.RequestException as e:
print(f"Error checking API health: {str(e)}")
return False, False
def stream_text_to_speech(text, description, token):
"""Request streaming speech generation from the TTS API"""
if not text:
return None
# Check if API is available
is_healthy, _ = check_api_health()
if not is_healthy:
raise gr.Error("TTS API service is not available. Please ensure the FastAPI service is running.")
# Prepare the request
payload = {
"text": text,
"description": description,
"token": token
}
try:
response = requests.post(f"{API_URL}/tts/stream", json=payload)
audio_data = io.BytesIO(response.content)
with wave.open(audio_data, 'rb') as wav_file:
sample_rate = wav_file.getframerate()
n_frames = wav_file.getnframes()
audio_data = np.frombuffer(wav_file.readframes(n_frames), dtype=np.int16)
audio_data = audio_data.astype(np.float32) / 32767.0
return (sample_rate, audio_data)
except Exception as e:
raise gr.Error(f"Request failed: {str(e)}")
def store_text_to_speech(text, description, token):
"""Request speech generation and storage from the TTS API"""
if not text:
return "Error: Text cannot be empty"
# Check if API and S3 storage are available
is_healthy, s3_available = check_api_health()
if not is_healthy:
return "Error: TTS API service is not available. Please ensure the FastAPI service is running."
if not s3_available:
return "Error: S3 storage is not available. Please check the API server configuration."
# Prepare the request
payload = {
"text": text,
"description": description,
"token": token
}
try:
response = requests.post(f"{API_URL}/tts/store", json=payload)
data = response.json()
return f"✅ Audio generated and stored! Audio duration: {data['duration_seconds']:.2f}s\n\nURL: {data['url']}"
except Exception as e:
return f"⚠️ Request failed: {str(e)}"
# Create the Gradio interface
with gr.Blocks(title="Baamtu TTS") as demo:
gr.Markdown("# Wolof Text-to-Speech by Baamtu")
gr.Markdown("### Generate speech from text in Wolof. You can use the streaming or the storage option.")
with gr.Tabs():
with gr.TabItem("Streaming"):
with gr.Row():
with gr.Column(scale=2):
stream_text_input = gr.Textbox(
label="Text to speak",
lines=4
)
stream_token = gr.Textbox(
label="Token for authentication",
lines=1
)
with gr.Column(scale=1):
stream_voice_desc = gr.Textbox(
label="Voice description",
lines=5,
value="Female speaks in a very distant-sounding voice, with a very noisy background, and a monotone delivery, speaking slowly."
)
stream_btn = gr.Button("Generate Audio", variant="primary")
audio_output = gr.Audio(
label="Generated Speech",
type="numpy",
streaming=True,
interactive=False
)
stream_btn.click(
fn=stream_text_to_speech,
inputs=[stream_text_input, stream_voice_desc, stream_token],
outputs=audio_output
)
with gr.TabItem("Storage"):
is_healthy, s3_available = check_api_health()
if not s3_available:
gr.Markdown("⚠️ **S3 storage is not available.** Please check the API server configuration.")
with gr.Row():
with gr.Column(scale=2):
store_text_input = gr.Textbox(
label="Text to speak",
lines=4
)
store_token = gr.Textbox(
label="Token for authentication",
lines=1
)
with gr.Column(scale=1):
store_voice_desc = gr.Textbox(
label="Voice description",
lines=5,
value="Female's speech is very close-sounding and very clear. She speaks fast with an expressive and animated voice."
)
store_btn = gr.Button("Generate & Store in S3", variant="primary")
store_result = gr.Textbox(
label="Storage Result",
lines=4,
placeholder="Generated audio link will appear here...",
interactive=False
)
store_btn.click(
fn=store_text_to_speech,
inputs=[store_text_input, store_voice_desc, store_token],
outputs=store_result
)
# Launch the app
demo.launch() |