Spaces:

Baamtu
/

baamtu-tts

Sleeping

File size: 5,841 Bytes

import gradio as gr
import requests
import numpy as np
import io
import wave

# API endpoint
API_URL = "https://0608-44-220-51-89.ngrok-free.app"

def check_api_health():
    """Check if the TTS API service is available"""
    try:
        response = requests.get(f"{API_URL}/health", timeout=5)
        if response.status_code == 200:
            # Check if S3 storage is available
            data = response.json()
            return data.get("status") == "healthy", data.get("s3_storage") == "available"
        return False, False
    except requests.exceptions.RequestException as e:
        print(f"Error checking API health: {str(e)}")
        return False, False

def stream_text_to_speech(text, description, token):
    """Request streaming speech generation from the TTS API"""
    if not text:
        return None
    
    # Check if API is available
    is_healthy, _ = check_api_health()
    if not is_healthy:
        raise gr.Error("TTS API service is not available. Please ensure the FastAPI service is running.")
    
    # Prepare the request
    payload = {
        "text": text,
        "description": description,
        "token": token
    }
    
    try:
        response = requests.post(f"{API_URL}/tts/stream", json=payload)

        audio_data = io.BytesIO(response.content)
        with wave.open(audio_data, 'rb') as wav_file:
            sample_rate = wav_file.getframerate()
            n_frames = wav_file.getnframes()
            audio_data = np.frombuffer(wav_file.readframes(n_frames), dtype=np.int16)
            audio_data = audio_data.astype(np.float32) / 32767.0
        
        return (sample_rate, audio_data)
            
    except Exception as e:
        raise gr.Error(f"Request failed: {str(e)}")

def store_text_to_speech(text, description, token):
    """Request speech generation and storage from the TTS API"""
    if not text:
        return "Error: Text cannot be empty"
    
    # Check if API and S3 storage are available
    is_healthy, s3_available = check_api_health()
    if not is_healthy:
        return "Error: TTS API service is not available. Please ensure the FastAPI service is running."
    
    if not s3_available:
        return "Error: S3 storage is not available. Please check the API server configuration."
    
    # Prepare the request
    payload = {
        "text": text,
        "description": description,
        "token": token
    }
    
    try:
        response = requests.post(f"{API_URL}/tts/store", json=payload)
        data = response.json()
        return f"✅ Audio generated and stored! Audio duration: {data['duration_seconds']:.2f}s\n\nURL: {data['url']}"

    except Exception as e:
        return f"⚠️ Request failed: {str(e)}"

# Create the Gradio interface
with gr.Blocks(title="Baamtu TTS") as demo:
    gr.Markdown("# Wolof Text-to-Speech by Baamtu")
    gr.Markdown("### Generate speech from text in Wolof. You can use the streaming or the storage option.")
    
    with gr.Tabs():
        with gr.TabItem("Streaming"):
            with gr.Row():
                with gr.Column(scale=2):
                    stream_text_input = gr.Textbox(
                        label="Text to speak", 
                        lines=4
                    )
                    stream_token = gr.Textbox(
                        label="Token for authentication", 
                        lines=1
                    )
                
                with gr.Column(scale=1):
                    stream_voice_desc = gr.Textbox(
                        label="Voice description", 
                        lines=5, 
                        value="Female speaks in a very distant-sounding voice, with a very noisy background, and a monotone delivery, speaking slowly."
                    )
                    stream_btn = gr.Button("Generate Audio", variant="primary")
            
            audio_output = gr.Audio(
                label="Generated Speech", 
                type="numpy",
                streaming=True,
                interactive=False
            )
            
            stream_btn.click(
                fn=stream_text_to_speech, 
                inputs=[stream_text_input, stream_voice_desc, stream_token],
                outputs=audio_output
            )
        
        with gr.TabItem("Storage"):
            is_healthy, s3_available = check_api_health()
            if not s3_available:
                gr.Markdown("⚠️ **S3 storage is not available.** Please check the API server configuration.")
            
            with gr.Row():
                with gr.Column(scale=2):
                    store_text_input = gr.Textbox(
                        label="Text to speak", 
                        lines=4
                    )
                    store_token = gr.Textbox(
                        label="Token for authentication", 
                        lines=1
                    )
                
                with gr.Column(scale=1):
                    store_voice_desc = gr.Textbox(
                        label="Voice description", 
                        lines=5,
                        value="Female's speech is very close-sounding and very clear. She speaks fast with an expressive and animated voice."
                    )
                    store_btn = gr.Button("Generate & Store in S3", variant="primary")
            
            store_result = gr.Textbox(
                label="Storage Result", 
                lines=4,
                placeholder="Generated audio link will appear here...",
                interactive=False
            )
            
            store_btn.click(
                fn=store_text_to_speech,
                inputs=[store_text_input, store_voice_desc, store_token], 
                outputs=store_result
            )

# Launch the app
demo.launch()