import os
import gradio as gr
import librosa
import numpy as np
import ctranslate2
from faster_whisper import WhisperModel

# --- 1. CONFIGURATION ET CONVERSION DU MODÈLE ---
MODEL_NAME = "abiyo27/whisper-small-ewe-2"
CT2_MODEL_DIR = "whisper-small-ewe-2-ct2"

# Si le modèle n'a pas encore été converti, on le fait au démarrage
if not os.path.exists(CT2_MODEL_DIR):
    print(f"⏳ Conversion de {MODEL_NAME} au format CTranslate2 (int8)...")
    print("Cela prendra environ une minute au premier lancement.")
    # On télécharge et on convertit ton modèle HF en int8 (optimisé CPU)
    converter = ctranslate2.converters.TransformersConverter(MODEL_NAME)
    converter.convert(output_dir=CT2_MODEL_DIR, quantization="int8")
    print("✅ Conversion terminée !")

# --- 2. CHARGEMENT OPTIMISÉ (FASTER-WHISPER) ---
print("🚀 Chargement du modèle faster-whisper en mémoire...")
# compute_type="int8" est le secret pour une vitesse fulgurante sur CPU
model = WhisperModel(CT2_MODEL_DIR, device="cpu", compute_type="int8", cpu_threads=2)

# --- 3. FONCTIONS DE TRAITEMENT ---
def preprocess_audio(audio):
    """Gère le rééchantillonnage strict à 16kHz de manière optimisée."""
    if audio is None:
        return None
    sr, y = audio
    y = y.astype(np.float32)
    # Normalisation
    if np.max(np.abs(y)) > 0:
        y /= np.max(np.abs(y))
    # Faster-whisper exige 16000Hz
    if sr != 16000:
        y = librosa.resample(y, orig_sr=sr, target_sr=16000)
    return y

def transcribe(audio, state=""):
    """Transcription de fichier ou micro complet."""
    y = preprocess_audio(audio)
    if y is None: 
        return state

    # beam_size=5 donne une bonne précision. task="transcribe" forcé.
    segments, info = model.transcribe(y, beam_size=5, task="transcribe")
    
    # On assemble les segments de texte générés
    text = " ".join([segment.text for segment in segments])
    return text.strip()

def stream_transcribe(audio, state=""):
    """Transcription pour le streaming (plus agressive sur la vitesse)."""
    y = preprocess_audio(audio)
    if y is None: 
        return state
    
    # beam_size=1 pour privilégier la vitesse extrême en streaming
    segments, info = model.transcribe(y, beam_size=1, task="transcribe")
    
    text = " ".join([segment.text for segment in segments])
    return text.strip()

# --- 4. INTERFACE GRADIO ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(f"# 🎙️ Ewe STT - Faster Whisper CPU")
    gr.Markdown("Transcription ultra-rapide optimisée pour processeur. Traduction automatique du français vers l'Ewe ou transcription directe.")

    with gr.Tabs():
        # Onglet 1: Fichier et Enregistrement classique
        with gr.TabItem("Upload ou Enregistrement"):
            with gr.Row():
                audio_input = gr.Audio(label="Audio (Fichier ou Micro)", type="numpy")
            with gr.Row():
                transcribe_btn = gr.Button("Transcrire", variant="primary")
            output_text = gr.Textbox(label="Transcription Ewe", placeholder="Le texte apparaîtra ici...")

            transcribe_btn.click(
                fn=transcribe, 
                inputs=audio_input, 
                outputs=output_text,
                api_name="predict" 
            )

        # Onglet 2: Streaming temps réel
        with gr.TabItem("Temps Réel (Streaming)"):
            gr.Markdown("*Note : Le streaming sur CPU gratuit reste expérimental, parlez clairement.*")
            stream_input = gr.Audio(
                label="Microphone", 
                sources=["microphone"], 
                streaming=True, 
                type="numpy"
            )
            stream_output = gr.Textbox(label="Flux de transcription direct")

            stream_input.stream(
                fn=stream_transcribe, 
                inputs=stream_input, 
                outputs=stream_output,
                show_progress="hidden"
            )

    gr.HTML("""
        <div style="text-align: center; color: #666; margin-top: 20px;">
            Modèle utilisé : <b>yawo stt-ewe-2</b> | Optimisation : <b>CTranslate2 (INT8)</b>
        </div>
    """)

if __name__ == "__main__":
    # La queue est importante pour gérer plusieurs requêtes sans planter le CPU
    demo.queue().launch()