import joblib
import numpy as np
import soundfile as sf
import whisper
import gradio as gr

# 1) Load your balanced text classifier
text_clf = joblib.load("text_pipeline_balanced.joblib")

# 2) Load Whisper-Large-v2 via official OpenAI Whisper on CPU
model = whisper.load_model("large-v2")  # or "base" for a smaller model

def classify(audio_path):
    """
    audio_path: str → path to the uploaded file
    returns: transcript (str), safety probabilities (dict), unsafe probability (str)
    """
    # Run beam search transcription
    result = model.transcribe(
        audio_path,
        beam_size=5,         # beam search for higher accuracy
        language="en"
    )
    txt = result["text"].strip()

    # Safety classification
    proba = float(text_clf.predict_proba([txt])[0][1])
    label_probs = {"safe": 1 - proba, "unsafe": proba}
    unsafe_str = f"{proba:.2f}"

    return txt, label_probs, unsafe_str

# Gradio UI
audio_input     = gr.Audio(label="Upload or record audio", type="filepath")
transcript_out  = gr.Textbox(label="Transcript")
probs_out       = gr.Label(num_top_classes=2, label="Safety Probabilities")
unsafe_out      = gr.Textbox(label="Unsafe Probability")

iface = gr.Interface(
    fn=classify,
    inputs=audio_input,
    outputs=[transcript_out, probs_out, unsafe_out],
    title="BubbleGuard Audio Safety Checker",
    description="Uses the official openai-whisper package for identical, CPU-only transcripts."
)

if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)