import joblib import numpy as np import soundfile as sf import whisper import gradio as gr # 1) Load your balanced text classifier text_clf = joblib.load("text_pipeline_balanced.joblib") # 2) Load Whisper-Large-v2 via official OpenAI Whisper on CPU model = whisper.load_model("large-v2") # or "base" for a smaller model def classify(audio_path): """ audio_path: str → path to the uploaded file returns: transcript (str), safety probabilities (dict), unsafe probability (str) """ # Run beam search transcription result = model.transcribe( audio_path, beam_size=5, # beam search for higher accuracy language="en" ) txt = result["text"].strip() # Safety classification proba = float(text_clf.predict_proba([txt])[0][1]) label_probs = {"safe": 1 - proba, "unsafe": proba} unsafe_str = f"{proba:.2f}" return txt, label_probs, unsafe_str # Gradio UI audio_input = gr.Audio(label="Upload or record audio", type="filepath") transcript_out = gr.Textbox(label="Transcript") probs_out = gr.Label(num_top_classes=2, label="Safety Probabilities") unsafe_out = gr.Textbox(label="Unsafe Probability") iface = gr.Interface( fn=classify, inputs=audio_input, outputs=[transcript_out, probs_out, unsafe_out], title="BubbleGuard Audio Safety Checker", description="Uses the official openai-whisper package for identical, CPU-only transcripts." ) if __name__ == "__main__": iface.launch(server_name="0.0.0.0", server_port=7860)