Spaces:
Sleeping
Sleeping
File size: 1,542 Bytes
0853d17 84897bd 10f740b 2230468 0853d17 44875df 0853d17 176ca41 0853d17 c85978f 44875df 10f740b 176ca41 10f740b 44875df 10f740b 2230468 44875df 2230468 44875df 2230468 5dc2f59 2230468 44875df 14f29a1 2230468 752ac49 44875df 0853d17 176ca41 0853d17 2230468 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import joblib
import numpy as np
import soundfile as sf
import whisper
import gradio as gr
# 1) Load your balanced text classifier
text_clf = joblib.load("text_pipeline_balanced.joblib")
# 2) Load Whisper-Large-v2 via official OpenAI Whisper on CPU
model = whisper.load_model("large-v2") # or "base" for a smaller model
def classify(audio_path):
"""
audio_path: str → path to the uploaded file
returns: transcript (str), safety probabilities (dict), unsafe probability (str)
"""
# Run beam search transcription
result = model.transcribe(
audio_path,
beam_size=5, # beam search for higher accuracy
language="en"
)
txt = result["text"].strip()
# Safety classification
proba = float(text_clf.predict_proba([txt])[0][1])
label_probs = {"safe": 1 - proba, "unsafe": proba}
unsafe_str = f"{proba:.2f}"
return txt, label_probs, unsafe_str
# Gradio UI
audio_input = gr.Audio(label="Upload or record audio", type="filepath")
transcript_out = gr.Textbox(label="Transcript")
probs_out = gr.Label(num_top_classes=2, label="Safety Probabilities")
unsafe_out = gr.Textbox(label="Unsafe Probability")
iface = gr.Interface(
fn=classify,
inputs=audio_input,
outputs=[transcript_out, probs_out, unsafe_out],
title="BubbleGuard Audio Safety Checker",
description="Uses the official openai-whisper package for identical, CPU-only transcripts."
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)
|