File size: 1,542 Bytes
0853d17
 
84897bd
10f740b
2230468
0853d17
44875df
0853d17
 
176ca41
 
0853d17
c85978f
44875df
 
 
 
10f740b
 
 
176ca41
10f740b
44875df
10f740b
2230468
44875df
2230468
44875df
2230468
5dc2f59
2230468
 
44875df
 
 
 
 
14f29a1
2230468
 
752ac49
44875df
0853d17
176ca41
0853d17
 
 
2230468
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import joblib
import numpy as np
import soundfile as sf
import whisper
import gradio as gr

# 1) Load your balanced text classifier
text_clf = joblib.load("text_pipeline_balanced.joblib")

# 2) Load Whisper-Large-v2 via official OpenAI Whisper on CPU
model = whisper.load_model("large-v2")  # or "base" for a smaller model

def classify(audio_path):
    """
    audio_path: str → path to the uploaded file
    returns: transcript (str), safety probabilities (dict), unsafe probability (str)
    """
    # Run beam search transcription
    result = model.transcribe(
        audio_path,
        beam_size=5,         # beam search for higher accuracy
        language="en"
    )
    txt = result["text"].strip()

    # Safety classification
    proba = float(text_clf.predict_proba([txt])[0][1])
    label_probs = {"safe": 1 - proba, "unsafe": proba}
    unsafe_str = f"{proba:.2f}"

    return txt, label_probs, unsafe_str

# Gradio UI
audio_input     = gr.Audio(label="Upload or record audio", type="filepath")
transcript_out  = gr.Textbox(label="Transcript")
probs_out       = gr.Label(num_top_classes=2, label="Safety Probabilities")
unsafe_out      = gr.Textbox(label="Unsafe Probability")

iface = gr.Interface(
    fn=classify,
    inputs=audio_input,
    outputs=[transcript_out, probs_out, unsafe_out],
    title="BubbleGuard Audio Safety Checker",
    description="Uses the official openai-whisper package for identical, CPU-only transcripts."
)

if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)