Spaces:

MetiMiester
/

BubbleGuard_Audio

Sleeping

MetiMiester commited on Aug 2, 2025

Commit

84897bd

verified ·

1 Parent(s): 18d8681

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,38 +2,43 @@ import gradio as gr
 import joblib
 import torch
 import numpy as np
-from transformers import pipeline
 # 1) Load your trained text classifier
 text_clf = joblib.load("text_pipeline_balanced.joblib")
-# 2) Initialize Whisper ASR
-device = 0 if torch.cuda.is_available() else -1
-asr = pipeline(
-    "automatic-speech-recognition",
-    model="openai/whisper-base",
-    chunk_length_s=30,
-    device=device,
-    ignore_warning=True,
-    generate_kwargs={"language": "en", "task": "transcribe"}
-)
 def classify_audio(audio_np, sr):
-    """
-    audio_np: np.ndarray
-    sr: sampling rate
-    """
     if audio_np is None or sr is None:
         return "", "❌ Unsafe", 0.0
     audio = audio_np.astype("float32")
     if audio.ndim > 1:
         audio = audio.mean(axis=1)
-    # Transcribe
-    transcript = asr({"array": audio, "sampling_rate": sr})["text"].strip()
-    # Classify
     proba = text_clf.predict_proba([transcript])[0][1]
     label = "❌ Unsafe" if proba > 0.5 else "✅ Safe"

 import joblib
 import torch
 import numpy as np
+import whisper               # openai-whisper
+import soundfile as sf
 # 1) Load your trained text classifier
 text_clf = joblib.load("text_pipeline_balanced.joblib")
+# 2) Load Whisper model
+#    (Requires `pip install openai-whisper`)
+asr_model = whisper.load_model("base")
+def transcribe_with_whisper(audio_np, sr):
+    # Whisper expects a file or numpy; we can pass the array directly
+    # but it wants 16 kHz, so resample if needed:
+    if sr != 16000:
+        import resampy
+        audio_np = resampy.resample(audio_np, sr, 16000)
+        sr = 16000
+    # whisper.load_model returns a model with .transcribe()
+    result = asr_model.transcribe(audio_np, fp16=False)
+    return result["text"].strip()
 def classify_audio(audio_np, sr):
     if audio_np is None or sr is None:
         return "", "❌ Unsafe", 0.0
+    # ensure float32 & mono
     audio = audio_np.astype("float32")
     if audio.ndim > 1:
         audio = audio.mean(axis=1)
+    # 3) Transcribe via openai-whisper
+    try:
+        transcript = transcribe_with_whisper(audio, sr)
+    except Exception as e:
+        transcript = f"[Transcription error: {e}]"
+    # 4) Classify
     proba = text_clf.predict_proba([transcript])[0][1]
     label = "❌ Unsafe" if proba > 0.5 else "✅ Safe"