Spaces:

Rivalcoder
/

Lite

Sleeping

App Files Files Community

Rivalcoder commited on Dec 1, 2025

Commit

4b25dd0

1 Parent(s): 98a399f

Add Files

Browse files

Files changed (1) hide show

alm_pipeline.py +31 -3

alm_pipeline.py CHANGED Viewed

@@ -1,8 +1,28 @@
 import whisper
 import librosa
 import numpy as np
 import tensorflow_hub as hub
 # Load ASR
 asr_model = whisper.load_model("small")
@@ -21,7 +41,8 @@ def estimate_emotion(activation):
 def speech_to_text(audio):
-    result = asr_model.transcribe(audio)
     return result["text"]
@@ -31,8 +52,15 @@ def detect_sound(audio):
     waveform = waveform.astype(np.float32)
     scores, embeddings, _ = yamnet(waveform)
     mean_scores = np.mean(scores.numpy(), axis=0)
-    top_idx = np.argmax(mean_scores)
-    return class_map[top_idx].decode("utf-8"), mean_scores.max()
 def analyze_audio(audio_file):

+import os
+import warnings
 import whisper
 import librosa
 import numpy as np
 import tensorflow_hub as hub
+# Reduce TensorFlow log noise and avoid attempting GPU / oneDNN on CPU-only envs
+os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")  # hide INFO/WARNING logs
+os.environ.setdefault("TF_ENABLE_ONEDNN_OPTS", "0")  # disable oneDNN custom ops
+os.environ.setdefault("CUDA_VISIBLE_DEVICES", "-1")  # don't try to use CUDA GPUs
+# Suppress specific library warnings that are expected in this setup
+warnings.filterwarnings(
+    "ignore",
+    category=UserWarning,
+    message="FP16 is not supported on CPU; using FP32 instead",
+)
+warnings.filterwarnings(
+    "ignore",
+    category=FutureWarning,
+    module="librosa",
+)
 # Load ASR
 asr_model = whisper.load_model("small")
 def speech_to_text(audio):
+    # Force FP32 on CPU to avoid FP16 warnings and ensure compatibility
+    result = asr_model.transcribe(audio, fp16=False)
     return result["text"]
     waveform = waveform.astype(np.float32)
     scores, embeddings, _ = yamnet(waveform)
     mean_scores = np.mean(scores.numpy(), axis=0)
+    top_idx = int(np.argmax(mean_scores))
+    # class_map may contain integers or byte strings depending on TF Hub version;
+    # convert robustly to a human-readable label.
+    label = class_map[top_idx]
+    if isinstance(label, bytes):
+        label = label.decode("utf-8")
+    else:
+        label = str(label)
+    return label, float(mean_scores.max())
 def analyze_audio(audio_file):