Rivalcoder commited on
Commit
98a399f
·
1 Parent(s): 5a416b3
Files changed (1) hide show
  1. alm_pipeline.py +3 -2
alm_pipeline.py CHANGED
@@ -26,8 +26,9 @@ def speech_to_text(audio):
26
 
27
 
28
  def detect_sound(audio):
29
- waveform, sr = librosa.load(audio, sr=16000)
30
- waveform = waveform.reshape(1, -1)
 
31
  scores, embeddings, _ = yamnet(waveform)
32
  mean_scores = np.mean(scores.numpy(), axis=0)
33
  top_idx = np.argmax(mean_scores)
 
26
 
27
 
28
  def detect_sound(audio):
29
+ # Load mono waveform at 16 kHz as 1D float32 array, as expected by YAMNet
30
+ waveform, sr = librosa.load(audio, sr=16000, mono=True)
31
+ waveform = waveform.astype(np.float32)
32
  scores, embeddings, _ = yamnet(waveform)
33
  mean_scores = np.mean(scores.numpy(), axis=0)
34
  top_idx = np.argmax(mean_scores)