Spaces:

Kaworu17
/

YAMNet

Sleeping

App Files Files Community

Kaworu17 commited on May 4, 2025

Commit

11683d3

verified ·

1 Parent(s): 22c3745

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -16

app.py CHANGED Viewed

@@ -3,13 +3,14 @@ import tensorflow_hub as hub
 import numpy as np
 import matplotlib.pyplot as plt
 import gradio as gr
-import soundfile as sf  # PySoundFile for broader audio format support
-# Load YAMNet model
 yamnet_model_handle = "https://tfhub.dev/google/yamnet/1"
 yamnet_model = hub.load(yamnet_model_handle)
-# Load class names
 def load_class_map():
     class_map_path = tf.keras.utils.get_file(
         'yamnet_class_map.csv',
@@ -21,33 +22,38 @@ def load_class_map():
 class_names = load_class_map()
-# Core classifier function
 def classify_audio(file_path):
     try:
-        # Load audio file using soundfile (supports WAV, MP3, FLAC, OGG, etc.)
         audio_data, sample_rate = sf.read(file_path)
         # Convert stereo to mono if needed
         if len(audio_data.shape) > 1:
             audio_data = np.mean(audio_data, axis=1)
-        # Normalize
         audio_data = audio_data / np.max(np.abs(audio_data))
-        # Resample if needed
-        if sample_rate != 16000:
-            audio_data = tf.audio.resample(audio_data, sample_rate, 16000)
-            sample_rate = 16000
-        # Predict
-        scores, embeddings, spectrogram = yamnet_model(audio_data)
-        mean_scores = np.mean(scores, axis=0)
         top_5 = np.argsort(mean_scores)[::-1][:5]
         top_prediction = class_names[top_5[0]]
         top_scores = {class_names[i]: float(mean_scores[i]) for i in top_5}
-        # Plot waveform
         fig, ax = plt.subplots()
         ax.plot(audio_data)
         ax.set_title("Waveform")
@@ -60,7 +66,7 @@ def classify_audio(file_path):
     except Exception as e:
         return f"Error processing audio: {e}", {}, None
-# Gradio UI
 interface = gr.Interface(
     fn=classify_audio,
     inputs=gr.Audio(type="filepath", label="Upload .wav or .mp3 audio file"),
@@ -70,7 +76,7 @@ interface = gr.Interface(
         gr.Plot(label="Waveform")
     ],
     title="Audtheia YAMNet Audio Classifier",
-    description="Upload environmental or animal sounds (WAV/MP3). Classifies with YAMNet and shows waveform + top 5 predictions."
 )
 if __name__ == "__main__":

 import numpy as np
 import matplotlib.pyplot as plt
 import gradio as gr
+import soundfile as sf
+from scipy.signal import resample  # Correct resampling method
+# Load YAMNet model from TensorFlow Hub
 yamnet_model_handle = "https://tfhub.dev/google/yamnet/1"
 yamnet_model = hub.load(yamnet_model_handle)
+# Load class labels
 def load_class_map():
     class_map_path = tf.keras.utils.get_file(
         'yamnet_class_map.csv',
 class_names = load_class_map()
+# Classification function
 def classify_audio(file_path):
     try:
+        # Load audio file (WAV, MP3, etc.)
         audio_data, sample_rate = sf.read(file_path)
         # Convert stereo to mono if needed
         if len(audio_data.shape) > 1:
             audio_data = np.mean(audio_data, axis=1)
+        # Normalize audio
         audio_data = audio_data / np.max(np.abs(audio_data))
+        # Resample to 16kHz if necessary
+        target_rate = 16000
+        if sample_rate != target_rate:
+            duration = audio_data.shape[0] / sample_rate
+            new_length = int(duration * target_rate)
+            audio_data = resample(audio_data, new_length)
+        # Convert to tensor
+        waveform = tf.convert_to_tensor(audio_data, dtype=tf.float32)
+        # Run YAMNet
+        scores, embeddings, spectrogram = yamnet_model(waveform)
+        mean_scores = tf.reduce_mean(scores, axis=0).numpy()
         top_5 = np.argsort(mean_scores)[::-1][:5]
         top_prediction = class_names[top_5[0]]
         top_scores = {class_names[i]: float(mean_scores[i]) for i in top_5}
+        # Create waveform plot
         fig, ax = plt.subplots()
         ax.plot(audio_data)
         ax.set_title("Waveform")
     except Exception as e:
         return f"Error processing audio: {e}", {}, None
+# Gradio interface
 interface = gr.Interface(
     fn=classify_audio,
     inputs=gr.Audio(type="filepath", label="Upload .wav or .mp3 audio file"),
         gr.Plot(label="Waveform")
     ],
     title="Audtheia YAMNet Audio Classifier",
+    description="Upload an environmental or animal sound to classify using the YAMNet model. Returns label predictions and waveform."
 )
 if __name__ == "__main__":