InstrumentIdentifier

Sleeping

App Files Files Community

MichalIwaniuk commited on Jun 4, 2025

Commit

76e83c1

1 Parent(s): 18b8d3c

zmiana

Browse files

Files changed (2) hide show

app.py +19 -21
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -4,8 +4,10 @@ import librosa
 import librosa.display
 import tensorflow as tf
 import matplotlib.pyplot as plt
-# Parametry modelu
 SR = 22050
 N_MELS = 128
 TARGET_FRAMES = 216
@@ -16,13 +18,13 @@ model = tf.keras.models.load_model("model.h5")
 def compute_melspectrogram(y, sr=SR, n_mels=N_MELS):
     S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
-    S_DB = librosa.power_to_db(S, ref=np.max)
-    return S_DB
 def resize_spectrogram(S, target_frames=TARGET_FRAMES):
     if S.shape[1] < target_frames:
         pad = target_frames - S.shape[1]
-        left = pad // 2; right = pad - left
         S = np.pad(S, ((0, 0), (left, right)), mode='constant')
     elif S.shape[1] > target_frames:
         start = (S.shape[1] - target_frames) // 2
@@ -30,46 +32,42 @@ def resize_spectrogram(S, target_frames=TARGET_FRAMES):
     return S
 def predict_and_plot(audio_path):
-    # Wczytaj audio
     y, _ = librosa.load(audio_path, sr=SR)
-    # Oblicz spektrogram
     S_full = compute_melspectrogram(y)
     S = resize_spectrogram(S_full)
     # Przygotuj do predykcji
     x = S[np.newaxis, ..., np.newaxis]
-    probs = model.predict(x, verbose=0)[0]
-    # Przygotuj spektrogram jako obrazek
     fig, ax = plt.subplots(figsize=(8, 4))
     librosa.display.specshow(S_full, sr=SR, x_axis='time', y_axis='mel', cmap='magma', ax=ax)
     ax.set_title("Mel-spektrogram")
-    ax.set_xlabel("Czas")
-    ax.set_ylabel("Częstotliwość (Mel)")
     plt.tight_layout()
-    # Zapisz obrazek do zmiennej
-    import io
     buf = io.BytesIO()
-    plt.savefig(buf, format='png')
     plt.close(fig)
     buf.seek(0)
-    # Zwróć predykcje i obraz
-    predictions = {label: float(p) for label, p in zip(LABELS, probs)}
-    return predictions, buf
-# Gradio UI
 demo = gr.Interface(
     fn=predict_and_plot,
     inputs=gr.Audio(type="filepath", label="Wgraj plik WAV"),
     outputs=[
-        gr.Label(num_top_classes=5, label="Predykcje instrumentu"),
         gr.Image(label="Spektrogram")
     ],
-    title="Rozpoznawanie instrumentów z dźwięku",
-    description="Model na podstawie spektrogramu melowego rozpoznaje instrument muzyczny. Obsługiwane klasy: " + ", ".join(LABELS)
 )
 if __name__ == "__main__":

 import librosa.display
 import tensorflow as tf
 import matplotlib.pyplot as plt
+from PIL import Image
+import io
+# Parametry
 SR = 22050
 N_MELS = 128
 TARGET_FRAMES = 216
 def compute_melspectrogram(y, sr=SR, n_mels=N_MELS):
     S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
+    return librosa.power_to_db(S, ref=np.max)
 def resize_spectrogram(S, target_frames=TARGET_FRAMES):
     if S.shape[1] < target_frames:
         pad = target_frames - S.shape[1]
+        left = pad // 2
+        right = pad - left
         S = np.pad(S, ((0, 0), (left, right)), mode='constant')
     elif S.shape[1] > target_frames:
         start = (S.shape[1] - target_frames) // 2
     return S
 def predict_and_plot(audio_path):
+    # Wczytaj dźwięk
     y, _ = librosa.load(audio_path, sr=SR)
+    # Oblicz i przeskaluj spektrogram
     S_full = compute_melspectrogram(y)
     S = resize_spectrogram(S_full)
     # Przygotuj do predykcji
     x = S[np.newaxis, ..., np.newaxis]
+    preds = model.predict(x, verbose=0)[0]
+    # Rysuj spektrogram i zapisz do obrazu
     fig, ax = plt.subplots(figsize=(8, 4))
     librosa.display.specshow(S_full, sr=SR, x_axis='time', y_axis='mel', cmap='magma', ax=ax)
     ax.set_title("Mel-spektrogram")
     plt.tight_layout()
     buf = io.BytesIO()
+    fig.savefig(buf, format='png')
     plt.close(fig)
     buf.seek(0)
+    image = Image.open(buf)
+    # Predykcje jako słownik
+    pred_dict = {label: float(p) for label, p in zip(LABELS, preds)}
+    return pred_dict, image
 demo = gr.Interface(
     fn=predict_and_plot,
     inputs=gr.Audio(type="filepath", label="Wgraj plik WAV"),
     outputs=[
+        gr.Label(num_top_classes=5, label="Predykcja"),
         gr.Image(label="Spektrogram")
     ],
+    title="Rozpoznawanie instrumentów",
+    description="Model klasyfikuje dźwięki do jednej z klas instrumentów."
 )
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ tensorflow
 librosa
 gradio
 numpy
-matplotlib

 librosa
 gradio
 numpy
+matplotlib
+Pillow