Spaces:

MichalIwaniuk
/

ModelInstrumenty

Sleeping

App Files Files Community

MichalIwaniuk commited on Jun 4

Commit

96b9863

1 Parent(s): 51fae2f

commmit

Browse files

Files changed (1) hide show

app.py +61 -27

app.py CHANGED Viewed

@@ -1,41 +1,75 @@
 import gradio as gr
-import tensorflow as tf
-import librosa
 import numpy as np
 # Wczytanie modelu
 model = tf.keras.models.load_model("model.h5")
-# Lista klas (dostosuj do swojego przypadku)
-INSTRUMENTS = ['cel', 'cla', 'flu', 'gac', 'gel', 'org', 'pia', 'sax', 'tru', 'vio', 'voi']  # <- zmień na własne klasy
-# Parametry przetwarzania
-SR = 22050
-DURATION = 5
-N_MELS = 128
-def preprocess_audio(file):
-    y, _ = librosa.load(file.name, sr=SR, duration=DURATION)
-    if len(y) < SR * DURATION:
-        y = np.pad(y, (0, SR * DURATION - len(y)))
-    mel = librosa.feature.melspectrogram(y=y, sr=SR, n_mels=N_MELS)
-    mel_db = librosa.power_to_db(mel, ref=np.max)
-    mel_db = mel_db[..., np.newaxis]  # Dodaj kanał
-    mel_db = np.expand_dims(mel_db, axis=0)  # Dodaj batch
-    return mel_db
-def predict_instrument(audio_file):
-    mel_input = preprocess_audio(audio_file)
-    preds = model.predict(mel_input)[0]
-    result = {cls: float(score) for cls, score in zip(INSTRUMENTS, preds)}
-    return result
 demo = gr.Interface(
-    fn=predict_instrument,
     inputs=gr.Audio(type="filepath", label="Wgraj plik WAV"),
-    outputs=gr.Label(num_top_classes=3),
-    title="Rozpoznawanie Instrumentów",
-    description="Model rozpoznaje instrumenty muzyczne na podstawie pliku dźwiękowego (.wav)."
 )
 if __name__ == "__main__":

 import gradio as gr
 import numpy as np
+import librosa
+import librosa.display
+import tensorflow as tf
+import matplotlib.pyplot as plt
+# Parametry modelu
+SR = 22050
+N_MELS = 128
+TARGET_FRAMES = 216
+LABELS = ['cel', 'cla', 'flu', 'gac', 'gel', 'org', 'pia', 'sax', 'tru', 'vio', 'voi']
 # Wczytanie modelu
 model = tf.keras.models.load_model("model.h5")
+def compute_melspectrogram(y, sr=SR, n_mels=N_MELS):
+    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
+    S_DB = librosa.power_to_db(S, ref=np.max)
+    return S_DB
+def resize_spectrogram(S, target_frames=TARGET_FRAMES):
+    if S.shape[1] < target_frames:
+        pad = target_frames - S.shape[1]
+        left = pad // 2; right = pad - left
+        S = np.pad(S, ((0, 0), (left, right)), mode='constant')
+    elif S.shape[1] > target_frames:
+        start = (S.shape[1] - target_frames) // 2
+        S = S[:, start:start+target_frames]
+    return S
+def predict_and_plot(audio_path):
+    # Wczytaj audio
+    y, _ = librosa.load(audio_path, sr=SR)
+    # Oblicz spektrogram
+    S_full = compute_melspectrogram(y)
+    S = resize_spectrogram(S_full)
+    # Przygotuj do predykcji
+    x = S[np.newaxis, ..., np.newaxis]
+    probs = model.predict(x, verbose=0)[0]
+    # Przygotuj spektrogram jako obrazek
+    fig, ax = plt.subplots(figsize=(8, 4))
+    librosa.display.specshow(S_full, sr=SR, x_axis='time', y_axis='mel', cmap='magma', ax=ax)
+    ax.set_title("Mel-spektrogram")
+    ax.set_xlabel("Czas")
+    ax.set_ylabel("Częstotliwość (Mel)")
+    plt.tight_layout()
+    # Zapisz obrazek do zmiennej
+    import io
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png')
+    plt.close(fig)
+    buf.seek(0)
+    # Zwróć predykcje i obraz
+    predictions = {label: float(p) for label, p in zip(LABELS, probs)}
+    return predictions, buf
+# Gradio UI
 demo = gr.Interface(
+    fn=predict_and_plot,
     inputs=gr.Audio(type="filepath", label="Wgraj plik WAV"),
+    outputs=[
+        gr.Label(num_top_classes=5, label="Predykcje instrumentu"),
+        gr.Image(label="Spektrogram")
+    ],
+    title="Rozpoznawanie instrumentów z dźwięku",
+    description="Model na podstawie spektrogramu melowego rozpoznaje instrument muzyczny. Obsługiwane klasy: " + ", ".join(LABELS)
 )
 if __name__ == "__main__":