File size: 1,306 Bytes
c1e8004
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import gradio as gr
import tensorflow as tf
import librosa
import numpy as np

# Wczytanie modelu
model = tf.keras.models.load_model("model.h5")

# Lista klas (dostosuj do swojego przypadku)
INSTRUMENTS = ['piano', 'guitar', 'violin', 'drums']  # <- zmień na własne klasy

# Parametry przetwarzania
SR = 22050
DURATION = 5
N_MELS = 128

def preprocess_audio(file):
    y, _ = librosa.load(file.name, sr=SR, duration=DURATION)
    if len(y) < SR * DURATION:
        y = np.pad(y, (0, SR * DURATION - len(y)))
    mel = librosa.feature.melspectrogram(y=y, sr=SR, n_mels=N_MELS)
    mel_db = librosa.power_to_db(mel, ref=np.max)
    mel_db = mel_db[..., np.newaxis]  # Dodaj kanał
    mel_db = np.expand_dims(mel_db, axis=0)  # Dodaj batch
    return mel_db

def predict_instrument(audio_file):
    mel_input = preprocess_audio(audio_file)
    preds = model.predict(mel_input)[0]
    result = {cls: float(score) for cls, score in zip(INSTRUMENTS, preds)}
    return result

demo = gr.Interface(
    fn=predict_instrument,
    inputs=gr.Audio(type="file", label="Wgraj plik WAV"),
    outputs=gr.Label(num_top_classes=3),
    title="Rozpoznawanie Instrumentów",
    description="Model rozpoznaje instrumenty muzyczne na podstawie pliku dźwiękowego (.wav)."
)

if __name__ == "__main__":
    demo.launch()