MichalIwaniuk commited on
Commit
76e83c1
1 Parent(s): 18b8d3c
Files changed (2) hide show
  1. app.py +19 -21
  2. requirements.txt +2 -1
app.py CHANGED
@@ -4,8 +4,10 @@ import librosa
4
  import librosa.display
5
  import tensorflow as tf
6
  import matplotlib.pyplot as plt
 
 
7
 
8
- # Parametry modelu
9
  SR = 22050
10
  N_MELS = 128
11
  TARGET_FRAMES = 216
@@ -16,13 +18,13 @@ model = tf.keras.models.load_model("model.h5")
16
 
17
  def compute_melspectrogram(y, sr=SR, n_mels=N_MELS):
18
  S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
19
- S_DB = librosa.power_to_db(S, ref=np.max)
20
- return S_DB
21
 
22
  def resize_spectrogram(S, target_frames=TARGET_FRAMES):
23
  if S.shape[1] < target_frames:
24
  pad = target_frames - S.shape[1]
25
- left = pad // 2; right = pad - left
 
26
  S = np.pad(S, ((0, 0), (left, right)), mode='constant')
27
  elif S.shape[1] > target_frames:
28
  start = (S.shape[1] - target_frames) // 2
@@ -30,46 +32,42 @@ def resize_spectrogram(S, target_frames=TARGET_FRAMES):
30
  return S
31
 
32
  def predict_and_plot(audio_path):
33
- # Wczytaj audio
34
  y, _ = librosa.load(audio_path, sr=SR)
35
 
36
- # Oblicz spektrogram
37
  S_full = compute_melspectrogram(y)
38
  S = resize_spectrogram(S_full)
39
 
40
  # Przygotuj do predykcji
41
  x = S[np.newaxis, ..., np.newaxis]
42
- probs = model.predict(x, verbose=0)[0]
43
 
44
- # Przygotuj spektrogram jako obrazek
45
  fig, ax = plt.subplots(figsize=(8, 4))
46
  librosa.display.specshow(S_full, sr=SR, x_axis='time', y_axis='mel', cmap='magma', ax=ax)
47
  ax.set_title("Mel-spektrogram")
48
- ax.set_xlabel("Czas")
49
- ax.set_ylabel("Cz臋stotliwo艣膰 (Mel)")
50
  plt.tight_layout()
51
-
52
- # Zapisz obrazek do zmiennej
53
- import io
54
  buf = io.BytesIO()
55
- plt.savefig(buf, format='png')
56
  plt.close(fig)
57
  buf.seek(0)
 
58
 
59
- # Zwr贸膰 predykcje i obraz
60
- predictions = {label: float(p) for label, p in zip(LABELS, probs)}
61
- return predictions, buf
62
 
63
- # Gradio UI
64
  demo = gr.Interface(
65
  fn=predict_and_plot,
66
  inputs=gr.Audio(type="filepath", label="Wgraj plik WAV"),
67
  outputs=[
68
- gr.Label(num_top_classes=5, label="Predykcje instrumentu"),
69
  gr.Image(label="Spektrogram")
70
  ],
71
- title="Rozpoznawanie instrument贸w z d藕wi臋ku",
72
- description="Model na podstawie spektrogramu melowego rozpoznaje instrument muzyczny. Obs艂ugiwane klasy: " + ", ".join(LABELS)
73
  )
74
 
75
  if __name__ == "__main__":
 
4
  import librosa.display
5
  import tensorflow as tf
6
  import matplotlib.pyplot as plt
7
+ from PIL import Image
8
+ import io
9
 
10
+ # Parametry
11
  SR = 22050
12
  N_MELS = 128
13
  TARGET_FRAMES = 216
 
18
 
19
  def compute_melspectrogram(y, sr=SR, n_mels=N_MELS):
20
  S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
21
+ return librosa.power_to_db(S, ref=np.max)
 
22
 
23
  def resize_spectrogram(S, target_frames=TARGET_FRAMES):
24
  if S.shape[1] < target_frames:
25
  pad = target_frames - S.shape[1]
26
+ left = pad // 2
27
+ right = pad - left
28
  S = np.pad(S, ((0, 0), (left, right)), mode='constant')
29
  elif S.shape[1] > target_frames:
30
  start = (S.shape[1] - target_frames) // 2
 
32
  return S
33
 
34
  def predict_and_plot(audio_path):
35
+ # Wczytaj d藕wi臋k
36
  y, _ = librosa.load(audio_path, sr=SR)
37
 
38
+ # Oblicz i przeskaluj spektrogram
39
  S_full = compute_melspectrogram(y)
40
  S = resize_spectrogram(S_full)
41
 
42
  # Przygotuj do predykcji
43
  x = S[np.newaxis, ..., np.newaxis]
44
+ preds = model.predict(x, verbose=0)[0]
45
 
46
+ # Rysuj spektrogram i zapisz do obrazu
47
  fig, ax = plt.subplots(figsize=(8, 4))
48
  librosa.display.specshow(S_full, sr=SR, x_axis='time', y_axis='mel', cmap='magma', ax=ax)
49
  ax.set_title("Mel-spektrogram")
 
 
50
  plt.tight_layout()
51
+
 
 
52
  buf = io.BytesIO()
53
+ fig.savefig(buf, format='png')
54
  plt.close(fig)
55
  buf.seek(0)
56
+ image = Image.open(buf)
57
 
58
+ # Predykcje jako s艂ownik
59
+ pred_dict = {label: float(p) for label, p in zip(LABELS, preds)}
60
+ return pred_dict, image
61
 
 
62
  demo = gr.Interface(
63
  fn=predict_and_plot,
64
  inputs=gr.Audio(type="filepath", label="Wgraj plik WAV"),
65
  outputs=[
66
+ gr.Label(num_top_classes=5, label="Predykcja"),
67
  gr.Image(label="Spektrogram")
68
  ],
69
+ title="Rozpoznawanie instrument贸w",
70
+ description="Model klasyfikuje d藕wi臋ki do jednej z klas instrument贸w."
71
  )
72
 
73
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -2,4 +2,5 @@ tensorflow
2
  librosa
3
  gradio
4
  numpy
5
- matplotlib
 
 
2
  librosa
3
  gradio
4
  numpy
5
+ matplotlib
6
+ Pillow