houseaudrey12 commited on
Commit
9d00eeb
·
verified ·
1 Parent(s): e171a10

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -13
app.py CHANGED
@@ -3,6 +3,7 @@ import torch
3
  import torchaudio
4
  import numpy as np
5
  from datasets import load_dataset
 
6
 
7
  # ---------------------------
8
  # Load Dataset for Label Reference
@@ -23,13 +24,19 @@ def fake_quality_score(mel_spec):
23
 
24
  # ---------------------------
25
  # Audio Preprocessing
26
- # NOTE: Gradio with type="numpy" gives (sample_rate, data)
27
  # ---------------------------
 
 
 
 
 
 
28
  mel_transform = torchaudio.transforms.MelSpectrogram(
29
- sample_rate=44100,
30
- n_fft=1024,
31
- hop_length=512,
32
- n_mels=64
 
33
  )
34
 
35
  def preprocess_audio(audio):
@@ -48,20 +55,27 @@ def preprocess_audio(audio):
48
 
49
  # If shape is (samples, channels), transpose to (channels, samples)
50
  if waveform.ndim == 2 and waveform.shape[0] < waveform.shape[1]:
51
- # shape (samples, channels) -> (channels, samples)
52
  waveform = waveform.transpose(0, 1)
53
 
54
  # Convert to mono if stereo or more channels
55
  if waveform.shape[0] > 1:
56
  waveform = waveform.mean(dim=0, keepdim=True)
57
 
58
- # Resample to 44.1 kHz if needed
59
- if sr != 44100:
60
- resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=44100)
61
  waveform = resampler(waveform)
62
- sr = 44100
 
 
 
 
 
 
 
 
63
 
64
- # Mel-spectrogram
65
  mel = mel_transform(waveform)
66
  mel_db = torchaudio.transforms.AmplitudeToDB()(mel)
67
  return mel_db
@@ -71,7 +85,7 @@ def preprocess_audio(audio):
71
  # ---------------------------
72
  def analyze_piano(audio):
73
  if audio is None:
74
- return "Please upload or record a piano audio clip."
75
 
76
  try:
77
  mel = preprocess_audio(audio)
@@ -102,7 +116,7 @@ demo = gr.Interface(
102
  ),
103
  outputs=gr.Textbox(label="AI Analysis Output"),
104
  title="AI Piano Sound Analyzer 🎹",
105
- description="Upload a short piano recording to get a predicted piano type and estimated sound-quality score."
106
  )
107
 
108
  if __name__ == "__main__":
 
3
  import torchaudio
4
  import numpy as np
5
  from datasets import load_dataset
6
+ import torch.nn.functional as F
7
 
8
  # ---------------------------
9
  # Load Dataset for Label Reference
 
24
 
25
  # ---------------------------
26
  # Audio Preprocessing
 
27
  # ---------------------------
28
+
29
+ TARGET_SR = 44100
30
+ N_FFT = 1024
31
+ HOP_LENGTH = 512
32
+ N_MELS = 64
33
+
34
  mel_transform = torchaudio.transforms.MelSpectrogram(
35
+ sample_rate=TARGET_SR,
36
+ n_fft=N_FFT,
37
+ hop_length=HOP_LENGTH,
38
+ n_mels=N_MELS,
39
+ center=False # we will handle padding manually
40
  )
41
 
42
  def preprocess_audio(audio):
 
55
 
56
  # If shape is (samples, channels), transpose to (channels, samples)
57
  if waveform.ndim == 2 and waveform.shape[0] < waveform.shape[1]:
 
58
  waveform = waveform.transpose(0, 1)
59
 
60
  # Convert to mono if stereo or more channels
61
  if waveform.shape[0] > 1:
62
  waveform = waveform.mean(dim=0, keepdim=True)
63
 
64
+ # Resample to TARGET_SR if needed
65
+ if sr != TARGET_SR:
66
+ resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=TARGET_SR)
67
  waveform = resampler(waveform)
68
+ sr = TARGET_SR
69
+
70
+ # --- NEW: Ensure minimum length for STFT / MelSpectrogram ---
71
+ min_len = N_FFT # at least one window
72
+ current_len = waveform.shape[-1]
73
+ if current_len < min_len:
74
+ pad_amount = min_len - current_len
75
+ # Pad at the end with zeros
76
+ waveform = F.pad(waveform, (0, pad_amount))
77
 
78
+ # Mel-spectrogram (no internal centering/padding)
79
  mel = mel_transform(waveform)
80
  mel_db = torchaudio.transforms.AmplitudeToDB()(mel)
81
  return mel_db
 
85
  # ---------------------------
86
  def analyze_piano(audio):
87
  if audio is None:
88
+ return "Please upload or record a piano audio clip (at least 1–2 seconds)."
89
 
90
  try:
91
  mel = preprocess_audio(audio)
 
116
  ),
117
  outputs=gr.Textbox(label="AI Analysis Output"),
118
  title="AI Piano Sound Analyzer 🎹",
119
+ description="Upload a short piano recording (around 1–3 seconds) to get a predicted piano type and estimated sound-quality score."
120
  )
121
 
122
  if __name__ == "__main__":