seayala commited on
Commit
729b487
·
verified ·
1 Parent(s): 8d5fb9b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -12
app.py CHANGED
@@ -9,31 +9,38 @@ labels = ['down', 'go', 'left', 'no', 'off', 'on', 'right', 'stop', 'up', 'yes']
9
 
10
  def extract_features(file_name):
11
  try:
12
- # Carga el audio sin cambiar el sample rate
13
- audio, sample_rate = librosa.load(file_name, sr=None)
14
 
15
- # Saca el espectrograma de magnitud
16
- spectrogram = np.abs(librosa.stft(audio, n_fft=512, hop_length=256))
 
 
 
 
 
 
17
 
18
- # Convierte a escala logarítmica (como normalmente esperan los modelos de audio)
19
- log_spectrogram = librosa.amplitude_to_db(spectrogram)
20
 
21
  # Ajusta tamaño exacto
22
- log_spectrogram = librosa.util.fix_length(log_spectrogram, size=257, axis=0)
23
- log_spectrogram = librosa.util.fix_length(log_spectrogram, size=97, axis=1)
24
 
25
  # Normaliza
26
- log_spectrogram = (log_spectrogram - np.mean(log_spectrogram)) / np.std(log_spectrogram)
27
 
28
- # Añade canal para la red convolucional
29
- log_spectrogram = log_spectrogram[..., np.newaxis]
30
 
31
  except Exception as e:
32
  print(f"Error encountered while parsing file: {file_name}")
33
  print(e)
34
  return None
35
 
36
- return log_spectrogram
 
37
 
38
  def classify_audio(audio_file):
39
  print(f"Tipo de audio_file: {type(audio_file)}")
 
9
 
10
  def extract_features(file_name):
11
  try:
12
+ # Resamplea a 16kHz
13
+ audio, sample_rate = librosa.load(file_name, sr=16000)
14
 
15
+ # Saca Mel-spectrograma
16
+ mel_spectrogram = librosa.feature.melspectrogram(
17
+ y=audio,
18
+ sr=sample_rate,
19
+ n_mels=257,
20
+ n_fft=512,
21
+ hop_length=256
22
+ )
23
 
24
+ # Convierte a escala logarítmica
25
+ log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
26
 
27
  # Ajusta tamaño exacto
28
+ log_mel_spectrogram = librosa.util.fix_length(log_mel_spectrogram, size=257, axis=0)
29
+ log_mel_spectrogram = librosa.util.fix_length(log_mel_spectrogram, size=97, axis=1)
30
 
31
  # Normaliza
32
+ log_mel_spectrogram = (log_mel_spectrogram - np.mean(log_mel_spectrogram)) / np.std(log_mel_spectrogram)
33
 
34
+ # Añade canal
35
+ log_mel_spectrogram = log_mel_spectrogram[..., np.newaxis]
36
 
37
  except Exception as e:
38
  print(f"Error encountered while parsing file: {file_name}")
39
  print(e)
40
  return None
41
 
42
+ return log_mel_spectrogram
43
+
44
 
45
  def classify_audio(audio_file):
46
  print(f"Tipo de audio_file: {type(audio_file)}")