lyimo commited on
Commit
52fb7c9
·
verified ·
1 Parent(s): 92182fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -12
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import gradio as gr
2
  from fastai.vision.all import *
3
  import librosa
4
  import numpy as np
@@ -9,20 +8,24 @@ import tempfile
9
  learn = load_learner('model.pkl')
10
  labels = learn.dls.vocab
11
 
12
- def audio_to_spectrogram(audio_file):
13
- if isinstance(audio_file, str):
14
- if audio_file.endswith('.mp3'):
 
15
  with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav:
16
- audio = AudioSegment.from_mp3(audio_file)
17
  audio.export(temp_wav.name, format='wav')
18
  y, sr = librosa.load(temp_wav.name, sr=None)
19
  else:
20
- y, sr = librosa.load(audio_file, sr=None)
21
- else:
22
- y, sr = librosa.load(audio_file, sr=None)
23
 
 
24
  S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
25
  S_dB = librosa.power_to_db(S, ref=np.max)
 
 
26
  fig, ax = plt.subplots()
27
  img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
28
  fig.colorbar(img, ax=ax, format='%+2.0f dB')
@@ -30,22 +33,24 @@ def audio_to_spectrogram(audio_file):
30
  spectrogram_file = "spectrogram.png"
31
  plt.savefig(spectrogram_file)
32
  plt.close()
 
33
  return spectrogram_file
34
 
35
  def predict(audio):
36
  spectrogram_file = audio_to_spectrogram(audio)
37
  img = PILImage.create(spectrogram_file)
38
- img = img.resize((512, 512))
39
  pred, pred_idx, probs = learn.predict(img)
40
  return {labels[i]: float(probs[i]) for i in range(len(labels))}
41
 
42
- examples = ['example_audio.mp3']
43
-
44
  gr.Interface(
45
  fn=predict,
46
  inputs=[
47
  gr.Audio(sources="upload", type="filepath", label="Upload audio (WAV or MP3)"),
48
  gr.Audio(sources="microphone", label="Record audio")
49
  ],
50
- outputs=gr.components.Label(num_top_classes=3)
 
51
  ).launch()
 
 
1
  from fastai.vision.all import *
2
  import librosa
3
  import numpy as np
 
8
  learn = load_learner('model.pkl')
9
  labels = learn.dls.vocab
10
 
11
+ def audio_to_spectrogram(audio):
12
+ # Handle both uploaded files and recorded audio
13
+ if isinstance(audio, str): # Uploaded file
14
+ if audio.endswith('.mp3'):
15
  with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav:
16
+ audio = AudioSegment.from_mp3(audio)
17
  audio.export(temp_wav.name, format='wav')
18
  y, sr = librosa.load(temp_wav.name, sr=None)
19
  else:
20
+ y, sr = librosa.load(audio, sr=None)
21
+ else: # Recorded audio
22
+ y, sr = librosa.load(audio, sr=None)
23
 
24
+ # Generate mel spectrogram
25
  S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
26
  S_dB = librosa.power_to_db(S, ref=np.max)
27
+
28
+ # Create and save spectrogram image
29
  fig, ax = plt.subplots()
30
  img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
31
  fig.colorbar(img, ax=ax, format='%+2.0f dB')
 
33
  spectrogram_file = "spectrogram.png"
34
  plt.savefig(spectrogram_file)
35
  plt.close()
36
+
37
  return spectrogram_file
38
 
39
  def predict(audio):
40
  spectrogram_file = audio_to_spectrogram(audio)
41
  img = PILImage.create(spectrogram_file)
42
+ img = img.resize((512, 512)) # Assuming your model expects this size
43
  pred, pred_idx, probs = learn.predict(img)
44
  return {labels[i]: float(probs[i]) for i in range(len(labels))}
45
 
46
+ # Create Gradio interface with upload and microphone options
47
+ examples = ['example_audio.mp3'] # Optional: provide example audio for upload
48
  gr.Interface(
49
  fn=predict,
50
  inputs=[
51
  gr.Audio(sources="upload", type="filepath", label="Upload audio (WAV or MP3)"),
52
  gr.Audio(sources="microphone", label="Record audio")
53
  ],
54
+ outputs=gr.components.Label(num_top_classes=3),
55
+ examples=examples
56
  ).launch()