Spaces:

lyimo
/

dove

Runtime error

App Files Files Community

lyimo commited on Apr 24, 2024

Commit

52fb7c9

verified ·

1 Parent(s): 92182fc

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -12

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import gradio as gr
 from fastai.vision.all import *
 import librosa
 import numpy as np
@@ -9,20 +8,24 @@ import tempfile
 learn = load_learner('model.pkl')
 labels = learn.dls.vocab
-def audio_to_spectrogram(audio_file):
-    if isinstance(audio_file, str):
-        if audio_file.endswith('.mp3'):
             with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav:
-                audio = AudioSegment.from_mp3(audio_file)
                 audio.export(temp_wav.name, format='wav')
                 y, sr = librosa.load(temp_wav.name, sr=None)
         else:
-            y, sr = librosa.load(audio_file, sr=None)
-    else:
-        y, sr = librosa.load(audio_file, sr=None)
     S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
     S_dB = librosa.power_to_db(S, ref=np.max)
     fig, ax = plt.subplots()
     img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
     fig.colorbar(img, ax=ax, format='%+2.0f dB')
@@ -30,22 +33,24 @@ def audio_to_spectrogram(audio_file):
     spectrogram_file = "spectrogram.png"
     plt.savefig(spectrogram_file)
     plt.close()
     return spectrogram_file
 def predict(audio):
     spectrogram_file = audio_to_spectrogram(audio)
     img = PILImage.create(spectrogram_file)
-    img = img.resize((512, 512))
     pred, pred_idx, probs = learn.predict(img)
     return {labels[i]: float(probs[i]) for i in range(len(labels))}
-examples = ['example_audio.mp3']
 gr.Interface(
     fn=predict,
     inputs=[
         gr.Audio(sources="upload", type="filepath", label="Upload audio (WAV or MP3)"),
         gr.Audio(sources="microphone", label="Record audio")
     ],
-    outputs=gr.components.Label(num_top_classes=3)
 ).launch()

 from fastai.vision.all import *
 import librosa
 import numpy as np
 learn = load_learner('model.pkl')
 labels = learn.dls.vocab
+def audio_to_spectrogram(audio):
+    # Handle both uploaded files and recorded audio
+    if isinstance(audio, str):  # Uploaded file
+        if audio.endswith('.mp3'):
             with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav:
+                audio = AudioSegment.from_mp3(audio)
                 audio.export(temp_wav.name, format='wav')
                 y, sr = librosa.load(temp_wav.name, sr=None)
         else:
+            y, sr = librosa.load(audio, sr=None)
+    else:  # Recorded audio
+        y, sr = librosa.load(audio, sr=None)
+    # Generate mel spectrogram
     S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
     S_dB = librosa.power_to_db(S, ref=np.max)
+    # Create and save spectrogram image
     fig, ax = plt.subplots()
     img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
     fig.colorbar(img, ax=ax, format='%+2.0f dB')
     spectrogram_file = "spectrogram.png"
     plt.savefig(spectrogram_file)
     plt.close()
     return spectrogram_file
 def predict(audio):
     spectrogram_file = audio_to_spectrogram(audio)
     img = PILImage.create(spectrogram_file)
+    img = img.resize((512, 512))  # Assuming your model expects this size
     pred, pred_idx, probs = learn.predict(img)
     return {labels[i]: float(probs[i]) for i in range(len(labels))}
+# Create Gradio interface with upload and microphone options
+examples = ['example_audio.mp3']  # Optional: provide example audio for upload
 gr.Interface(
     fn=predict,
     inputs=[
         gr.Audio(sources="upload", type="filepath", label="Upload audio (WAV or MP3)"),
         gr.Audio(sources="microphone", label="Record audio")
     ],
+    outputs=gr.components.Label(num_top_classes=3),
+    examples=examples
 ).launch()