Spaces:

lyimo
/

dove

Runtime error

App Files Files Community

lyimo commited on Apr 24, 2024

Commit

752df3a

verified ·

1 Parent(s): 4a8dba0

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -25

app.py CHANGED Viewed

@@ -9,24 +9,17 @@ import tempfile
 learn = load_learner('model.pkl')
 labels = learn.dls.vocab
-def audio_to_spectrogram(audio):
-    # Handle both uploaded files and recorded audio
-    if isinstance(audio, str):  # Uploaded file
-        if audio.endswith('.mp3'):
-            with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav:
-                audio = AudioSegment.from_mp3(audio)
-                audio.export(temp_wav.name, format='wav')
-                y, sr = librosa.load(temp_wav.name, sr=None)
-        else:
-            y, sr = librosa.load(audio, sr=None)
-    else:  # Recorded audio
-        y, sr = librosa.load(audio, sr=None)
-    # Generate mel spectrogram
     S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
     S_dB = librosa.power_to_db(S, ref=np.max)
-    # Create and save spectrogram image
     fig, ax = plt.subplots()
     img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
     fig.colorbar(img, ax=ax, format='%+2.0f dB')
@@ -34,23 +27,20 @@ def audio_to_spectrogram(audio):
     spectrogram_file = "spectrogram.png"
     plt.savefig(spectrogram_file)
     plt.close()
     return spectrogram_file
 def predict(audio):
     spectrogram_file = audio_to_spectrogram(audio)
     img = PILImage.create(spectrogram_file)
-    img = img.resize((512, 512))  # Assuming your model expects this size
     pred, pred_idx, probs = learn.predict(img)
     return {labels[i]: float(probs[i]) for i in range(len(labels))}
-# Create Gradio interface with upload and microphone options
-examples = ['example_audio.mp3']  # Optional: provide example audio for upload
 gr.Interface(
     fn=predict,
-    inputs=[
-        gr.Audio(sources="upload", type="filepath", label="Upload audio (WAV or MP3)"),
-        gr.Audio(sources="microphone", label="Record audio")
-    ],
-    outputs=gr.components.Label(num_top_classes=3)
-).launch()

 learn = load_learner('model.pkl')
 labels = learn.dls.vocab
+def audio_to_spectrogram(audio_file):
+    if audio_file.endswith('.mp3'):
+        with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav:
+            audio = AudioSegment.from_mp3(audio_file)
+            audio.export(temp_wav.name, format='wav')
+            y, sr = librosa.load(temp_wav.name, sr=None)
+    else:
+        y, sr = librosa.load(audio_file, sr=None)
     S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
     S_dB = librosa.power_to_db(S, ref=np.max)
     fig, ax = plt.subplots()
     img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
     fig.colorbar(img, ax=ax, format='%+2.0f dB')
     spectrogram_file = "spectrogram.png"
     plt.savefig(spectrogram_file)
     plt.close()
     return spectrogram_file
 def predict(audio):
     spectrogram_file = audio_to_spectrogram(audio)
     img = PILImage.create(spectrogram_file)
+    img = img.resize((512, 512))
     pred, pred_idx, probs = learn.predict(img)
     return {labels[i]: float(probs[i]) for i in range(len(labels))}
+examples = ['example_audio.mp3']
 gr.Interface(
     fn=predict,
+    inputs=gr.Audio(sources="upload", type="filepath", label="Upload audio (WAV or MP3)"),
+    outputs=gr.components.Label(num_top_classes=3),
+    examples=examples,
+).launch()