Spaces:

mrblackdev
/

Voice-To-MiDI-VTM

Runtime error

App Files Files Community

mrblackdev commited on Aug 24, 2025

Commit

c1ccd25

verified ·

1 Parent(s): 917827b

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -47

app.py CHANGED Viewed

@@ -10,10 +10,21 @@ A440 = 440.0
 def hz_to_midi(f):
     if f is None or np.isnan(f) or f <= 0:
-        return None
     return 69 + 12 * np.log2(f / A440)
 def round_to_grid(seconds, bpm, division=4):
     if bpm <= 0:
         return seconds
@@ -25,11 +36,10 @@ def round_to_grid(seconds, bpm, division=4):
 def group_notes(f0, sr, hop_length, min_note_ms=80, merge_gap_ms=60, midi_smoothing_window=3):
     times = np.arange(len(f0)) * hop_length / sr
-    midi_vals = np.array([hz_to_midi(x) for x in f0])
     if midi_smoothing_window and midi_smoothing_window > 1:
-        from scipy.ndimage import median_filter
-        midi_vals = median_filter(midi_vals, size=midi_smoothing_window)
     midi_round = np.round(midi_vals)
     midi_round[np.isnan(midi_vals)] = np.nan
@@ -65,25 +75,28 @@ def group_notes(f0, sr, hop_length, min_note_ms=80, merge_gap_ms=60, midi_smooth
 def audio_to_midi(audio, fmin_note='C2', fmax_note='C7', hop_length=256, frame_length=2048,
                   voicing_thres=0.1, min_note_ms=80, merge_gap_ms=60, bpm=100,
                   quantize=True, division=4, velocity=80, program=0):
-    if isinstance(audio, tuple):
-        sr, y = audio
-        y = np.array(y, dtype=np.float32)
-    else:
-        y, sr = librosa.load(audio, sr=None, mono=True)
-    if np.max(np.abs(y)) > 0:
-        y = y / np.max(np.abs(y))
-    fmin_hz = librosa.note_to_hz(fmin_note)
-    fmax_hz = librosa.note_to_hz(fmax_note)
-    # Eliminar 'trough_threshold', usar 'thresholds' de pyin en versiones nuevas
-    f0, voiced_flag, _ = librosa.pyin(
-        y, fmin=fmin_hz, fmax=fmax_hz, frame_length=frame_length,
-        hop_length=hop_length, sr=sr)
-    f0[~voiced_flag] = np.nan
     notes = group_notes(f0, sr, hop_length, min_note_ms, merge_gap_ms, 3)
     if quantize and bpm > 0:
         q_notes = []
@@ -116,6 +129,7 @@ def audio_to_midi(audio, fmin_note='C2', fmax_note='C7', hop_length=256, frame_l
     return midi_path, summary
 CSS = """
 #app_title {font-size: 28px; font-weight: 800}
 #app_subtitle {opacity: .8}
@@ -142,7 +156,7 @@ with gr.Blocks(css=CSS, fill_height=True) as demo:
             with gr.Accordion("Cuantización y salida", open=True):
                 do_quant = gr.Checkbox(value=True, label="Cuantizar a rejilla")
                 bpm = gr.Slider(40, 220, value=100, step=1, label="BPM")
-                division = gr.Dropdown([2, 4, 8], value=4, label="División por negra", info="2=Corchea, 4=Semicorchea, 8=Fusa")
                 velocity = gr.Slider(1, 127, value=90, step=1, label="Velocidad (1-127)")
                 program = gr.Slider(0, 127, value=0, step=1, label="Programa/MIDI Instrument (0=Piano)")
@@ -153,34 +167,32 @@ with gr.Blocks(css=CSS, fill_height=True) as demo:
             summary_out = gr.JSON(label="Resumen")
             gr.Markdown("""
             **Tips**
-            - Canta una melodía monofónica, sin armonías.
-            - Ajusta el rango de notas (C2–C7) si cantas muy grave o agudo.
-            - Usa la cuantización para encajar a tempo; si quieres naturalidad, desactívala.
             """)
     def _convert(audio_path, fmin_note, fmax_note, hop_length, frame_length, voice_thres, min_ms, gap_join_ms, do_quantize, bpm_val, division_val, velocity_val, program_val):
-        midi_path, summary = audio_to_midi(
-            audio=audio_path,
-            fmin_note=fmin_note,
-            fmax_note=fmax_note,
-            hop_length=int(hop_length),
-            frame_length=int(frame_length),
-            voicing_thres=float(voice_thres),
-            min_note_ms=int(min_ms),
-            merge_gap_ms=int(gap_join_ms),
-            bpm=float(bpm_val),
-            quantize=bool(do_quantize),
-            division=int(division_val),
-            velocity=int(velocity_val),
-            program=int(program_val),
-        )
-        return midi_path, summary
-    run_btn.click(
-        _convert,
-        inputs=[audio_in, fmin, fmax, hop, frame, voice_th, min_ms, gap_ms, do_quant, bpm, division, velocity, program],
-        outputs=[midi_out, summary_out]
-    )
 if __name__ == "__main__":
     demo.launch()

 def hz_to_midi(f):
     if f is None or np.isnan(f) or f <= 0:
+        return np.nan
     return 69 + 12 * np.log2(f / A440)
+def safe_median_filter(data, size=3):
+    try:
+        from scipy.ndimage import median_filter
+        if data.dtype != np.float64:
+            data = data.astype(np.float64)
+        return median_filter(data, size=size)
+    except Exception as e:
+        print("Median filter fallback:", e)
+        return data
 def round_to_grid(seconds, bpm, division=4):
     if bpm <= 0:
         return seconds
 def group_notes(f0, sr, hop_length, min_note_ms=80, merge_gap_ms=60, midi_smoothing_window=3):
     times = np.arange(len(f0)) * hop_length / sr
+    midi_vals = np.array([hz_to_midi(x) for x in f0], dtype=np.float64)
     if midi_smoothing_window and midi_smoothing_window > 1:
+        midi_vals = safe_median_filter(midi_vals, size=midi_smoothing_window)
     midi_round = np.round(midi_vals)
     midi_round[np.isnan(midi_vals)] = np.nan
 def audio_to_midi(audio, fmin_note='C2', fmax_note='C7', hop_length=256, frame_length=2048,
                   voicing_thres=0.1, min_note_ms=80, merge_gap_ms=60, bpm=100,
                   quantize=True, division=4, velocity=80, program=0):
+    try:
+        if isinstance(audio, tuple):
+            sr, y = audio
+            y = np.array(y, dtype=np.float32)
+        else:
+            y, sr = librosa.load(audio, sr=None, mono=True)
+        if np.max(np.abs(y)) > 0:
+            y = y / np.max(np.abs(y))
+    except Exception as e:
+        raise RuntimeError(f"Error al cargar audio: {e}")
+    try:
+        fmin_hz = librosa.note_to_hz(fmin_note)
+        fmax_hz = librosa.note_to_hz(fmax_note)
+        f0, voiced_flag, _ = librosa.pyin(y, fmin=fmin_hz, fmax=fmax_hz, frame_length=frame_length, hop_length=hop_length, sr=sr)
+        f0[~voiced_flag] = np.nan
+    except Exception as e:
+        raise RuntimeError(f"Error al extraer pitch: {e}")
     notes = group_notes(f0, sr, hop_length, min_note_ms, merge_gap_ms, 3)
+    if not notes:
+        raise RuntimeError("No se detectaron notas. Ajusta parámetros o usa audio más claro.")
     if quantize and bpm > 0:
         q_notes = []
     return midi_path, summary
+# Interfaz Gradio
 CSS = """
 #app_title {font-size: 28px; font-weight: 800}
 #app_subtitle {opacity: .8}
             with gr.Accordion("Cuantización y salida", open=True):
                 do_quant = gr.Checkbox(value=True, label="Cuantizar a rejilla")
                 bpm = gr.Slider(40, 220, value=100, step=1, label="BPM")
+                division = gr.Dropdown([2, 4, 8], value=4, label="División por negra")
                 velocity = gr.Slider(1, 127, value=90, step=1, label="Velocidad (1-127)")
                 program = gr.Slider(0, 127, value=0, step=1, label="Programa/MIDI Instrument (0=Piano)")
             summary_out = gr.JSON(label="Resumen")
             gr.Markdown("""
             **Tips**
+            - Usa melodías monofónicas.
+            - Ajusta rango de notas.
+            - Si falla, prueba menos smoothing.
             """)
     def _convert(audio_path, fmin_note, fmax_note, hop_length, frame_length, voice_thres, min_ms, gap_join_ms, do_quantize, bpm_val, division_val, velocity_val, program_val):
+        try:
+            return audio_to_midi(
+                audio=audio_path,
+                fmin_note=fmin_note,
+                fmax_note=fmax_note,
+                hop_length=int(hop_length),
+                frame_length=int(frame_length),
+                voicing_thres=float(voice_thres),
+                min_note_ms=int(min_ms),
+                merge_gap_ms=int(gap_join_ms),
+                bpm=float(bpm_val),
+                quantize=bool(do_quantize),
+                division=int(division_val),
+                velocity=int(velocity_val),
+                program=int(program_val),
+            )
+        except Exception as e:
+            raise gr.Error(f"Error: {e}")
+    run_btn.click(_convert, inputs=[audio_in, fmin, fmax, hop, frame, voice_th, min_ms, gap_ms, do_quant, bpm, division, velocity, program], outputs=[midi_out, summary_out])
 if __name__ == "__main__":
     demo.launch()