mrblackdev commited on
Commit
c1ccd25
·
verified ·
1 Parent(s): 917827b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -47
app.py CHANGED
@@ -10,10 +10,21 @@ A440 = 440.0
10
 
11
  def hz_to_midi(f):
12
  if f is None or np.isnan(f) or f <= 0:
13
- return None
14
  return 69 + 12 * np.log2(f / A440)
15
 
16
 
 
 
 
 
 
 
 
 
 
 
 
17
  def round_to_grid(seconds, bpm, division=4):
18
  if bpm <= 0:
19
  return seconds
@@ -25,11 +36,10 @@ def round_to_grid(seconds, bpm, division=4):
25
 
26
  def group_notes(f0, sr, hop_length, min_note_ms=80, merge_gap_ms=60, midi_smoothing_window=3):
27
  times = np.arange(len(f0)) * hop_length / sr
28
- midi_vals = np.array([hz_to_midi(x) for x in f0])
29
 
30
  if midi_smoothing_window and midi_smoothing_window > 1:
31
- from scipy.ndimage import median_filter
32
- midi_vals = median_filter(midi_vals, size=midi_smoothing_window)
33
 
34
  midi_round = np.round(midi_vals)
35
  midi_round[np.isnan(midi_vals)] = np.nan
@@ -65,25 +75,28 @@ def group_notes(f0, sr, hop_length, min_note_ms=80, merge_gap_ms=60, midi_smooth
65
  def audio_to_midi(audio, fmin_note='C2', fmax_note='C7', hop_length=256, frame_length=2048,
66
  voicing_thres=0.1, min_note_ms=80, merge_gap_ms=60, bpm=100,
67
  quantize=True, division=4, velocity=80, program=0):
68
- if isinstance(audio, tuple):
69
- sr, y = audio
70
- y = np.array(y, dtype=np.float32)
71
- else:
72
- y, sr = librosa.load(audio, sr=None, mono=True)
73
- if np.max(np.abs(y)) > 0:
74
- y = y / np.max(np.abs(y))
75
-
76
- fmin_hz = librosa.note_to_hz(fmin_note)
77
- fmax_hz = librosa.note_to_hz(fmax_note)
78
-
79
- # Eliminar 'trough_threshold', usar 'thresholds' de pyin en versiones nuevas
80
- f0, voiced_flag, _ = librosa.pyin(
81
- y, fmin=fmin_hz, fmax=fmax_hz, frame_length=frame_length,
82
- hop_length=hop_length, sr=sr)
83
-
84
- f0[~voiced_flag] = np.nan
 
85
 
86
  notes = group_notes(f0, sr, hop_length, min_note_ms, merge_gap_ms, 3)
 
 
87
 
88
  if quantize and bpm > 0:
89
  q_notes = []
@@ -116,6 +129,7 @@ def audio_to_midi(audio, fmin_note='C2', fmax_note='C7', hop_length=256, frame_l
116
  return midi_path, summary
117
 
118
 
 
119
  CSS = """
120
  #app_title {font-size: 28px; font-weight: 800}
121
  #app_subtitle {opacity: .8}
@@ -142,7 +156,7 @@ with gr.Blocks(css=CSS, fill_height=True) as demo:
142
  with gr.Accordion("Cuantización y salida", open=True):
143
  do_quant = gr.Checkbox(value=True, label="Cuantizar a rejilla")
144
  bpm = gr.Slider(40, 220, value=100, step=1, label="BPM")
145
- division = gr.Dropdown([2, 4, 8], value=4, label="División por negra", info="2=Corchea, 4=Semicorchea, 8=Fusa")
146
  velocity = gr.Slider(1, 127, value=90, step=1, label="Velocidad (1-127)")
147
  program = gr.Slider(0, 127, value=0, step=1, label="Programa/MIDI Instrument (0=Piano)")
148
 
@@ -153,34 +167,32 @@ with gr.Blocks(css=CSS, fill_height=True) as demo:
153
  summary_out = gr.JSON(label="Resumen")
154
  gr.Markdown("""
155
  **Tips**
156
- - Canta una melodía monofónica, sin armonías.
157
- - Ajusta el rango de notas (C2–C7) si cantas muy grave o agudo.
158
- - Usa la cuantización para encajar a tempo; si quieres naturalidad, desactívala.
159
  """)
160
 
161
  def _convert(audio_path, fmin_note, fmax_note, hop_length, frame_length, voice_thres, min_ms, gap_join_ms, do_quantize, bpm_val, division_val, velocity_val, program_val):
162
- midi_path, summary = audio_to_midi(
163
- audio=audio_path,
164
- fmin_note=fmin_note,
165
- fmax_note=fmax_note,
166
- hop_length=int(hop_length),
167
- frame_length=int(frame_length),
168
- voicing_thres=float(voice_thres),
169
- min_note_ms=int(min_ms),
170
- merge_gap_ms=int(gap_join_ms),
171
- bpm=float(bpm_val),
172
- quantize=bool(do_quantize),
173
- division=int(division_val),
174
- velocity=int(velocity_val),
175
- program=int(program_val),
176
- )
177
- return midi_path, summary
178
-
179
- run_btn.click(
180
- _convert,
181
- inputs=[audio_in, fmin, fmax, hop, frame, voice_th, min_ms, gap_ms, do_quant, bpm, division, velocity, program],
182
- outputs=[midi_out, summary_out]
183
- )
184
 
185
  if __name__ == "__main__":
186
  demo.launch()
 
10
 
11
  def hz_to_midi(f):
12
  if f is None or np.isnan(f) or f <= 0:
13
+ return np.nan
14
  return 69 + 12 * np.log2(f / A440)
15
 
16
 
17
+ def safe_median_filter(data, size=3):
18
+ try:
19
+ from scipy.ndimage import median_filter
20
+ if data.dtype != np.float64:
21
+ data = data.astype(np.float64)
22
+ return median_filter(data, size=size)
23
+ except Exception as e:
24
+ print("Median filter fallback:", e)
25
+ return data
26
+
27
+
28
  def round_to_grid(seconds, bpm, division=4):
29
  if bpm <= 0:
30
  return seconds
 
36
 
37
  def group_notes(f0, sr, hop_length, min_note_ms=80, merge_gap_ms=60, midi_smoothing_window=3):
38
  times = np.arange(len(f0)) * hop_length / sr
39
+ midi_vals = np.array([hz_to_midi(x) for x in f0], dtype=np.float64)
40
 
41
  if midi_smoothing_window and midi_smoothing_window > 1:
42
+ midi_vals = safe_median_filter(midi_vals, size=midi_smoothing_window)
 
43
 
44
  midi_round = np.round(midi_vals)
45
  midi_round[np.isnan(midi_vals)] = np.nan
 
75
  def audio_to_midi(audio, fmin_note='C2', fmax_note='C7', hop_length=256, frame_length=2048,
76
  voicing_thres=0.1, min_note_ms=80, merge_gap_ms=60, bpm=100,
77
  quantize=True, division=4, velocity=80, program=0):
78
+ try:
79
+ if isinstance(audio, tuple):
80
+ sr, y = audio
81
+ y = np.array(y, dtype=np.float32)
82
+ else:
83
+ y, sr = librosa.load(audio, sr=None, mono=True)
84
+ if np.max(np.abs(y)) > 0:
85
+ y = y / np.max(np.abs(y))
86
+ except Exception as e:
87
+ raise RuntimeError(f"Error al cargar audio: {e}")
88
+
89
+ try:
90
+ fmin_hz = librosa.note_to_hz(fmin_note)
91
+ fmax_hz = librosa.note_to_hz(fmax_note)
92
+ f0, voiced_flag, _ = librosa.pyin(y, fmin=fmin_hz, fmax=fmax_hz, frame_length=frame_length, hop_length=hop_length, sr=sr)
93
+ f0[~voiced_flag] = np.nan
94
+ except Exception as e:
95
+ raise RuntimeError(f"Error al extraer pitch: {e}")
96
 
97
  notes = group_notes(f0, sr, hop_length, min_note_ms, merge_gap_ms, 3)
98
+ if not notes:
99
+ raise RuntimeError("No se detectaron notas. Ajusta parámetros o usa audio más claro.")
100
 
101
  if quantize and bpm > 0:
102
  q_notes = []
 
129
  return midi_path, summary
130
 
131
 
132
+ # Interfaz Gradio
133
  CSS = """
134
  #app_title {font-size: 28px; font-weight: 800}
135
  #app_subtitle {opacity: .8}
 
156
  with gr.Accordion("Cuantización y salida", open=True):
157
  do_quant = gr.Checkbox(value=True, label="Cuantizar a rejilla")
158
  bpm = gr.Slider(40, 220, value=100, step=1, label="BPM")
159
+ division = gr.Dropdown([2, 4, 8], value=4, label="División por negra")
160
  velocity = gr.Slider(1, 127, value=90, step=1, label="Velocidad (1-127)")
161
  program = gr.Slider(0, 127, value=0, step=1, label="Programa/MIDI Instrument (0=Piano)")
162
 
 
167
  summary_out = gr.JSON(label="Resumen")
168
  gr.Markdown("""
169
  **Tips**
170
+ - Usa melodías monofónicas.
171
+ - Ajusta rango de notas.
172
+ - Si falla, prueba menos smoothing.
173
  """)
174
 
175
  def _convert(audio_path, fmin_note, fmax_note, hop_length, frame_length, voice_thres, min_ms, gap_join_ms, do_quantize, bpm_val, division_val, velocity_val, program_val):
176
+ try:
177
+ return audio_to_midi(
178
+ audio=audio_path,
179
+ fmin_note=fmin_note,
180
+ fmax_note=fmax_note,
181
+ hop_length=int(hop_length),
182
+ frame_length=int(frame_length),
183
+ voicing_thres=float(voice_thres),
184
+ min_note_ms=int(min_ms),
185
+ merge_gap_ms=int(gap_join_ms),
186
+ bpm=float(bpm_val),
187
+ quantize=bool(do_quantize),
188
+ division=int(division_val),
189
+ velocity=int(velocity_val),
190
+ program=int(program_val),
191
+ )
192
+ except Exception as e:
193
+ raise gr.Error(f"Error: {e}")
194
+
195
+ run_btn.click(_convert, inputs=[audio_in, fmin, fmax, hop, frame, voice_th, min_ms, gap_ms, do_quant, bpm, division, velocity, program], outputs=[midi_out, summary_out])
 
 
196
 
197
  if __name__ == "__main__":
198
  demo.launch()