mrblackdev commited on
Commit
bf213f8
·
verified ·
1 Parent(s): 55ac48d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +296 -127
app.py CHANGED
@@ -1,29 +1,40 @@
 
 
 
 
1
  import os
2
  import tempfile
 
3
  import numpy as np
4
  import librosa
5
  import pretty_midi
6
  import gradio as gr
 
7
 
 
8
  A440 = 440.0
9
 
10
-
11
  def hz_to_midi(f):
12
- if f is None or np.isnan(f) or f <= 0:
 
 
 
 
 
13
  return np.nan
14
- return 69 + 12 * np.log2(f / A440)
15
-
16
 
17
  def safe_median_filter(data, size=3):
 
18
  try:
19
  from scipy.ndimage import median_filter
20
- if data.dtype != np.float64:
21
- data = data.astype(np.float64)
22
- return median_filter(data, size=size)
 
23
  except Exception as e:
24
- print("Median filter fallback:", e)
25
- return data
26
-
27
 
28
  def round_to_grid(seconds, bpm, division=4):
29
  if bpm <= 0:
@@ -33,166 +44,324 @@ def round_to_grid(seconds, bpm, division=4):
33
  ticks = np.round(seconds / grid)
34
  return ticks * grid
35
 
 
 
 
 
 
 
 
 
 
36
 
37
- def group_notes(f0, sr, hop_length, min_note_ms=80, merge_gap_ms=60, midi_smoothing_window=3):
38
- times = np.arange(len(f0)) * hop_length / sr
39
- midi_vals = np.array([hz_to_midi(x) for x in f0], dtype=np.float64)
40
-
41
- if midi_smoothing_window and midi_smoothing_window > 1:
42
- midi_vals = safe_median_filter(midi_vals, size=midi_smoothing_window)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- midi_round = np.round(midi_vals)
45
- midi_round[np.isnan(midi_vals)] = np.nan
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- notes, i, n = [], 0, len(midi_round)
48
- frame_ms = 1000.0 * hop_length / sr
49
- min_frames = max(1, int(np.ceil(min_note_ms / frame_ms)))
50
- merge_gap_frames = int(np.ceil(merge_gap_ms / frame_ms))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- while i < n:
53
- if np.isnan(midi_round[i]):
54
- i += 1
 
 
 
 
 
55
  continue
56
- note_val, start, j, gap = int(midi_round[i]), i, i + 1, 0
57
- while j < n:
58
- if np.isnan(midi_round[j]):
59
- gap += 1
60
- if gap > merge_gap_frames:
61
- break
 
 
 
 
 
 
 
62
  j += 1
63
- continue
64
- gap = 0
65
- if int(midi_round[j]) != note_val:
66
- break
67
- j += 1
68
- if (j - start) >= min_frames:
69
- t0, t1 = times[start], times[j - 1] + hop_length / sr
70
- notes.append((note_val, t0, t1))
71
- i = j + 1
72
  return notes
73
 
74
-
75
- def audio_to_midi(audio, fmin_note='C2', fmax_note='C7', hop_length=256, frame_length=2048,
76
- voicing_thres=0.1, min_note_ms=80, merge_gap_ms=60, bpm=100,
77
- quantize=True, division=4, velocity=80, program=0):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  try:
 
79
  if isinstance(audio, tuple):
80
  sr, y = audio
81
  y = np.array(y, dtype=np.float32)
82
  else:
83
  y, sr = librosa.load(audio, sr=None, mono=True)
 
 
 
84
  if np.max(np.abs(y)) > 0:
85
  y = y / np.max(np.abs(y))
86
- except Exception as e:
87
- raise RuntimeError(f"Error al cargar audio: {e}")
88
 
89
- try:
90
- fmin_hz = librosa.note_to_hz(fmin_note)
91
- fmax_hz = librosa.note_to_hz(fmax_note)
92
- f0, voiced_flag, _ = librosa.pyin(y, fmin=fmin_hz, fmax=fmax_hz, frame_length=frame_length, hop_length=hop_length, sr=sr)
93
- f0[~voiced_flag] = np.nan
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  except Exception as e:
95
- raise RuntimeError(f"Error al extraer pitch: {e}")
96
-
97
- notes = group_notes(f0, sr, hop_length, min_note_ms, merge_gap_ms, 3)
98
- if not notes:
99
- raise RuntimeError("No se detectaron notas. Ajusta parámetros o usa audio más claro.")
100
-
101
- if quantize and bpm > 0:
102
- q_notes = []
103
- for m, t0, t1 in notes:
104
- qt0, qt1 = round_to_grid(t0, bpm, division), round_to_grid(t1, bpm, division)
105
- if qt1 <= qt0:
106
- qt1 = qt0 + (60.0 / bpm) / division
107
- q_notes.append((m, qt0, qt1))
108
- notes = q_notes
109
-
110
- pm = pretty_midi.PrettyMIDI()
111
- instrument = pretty_midi.Instrument(program=program)
112
- for m, t0, t1 in notes:
113
- v = int(np.clip(velocity, 1, 127))
114
- instrument.notes.append(pretty_midi.Note(velocity=v, pitch=int(m), start=float(t0), end=float(t1)))
115
- pm.instruments.append(instrument)
116
-
117
- tmpdir = tempfile.mkdtemp()
118
- midi_path = os.path.join(tmpdir, "output.mid")
119
- pm.write(midi_path)
120
-
121
- summary = {
122
- "duracion_audio_s": round(len(y) / sr, 3),
123
- "notas_detectadas": len(notes),
124
- "rango_midi_min": int(np.min([n[0] for n in notes])) if notes else None,
125
- "rango_midi_max": int(np.max([n[0] for n in notes])) if notes else None,
126
- "bpm": bpm,
127
- "division": division,
128
- }
129
- return midi_path, summary
130
-
131
-
132
- # Interfaz Gradio
133
  CSS = """
134
  #app_title {font-size: 28px; font-weight: 800}
135
  #app_subtitle {opacity: .8}
136
  """
137
 
138
- with gr.Blocks(css=CSS, fill_height=True) as demo:
139
- gr.Markdown("""
140
- <div id='app_title'>🎤 Audio🎹 MIDI (Pitch‑to‑MIDI)</div>
141
- <div id='app_subtitle'>Sube o graba tu voz, detecta notas y exporta un archivo MIDI listo para tu DAW.</div>
142
- """)
143
 
144
  with gr.Row():
145
  with gr.Column(scale=2):
146
- audio_in = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio de entrada (voz, monofónica)")
147
- with gr.Accordion("Opciones de detección", open=False):
148
- fmin = gr.Dropdown(["C1", "C2", "C3", "C4", "C5"], value="C2", label="Nota mínima")
149
- fmax = gr.Dropdown(["C4", "C5", "C6", "C7"], value="C7", label="Nota máxima")
150
- hop = gr.Slider(128, 1024, value=256, step=64, label="Hop length (muestras)")
151
- frame = gr.Slider(1024, 4096, value=2048, step=256, label="Frame length (muestras)")
152
- voice_th = gr.Slider(0.01, 0.5, value=0.1, step=0.01, label="Umbral de voicing")
153
- min_ms = gr.Slider(10, 200, value=80, step=5, label="Duración mínima de nota (ms)")
154
- gap_ms = gr.Slider(0, 200, value=60, step=5, label="Unir huecos (ms)")
155
-
156
- with gr.Accordion("Cuantización y salida", open=True):
157
  do_quant = gr.Checkbox(value=True, label="Cuantizar a rejilla")
158
- bpm = gr.Slider(40, 220, value=100, step=1, label="BPM")
159
- division = gr.Dropdown([2, 4, 8], value=4, label="División por negra")
160
- velocity = gr.Slider(1, 127, value=90, step=1, label="Velocidad (1-127)")
161
- program = gr.Slider(0, 127, value=0, step=1, label="Programa/MIDI Instrument (0=Piano)")
162
 
163
  run_btn = gr.Button("🔄 Convertir a MIDI", variant="primary")
164
 
165
  with gr.Column(scale=1):
166
  midi_out = gr.File(label="Archivo MIDI generado")
167
  summary_out = gr.JSON(label="Resumen")
168
- gr.Markdown("""
169
- **Tips**
170
- - Usa melodías monofónicas.
171
- - Ajusta rango de notas.
172
- - Si falla, prueba menos smoothing.
173
- """)
174
-
175
- def _convert(audio_path, fmin_note, fmax_note, hop_length, frame_length, voice_thres, min_ms, gap_join_ms, do_quantize, bpm_val, division_val, velocity_val, program_val):
 
176
  try:
177
- return audio_to_midi(
178
  audio=audio_path,
179
- fmin_note=fmin_note,
180
- fmax_note=fmax_note,
181
  hop_length=int(hop_length),
182
  frame_length=int(frame_length),
183
- voicing_thres=float(voice_thres),
184
- min_note_ms=int(min_ms),
185
- merge_gap_ms=int(gap_join_ms),
186
  bpm=float(bpm_val),
187
  quantize=bool(do_quantize),
188
  division=int(division_val),
189
  velocity=int(velocity_val),
190
- program=int(program_val),
 
 
191
  )
 
192
  except Exception as e:
193
- raise gr.Error(f"Error: {e}")
194
 
195
- run_btn.click(_convert, inputs=[audio_in, fmin, fmax, hop, frame, voice_th, min_ms, gap_ms, do_quant, bpm, division, velocity, program], outputs=[midi_out, summary_out])
 
 
 
 
196
 
197
  if __name__ == "__main__":
198
- demo.launch()
 
1
+ # app.py - Audio -> Multi-track MIDI (HPSS + Multi-pitch + Clustering)
2
+ # Designed for Hugging Face Spaces (Gradio).
3
+ # Author: AlexGPT (responding to your request)
4
+
5
  import os
6
  import tempfile
7
+ import traceback
8
  import numpy as np
9
  import librosa
10
  import pretty_midi
11
  import gradio as gr
12
+ from sklearn.cluster import AgglomerativeClustering
13
 
14
+ # ---------- Config ----------
15
  A440 = 440.0
16
 
17
+ # ---------- Utilities ----------
18
  def hz_to_midi(f):
19
+ """Return float MIDI number or np.nan for invalid f."""
20
+ try:
21
+ if f is None or np.isnan(f) or f <= 0:
22
+ return np.nan
23
+ return 69 + 12 * np.log2(f / A440)
24
+ except Exception:
25
  return np.nan
 
 
26
 
27
  def safe_median_filter(data, size=3):
28
+ """Median filter forcing float64 to avoid scipy errors; fallback to identity."""
29
  try:
30
  from scipy.ndimage import median_filter
31
+ arr = np.asarray(data)
32
+ if arr.dtype != np.float64:
33
+ arr = arr.astype(np.float64)
34
+ return median_filter(arr, size=size)
35
  except Exception as e:
36
+ print("median_filter fallback:", e)
37
+ return np.asarray(data, dtype=np.float64)
 
38
 
39
  def round_to_grid(seconds, bpm, division=4):
40
  if bpm <= 0:
 
44
  ticks = np.round(seconds / grid)
45
  return ticks * grid
46
 
47
+ # ---------- Signal separation & percussive detection ----------
48
+ def separate_harmonic_percussive(y):
49
+ """HPSS separation; returns (harmonic, percussive). If fails, return (y, zeros)."""
50
+ try:
51
+ y_h, y_p = librosa.effects.hpss(y)
52
+ return y_h, y_p
53
+ except Exception as e:
54
+ print("HPSS fallback:", e)
55
+ return y, np.zeros_like(y)
56
 
57
+ def detect_percussive_hits(y_p, sr, backtrack=False):
58
+ """
59
+ Detect percussive onsets and map them to simple drum MIDI notes.
60
+ Returns list of (time_seconds, midi_note).
61
+ Heuristics: use spectral centroid & onset energy to classify kick/snare/hihat.
62
+ """
63
+ try:
64
+ onset_env = librosa.onset.onset_strength(y=y_p, sr=sr)
65
+ onsets = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, backtrack=backtrack)
66
+ hits = []
67
+ if len(onsets) == 0:
68
+ return hits
69
+ S = np.abs(librosa.stft(y_p, n_fft=2048))
70
+ for fr in onsets:
71
+ t = float(librosa.frames_to_time(fr, sr=sr))
72
+ # spectral centroid around the frame (safe slicing)
73
+ start = max(0, fr - 2)
74
+ end = min(fr + 3, S.shape[1] - 1)
75
+ try:
76
+ centroid = np.mean(librosa.feature.spectral_centroid(S=S[:, start:end+1], sr=sr))
77
+ except Exception:
78
+ centroid = 0.0
79
+ # Heurística simple:
80
+ # centroid small -> kick, medium -> snare, large -> hihat
81
+ if centroid < 1500:
82
+ midi_note = 36 # Kick
83
+ elif centroid < 3500:
84
+ midi_note = 38 # Acoustic snare
85
+ else:
86
+ midi_note = 42 # Closed hi-hat
87
+ hits.append((t, midi_note))
88
+ return hits
89
+ except Exception as e:
90
+ print("Percussive detection error:", e)
91
+ return []
92
 
93
+ # ---------- Multi-pitch extraction ----------
94
+ def extract_multi_pitches(y_h, sr, hop_length=256, top_n=3, min_confidence=0.08):
95
+ """
96
+ Use piptrack to extract candidate pitches per frame.
97
+ Returns list of (time_seconds, freq_hz).
98
+ """
99
+ try:
100
+ S = np.abs(librosa.stft(y_h, n_fft=2048, hop_length=hop_length))
101
+ pitches, mags = librosa.piptrack(S=S, sr=sr, hop_length=hop_length)
102
+ times = librosa.frames_to_time(np.arange(pitches.shape[1]), sr=sr, hop_length=hop_length)
103
+ candidates = []
104
+ for i in range(pitches.shape[1]):
105
+ col_p = pitches[:, i]
106
+ col_m = mags[:, i]
107
+ if np.max(col_m) <= 0:
108
+ continue
109
+ # pick top_n bins by magnitude
110
+ idx = np.argsort(col_m)[-top_n:]
111
+ max_col = np.max(col_m)
112
+ for k in idx:
113
+ if col_m[k] > 0 and col_m[k] >= min_confidence * max_col:
114
+ candidates.append((times[i], float(col_p[k])))
115
+ # filter zeros & NaNs
116
+ candidates = [(t, p) for (t, p) in candidates if p is not None and p > 0 and not np.isnan(p)]
117
+ return candidates
118
+ except Exception as e:
119
+ print("extract_multi_pitches error:", e)
120
+ return []
121
 
122
+ # ---------- Clustering / track formation ----------
123
+ def cluster_pitch_trajectories(candidates, max_voices=4):
124
+ """
125
+ Cluster candidate (time, pitch) pairs into trajectories representing voices/instruments.
126
+ Returns list of tracks; each track is a sorted list of (time, freq_hz).
127
+ """
128
+ if not candidates:
129
+ return []
130
+ try:
131
+ X = np.array([[t, hz_to_midi(h)] for (t, h) in candidates], dtype=np.float64)
132
+ # Normalize columns
133
+ Xn = X.copy()
134
+ if Xn[:,0].ptp() > 1e-9:
135
+ Xn[:,0] = (Xn[:,0] - Xn[:,0].min()) / (Xn[:,0].ptp())
136
+ else:
137
+ Xn[:,0] = 0.0
138
+ if Xn[:,1].ptp() > 1e-9:
139
+ Xn[:,1] = (Xn[:,1] - Xn[:,1].min()) / (Xn[:,1].ptp())
140
+ else:
141
+ Xn[:,1] = 0.0
142
+ n_clusters = min(max_voices, max(1, int(np.unique(np.round(Xn, 3), axis=0).shape[0])))
143
+ if n_clusters <= 1:
144
+ labels = np.zeros(len(Xn), dtype=int)
145
+ else:
146
+ clustering = AgglomerativeClustering(n_clusters=n_clusters).fit(Xn)
147
+ labels = clustering.labels_
148
+ tracks = []
149
+ for lab in range(int(labels.max()) + 1):
150
+ idxs = np.where(labels == lab)[0]
151
+ if len(idxs) == 0:
152
+ continue
153
+ pts = [(float(X[i,0]), float(X[i,1])) for i in idxs]
154
+ # convert midi values back to hz for smoothing/processing (midi->hz)
155
+ pts_hz = [(t, A440 * (2 ** ((m - 69) / 12))) for (t, m) in pts]
156
+ pts_sorted = sorted(pts_hz, key=lambda x: x[0])
157
+ tracks.append(pts_sorted)
158
+ return tracks
159
+ except Exception as e:
160
+ print("cluster_pitch_trajectories error:", e)
161
+ return []
162
 
163
+ def trajectories_to_notes(tracks, hop_length, sr, min_note_ms=80):
164
+ """
165
+ Convert each trajectory (time,freq) to notes (midi_int, start, end).
166
+ Groups consecutive equal rounded-midis and enforces minimum duration.
167
+ """
168
+ notes = []
169
+ for tr in tracks:
170
+ if not tr:
171
  continue
172
+ times = np.array([t for t, _ in tr])
173
+ freqs = np.array([f for _, f in tr])
174
+ # Smooth frequencies
175
+ freqs_s = safe_median_filter(freqs.astype(np.float64), size=3)
176
+ midis = np.round([hz_to_midi(f) for f in freqs_s])
177
+ # Group consecutive equal midis
178
+ i = 0
179
+ n = len(midis)
180
+ frame_ms = 1000.0 * hop_length / sr
181
+ min_frames = max(1, int(np.ceil(min_note_ms / frame_ms)))
182
+ while i < n:
183
+ j = i + 1
184
+ while j < n and midis[j] == midis[i]:
185
  j += 1
186
+ if (j - i) >= min_frames and not np.isnan(midis[i]):
187
+ t0 = float(times[i])
188
+ t1 = float(times[j - 1] + hop_length / sr)
189
+ notes.append((int(midis[i]), t0, t1))
190
+ i = j
 
 
 
 
191
  return notes
192
 
193
+ # ---------- Main multi-instrument conversion ----------
194
+ def audio_to_midi_multi(
195
+ audio,
196
+ hop_length=256,
197
+ frame_length=2048,
198
+ max_voices=3,
199
+ percussive=True,
200
+ bpm=120,
201
+ quantize=True,
202
+ division=4,
203
+ velocity=100,
204
+ program_map=None,
205
+ top_n=4,
206
+ min_confidence=0.10,
207
+ min_note_ms=80,
208
+ ):
209
+ """
210
+ Full pipeline:
211
+ - load audio
212
+ - HPSS
213
+ - detect percussive hits -> drum track
214
+ - extract multi-pitch candidates from harmonic part
215
+ - cluster candidates into tracks (voices)
216
+ - convert tracks to MIDI notes and split into separate instruments by pitch ranges
217
+ """
218
  try:
219
+ # Load audio
220
  if isinstance(audio, tuple):
221
  sr, y = audio
222
  y = np.array(y, dtype=np.float32)
223
  else:
224
  y, sr = librosa.load(audio, sr=None, mono=True)
225
+ if y.size == 0:
226
+ raise ValueError("Empty audio")
227
+ # normalize
228
  if np.max(np.abs(y)) > 0:
229
  y = y / np.max(np.abs(y))
 
 
230
 
231
+ # HPSS
232
+ y_h, y_p = separate_harmonic_percussive(y)
233
+
234
+ pm = pretty_midi.PrettyMIDI()
235
+
236
+ # Percussion track
237
+ if percussive:
238
+ hits = detect_percussive_hits(y_p, sr)
239
+ if hits:
240
+ drum_inst = pretty_midi.Instrument(program=0, is_drum=True)
241
+ for t, midi_note in hits:
242
+ # tiny duration for hits
243
+ drum_inst.notes.append(pretty_midi.Note(velocity=int(velocity), pitch=int(midi_note),
244
+ start=float(t), end=float(t + 0.05)))
245
+ pm.instruments.append(drum_inst)
246
+
247
+ # Harmonic: multi-pitch extraction
248
+ candidates = extract_multi_pitches(y_h, sr, hop_length=hop_length, top_n=top_n, min_confidence=min_confidence)
249
+ tracks = cluster_pitch_trajectories(candidates, max_voices=max_voices)
250
+ notes = trajectories_to_notes(tracks, hop_length=hop_length, sr=sr, min_note_ms=min_note_ms)
251
+
252
+ # If we have notes, split by pitch quantiles into up to max_voices instrument tracks.
253
+ if notes:
254
+ midi_vals = np.array([n[0] for n in notes])
255
+ unique = np.unique(midi_vals)
256
+ groups = int(min(max_voices, max(1, len(unique))))
257
+ edges = np.quantile(midi_vals, np.linspace(0, 1, groups + 1))
258
+ for g in range(groups):
259
+ program = program_map[g] if (program_map and g < len(program_map)) else 0
260
+ inst = pretty_midi.Instrument(program=int(program))
261
+ low = edges[g]
262
+ high = edges[g + 1]
263
+ for m, t0, t1 in notes:
264
+ if m >= low - 0.0001 and m <= high + 0.0001:
265
+ inst.notes.append(pretty_midi.Note(velocity=int(velocity), pitch=int(m), start=float(t0),
266
+ end=float(t1)))
267
+ # Only append instruments that have notes
268
+ if len(inst.notes) > 0:
269
+ pm.instruments.append(inst)
270
+
271
+ # Quantize to grid if requested
272
+ if quantize and bpm > 0:
273
+ for instr in pm.instruments:
274
+ for note in instr.notes:
275
+ note.start = float(round_to_grid(note.start, bpm, division))
276
+ note.end = float(round_to_grid(note.end, bpm, division))
277
+ if note.end <= note.start:
278
+ note.end = note.start + (60.0 / bpm) / division
279
+
280
+ # Save MIDI
281
+ tmpdir = tempfile.mkdtemp()
282
+ midi_path = os.path.join(tmpdir, "multi_output.mid")
283
+ pm.write(midi_path)
284
+
285
+ summary = {
286
+ "duration_s": round(len(y) / sr, 3),
287
+ "instruments": len(pm.instruments),
288
+ "notes_total": sum(len(i.notes) for i in pm.instruments),
289
+ "bpm": bpm,
290
+ "voices_requested": max_voices,
291
+ }
292
+ return midi_path, summary
293
+
294
  except Exception as e:
295
+ traceback.print_exc()
296
+ raise
297
+
298
+ # ---------- Gradio UI ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  CSS = """
300
  #app_title {font-size: 28px; font-weight: 800}
301
  #app_subtitle {opacity: .8}
302
  """
303
 
304
+ with gr.Blocks(css=CSS, title="Audio → Multi-MIDI (AlexGPT)") as demo:
305
+ gr.Markdown("<div id='app_title'>🎤 Audio → 🎹 MIDI (Polyphonic & Multi-instrument)</div>"
306
+ "<div id='app_subtitle'>HPSS + Multi-pitch + Clustering multi-track MIDI</div>")
 
 
307
 
308
  with gr.Row():
309
  with gr.Column(scale=2):
310
+ audio_in = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio de entrada (mono/mix)")
311
+ with gr.Accordion("Extracción / Separación", open=False):
312
+ hop = gr.Slider(128, 1024, value=256, step=64, label="Hop length (samples)")
313
+ frame = gr.Slider(1024, 4096, value=2048, step=256, label="Frame length (samples)")
314
+ max_voices = gr.Slider(1, 6, value=3, step=1, label="Máx voces (clusters)")
315
+ percussive = gr.Checkbox(value=True, label="Detectar percusión (HPSS)")
316
+ topn = gr.Slider(1, 8, value=4, step=1, label="Picos por frame (top N)")
317
+ min_conf = gr.Slider(0.01, 0.5, value=0.1, step=0.01, label="Umbral relativo de confianza")
318
+ min_note_ms = gr.Slider(10, 500, value=80, step=10, label="Duración mínima nota (ms)")
319
+
320
+ with gr.Accordion("Salida MIDI", open=True):
321
  do_quant = gr.Checkbox(value=True, label="Cuantizar a rejilla")
322
+ bpm = gr.Slider(40, 220, value=120, step=1, label="BPM")
323
+ division = gr.Dropdown([1, 2, 4, 8, 16], value=4, label="División por negra (1=negra, 4=semicorchea)")
324
+ velocity = gr.Slider(1, 127, value=100, step=1, label="Velocidad (1-127)")
325
+ # program_map not editable in UI for simplicity; advanced: add dynamic inputs
326
 
327
  run_btn = gr.Button("🔄 Convertir a MIDI", variant="primary")
328
 
329
  with gr.Column(scale=1):
330
  midi_out = gr.File(label="Archivo MIDI generado")
331
  summary_out = gr.JSON(label="Resumen")
332
+ gr.Markdown(
333
+ "**Sugerencias**\n\n"
334
+ "- Este método es heurístico: los mejores resultados salen de mezclas con instrumentos claros y poca reverb.\n"
335
+ "- Para separar pistas reales (vocal, synth, bass) usa modelos de source separation (Demucs/Spleeter) antes del análisis.\n"
336
+ "- Ajusta `Máx voces` al número aproximado de instrumentos melódicos.\n"
337
+ )
338
+
339
+ def _convert(audio_path, hop_length, frame_length, max_voices_val, percussive_val, topn_val,
340
+ do_quantize, bpm_val, division_val, velocity_val, min_conf_val, min_note_ms_val):
341
  try:
342
+ midi_path, summary = audio_to_midi_multi(
343
  audio=audio_path,
 
 
344
  hop_length=int(hop_length),
345
  frame_length=int(frame_length),
346
+ max_voices=int(max_voices_val),
347
+ percussive=bool(percussive_val),
 
348
  bpm=float(bpm_val),
349
  quantize=bool(do_quantize),
350
  division=int(division_val),
351
  velocity=int(velocity_val),
352
+ top_n=int(topn_val),
353
+ min_confidence=float(min_conf_val),
354
+ min_note_ms=int(min_note_ms_val),
355
  )
356
+ return midi_path, summary
357
  except Exception as e:
358
+ return gr.update(value=None), {"error": str(e)}
359
 
360
+ run_btn.click(
361
+ _convert,
362
+ inputs=[audio_in, hop, frame, max_voices, percussive, topn, do_quant, bpm, division, velocity, min_conf, min_note_ms],
363
+ outputs=[midi_out, summary_out],
364
+ )
365
 
366
  if __name__ == "__main__":
367
+ demo.launch()