mrblackdev commited on
Commit
7d16dd5
·
verified ·
1 Parent(s): 88ac55f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -97
app.py CHANGED
@@ -5,9 +5,6 @@ import librosa
5
  import pretty_midi
6
  import gradio as gr
7
 
8
- # =====================
9
- # Utilidades
10
- # =====================
11
  A440 = 440.0
12
 
13
 
@@ -17,12 +14,7 @@ def hz_to_midi(f):
17
  return 69 + 12 * np.log2(f / A440)
18
 
19
 
20
- def midi_to_hz(m):
21
- return A440 * (2 ** ((m - 69) / 12))
22
-
23
-
24
  def round_to_grid(seconds, bpm, division=4):
25
- """Cuantiza tiempo en segundos a la rejilla (division por negra, p.ej. 4=semicorchea)."""
26
  if bpm <= 0:
27
  return seconds
28
  beat = 60.0 / bpm
@@ -31,34 +23,18 @@ def round_to_grid(seconds, bpm, division=4):
31
  return ticks * grid
32
 
33
 
34
- def group_notes(f0, sr, hop_length,
35
- min_note_ms=80,
36
- merge_gap_ms=60,
37
- midi_smoothing_window=3):
38
- """
39
- Agrupa frames con el mismo número MIDI (tras redondeo) en notas con inicio/fin.
40
- - f0: vector de frecuencias (Hz, NaN para no sonoro)
41
- - Devuelve lista de (midi_note, t_start, t_end)
42
- """
43
  times = np.arange(len(f0)) * hop_length / sr
44
-
45
- # Convertir a MIDI y enmascarar no sonoros
46
  midi_vals = np.array([hz_to_midi(x) for x in f0])
47
 
48
- # Suavizado mediano para reducir saltos espurios
49
  if midi_smoothing_window and midi_smoothing_window > 1:
50
  from scipy.ndimage import median_filter
51
  midi_vals = median_filter(midi_vals, size=midi_smoothing_window)
52
 
53
- # Redondeo al entero más cercano (clase de nota)
54
  midi_round = np.round(midi_vals)
55
-
56
- # No sonoros -> NaN
57
  midi_round[np.isnan(midi_vals)] = np.nan
58
 
59
- notes = []
60
- i = 0
61
- n = len(midi_round)
62
  frame_ms = 1000.0 * hop_length / sr
63
  min_frames = max(1, int(np.ceil(min_note_ms / frame_ms)))
64
  merge_gap_frames = int(np.ceil(merge_gap_ms / frame_ms))
@@ -67,11 +43,7 @@ def group_notes(f0, sr, hop_length,
67
  if np.isnan(midi_round[i]):
68
  i += 1
69
  continue
70
- note_val = int(midi_round[i])
71
- start = i
72
- j = i + 1
73
- # extender mientras siga misma nota (permitimos pequeños NaN huecos cortos)
74
- gap = 0
75
  while j < n:
76
  if np.isnan(midi_round[j]):
77
  gap += 1
@@ -83,115 +55,66 @@ def group_notes(f0, sr, hop_length,
83
  if int(midi_round[j]) != note_val:
84
  break
85
  j += 1
86
- # Validar duración mínima
87
  if (j - start) >= min_frames:
88
- t0 = times[start]
89
- t1 = times[j - 1] + hop_length / sr
90
  notes.append((note_val, t0, t1))
91
  i = j + 1
92
  return notes
93
 
94
 
95
- def audio_to_midi(
96
- audio,
97
- fmin_note='C2',
98
- fmax_note='C7',
99
- hop_length=256,
100
- frame_length=2048,
101
- voicing_thres=0.1,
102
- min_note_ms=80,
103
- merge_gap_ms=60,
104
- bpm=100,
105
- quantize=True,
106
- division=4,
107
- velocity=80,
108
- program=0,
109
- ):
110
- """
111
- Convierte audio (ruta o ndarray) a un archivo MIDI temporal y retorna ruta + resumen.
112
- """
113
- # Cargar audio
114
  if isinstance(audio, tuple):
115
- # gradio mic: (sr, data)
116
  sr, y = audio
117
  y = np.array(y, dtype=np.float32)
118
  else:
119
- # gradio file: filepath str
120
  y, sr = librosa.load(audio, sr=None, mono=True)
121
-
122
- # Normalizar
123
  if np.max(np.abs(y)) > 0:
124
  y = y / np.max(np.abs(y))
125
 
126
- # Pyin para f0
127
  fmin_hz = librosa.note_to_hz(fmin_note)
128
  fmax_hz = librosa.note_to_hz(fmax_note)
129
 
130
  f0, voiced_flag, _ = librosa.pyin(
131
- y,
132
- fmin=fmin_hz,
133
- fmax=fmax_hz,
134
- frame_length=frame_length,
135
- hop_length=hop_length,
136
- center=True,
137
- sr=sr,
138
- trough_threshold=voicing_thres,
139
- )
140
 
141
- # Filtrar frames no sonoros
142
  f0[~voiced_flag] = np.nan
143
 
144
- # Agrupar en notas (midi, t0, t1)
145
- notes = group_notes(
146
- f0=f0,
147
- sr=sr,
148
- hop_length=hop_length,
149
- min_note_ms=min_note_ms,
150
- merge_gap_ms=merge_gap_ms,
151
- midi_smoothing_window=3,
152
- )
153
 
154
- # Opcional: cuantización temporal
155
  if quantize and bpm > 0:
156
  q_notes = []
157
  for m, t0, t1 in notes:
158
- qt0 = float(round_to_grid(t0, bpm, division))
159
- qt1 = float(round_to_grid(t1, bpm, division))
160
  if qt1 <= qt0:
161
- qt1 = qt0 + (60.0 / bpm) / division # mínimo 1 grid
162
  q_notes.append((m, qt0, qt1))
163
  notes = q_notes
164
 
165
- # Construir MIDI
166
  pm = pretty_midi.PrettyMIDI()
167
- instrument = pretty_midi.Instrument(program=program) # 0 = Acoustic Grand Piano
168
  for m, t0, t1 in notes:
169
  v = int(np.clip(velocity, 1, 127))
170
  instrument.notes.append(pretty_midi.Note(velocity=v, pitch=int(m), start=float(t0), end=float(t1)))
171
  pm.instruments.append(instrument)
172
 
173
- # Guardar a archivo temporal
174
  tmpdir = tempfile.mkdtemp()
175
  midi_path = os.path.join(tmpdir, "output.mid")
176
  pm.write(midi_path)
177
 
178
- # Métricas
179
- dur = len(y) / sr
180
  summary = {
181
- "duracion_audio_s": round(dur, 3),
182
  "notas_detectadas": len(notes),
183
  "rango_midi_min": int(np.min([n[0] for n in notes])) if notes else None,
184
  "rango_midi_max": int(np.max([n[0] for n in notes])) if notes else None,
185
  "bpm": bpm,
186
  "division": division,
187
  }
188
-
189
  return midi_path, summary
190
 
191
 
192
- # =====================
193
- # Interfaz Gradio
194
- # =====================
195
  CSS = """
196
  #app_title {font-size: 28px; font-weight: 800}
197
  #app_subtitle {opacity: .8}
@@ -207,8 +130,8 @@ with gr.Blocks(css=CSS, fill_height=True) as demo:
207
  with gr.Column(scale=2):
208
  audio_in = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio de entrada (voz, monofónica)")
209
  with gr.Accordion("Opciones de detección", open=False):
210
- fmin = gr.Dropdown(["C1","C2","C3","C4","C5"], value="C2", label="Nota mínima")
211
- fmax = gr.Dropdown(["C4","C5","C6","C7"], value="C7", label="Nota máxima")
212
  hop = gr.Slider(128, 1024, value=256, step=64, label="Hop length (muestras)")
213
  frame = gr.Slider(1024, 4096, value=2048, step=256, label="Frame length (muestras)")
214
  voice_th = gr.Slider(0.01, 0.5, value=0.1, step=0.01, label="Umbral de voicing (pyin)")
@@ -218,7 +141,7 @@ with gr.Blocks(css=CSS, fill_height=True) as demo:
218
  with gr.Accordion("Cuantización y salida", open=True):
219
  do_quant = gr.Checkbox(value=True, label="Cuantizar a rejilla")
220
  bpm = gr.Slider(40, 220, value=100, step=1, label="BPM")
221
- division = gr.Dropdown([(2, "Corchea"), (4, "Semicorchea"), (8, "Fusa")], value=4, label="División por negra", info="Más alto = rejilla más fina")
222
  velocity = gr.Slider(1, 127, value=90, step=1, label="Velocidad (1-127)")
223
  program = gr.Slider(0, 127, value=0, step=1, label="Programa/MIDI Instrument (0=Piano)")
224
 
@@ -235,9 +158,6 @@ with gr.Blocks(css=CSS, fill_height=True) as demo:
235
  """)
236
 
237
  def _convert(audio_path, fmin_note, fmax_note, hop_length, frame_length, voice_thres, min_ms, gap_join_ms, do_quantize, bpm_val, division_val, velocity_val, program_val):
238
- # division puede venir como tuple(label) o int (según Gradio). Normalizamos.
239
- if isinstance(division_val, tuple):
240
- division_val = division_val[0]
241
  midi_path, summary = audio_to_midi(
242
  audio=audio_path,
243
  fmin_note=fmin_note,
 
5
  import pretty_midi
6
  import gradio as gr
7
 
 
 
 
8
  A440 = 440.0
9
 
10
 
 
14
  return 69 + 12 * np.log2(f / A440)
15
 
16
 
 
 
 
 
17
  def round_to_grid(seconds, bpm, division=4):
 
18
  if bpm <= 0:
19
  return seconds
20
  beat = 60.0 / bpm
 
23
  return ticks * grid
24
 
25
 
26
+ def group_notes(f0, sr, hop_length, min_note_ms=80, merge_gap_ms=60, midi_smoothing_window=3):
 
 
 
 
 
 
 
 
27
  times = np.arange(len(f0)) * hop_length / sr
 
 
28
  midi_vals = np.array([hz_to_midi(x) for x in f0])
29
 
 
30
  if midi_smoothing_window and midi_smoothing_window > 1:
31
  from scipy.ndimage import median_filter
32
  midi_vals = median_filter(midi_vals, size=midi_smoothing_window)
33
 
 
34
  midi_round = np.round(midi_vals)
 
 
35
  midi_round[np.isnan(midi_vals)] = np.nan
36
 
37
+ notes, i, n = [], 0, len(midi_round)
 
 
38
  frame_ms = 1000.0 * hop_length / sr
39
  min_frames = max(1, int(np.ceil(min_note_ms / frame_ms)))
40
  merge_gap_frames = int(np.ceil(merge_gap_ms / frame_ms))
 
43
  if np.isnan(midi_round[i]):
44
  i += 1
45
  continue
46
+ note_val, start, j, gap = int(midi_round[i]), i, i + 1, 0
 
 
 
 
47
  while j < n:
48
  if np.isnan(midi_round[j]):
49
  gap += 1
 
55
  if int(midi_round[j]) != note_val:
56
  break
57
  j += 1
 
58
  if (j - start) >= min_frames:
59
+ t0, t1 = times[start], times[j - 1] + hop_length / sr
 
60
  notes.append((note_val, t0, t1))
61
  i = j + 1
62
  return notes
63
 
64
 
65
+ def audio_to_midi(audio, fmin_note='C2', fmax_note='C7', hop_length=256, frame_length=2048,
66
+ voicing_thres=0.1, min_note_ms=80, merge_gap_ms=60, bpm=100,
67
+ quantize=True, division=4, velocity=80, program=0):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  if isinstance(audio, tuple):
 
69
  sr, y = audio
70
  y = np.array(y, dtype=np.float32)
71
  else:
 
72
  y, sr = librosa.load(audio, sr=None, mono=True)
 
 
73
  if np.max(np.abs(y)) > 0:
74
  y = y / np.max(np.abs(y))
75
 
 
76
  fmin_hz = librosa.note_to_hz(fmin_note)
77
  fmax_hz = librosa.note_to_hz(fmax_note)
78
 
79
  f0, voiced_flag, _ = librosa.pyin(
80
+ y, fmin=fmin_hz, fmax=fmax_hz, frame_length=frame_length,
81
+ hop_length=hop_length, center=True, sr=sr, trough_threshold=voicing_thres)
 
 
 
 
 
 
 
82
 
 
83
  f0[~voiced_flag] = np.nan
84
 
85
+ notes = group_notes(f0, sr, hop_length, min_note_ms, merge_gap_ms, 3)
 
 
 
 
 
 
 
 
86
 
 
87
  if quantize and bpm > 0:
88
  q_notes = []
89
  for m, t0, t1 in notes:
90
+ qt0, qt1 = round_to_grid(t0, bpm, division), round_to_grid(t1, bpm, division)
 
91
  if qt1 <= qt0:
92
+ qt1 = qt0 + (60.0 / bpm) / division
93
  q_notes.append((m, qt0, qt1))
94
  notes = q_notes
95
 
 
96
  pm = pretty_midi.PrettyMIDI()
97
+ instrument = pretty_midi.Instrument(program=program)
98
  for m, t0, t1 in notes:
99
  v = int(np.clip(velocity, 1, 127))
100
  instrument.notes.append(pretty_midi.Note(velocity=v, pitch=int(m), start=float(t0), end=float(t1)))
101
  pm.instruments.append(instrument)
102
 
 
103
  tmpdir = tempfile.mkdtemp()
104
  midi_path = os.path.join(tmpdir, "output.mid")
105
  pm.write(midi_path)
106
 
 
 
107
  summary = {
108
+ "duracion_audio_s": round(len(y) / sr, 3),
109
  "notas_detectadas": len(notes),
110
  "rango_midi_min": int(np.min([n[0] for n in notes])) if notes else None,
111
  "rango_midi_max": int(np.max([n[0] for n in notes])) if notes else None,
112
  "bpm": bpm,
113
  "division": division,
114
  }
 
115
  return midi_path, summary
116
 
117
 
 
 
 
118
  CSS = """
119
  #app_title {font-size: 28px; font-weight: 800}
120
  #app_subtitle {opacity: .8}
 
130
  with gr.Column(scale=2):
131
  audio_in = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio de entrada (voz, monofónica)")
132
  with gr.Accordion("Opciones de detección", open=False):
133
+ fmin = gr.Dropdown(["C1", "C2", "C3", "C4", "C5"], value="C2", label="Nota mínima")
134
+ fmax = gr.Dropdown(["C4", "C5", "C6", "C7"], value="C7", label="Nota máxima")
135
  hop = gr.Slider(128, 1024, value=256, step=64, label="Hop length (muestras)")
136
  frame = gr.Slider(1024, 4096, value=2048, step=256, label="Frame length (muestras)")
137
  voice_th = gr.Slider(0.01, 0.5, value=0.1, step=0.01, label="Umbral de voicing (pyin)")
 
141
  with gr.Accordion("Cuantización y salida", open=True):
142
  do_quant = gr.Checkbox(value=True, label="Cuantizar a rejilla")
143
  bpm = gr.Slider(40, 220, value=100, step=1, label="BPM")
144
+ division = gr.Dropdown([2, 4, 8], value=4, label="División por negra", info="2=Corchea, 4=Semicorchea, 8=Fusa")
145
  velocity = gr.Slider(1, 127, value=90, step=1, label="Velocidad (1-127)")
146
  program = gr.Slider(0, 127, value=0, step=1, label="Programa/MIDI Instrument (0=Piano)")
147
 
 
158
  """)
159
 
160
  def _convert(audio_path, fmin_note, fmax_note, hop_length, frame_length, voice_thres, min_ms, gap_join_ms, do_quantize, bpm_val, division_val, velocity_val, program_val):
 
 
 
161
  midi_path, summary = audio_to_midi(
162
  audio=audio_path,
163
  fmin_note=fmin_note,