Ewan Claude Opus 4.6 commited on
Commit
aa08171
Β·
1 Parent(s): ecae4b3

Use solo piano optimizer for full-song melodic stem, reduce LH bass notes

Browse files

Switch full-song pipeline from optimize_other to the full solo piano optimizer
for the melodic stem β€” produces much better rhythm, playability, and note
accuracy. Also reduces left-hand concurrent notes limit (2 normal, 3 complex)
to avoid muddy bass chords while keeping right hand at 4-5.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

api/server.py CHANGED
@@ -157,35 +157,44 @@ def run_full_transcription(job_id, audio_path, job_dir):
157
  run_transcribe(stems["bass"], str(bass_raw))
158
 
159
  # Step 3: Optimize transcriptions
 
 
 
160
  job_status[job_id] = {"step": 3, "label": "Optimizing note accuracy...", "done": False}
161
- from optimize_other import optimize_other
162
  from optimize_bass import optimize_bass
163
 
164
- piano_opt = job_dir / "piano_optimized.mid"
165
- optimize_other(stems["other"], str(piano_raw), str(piano_opt),
166
- mix_audio_path=str(audio_path))
 
 
 
 
 
 
 
 
 
 
167
 
168
  bass_opt = job_dir / "bass_optimized.mid"
169
  optimize_bass(stems["bass"], str(bass_raw), str(bass_opt))
170
 
171
- # Step 4: Chord detection on "other" stem
172
- job_status[job_id] = {"step": 4, "label": "Detecting chords...", "done": False}
173
- from chords import detect_chords
174
- chords_path = job_dir / "transcription_chords.json"
175
- detect_chords(str(piano_opt), str(chords_path))
176
  chord_data = None
177
  if chords_path.exists():
178
  with open(chords_path) as f:
179
  chord_data = json.load(f)
180
 
181
- # Step 5: Transcribe drums
182
- job_status[job_id] = {"step": 5, "label": "Transcribing drums...", "done": False}
183
  from drums import transcribe_drums
184
  drum_tab_path = job_dir / "drum_tab.json"
185
  transcribe_drums(stems["drums"], str(drum_tab_path))
186
 
187
- # Step 6: Generate guitar and bass tabs
188
- job_status[job_id] = {"step": 6, "label": "Generating tabs...", "done": False}
189
  from tabs import midi_to_guitar_tab, midi_to_bass_tab
190
 
191
  guitar_tab = midi_to_guitar_tab(str(piano_opt), str(chords_path))
@@ -198,8 +207,8 @@ def run_full_transcription(job_id, audio_path, job_dir):
198
  with open(bass_tab_path, 'w') as f:
199
  json.dump(bass_tab, f)
200
 
201
- # Step 7: Merge melodic + bass into final MIDI
202
- job_status[job_id] = {"step": 7, "label": "Assembling final result...", "done": False}
203
  merged_path = job_dir / "transcription.mid"
204
  merge_stems(str(piano_opt), str(bass_opt), str(merged_path))
205
 
@@ -211,7 +220,7 @@ def run_full_transcription(job_id, audio_path, job_dir):
211
  f.unlink(missing_ok=True)
212
 
213
  job_status[job_id] = {
214
- "step": 8, "label": "Done!", "done": True,
215
  "result": {
216
  "job_id": job_id,
217
  "midi_url": f"/api/jobs/{job_id}/midi",
 
157
  run_transcribe(stems["bass"], str(bass_raw))
158
 
159
  # Step 3: Optimize transcriptions
160
+ # Use the full solo piano optimizer for the melodic stem β€” it produces
161
+ # much better rhythm, playability, and note accuracy. Also runs chord
162
+ # detection and spectral analysis internally.
163
  job_status[job_id] = {"step": 3, "label": "Optimizing note accuracy...", "done": False}
164
+ from optimize import optimize as optimize_piano
165
  from optimize_bass import optimize_bass
166
 
167
+ piano_opt = job_dir / "transcription.tmp.mid"
168
+ optimize_piano(stems["other"], str(piano_raw), str(piano_opt))
169
+
170
+ # Solo optimizer writes chords to {stem}_chords.json next to the output
171
+ auto_chords = job_dir / "transcription.tmp_chords.json"
172
+ chords_path = job_dir / "transcription_chords.json"
173
+ if auto_chords.exists():
174
+ auto_chords.rename(chords_path)
175
+
176
+ # Rename to final path
177
+ piano_final = job_dir / "piano_optimized.mid"
178
+ piano_opt.rename(piano_final)
179
+ piano_opt = piano_final
180
 
181
  bass_opt = job_dir / "bass_optimized.mid"
182
  optimize_bass(stems["bass"], str(bass_raw), str(bass_opt))
183
 
184
+ # Load chord data
 
 
 
 
185
  chord_data = None
186
  if chords_path.exists():
187
  with open(chords_path) as f:
188
  chord_data = json.load(f)
189
 
190
+ # Step 4: Transcribe drums
191
+ job_status[job_id] = {"step": 4, "label": "Transcribing drums...", "done": False}
192
  from drums import transcribe_drums
193
  drum_tab_path = job_dir / "drum_tab.json"
194
  transcribe_drums(stems["drums"], str(drum_tab_path))
195
 
196
+ # Step 5: Generate guitar and bass tabs
197
+ job_status[job_id] = {"step": 5, "label": "Generating tabs...", "done": False}
198
  from tabs import midi_to_guitar_tab, midi_to_bass_tab
199
 
200
  guitar_tab = midi_to_guitar_tab(str(piano_opt), str(chords_path))
 
207
  with open(bass_tab_path, 'w') as f:
208
  json.dump(bass_tab, f)
209
 
210
+ # Step 6: Merge melodic + bass into final MIDI
211
+ job_status[job_id] = {"step": 6, "label": "Assembling final result...", "done": False}
212
  merged_path = job_dir / "transcription.mid"
213
  merge_stems(str(piano_opt), str(bass_opt), str(merged_path))
214
 
 
220
  f.unlink(missing_ok=True)
221
 
222
  job_status[job_id] = {
223
+ "step": 7, "label": "Done!", "done": True,
224
  "result": {
225
  "job_id": job_id,
226
  "midi_url": f"/api/jobs/{job_id}/midi",
transcriber/optimize.py CHANGED
@@ -612,13 +612,19 @@ def apply_pitch_ceiling(midi_data, max_pitch=96):
612
  return midi_out, removed
613
 
614
 
615
- def limit_concurrent_notes(midi_data, max_per_hand=4, hand_split=60):
616
  """Limit notes per chord to max_per_hand per hand.
617
 
618
  Groups notes by onset time (within 30ms) and splits into left/right hand.
619
  Removes excess notes β€” protects melody (highest RH pitch) and bass
620
  (lowest LH pitch), then removes lowest velocity.
 
 
 
 
621
  """
 
 
622
  midi_out = copy.deepcopy(midi_data)
623
  removed = 0
624
 
@@ -643,7 +649,8 @@ def limit_concurrent_notes(midi_data, max_per_hand=4, hand_split=60):
643
  right = [idx for idx in chord_indices if notes[idx].pitch >= hand_split]
644
 
645
  for is_right, hand_indices in [(True, right), (False, left)]:
646
- if len(hand_indices) <= max_per_hand:
 
647
  continue
648
 
649
  # Protect melody (highest RH) or bass (lowest LH)
@@ -656,7 +663,7 @@ def limit_concurrent_notes(midi_data, max_per_hand=4, hand_split=60):
656
  scored = [(notes[idx].velocity, idx) for idx in trimmable]
657
  scored.sort()
658
 
659
- excess = len(hand_indices) - max_per_hand
660
  for _, idx in scored[:excess]:
661
  to_remove.add(idx)
662
 
@@ -666,14 +673,20 @@ def limit_concurrent_notes(midi_data, max_per_hand=4, hand_split=60):
666
  return midi_out, removed
667
 
668
 
669
- def limit_total_concurrent(midi_data, max_per_hand=4, hand_split=60):
670
  """Limit concurrent sounding notes to max_per_hand per hand.
671
 
672
  Splits notes into left hand (< hand_split) and right hand (>= hand_split).
673
- At each note onset, count concurrent notes in that hand. If > max_per_hand,
674
  trim sustained notes β€” but protect the melody (highest RH pitch) and bass
675
  (lowest LH pitch). Among the rest, trim lowest velocity first.
 
 
 
 
676
  """
 
 
677
  midi_out = copy.deepcopy(midi_data)
678
  trimmed = 0
679
 
@@ -684,6 +697,7 @@ def limit_total_concurrent(midi_data, max_per_hand=4, hand_split=60):
684
 
685
  for i, note in enumerate(notes):
686
  is_right = note.pitch >= hand_split
 
687
 
688
  # Find all notes in the same hand currently sounding
689
  sounding = []
@@ -693,8 +707,8 @@ def limit_total_concurrent(midi_data, max_per_hand=4, hand_split=60):
693
  if same_hand:
694
  sounding.append(j)
695
 
696
- if len(sounding) + 1 > max_per_hand:
697
- excess = len(sounding) + 1 - max_per_hand
698
  # All indices including the current note
699
  all_indices = sounding + [i]
700
 
@@ -1458,14 +1472,20 @@ def optimize(original_audio_path, midi_path, output_path=None):
1458
 
1459
  # Step 8f: Playability filter β€” limit per-onset chord size
1460
  # Complex pieces get 5 notes/hand to preserve dense voicings
1461
- max_hand = 5 if complexity == 'complex' else 4
1462
- print(f"\nStep 8f: Playability filter (max {max_hand} notes per hand per chord)...")
1463
- midi_data, playability_removed = limit_concurrent_notes(midi_data, max_per_hand=max_hand)
 
 
 
 
1464
  print(f" Removed {playability_removed} excess chord notes")
1465
 
1466
  # Step 8g: Limit total concurrent sounding notes
1467
- print(f"\nStep 8g: Concurrent sounding limit (max {max_hand} per hand)...")
1468
- midi_data, sustain_trimmed = limit_total_concurrent(midi_data, max_per_hand=max_hand)
 
 
1469
  print(f" Trimmed {sustain_trimmed} sustained notes to reduce pileup")
1470
 
1471
  # Final metrics
 
612
  return midi_out, removed
613
 
614
 
615
+ def limit_concurrent_notes(midi_data, max_per_hand=4, hand_split=60, max_left_hand=None):
616
  """Limit notes per chord to max_per_hand per hand.
617
 
618
  Groups notes by onset time (within 30ms) and splits into left/right hand.
619
  Removes excess notes β€” protects melody (highest RH pitch) and bass
620
  (lowest LH pitch), then removes lowest velocity.
621
+
622
+ Args:
623
+ max_per_hand: Max notes for right hand (default 4)
624
+ max_left_hand: Max notes for left hand (defaults to max_per_hand)
625
  """
626
+ if max_left_hand is None:
627
+ max_left_hand = max_per_hand
628
  midi_out = copy.deepcopy(midi_data)
629
  removed = 0
630
 
 
649
  right = [idx for idx in chord_indices if notes[idx].pitch >= hand_split]
650
 
651
  for is_right, hand_indices in [(True, right), (False, left)]:
652
+ limit = max_per_hand if is_right else max_left_hand
653
+ if len(hand_indices) <= limit:
654
  continue
655
 
656
  # Protect melody (highest RH) or bass (lowest LH)
 
663
  scored = [(notes[idx].velocity, idx) for idx in trimmable]
664
  scored.sort()
665
 
666
+ excess = len(hand_indices) - limit
667
  for _, idx in scored[:excess]:
668
  to_remove.add(idx)
669
 
 
673
  return midi_out, removed
674
 
675
 
676
+ def limit_total_concurrent(midi_data, max_per_hand=4, hand_split=60, max_left_hand=None):
677
  """Limit concurrent sounding notes to max_per_hand per hand.
678
 
679
  Splits notes into left hand (< hand_split) and right hand (>= hand_split).
680
+ At each note onset, count concurrent notes in that hand. If > limit,
681
  trim sustained notes β€” but protect the melody (highest RH pitch) and bass
682
  (lowest LH pitch). Among the rest, trim lowest velocity first.
683
+
684
+ Args:
685
+ max_per_hand: Max concurrent notes for right hand (default 4)
686
+ max_left_hand: Max concurrent notes for left hand (defaults to max_per_hand)
687
  """
688
+ if max_left_hand is None:
689
+ max_left_hand = max_per_hand
690
  midi_out = copy.deepcopy(midi_data)
691
  trimmed = 0
692
 
 
697
 
698
  for i, note in enumerate(notes):
699
  is_right = note.pitch >= hand_split
700
+ limit = max_per_hand if is_right else max_left_hand
701
 
702
  # Find all notes in the same hand currently sounding
703
  sounding = []
 
707
  if same_hand:
708
  sounding.append(j)
709
 
710
+ if len(sounding) + 1 > limit:
711
+ excess = len(sounding) + 1 - limit
712
  # All indices including the current note
713
  all_indices = sounding + [i]
714
 
 
1472
 
1473
  # Step 8f: Playability filter β€” limit per-onset chord size
1474
  # Complex pieces get 5 notes/hand to preserve dense voicings
1475
+ # Left hand (bass) gets a tighter limit to avoid muddy chords
1476
+ max_rh = 5 if complexity == 'complex' else 4
1477
+ max_lh = 3 if complexity == 'complex' else 2
1478
+ print(f"\nStep 8f: Playability filter (RH max {max_rh}, LH max {max_lh} per chord)...")
1479
+ midi_data, playability_removed = limit_concurrent_notes(
1480
+ midi_data, max_per_hand=max_rh, max_left_hand=max_lh
1481
+ )
1482
  print(f" Removed {playability_removed} excess chord notes")
1483
 
1484
  # Step 8g: Limit total concurrent sounding notes
1485
+ print(f"\nStep 8g: Concurrent sounding limit (RH max {max_rh}, LH max {max_lh})...")
1486
+ midi_data, sustain_trimmed = limit_total_concurrent(
1487
+ midi_data, max_per_hand=max_rh, max_left_hand=max_lh
1488
+ )
1489
  print(f" Trimmed {sustain_trimmed} sustained notes to reduce pileup")
1490
 
1491
  # Final metrics
transcriber/optimize_other.py CHANGED
@@ -158,14 +158,19 @@ def optimize_other(original_audio_path, midi_path, output_path=None, mix_audio_p
158
  print(f" Trimmed {notes_trimmed}, enforced min duration on {durations_enforced}")
159
 
160
  # Step 8b: Playability filter β€” limit per-onset chord size
161
- max_hand = 5 if complexity == 'complex' else 4
162
- print(f"\nStep 8b: Playability filter (max {max_hand} notes per hand per chord)...")
163
- midi_data, playability_removed = limit_concurrent_notes(midi_data, max_per_hand=max_hand)
 
 
 
164
  print(f" Removed {playability_removed} excess chord notes")
165
 
166
  # Step 8c: Limit total concurrent sounding notes
167
- print(f"\nStep 8c: Concurrent sounding limit (max {max_hand} per hand)...")
168
- midi_data, sustain_trimmed = limit_total_concurrent(midi_data, max_per_hand=max_hand)
 
 
169
  print(f" Trimmed {sustain_trimmed} sustained notes to reduce pileup")
170
 
171
  # Shift to t=0 if there was leading silence
 
158
  print(f" Trimmed {notes_trimmed}, enforced min duration on {durations_enforced}")
159
 
160
  # Step 8b: Playability filter β€” limit per-onset chord size
161
+ max_rh = 5 if complexity == 'complex' else 4
162
+ max_lh = 3 if complexity == 'complex' else 2
163
+ print(f"\nStep 8b: Playability filter (RH max {max_rh}, LH max {max_lh} per chord)...")
164
+ midi_data, playability_removed = limit_concurrent_notes(
165
+ midi_data, max_per_hand=max_rh, max_left_hand=max_lh
166
+ )
167
  print(f" Removed {playability_removed} excess chord notes")
168
 
169
  # Step 8c: Limit total concurrent sounding notes
170
+ print(f"\nStep 8c: Concurrent sounding limit (RH max {max_rh}, LH max {max_lh})...")
171
+ midi_data, sustain_trimmed = limit_total_concurrent(
172
+ midi_data, max_per_hand=max_rh, max_left_hand=max_lh
173
+ )
174
  print(f" Trimmed {sustain_trimmed} sustained notes to reduce pileup")
175
 
176
  # Shift to t=0 if there was leading silence