Spaces:
Sleeping
Sleeping
Ewan Claude Opus 4.6 commited on
Commit Β·
aa08171
1
Parent(s): ecae4b3
Use solo piano optimizer for full-song melodic stem, reduce LH bass notes
Browse filesSwitch full-song pipeline from optimize_other to the full solo piano optimizer
for the melodic stem β produces much better rhythm, playability, and note
accuracy. Also reduces left-hand concurrent notes limit (2 normal, 3 complex)
to avoid muddy bass chords while keeping right hand at 4-5.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- api/server.py +25 -16
- transcriber/optimize.py +32 -12
- transcriber/optimize_other.py +10 -5
api/server.py
CHANGED
|
@@ -157,35 +157,44 @@ def run_full_transcription(job_id, audio_path, job_dir):
|
|
| 157 |
run_transcribe(stems["bass"], str(bass_raw))
|
| 158 |
|
| 159 |
# Step 3: Optimize transcriptions
|
|
|
|
|
|
|
|
|
|
| 160 |
job_status[job_id] = {"step": 3, "label": "Optimizing note accuracy...", "done": False}
|
| 161 |
-
from
|
| 162 |
from optimize_bass import optimize_bass
|
| 163 |
|
| 164 |
-
piano_opt = job_dir / "
|
| 165 |
-
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
bass_opt = job_dir / "bass_optimized.mid"
|
| 169 |
optimize_bass(stems["bass"], str(bass_raw), str(bass_opt))
|
| 170 |
|
| 171 |
-
#
|
| 172 |
-
job_status[job_id] = {"step": 4, "label": "Detecting chords...", "done": False}
|
| 173 |
-
from chords import detect_chords
|
| 174 |
-
chords_path = job_dir / "transcription_chords.json"
|
| 175 |
-
detect_chords(str(piano_opt), str(chords_path))
|
| 176 |
chord_data = None
|
| 177 |
if chords_path.exists():
|
| 178 |
with open(chords_path) as f:
|
| 179 |
chord_data = json.load(f)
|
| 180 |
|
| 181 |
-
# Step
|
| 182 |
-
job_status[job_id] = {"step":
|
| 183 |
from drums import transcribe_drums
|
| 184 |
drum_tab_path = job_dir / "drum_tab.json"
|
| 185 |
transcribe_drums(stems["drums"], str(drum_tab_path))
|
| 186 |
|
| 187 |
-
# Step
|
| 188 |
-
job_status[job_id] = {"step":
|
| 189 |
from tabs import midi_to_guitar_tab, midi_to_bass_tab
|
| 190 |
|
| 191 |
guitar_tab = midi_to_guitar_tab(str(piano_opt), str(chords_path))
|
|
@@ -198,8 +207,8 @@ def run_full_transcription(job_id, audio_path, job_dir):
|
|
| 198 |
with open(bass_tab_path, 'w') as f:
|
| 199 |
json.dump(bass_tab, f)
|
| 200 |
|
| 201 |
-
# Step
|
| 202 |
-
job_status[job_id] = {"step":
|
| 203 |
merged_path = job_dir / "transcription.mid"
|
| 204 |
merge_stems(str(piano_opt), str(bass_opt), str(merged_path))
|
| 205 |
|
|
@@ -211,7 +220,7 @@ def run_full_transcription(job_id, audio_path, job_dir):
|
|
| 211 |
f.unlink(missing_ok=True)
|
| 212 |
|
| 213 |
job_status[job_id] = {
|
| 214 |
-
"step":
|
| 215 |
"result": {
|
| 216 |
"job_id": job_id,
|
| 217 |
"midi_url": f"/api/jobs/{job_id}/midi",
|
|
|
|
| 157 |
run_transcribe(stems["bass"], str(bass_raw))
|
| 158 |
|
| 159 |
# Step 3: Optimize transcriptions
|
| 160 |
+
# Use the full solo piano optimizer for the melodic stem β it produces
|
| 161 |
+
# much better rhythm, playability, and note accuracy. Also runs chord
|
| 162 |
+
# detection and spectral analysis internally.
|
| 163 |
job_status[job_id] = {"step": 3, "label": "Optimizing note accuracy...", "done": False}
|
| 164 |
+
from optimize import optimize as optimize_piano
|
| 165 |
from optimize_bass import optimize_bass
|
| 166 |
|
| 167 |
+
piano_opt = job_dir / "transcription.tmp.mid"
|
| 168 |
+
optimize_piano(stems["other"], str(piano_raw), str(piano_opt))
|
| 169 |
+
|
| 170 |
+
# Solo optimizer writes chords to {stem}_chords.json next to the output
|
| 171 |
+
auto_chords = job_dir / "transcription.tmp_chords.json"
|
| 172 |
+
chords_path = job_dir / "transcription_chords.json"
|
| 173 |
+
if auto_chords.exists():
|
| 174 |
+
auto_chords.rename(chords_path)
|
| 175 |
+
|
| 176 |
+
# Rename to final path
|
| 177 |
+
piano_final = job_dir / "piano_optimized.mid"
|
| 178 |
+
piano_opt.rename(piano_final)
|
| 179 |
+
piano_opt = piano_final
|
| 180 |
|
| 181 |
bass_opt = job_dir / "bass_optimized.mid"
|
| 182 |
optimize_bass(stems["bass"], str(bass_raw), str(bass_opt))
|
| 183 |
|
| 184 |
+
# Load chord data
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
chord_data = None
|
| 186 |
if chords_path.exists():
|
| 187 |
with open(chords_path) as f:
|
| 188 |
chord_data = json.load(f)
|
| 189 |
|
| 190 |
+
# Step 4: Transcribe drums
|
| 191 |
+
job_status[job_id] = {"step": 4, "label": "Transcribing drums...", "done": False}
|
| 192 |
from drums import transcribe_drums
|
| 193 |
drum_tab_path = job_dir / "drum_tab.json"
|
| 194 |
transcribe_drums(stems["drums"], str(drum_tab_path))
|
| 195 |
|
| 196 |
+
# Step 5: Generate guitar and bass tabs
|
| 197 |
+
job_status[job_id] = {"step": 5, "label": "Generating tabs...", "done": False}
|
| 198 |
from tabs import midi_to_guitar_tab, midi_to_bass_tab
|
| 199 |
|
| 200 |
guitar_tab = midi_to_guitar_tab(str(piano_opt), str(chords_path))
|
|
|
|
| 207 |
with open(bass_tab_path, 'w') as f:
|
| 208 |
json.dump(bass_tab, f)
|
| 209 |
|
| 210 |
+
# Step 6: Merge melodic + bass into final MIDI
|
| 211 |
+
job_status[job_id] = {"step": 6, "label": "Assembling final result...", "done": False}
|
| 212 |
merged_path = job_dir / "transcription.mid"
|
| 213 |
merge_stems(str(piano_opt), str(bass_opt), str(merged_path))
|
| 214 |
|
|
|
|
| 220 |
f.unlink(missing_ok=True)
|
| 221 |
|
| 222 |
job_status[job_id] = {
|
| 223 |
+
"step": 7, "label": "Done!", "done": True,
|
| 224 |
"result": {
|
| 225 |
"job_id": job_id,
|
| 226 |
"midi_url": f"/api/jobs/{job_id}/midi",
|
transcriber/optimize.py
CHANGED
|
@@ -612,13 +612,19 @@ def apply_pitch_ceiling(midi_data, max_pitch=96):
|
|
| 612 |
return midi_out, removed
|
| 613 |
|
| 614 |
|
| 615 |
-
def limit_concurrent_notes(midi_data, max_per_hand=4, hand_split=60):
|
| 616 |
"""Limit notes per chord to max_per_hand per hand.
|
| 617 |
|
| 618 |
Groups notes by onset time (within 30ms) and splits into left/right hand.
|
| 619 |
Removes excess notes β protects melody (highest RH pitch) and bass
|
| 620 |
(lowest LH pitch), then removes lowest velocity.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 621 |
"""
|
|
|
|
|
|
|
| 622 |
midi_out = copy.deepcopy(midi_data)
|
| 623 |
removed = 0
|
| 624 |
|
|
@@ -643,7 +649,8 @@ def limit_concurrent_notes(midi_data, max_per_hand=4, hand_split=60):
|
|
| 643 |
right = [idx for idx in chord_indices if notes[idx].pitch >= hand_split]
|
| 644 |
|
| 645 |
for is_right, hand_indices in [(True, right), (False, left)]:
|
| 646 |
-
|
|
|
|
| 647 |
continue
|
| 648 |
|
| 649 |
# Protect melody (highest RH) or bass (lowest LH)
|
|
@@ -656,7 +663,7 @@ def limit_concurrent_notes(midi_data, max_per_hand=4, hand_split=60):
|
|
| 656 |
scored = [(notes[idx].velocity, idx) for idx in trimmable]
|
| 657 |
scored.sort()
|
| 658 |
|
| 659 |
-
excess = len(hand_indices) -
|
| 660 |
for _, idx in scored[:excess]:
|
| 661 |
to_remove.add(idx)
|
| 662 |
|
|
@@ -666,14 +673,20 @@ def limit_concurrent_notes(midi_data, max_per_hand=4, hand_split=60):
|
|
| 666 |
return midi_out, removed
|
| 667 |
|
| 668 |
|
| 669 |
-
def limit_total_concurrent(midi_data, max_per_hand=4, hand_split=60):
|
| 670 |
"""Limit concurrent sounding notes to max_per_hand per hand.
|
| 671 |
|
| 672 |
Splits notes into left hand (< hand_split) and right hand (>= hand_split).
|
| 673 |
-
At each note onset, count concurrent notes in that hand. If >
|
| 674 |
trim sustained notes β but protect the melody (highest RH pitch) and bass
|
| 675 |
(lowest LH pitch). Among the rest, trim lowest velocity first.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 676 |
"""
|
|
|
|
|
|
|
| 677 |
midi_out = copy.deepcopy(midi_data)
|
| 678 |
trimmed = 0
|
| 679 |
|
|
@@ -684,6 +697,7 @@ def limit_total_concurrent(midi_data, max_per_hand=4, hand_split=60):
|
|
| 684 |
|
| 685 |
for i, note in enumerate(notes):
|
| 686 |
is_right = note.pitch >= hand_split
|
|
|
|
| 687 |
|
| 688 |
# Find all notes in the same hand currently sounding
|
| 689 |
sounding = []
|
|
@@ -693,8 +707,8 @@ def limit_total_concurrent(midi_data, max_per_hand=4, hand_split=60):
|
|
| 693 |
if same_hand:
|
| 694 |
sounding.append(j)
|
| 695 |
|
| 696 |
-
if len(sounding) + 1 >
|
| 697 |
-
excess = len(sounding) + 1 -
|
| 698 |
# All indices including the current note
|
| 699 |
all_indices = sounding + [i]
|
| 700 |
|
|
@@ -1458,14 +1472,20 @@ def optimize(original_audio_path, midi_path, output_path=None):
|
|
| 1458 |
|
| 1459 |
# Step 8f: Playability filter β limit per-onset chord size
|
| 1460 |
# Complex pieces get 5 notes/hand to preserve dense voicings
|
| 1461 |
-
|
| 1462 |
-
|
| 1463 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1464 |
print(f" Removed {playability_removed} excess chord notes")
|
| 1465 |
|
| 1466 |
# Step 8g: Limit total concurrent sounding notes
|
| 1467 |
-
print(f"\nStep 8g: Concurrent sounding limit (max {
|
| 1468 |
-
midi_data, sustain_trimmed = limit_total_concurrent(
|
|
|
|
|
|
|
| 1469 |
print(f" Trimmed {sustain_trimmed} sustained notes to reduce pileup")
|
| 1470 |
|
| 1471 |
# Final metrics
|
|
|
|
| 612 |
return midi_out, removed
|
| 613 |
|
| 614 |
|
| 615 |
+
def limit_concurrent_notes(midi_data, max_per_hand=4, hand_split=60, max_left_hand=None):
|
| 616 |
"""Limit notes per chord to max_per_hand per hand.
|
| 617 |
|
| 618 |
Groups notes by onset time (within 30ms) and splits into left/right hand.
|
| 619 |
Removes excess notes β protects melody (highest RH pitch) and bass
|
| 620 |
(lowest LH pitch), then removes lowest velocity.
|
| 621 |
+
|
| 622 |
+
Args:
|
| 623 |
+
max_per_hand: Max notes for right hand (default 4)
|
| 624 |
+
max_left_hand: Max notes for left hand (defaults to max_per_hand)
|
| 625 |
"""
|
| 626 |
+
if max_left_hand is None:
|
| 627 |
+
max_left_hand = max_per_hand
|
| 628 |
midi_out = copy.deepcopy(midi_data)
|
| 629 |
removed = 0
|
| 630 |
|
|
|
|
| 649 |
right = [idx for idx in chord_indices if notes[idx].pitch >= hand_split]
|
| 650 |
|
| 651 |
for is_right, hand_indices in [(True, right), (False, left)]:
|
| 652 |
+
limit = max_per_hand if is_right else max_left_hand
|
| 653 |
+
if len(hand_indices) <= limit:
|
| 654 |
continue
|
| 655 |
|
| 656 |
# Protect melody (highest RH) or bass (lowest LH)
|
|
|
|
| 663 |
scored = [(notes[idx].velocity, idx) for idx in trimmable]
|
| 664 |
scored.sort()
|
| 665 |
|
| 666 |
+
excess = len(hand_indices) - limit
|
| 667 |
for _, idx in scored[:excess]:
|
| 668 |
to_remove.add(idx)
|
| 669 |
|
|
|
|
| 673 |
return midi_out, removed
|
| 674 |
|
| 675 |
|
| 676 |
+
def limit_total_concurrent(midi_data, max_per_hand=4, hand_split=60, max_left_hand=None):
|
| 677 |
"""Limit concurrent sounding notes to max_per_hand per hand.
|
| 678 |
|
| 679 |
Splits notes into left hand (< hand_split) and right hand (>= hand_split).
|
| 680 |
+
At each note onset, count concurrent notes in that hand. If > limit,
|
| 681 |
trim sustained notes β but protect the melody (highest RH pitch) and bass
|
| 682 |
(lowest LH pitch). Among the rest, trim lowest velocity first.
|
| 683 |
+
|
| 684 |
+
Args:
|
| 685 |
+
max_per_hand: Max concurrent notes for right hand (default 4)
|
| 686 |
+
max_left_hand: Max concurrent notes for left hand (defaults to max_per_hand)
|
| 687 |
"""
|
| 688 |
+
if max_left_hand is None:
|
| 689 |
+
max_left_hand = max_per_hand
|
| 690 |
midi_out = copy.deepcopy(midi_data)
|
| 691 |
trimmed = 0
|
| 692 |
|
|
|
|
| 697 |
|
| 698 |
for i, note in enumerate(notes):
|
| 699 |
is_right = note.pitch >= hand_split
|
| 700 |
+
limit = max_per_hand if is_right else max_left_hand
|
| 701 |
|
| 702 |
# Find all notes in the same hand currently sounding
|
| 703 |
sounding = []
|
|
|
|
| 707 |
if same_hand:
|
| 708 |
sounding.append(j)
|
| 709 |
|
| 710 |
+
if len(sounding) + 1 > limit:
|
| 711 |
+
excess = len(sounding) + 1 - limit
|
| 712 |
# All indices including the current note
|
| 713 |
all_indices = sounding + [i]
|
| 714 |
|
|
|
|
| 1472 |
|
| 1473 |
# Step 8f: Playability filter β limit per-onset chord size
|
| 1474 |
# Complex pieces get 5 notes/hand to preserve dense voicings
|
| 1475 |
+
# Left hand (bass) gets a tighter limit to avoid muddy chords
|
| 1476 |
+
max_rh = 5 if complexity == 'complex' else 4
|
| 1477 |
+
max_lh = 3 if complexity == 'complex' else 2
|
| 1478 |
+
print(f"\nStep 8f: Playability filter (RH max {max_rh}, LH max {max_lh} per chord)...")
|
| 1479 |
+
midi_data, playability_removed = limit_concurrent_notes(
|
| 1480 |
+
midi_data, max_per_hand=max_rh, max_left_hand=max_lh
|
| 1481 |
+
)
|
| 1482 |
print(f" Removed {playability_removed} excess chord notes")
|
| 1483 |
|
| 1484 |
# Step 8g: Limit total concurrent sounding notes
|
| 1485 |
+
print(f"\nStep 8g: Concurrent sounding limit (RH max {max_rh}, LH max {max_lh})...")
|
| 1486 |
+
midi_data, sustain_trimmed = limit_total_concurrent(
|
| 1487 |
+
midi_data, max_per_hand=max_rh, max_left_hand=max_lh
|
| 1488 |
+
)
|
| 1489 |
print(f" Trimmed {sustain_trimmed} sustained notes to reduce pileup")
|
| 1490 |
|
| 1491 |
# Final metrics
|
transcriber/optimize_other.py
CHANGED
|
@@ -158,14 +158,19 @@ def optimize_other(original_audio_path, midi_path, output_path=None, mix_audio_p
|
|
| 158 |
print(f" Trimmed {notes_trimmed}, enforced min duration on {durations_enforced}")
|
| 159 |
|
| 160 |
# Step 8b: Playability filter β limit per-onset chord size
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
|
|
|
|
|
|
|
|
|
| 164 |
print(f" Removed {playability_removed} excess chord notes")
|
| 165 |
|
| 166 |
# Step 8c: Limit total concurrent sounding notes
|
| 167 |
-
print(f"\nStep 8c: Concurrent sounding limit (max {
|
| 168 |
-
midi_data, sustain_trimmed = limit_total_concurrent(
|
|
|
|
|
|
|
| 169 |
print(f" Trimmed {sustain_trimmed} sustained notes to reduce pileup")
|
| 170 |
|
| 171 |
# Shift to t=0 if there was leading silence
|
|
|
|
| 158 |
print(f" Trimmed {notes_trimmed}, enforced min duration on {durations_enforced}")
|
| 159 |
|
| 160 |
# Step 8b: Playability filter β limit per-onset chord size
|
| 161 |
+
max_rh = 5 if complexity == 'complex' else 4
|
| 162 |
+
max_lh = 3 if complexity == 'complex' else 2
|
| 163 |
+
print(f"\nStep 8b: Playability filter (RH max {max_rh}, LH max {max_lh} per chord)...")
|
| 164 |
+
midi_data, playability_removed = limit_concurrent_notes(
|
| 165 |
+
midi_data, max_per_hand=max_rh, max_left_hand=max_lh
|
| 166 |
+
)
|
| 167 |
print(f" Removed {playability_removed} excess chord notes")
|
| 168 |
|
| 169 |
# Step 8c: Limit total concurrent sounding notes
|
| 170 |
+
print(f"\nStep 8c: Concurrent sounding limit (RH max {max_rh}, LH max {max_lh})...")
|
| 171 |
+
midi_data, sustain_trimmed = limit_total_concurrent(
|
| 172 |
+
midi_data, max_per_hand=max_rh, max_left_hand=max_lh
|
| 173 |
+
)
|
| 174 |
print(f" Trimmed {sustain_trimmed} sustained notes to reduce pileup")
|
| 175 |
|
| 176 |
# Shift to t=0 if there was leading silence
|