mroctopus / transcriber /arrange.py
Ewan
Full Song quality pass: light optimizer, better guitar sound, tab accuracy
78ef95d
"""Piano arrangement engine for full-song mode.
Takes separated stem transcriptions (vocals, bass) and chord detection
to produce a playable two-hand piano arrangement:
- Right hand: vocal melody + harmonic fill
- Left hand: chord voicings based on detected chords + bass roots
"""
import json
from pathlib import Path
import numpy as np
import pretty_midi
# Import chord templates from our chord detection module
from chords import CHORD_TEMPLATES, NOTE_NAMES
# ---------------------------------------------------------------------------
# Voicing helpers
# ---------------------------------------------------------------------------
def _root_name_to_pc(root_name):
"""Convert a root note name (e.g. 'Db', 'F#') to pitch class 0-11."""
enharmonic = {
'Cb': 11, 'Db': 1, 'Eb': 3, 'Fb': 4, 'Gb': 6, 'Ab': 8, 'Bb': 10,
}
if root_name in enharmonic:
return enharmonic[root_name]
try:
return NOTE_NAMES.index(root_name)
except ValueError:
return 0 # fallback to C
def voice_chord(root_pc, quality, register_low=48, register_high=60, max_notes=3):
"""Build a chord voicing within a MIDI register.
Args:
root_pc: Root pitch class (0-11, C=0)
quality: Chord quality string matching CHORD_TEMPLATES keys
register_low: Lowest MIDI note for the voicing
register_high: Highest MIDI note for the voicing
max_notes: Maximum number of notes in the voicing
Returns:
List of MIDI pitch numbers, sorted low to high
"""
template = CHORD_TEMPLATES.get(quality)
if template is None:
# Unknown quality — just do root + fifth
template = frozenset([0, 7])
# Get intervals sorted, root first
intervals = sorted(template)
# Build pitches: place root at the bottom of the register, stack upward
root_midi = register_low + root_pc % 12
if root_midi < register_low:
root_midi += 12
if root_midi > register_high:
root_midi -= 12
pitches = []
for interval in intervals:
p = root_midi + interval
# Keep within register
while p < register_low:
p += 12
while p > register_high:
p -= 12
pitches.append(p)
# Remove duplicates, sort, and limit
pitches = sorted(set(pitches))
if len(pitches) > max_notes:
# Keep root + highest priority tones
# Priority: root (0), fifth (7), third (3 or 4), seventh (10 or 11)
pitches = pitches[:max_notes]
return pitches
def apply_voice_leading(voicing_sequence):
"""Minimize pitch jumps between consecutive voicings.
For each voicing after the first, try rotating the pitches
(inversions) to find the arrangement closest to the previous voicing.
Args:
voicing_sequence: List of (pitches_list, start_time, end_time, velocity) tuples
Returns:
Modified sequence with optimized inversions
"""
if len(voicing_sequence) <= 1:
return voicing_sequence
result = [voicing_sequence[0]]
for i in range(1, len(voicing_sequence)):
pitches, start, end, vel = voicing_sequence[i]
prev_pitches = result[i - 1][0]
if not pitches or not prev_pitches:
result.append(voicing_sequence[i])
continue
prev_center = sum(prev_pitches) / len(prev_pitches)
# Try different octave offsets for each pitch to minimize movement
best_pitches = pitches
best_cost = float('inf')
# Generate inversions by shifting individual pitches up/down an octave
for offset in range(-12, 13, 12):
candidate = sorted([p + offset for p in pitches])
# Check all pitches are in reasonable range (36-72)
if all(36 <= p <= 72 for p in candidate):
cost = abs(sum(candidate) / len(candidate) - prev_center)
if cost < best_cost:
best_cost = cost
best_pitches = candidate
result.append((best_pitches, start, end, vel))
return result
# ---------------------------------------------------------------------------
# Melody processing
# ---------------------------------------------------------------------------
def extract_melody(vocal_midi_path):
"""Load vocal MIDI and extract monophonic melody notes.
Returns list of (pitch, start, end, velocity) tuples sorted by start time.
"""
midi = pretty_midi.PrettyMIDI(str(vocal_midi_path))
notes = []
for inst in midi.instruments:
for n in inst.notes:
notes.append((n.pitch, n.start, n.end, n.velocity))
notes.sort(key=lambda x: x[1])
return notes
def transpose_to_range(notes, range_low=60, range_high=84):
"""Transpose melody notes into the target MIDI range by octave shifts.
Returns new list of (pitch, start, end, velocity) tuples.
"""
if not notes:
return notes
# Find median pitch of the melody
pitches = [n[0] for n in notes]
median_pitch = int(np.median(pitches))
target_center = (range_low + range_high) // 2
# Calculate octave shift needed
shift = 0
while median_pitch + shift < target_center - 6:
shift += 12
while median_pitch + shift > target_center + 6:
shift -= 12
result = []
for pitch, start, end, vel in notes:
new_pitch = pitch + shift
# Clamp to range
while new_pitch < range_low:
new_pitch += 12
while new_pitch > range_high:
new_pitch -= 12
result.append((new_pitch, start, end, vel))
return result
# ---------------------------------------------------------------------------
# Arrangement builders
# ---------------------------------------------------------------------------
def build_left_hand(bass_notes, chords):
"""Generate left-hand chord voicings from bass stem + chord detection.
Args:
bass_notes: List of (pitch, start, end, velocity) from bass stem
chords: List of chord event dicts from chords.json
Returns:
List of pretty_midi.Note objects for the left hand
"""
if not chords:
return []
# Build a voicing for each chord event
voicing_sequence = []
for chord in chords:
root_name = chord.get('root_note', 'C')
quality = chord.get('quality', 'major')
start = chord['start_time']
end = chord['end_time']
if quality == 'note':
# Single note — just play the root as an octave
root_pc = _root_name_to_pc(root_name)
pitches = [48 + root_pc % 12]
if pitches[0] < 48:
pitches[0] += 12
else:
root_pc = _root_name_to_pc(root_name)
pitches = voice_chord(root_pc, quality, register_low=48, register_high=60, max_notes=3)
voicing_sequence.append((pitches, start, end, 70))
# Apply voice leading to smooth transitions
voicing_sequence = apply_voice_leading(voicing_sequence)
# Convert to MIDI notes
lh_notes = []
for pitches, start, end, vel in voicing_sequence:
duration = end - start
if duration < 0.1:
continue
for pitch in pitches:
note = pretty_midi.Note(
velocity=vel,
pitch=int(pitch),
start=start,
end=end,
)
lh_notes.append(note)
return lh_notes
def build_right_hand(melody_notes, chords):
"""Generate right-hand part from vocal melody + harmonic fill.
Args:
melody_notes: List of (pitch, start, end, velocity) from vocals
chords: List of chord event dicts from chords.json
Returns:
List of pretty_midi.Note objects for the right hand
"""
rh_notes = []
# Add the melody notes
for pitch, start, end, vel in melody_notes:
note = pretty_midi.Note(
velocity=min(int(vel * 127), 100) if vel <= 1.0 else min(vel, 100),
pitch=int(pitch),
start=start,
end=end,
)
rh_notes.append(note)
if not chords or not melody_notes:
return rh_notes
# Build a time-indexed chord lookup
def chord_at_time(t):
for c in chords:
if c['start_time'] <= t < c['end_time']:
return c
return None
# Find gaps in the melody where we can add chord fill
melody_sorted = sorted(melody_notes, key=lambda x: x[1])
for i in range(len(melody_sorted) - 1):
gap_start = melody_sorted[i][2] # end of current note
gap_end = melody_sorted[i + 1][1] # start of next note
gap_duration = gap_end - gap_start
# Only fill gaps longer than 2 beats (~1.0s at 120bpm)
if gap_duration < 1.0:
continue
chord = chord_at_time(gap_start)
if not chord or chord.get('quality') == 'note':
continue
# Add a sparse chord fill: 2 tones, gentle velocity
root_pc = _root_name_to_pc(chord.get('root_note', 'C'))
fill_pitches = voice_chord(root_pc, chord['quality'],
register_low=60, register_high=76, max_notes=2)
fill_time = gap_start
fill_step = min(0.5, gap_duration / 2)
while fill_time + fill_step <= gap_end - 0.05:
for fp in fill_pitches:
fill_note = pretty_midi.Note(
velocity=45,
pitch=int(fp),
start=fill_time,
end=fill_time + fill_step * 0.9,
)
rh_notes.append(fill_note)
fill_time += fill_step
return rh_notes
# ---------------------------------------------------------------------------
# Main entry point
# ---------------------------------------------------------------------------
def arrange_piano(vocal_midi_path, bass_midi_path, chords_json_path, output_path):
"""Generate a piano arrangement from separated stem transcriptions.
Args:
vocal_midi_path: Path to optimized vocal melody MIDI
bass_midi_path: Path to optimized bass MIDI
chords_json_path: Path to chord detection JSON
output_path: Where to write the arrangement MIDI
Returns:
pretty_midi.PrettyMIDI: The arrangement
"""
print(" Arranging for piano...")
# Load inputs
melody_notes = extract_melody(vocal_midi_path)
print(f" Vocal melody: {len(melody_notes)} notes")
bass_midi = pretty_midi.PrettyMIDI(str(bass_midi_path))
bass_notes = []
for inst in bass_midi.instruments:
for n in inst.notes:
bass_notes.append((n.pitch, n.start, n.end, n.velocity))
bass_notes.sort(key=lambda x: x[1])
print(f" Bass: {len(bass_notes)} notes")
chords = []
chords_path = Path(chords_json_path)
if chords_path.exists():
with open(chords_path) as f:
chord_data = json.load(f)
chords = chord_data.get('chords', chord_data if isinstance(chord_data, list) else [])
print(f" Chords: {len(chords)} events")
# Transpose melody to piano right-hand range
melody_notes = transpose_to_range(melody_notes, range_low=60, range_high=84)
# Build the two hands
lh_notes = build_left_hand(bass_notes, chords)
rh_notes = build_right_hand(melody_notes, chords)
print(f" Left hand: {len(lh_notes)} notes")
print(f" Right hand: {len(rh_notes)} notes")
# Create the output MIDI
arrangement = pretty_midi.PrettyMIDI()
rh_inst = pretty_midi.Instrument(program=0, name="Piano Right")
rh_inst.notes = rh_notes
arrangement.instruments.append(rh_inst)
lh_inst = pretty_midi.Instrument(program=0, name="Piano Left")
lh_inst.notes = lh_notes
arrangement.instruments.append(lh_inst)
arrangement.write(str(output_path))
print(f" Arrangement complete: {len(rh_notes) + len(lh_notes)} total notes")
return arrangement