vynl / modules /chords.py
rlackey's picture
Tune chord detection - less aggressive filtering
dfa4828
"""
Multi-Source Chord Detection
Analyzes stems AND full mix for best results
"""
import numpy as np
import warnings
warnings.filterwarnings('ignore')
try:
import librosa
import scipy.ndimage
LIBROSA_AVAILABLE = True
except ImportError:
LIBROSA_AVAILABLE = False
def extract_chords_multi_stem(stems_dir, original_audio=None, min_duration=0.5):
"""
Extract chords from multiple sources - stems AND full track
Args:
stems_dir: Path to directory containing stems
original_audio: Path to original full mix (optional but recommended)
min_duration: Minimum chord duration
Returns:
List of (timestamp, chord_name) tuples
"""
if not LIBROSA_AVAILABLE:
print(" [WARN] Chord extraction skipped (librosa not installed)")
return []
from pathlib import Path
stems_dir = Path(stems_dir)
print(f" Analyzing multiple sources for chord detection...")
chord_candidates = []
# SOURCE 1: Original full mix (HIGHEST WEIGHT)
if original_audio and Path(original_audio).exists():
print(f" • Full mix (original audio)")
full_chords = detect_chords_from_stem(original_audio, focus='harmony')
if full_chords:
chord_candidates.append(('full_mix', full_chords, 4.0))
# SOURCE 2: Bass stem
for stem_file in stems_dir.glob('*.mp3'):
if 'bass' in stem_file.stem.lower():
print(f" • Bass stem")
bass_chords = detect_chords_from_stem(stem_file, focus='bass')
if bass_chords:
chord_candidates.append(('bass', bass_chords, 3.0))
break
# SOURCE 3: Guitar stem
for stem_file in stems_dir.glob('*.mp3'):
if 'guitar' in stem_file.stem.lower():
print(f" • Guitar stem")
guitar_chords = detect_chords_from_stem(stem_file, focus='harmony')
if guitar_chords:
chord_candidates.append(('guitar', guitar_chords, 2.5))
break
# SOURCE 4: Piano/Keys
for stem_file in stems_dir.glob('*.mp3'):
name_lower = stem_file.stem.lower()
if 'piano' in name_lower or 'keys' in name_lower:
print(f" • Piano/Keys stem")
piano_chords = detect_chords_from_stem(stem_file, focus='harmony')
if piano_chords:
chord_candidates.append(('piano', piano_chords, 2.0))
break
# SOURCE 5: Other stem
for stem_file in stems_dir.glob('*.mp3'):
if 'other' in stem_file.stem.lower():
print(f" • Other stem")
other_chords = detect_chords_from_stem(stem_file, focus='harmony')
if other_chords:
chord_candidates.append(('other', other_chords, 1.5))
break
if not chord_candidates:
print(" [WARN] No suitable sources found")
return []
print(f" Merging results from {len(chord_candidates)} sources...")
merged_chords = merge_chord_detections(chord_candidates, min_duration)
return merged_chords
def detect_chords_from_stem(stem_file, focus='harmony'):
"""Detect chords - FULL SONG"""
try:
y, sr = librosa.load(str(stem_file), sr=22050, duration=None)
hop_length = 256 if focus == 'bass' else 512
chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length)
chroma = scipy.ndimage.median_filter(chroma, size=(1, 9))
templates = create_chord_templates()
chords = []
last_chord = None
for i in range(chroma.shape[1]):
frame = chroma[:, i]
time = librosa.frames_to_time(i, sr=sr, hop_length=hop_length)
chord, confidence = match_chord_template_with_confidence(frame, templates, focus)
if chord != last_chord and confidence > 0.12:
chords.append((float(time), chord, float(confidence)))
last_chord = chord
return chords
except Exception as e:
from pathlib import Path
print(f" [WARN] Failed to analyze {Path(stem_file).name}: {e}")
return []
def merge_chord_detections(chord_candidates, min_duration=0.5):
"""Merge - if only one source, just use it directly"""
# If only one source, don't filter - just use it
if len(chord_candidates) == 1:
name, chords, weight = chord_candidates[0]
# Convert (time, chord, conf) to (time, chord)
return [(time, chord) for time, chord, conf in chords]
# Multiple sources - merge
all_times = set()
for name, chords, weight in chord_candidates:
for time, chord, conf in chords:
all_times.add(time)
all_times = sorted(all_times)
if not all_times:
return []
time_grid = np.arange(0, max(all_times) + 1, 0.5)
merged = []
last_chord = None
last_time = 0
for grid_time in time_grid:
votes = {}
total_weight = 0
for name, chords, weight in chord_candidates:
active_chord = get_chord_at_time(chords, grid_time)
if active_chord:
chord, conf = active_chord
vote_strength = conf * weight
if chord in votes:
votes[chord] += vote_strength
else:
votes[chord] = vote_strength
total_weight += weight
if votes:
best_chord = max(votes.items(), key=lambda x: x[1])[0]
# Less strict threshold
if best_chord != last_chord:
if last_chord is not None:
duration = grid_time - last_time
if duration >= min_duration:
merged.append((last_time, last_chord))
last_chord = best_chord
last_time = grid_time
if last_chord:
merged.append((last_time, last_chord))
return merged
def get_chord_at_time(chords, time):
"""Find active chord"""
active_chord = None
for chord_time, chord, conf in chords:
if chord_time <= time:
active_chord = (chord, conf)
else:
break
return active_chord
def create_chord_templates():
"""Enhanced chord templates"""
notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
templates = {}
for i, root in enumerate(notes):
# Major
template = np.zeros(12)
template[(i + 0) % 12] = 1.0
template[(i + 4) % 12] = 0.8
template[(i + 7) % 12] = 0.6
templates[root] = template
# Minor
template = np.zeros(12)
template[(i + 0) % 12] = 1.0
template[(i + 3) % 12] = 0.8
template[(i + 7) % 12] = 0.6
templates[root + 'm'] = template
# Dominant 7
template = np.zeros(12)
template[(i + 0) % 12] = 1.0
template[(i + 4) % 12] = 0.7
template[(i + 7) % 12] = 0.5
template[(i + 10) % 12] = 0.4
templates[root + '7'] = template
# Major 7
template = np.zeros(12)
template[(i + 0) % 12] = 1.0
template[(i + 4) % 12] = 0.7
template[(i + 7) % 12] = 0.5
template[(i + 11) % 12] = 0.4
templates[root + 'maj7'] = template
# Minor 7
template = np.zeros(12)
template[(i + 0) % 12] = 1.0
template[(i + 3) % 12] = 0.7
template[(i + 7) % 12] = 0.5
template[(i + 10) % 12] = 0.4
templates[root + 'm7'] = template
return templates
def match_chord_template_with_confidence(chroma_frame, templates, focus='harmony'):
"""Match with confidence"""
if chroma_frame.sum() > 0:
chroma_frame = chroma_frame / chroma_frame.sum()
best_chord = 'C'
best_score = -1
for chord_name, template in templates.items():
if template.sum() > 0:
template_norm = template / template.sum()
else:
continue
score = np.dot(chroma_frame, template_norm)
if focus == 'bass' and not ('7' in chord_name or 'm' in chord_name):
score *= 1.1
if score > best_score:
best_score = score
best_chord = chord_name
return best_chord, best_score
def extract_chords(audio_path, min_duration=0.5):
"""
Extract chords from audio file with musical timing.
Args:
audio_path: Path to audio file
min_duration: Minimum chord duration in seconds (default 0.5s for half-bar changes)
Returns:
List of (timestamp, chord_name) tuples
"""
if not LIBROSA_AVAILABLE:
return []
try:
# Load audio
y, sr = librosa.load(audio_path, sr=22050, duration=None)
# Use moderate hop for reasonable time resolution
# hop_length=1024 at 22050Hz = ~46ms per frame
hop_length = 1024
# Extract chroma features
chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length)
# Moderate smoothing - median filter across 11 frames (~0.5 seconds)
chroma = scipy.ndimage.median_filter(chroma, size=(1, 11))
templates = create_chord_templates()
# First pass: detect all chord candidates
raw_chords = []
for i in range(chroma.shape[1]):
frame = chroma[:, i]
time = librosa.frames_to_time(i, sr=sr, hop_length=hop_length)
chord, conf = match_chord_template_with_confidence(frame, templates, 'harmony')
raw_chords.append((float(time), chord, float(conf)))
# Second pass: consolidate with minimum duration and reasonable confidence
consolidated = []
current_chord = None
current_start = 0.0
confidence_threshold = 0.15 # Lower threshold to catch more chord changes
for time, chord, conf in raw_chords:
if conf >= confidence_threshold:
if chord != current_chord:
# Check if previous chord lasted long enough
if current_chord is not None:
duration = time - current_start
if duration >= min_duration:
consolidated.append((current_start, current_chord))
# Start new chord
current_chord = chord
current_start = time
# Don't forget the last chord
if current_chord is not None and raw_chords:
final_time = raw_chords[-1][0]
duration = final_time - current_start
if duration >= min_duration:
consolidated.append((current_start, current_chord))
return consolidated
except Exception as e:
print(f" [WARN] Chord extraction error: {e}")
return []
def format_chord_chart(chords, lyrics, duration, key='C', bpm=120, song_name=''):
"""
Format chords aligned above lyrics like a traditional chord chart.
Args:
chords: List of (timestamp, chord_name) tuples
lyrics: User-provided lyrics text (with optional [Section] markers)
duration: Total song duration in seconds
key: Detected key
bpm: Detected BPM
song_name: Name of the song
Returns:
Formatted chord chart string
"""
if not lyrics or not lyrics.strip():
# No lyrics - return timestamped format
return format_timestamped_chords(chords, key, bpm, song_name)
# Parse lyrics into sections and lines
sections = parse_lyrics_into_sections(lyrics)
if not sections:
return format_timestamped_chords(chords, key, bpm, song_name)
# Count total lyric lines (excluding section headers and blank lines)
total_lines = sum(len(s['lines']) for s in sections)
if total_lines == 0:
return format_timestamped_chords(chords, key, bpm, song_name)
# Estimate time per line
time_per_line = duration / total_lines if total_lines > 0 else 4.0
# Build chord chart
output = []
# Header
if song_name:
output.append(f"# {song_name}")
output.append(f"Key: {key}")
output.append(f"BPM: {bpm}")
output.append("")
current_time = 0.0
for section in sections:
# Section header
if section['name']:
output.append(f"[{section['name']}]")
for line in section['lines']:
if not line.strip():
output.append("")
continue
# Find chords that fall within this line's time window
line_end_time = current_time + time_per_line
line_chords = get_chords_in_range(chords, current_time, line_end_time)
if line_chords:
# Build chord line positioned above lyrics
chord_line = build_chord_line(line, line_chords, current_time, time_per_line)
output.append(chord_line)
output.append(line)
current_time = line_end_time
output.append("") # Blank line between sections
return "\n".join(output)
def parse_lyrics_into_sections(lyrics):
"""
Parse lyrics text into sections.
Handles formats like:
[Verse 1]
Line 1
Line 2
[Chorus]
Line 3
"""
import re
sections = []
current_section = {'name': '', 'lines': []}
for line in lyrics.split('\n'):
# Check for section header like [Verse 1] or [Chorus]
section_match = re.match(r'^\s*\[([^\]]+)\]\s*$', line)
if section_match:
# Save previous section if it has content
if current_section['lines'] or current_section['name']:
sections.append(current_section)
# Start new section
current_section = {'name': section_match.group(1), 'lines': []}
else:
# Add line to current section (preserve blank lines for formatting)
current_section['lines'].append(line)
# Don't forget the last section
if current_section['lines'] or current_section['name']:
sections.append(current_section)
# Filter out completely empty sections
sections = [s for s in sections if s['lines'] or s['name']]
return sections
def get_chords_in_range(chords, start_time, end_time):
"""Get chords that fall within a time range."""
result = []
for time, chord in chords:
if start_time <= time < end_time:
result.append((time, chord))
return result
def build_chord_line(lyric_line, line_chords, line_start_time, line_duration):
"""
Build a chord line positioned above lyrics.
Places chords at positions proportional to their timing within the line.
"""
if not line_chords:
return ""
line_length = len(lyric_line)
if line_length == 0:
# Just return chords space-separated
return " ".join(chord for time, chord in line_chords)
# Build chord line character by character
chord_chars = [' '] * line_length
for time, chord in line_chords:
# Calculate position in line based on timing
time_offset = time - line_start_time
position_ratio = time_offset / line_duration if line_duration > 0 else 0
# Map to character position
char_pos = int(position_ratio * line_length)
char_pos = max(0, min(char_pos, line_length - 1))
# Place chord, making room for full chord name
chord_len = len(chord)
# Adjust position to not overflow
if char_pos + chord_len > line_length:
char_pos = max(0, line_length - chord_len)
# Check if there's already a chord nearby (within 3 chars)
# If so, shift this one
for i in range(max(0, char_pos - 3), min(line_length, char_pos + chord_len + 3)):
if i < len(chord_chars) and chord_chars[i] != ' ':
# There's overlap, try to find next available spot
char_pos = find_next_free_position(chord_chars, char_pos, chord_len)
break
# Place the chord
for i, c in enumerate(chord):
pos = char_pos + i
if pos < line_length:
chord_chars[pos] = c
return ''.join(chord_chars).rstrip()
def find_next_free_position(chord_chars, start_pos, chord_len):
"""Find next position with enough space for a chord."""
line_length = len(chord_chars)
# Try positions after start_pos
for pos in range(start_pos, line_length - chord_len + 1):
if all(chord_chars[pos + i] == ' ' for i in range(chord_len)):
return pos
# Try positions before start_pos
for pos in range(start_pos - 1, -1, -1):
if pos + chord_len <= line_length:
if all(chord_chars[pos + i] == ' ' for i in range(chord_len)):
return pos
return start_pos # Fallback
def format_timestamped_chords(chords, key, bpm, song_name=''):
"""Fallback: format chords with timestamps (no lyrics)."""
lines = []
if song_name:
lines.append(f"# {song_name}")
lines.append(f"Key: {key} | BPM: {bpm}")
lines.append("")
lines.append("=" * 40)
lines.append("")
for time, chord in chords:
mins = int(time // 60)
secs = time % 60
lines.append(f"[{mins}:{secs:05.2f}] {chord}")
return "\n".join(lines)