| | """ |
| | Multi-Source Chord Detection |
| | Analyzes stems AND full mix for best results |
| | """ |
| |
|
| | import numpy as np |
| | import warnings |
| | warnings.filterwarnings('ignore') |
| |
|
| | try: |
| | import librosa |
| | import scipy.ndimage |
| | LIBROSA_AVAILABLE = True |
| | except ImportError: |
| | LIBROSA_AVAILABLE = False |
| |
|
| |
|
| | def extract_chords_multi_stem(stems_dir, original_audio=None, min_duration=0.5): |
| | """ |
| | Extract chords from multiple sources - stems AND full track |
| | |
| | Args: |
| | stems_dir: Path to directory containing stems |
| | original_audio: Path to original full mix (optional but recommended) |
| | min_duration: Minimum chord duration |
| | |
| | Returns: |
| | List of (timestamp, chord_name) tuples |
| | """ |
| | |
| | if not LIBROSA_AVAILABLE: |
| | print(" [WARN] Chord extraction skipped (librosa not installed)") |
| | return [] |
| | |
| | from pathlib import Path |
| | stems_dir = Path(stems_dir) |
| | |
| | print(f" Analyzing multiple sources for chord detection...") |
| | |
| | chord_candidates = [] |
| | |
| | |
| | if original_audio and Path(original_audio).exists(): |
| | print(f" • Full mix (original audio)") |
| | full_chords = detect_chords_from_stem(original_audio, focus='harmony') |
| | if full_chords: |
| | chord_candidates.append(('full_mix', full_chords, 4.0)) |
| | |
| | |
| | for stem_file in stems_dir.glob('*.mp3'): |
| | if 'bass' in stem_file.stem.lower(): |
| | print(f" • Bass stem") |
| | bass_chords = detect_chords_from_stem(stem_file, focus='bass') |
| | if bass_chords: |
| | chord_candidates.append(('bass', bass_chords, 3.0)) |
| | break |
| | |
| | |
| | for stem_file in stems_dir.glob('*.mp3'): |
| | if 'guitar' in stem_file.stem.lower(): |
| | print(f" • Guitar stem") |
| | guitar_chords = detect_chords_from_stem(stem_file, focus='harmony') |
| | if guitar_chords: |
| | chord_candidates.append(('guitar', guitar_chords, 2.5)) |
| | break |
| | |
| | |
| | for stem_file in stems_dir.glob('*.mp3'): |
| | name_lower = stem_file.stem.lower() |
| | if 'piano' in name_lower or 'keys' in name_lower: |
| | print(f" • Piano/Keys stem") |
| | piano_chords = detect_chords_from_stem(stem_file, focus='harmony') |
| | if piano_chords: |
| | chord_candidates.append(('piano', piano_chords, 2.0)) |
| | break |
| | |
| | |
| | for stem_file in stems_dir.glob('*.mp3'): |
| | if 'other' in stem_file.stem.lower(): |
| | print(f" • Other stem") |
| | other_chords = detect_chords_from_stem(stem_file, focus='harmony') |
| | if other_chords: |
| | chord_candidates.append(('other', other_chords, 1.5)) |
| | break |
| | |
| | if not chord_candidates: |
| | print(" [WARN] No suitable sources found") |
| | return [] |
| | |
| | print(f" Merging results from {len(chord_candidates)} sources...") |
| | merged_chords = merge_chord_detections(chord_candidates, min_duration) |
| | |
| | return merged_chords |
| |
|
| |
|
| | def detect_chords_from_stem(stem_file, focus='harmony'): |
| | """Detect chords - FULL SONG""" |
| | |
| | try: |
| | y, sr = librosa.load(str(stem_file), sr=22050, duration=None) |
| | hop_length = 256 if focus == 'bass' else 512 |
| | chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length) |
| | chroma = scipy.ndimage.median_filter(chroma, size=(1, 9)) |
| | |
| | templates = create_chord_templates() |
| | chords = [] |
| | last_chord = None |
| | |
| | for i in range(chroma.shape[1]): |
| | frame = chroma[:, i] |
| | time = librosa.frames_to_time(i, sr=sr, hop_length=hop_length) |
| | chord, confidence = match_chord_template_with_confidence(frame, templates, focus) |
| | |
| | if chord != last_chord and confidence > 0.12: |
| | chords.append((float(time), chord, float(confidence))) |
| | last_chord = chord |
| | |
| | return chords |
| | |
| | except Exception as e: |
| | from pathlib import Path |
| | print(f" [WARN] Failed to analyze {Path(stem_file).name}: {e}") |
| | return [] |
| |
|
| |
|
| | def merge_chord_detections(chord_candidates, min_duration=0.5): |
| | """Merge - if only one source, just use it directly""" |
| | |
| | |
| | if len(chord_candidates) == 1: |
| | name, chords, weight = chord_candidates[0] |
| | |
| | return [(time, chord) for time, chord, conf in chords] |
| | |
| | |
| | all_times = set() |
| | for name, chords, weight in chord_candidates: |
| | for time, chord, conf in chords: |
| | all_times.add(time) |
| | |
| | all_times = sorted(all_times) |
| | |
| | if not all_times: |
| | return [] |
| | |
| | time_grid = np.arange(0, max(all_times) + 1, 0.5) |
| | |
| | merged = [] |
| | last_chord = None |
| | last_time = 0 |
| | |
| | for grid_time in time_grid: |
| | votes = {} |
| | total_weight = 0 |
| | |
| | for name, chords, weight in chord_candidates: |
| | active_chord = get_chord_at_time(chords, grid_time) |
| | |
| | if active_chord: |
| | chord, conf = active_chord |
| | vote_strength = conf * weight |
| | |
| | if chord in votes: |
| | votes[chord] += vote_strength |
| | else: |
| | votes[chord] = vote_strength |
| | |
| | total_weight += weight |
| | |
| | if votes: |
| | best_chord = max(votes.items(), key=lambda x: x[1])[0] |
| | |
| | |
| | if best_chord != last_chord: |
| | if last_chord is not None: |
| | duration = grid_time - last_time |
| | if duration >= min_duration: |
| | merged.append((last_time, last_chord)) |
| | |
| | last_chord = best_chord |
| | last_time = grid_time |
| | |
| | if last_chord: |
| | merged.append((last_time, last_chord)) |
| | |
| | return merged |
| |
|
| |
|
| | def get_chord_at_time(chords, time): |
| | """Find active chord""" |
| | active_chord = None |
| | for chord_time, chord, conf in chords: |
| | if chord_time <= time: |
| | active_chord = (chord, conf) |
| | else: |
| | break |
| | return active_chord |
| |
|
| |
|
| | def create_chord_templates(): |
| | """Enhanced chord templates""" |
| | notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] |
| | templates = {} |
| | |
| | for i, root in enumerate(notes): |
| | |
| | template = np.zeros(12) |
| | template[(i + 0) % 12] = 1.0 |
| | template[(i + 4) % 12] = 0.8 |
| | template[(i + 7) % 12] = 0.6 |
| | templates[root] = template |
| | |
| | |
| | template = np.zeros(12) |
| | template[(i + 0) % 12] = 1.0 |
| | template[(i + 3) % 12] = 0.8 |
| | template[(i + 7) % 12] = 0.6 |
| | templates[root + 'm'] = template |
| | |
| | |
| | template = np.zeros(12) |
| | template[(i + 0) % 12] = 1.0 |
| | template[(i + 4) % 12] = 0.7 |
| | template[(i + 7) % 12] = 0.5 |
| | template[(i + 10) % 12] = 0.4 |
| | templates[root + '7'] = template |
| | |
| | |
| | template = np.zeros(12) |
| | template[(i + 0) % 12] = 1.0 |
| | template[(i + 4) % 12] = 0.7 |
| | template[(i + 7) % 12] = 0.5 |
| | template[(i + 11) % 12] = 0.4 |
| | templates[root + 'maj7'] = template |
| | |
| | |
| | template = np.zeros(12) |
| | template[(i + 0) % 12] = 1.0 |
| | template[(i + 3) % 12] = 0.7 |
| | template[(i + 7) % 12] = 0.5 |
| | template[(i + 10) % 12] = 0.4 |
| | templates[root + 'm7'] = template |
| | |
| | return templates |
| |
|
| |
|
| | def match_chord_template_with_confidence(chroma_frame, templates, focus='harmony'): |
| | """Match with confidence""" |
| | if chroma_frame.sum() > 0: |
| | chroma_frame = chroma_frame / chroma_frame.sum() |
| | |
| | best_chord = 'C' |
| | best_score = -1 |
| | |
| | for chord_name, template in templates.items(): |
| | if template.sum() > 0: |
| | template_norm = template / template.sum() |
| | else: |
| | continue |
| | |
| | score = np.dot(chroma_frame, template_norm) |
| | |
| | if focus == 'bass' and not ('7' in chord_name or 'm' in chord_name): |
| | score *= 1.1 |
| | |
| | if score > best_score: |
| | best_score = score |
| | best_chord = chord_name |
| | |
| | return best_chord, best_score |
| |
|
| |
|
| | def extract_chords(audio_path, min_duration=0.5): |
| | """ |
| | Extract chords from audio file with musical timing. |
| | |
| | Args: |
| | audio_path: Path to audio file |
| | min_duration: Minimum chord duration in seconds (default 0.5s for half-bar changes) |
| | |
| | Returns: |
| | List of (timestamp, chord_name) tuples |
| | """ |
| | if not LIBROSA_AVAILABLE: |
| | return [] |
| |
|
| | try: |
| | |
| | y, sr = librosa.load(audio_path, sr=22050, duration=None) |
| |
|
| | |
| | |
| | hop_length = 1024 |
| |
|
| | |
| | chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length) |
| |
|
| | |
| | chroma = scipy.ndimage.median_filter(chroma, size=(1, 11)) |
| |
|
| | templates = create_chord_templates() |
| |
|
| | |
| | raw_chords = [] |
| | for i in range(chroma.shape[1]): |
| | frame = chroma[:, i] |
| | time = librosa.frames_to_time(i, sr=sr, hop_length=hop_length) |
| | chord, conf = match_chord_template_with_confidence(frame, templates, 'harmony') |
| | raw_chords.append((float(time), chord, float(conf))) |
| |
|
| | |
| | consolidated = [] |
| | current_chord = None |
| | current_start = 0.0 |
| | confidence_threshold = 0.15 |
| |
|
| | for time, chord, conf in raw_chords: |
| | if conf >= confidence_threshold: |
| | if chord != current_chord: |
| | |
| | if current_chord is not None: |
| | duration = time - current_start |
| | if duration >= min_duration: |
| | consolidated.append((current_start, current_chord)) |
| |
|
| | |
| | current_chord = chord |
| | current_start = time |
| |
|
| | |
| | if current_chord is not None and raw_chords: |
| | final_time = raw_chords[-1][0] |
| | duration = final_time - current_start |
| | if duration >= min_duration: |
| | consolidated.append((current_start, current_chord)) |
| |
|
| | return consolidated |
| |
|
| | except Exception as e: |
| | print(f" [WARN] Chord extraction error: {e}") |
| | return [] |
| |
|
| |
|
| | def format_chord_chart(chords, lyrics, duration, key='C', bpm=120, song_name=''): |
| | """ |
| | Format chords aligned above lyrics like a traditional chord chart. |
| | |
| | Args: |
| | chords: List of (timestamp, chord_name) tuples |
| | lyrics: User-provided lyrics text (with optional [Section] markers) |
| | duration: Total song duration in seconds |
| | key: Detected key |
| | bpm: Detected BPM |
| | song_name: Name of the song |
| | |
| | Returns: |
| | Formatted chord chart string |
| | """ |
| | if not lyrics or not lyrics.strip(): |
| | |
| | return format_timestamped_chords(chords, key, bpm, song_name) |
| |
|
| | |
| | sections = parse_lyrics_into_sections(lyrics) |
| |
|
| | if not sections: |
| | return format_timestamped_chords(chords, key, bpm, song_name) |
| |
|
| | |
| | total_lines = sum(len(s['lines']) for s in sections) |
| |
|
| | if total_lines == 0: |
| | return format_timestamped_chords(chords, key, bpm, song_name) |
| |
|
| | |
| | time_per_line = duration / total_lines if total_lines > 0 else 4.0 |
| |
|
| | |
| | output = [] |
| |
|
| | |
| | if song_name: |
| | output.append(f"# {song_name}") |
| | output.append(f"Key: {key}") |
| | output.append(f"BPM: {bpm}") |
| | output.append("") |
| |
|
| | current_time = 0.0 |
| |
|
| | for section in sections: |
| | |
| | if section['name']: |
| | output.append(f"[{section['name']}]") |
| |
|
| | for line in section['lines']: |
| | if not line.strip(): |
| | output.append("") |
| | continue |
| |
|
| | |
| | line_end_time = current_time + time_per_line |
| | line_chords = get_chords_in_range(chords, current_time, line_end_time) |
| |
|
| | if line_chords: |
| | |
| | chord_line = build_chord_line(line, line_chords, current_time, time_per_line) |
| | output.append(chord_line) |
| |
|
| | output.append(line) |
| | current_time = line_end_time |
| |
|
| | output.append("") |
| |
|
| | return "\n".join(output) |
| |
|
| |
|
| | def parse_lyrics_into_sections(lyrics): |
| | """ |
| | Parse lyrics text into sections. |
| | |
| | Handles formats like: |
| | [Verse 1] |
| | Line 1 |
| | Line 2 |
| | |
| | [Chorus] |
| | Line 3 |
| | """ |
| | import re |
| |
|
| | sections = [] |
| | current_section = {'name': '', 'lines': []} |
| |
|
| | for line in lyrics.split('\n'): |
| | |
| | section_match = re.match(r'^\s*\[([^\]]+)\]\s*$', line) |
| |
|
| | if section_match: |
| | |
| | if current_section['lines'] or current_section['name']: |
| | sections.append(current_section) |
| |
|
| | |
| | current_section = {'name': section_match.group(1), 'lines': []} |
| | else: |
| | |
| | current_section['lines'].append(line) |
| |
|
| | |
| | if current_section['lines'] or current_section['name']: |
| | sections.append(current_section) |
| |
|
| | |
| | sections = [s for s in sections if s['lines'] or s['name']] |
| |
|
| | return sections |
| |
|
| |
|
| | def get_chords_in_range(chords, start_time, end_time): |
| | """Get chords that fall within a time range.""" |
| | result = [] |
| | for time, chord in chords: |
| | if start_time <= time < end_time: |
| | result.append((time, chord)) |
| | return result |
| |
|
| |
|
| | def build_chord_line(lyric_line, line_chords, line_start_time, line_duration): |
| | """ |
| | Build a chord line positioned above lyrics. |
| | |
| | Places chords at positions proportional to their timing within the line. |
| | """ |
| | if not line_chords: |
| | return "" |
| |
|
| | line_length = len(lyric_line) |
| | if line_length == 0: |
| | |
| | return " ".join(chord for time, chord in line_chords) |
| |
|
| | |
| | chord_chars = [' '] * line_length |
| |
|
| | for time, chord in line_chords: |
| | |
| | time_offset = time - line_start_time |
| | position_ratio = time_offset / line_duration if line_duration > 0 else 0 |
| |
|
| | |
| | char_pos = int(position_ratio * line_length) |
| | char_pos = max(0, min(char_pos, line_length - 1)) |
| |
|
| | |
| | chord_len = len(chord) |
| |
|
| | |
| | if char_pos + chord_len > line_length: |
| | char_pos = max(0, line_length - chord_len) |
| |
|
| | |
| | |
| | for i in range(max(0, char_pos - 3), min(line_length, char_pos + chord_len + 3)): |
| | if i < len(chord_chars) and chord_chars[i] != ' ': |
| | |
| | char_pos = find_next_free_position(chord_chars, char_pos, chord_len) |
| | break |
| |
|
| | |
| | for i, c in enumerate(chord): |
| | pos = char_pos + i |
| | if pos < line_length: |
| | chord_chars[pos] = c |
| |
|
| | return ''.join(chord_chars).rstrip() |
| |
|
| |
|
| | def find_next_free_position(chord_chars, start_pos, chord_len): |
| | """Find next position with enough space for a chord.""" |
| | line_length = len(chord_chars) |
| |
|
| | |
| | for pos in range(start_pos, line_length - chord_len + 1): |
| | if all(chord_chars[pos + i] == ' ' for i in range(chord_len)): |
| | return pos |
| |
|
| | |
| | for pos in range(start_pos - 1, -1, -1): |
| | if pos + chord_len <= line_length: |
| | if all(chord_chars[pos + i] == ' ' for i in range(chord_len)): |
| | return pos |
| |
|
| | return start_pos |
| |
|
| |
|
| | def format_timestamped_chords(chords, key, bpm, song_name=''): |
| | """Fallback: format chords with timestamps (no lyrics).""" |
| | lines = [] |
| |
|
| | if song_name: |
| | lines.append(f"# {song_name}") |
| | lines.append(f"Key: {key} | BPM: {bpm}") |
| | lines.append("") |
| | lines.append("=" * 40) |
| | lines.append("") |
| |
|
| | for time, chord in chords: |
| | mins = int(time // 60) |
| | secs = time % 60 |
| | lines.append(f"[{mins}:{secs:05.2f}] {chord}") |
| |
|
| | return "\n".join(lines) |
| |
|