""" Multi-Source Chord Detection Analyzes stems AND full mix for best results """ import numpy as np import warnings warnings.filterwarnings('ignore') try: import librosa import scipy.ndimage LIBROSA_AVAILABLE = True except ImportError: LIBROSA_AVAILABLE = False def extract_chords_multi_stem(stems_dir, original_audio=None, min_duration=0.5): """ Extract chords from multiple sources - stems AND full track Args: stems_dir: Path to directory containing stems original_audio: Path to original full mix (optional but recommended) min_duration: Minimum chord duration Returns: List of (timestamp, chord_name) tuples """ if not LIBROSA_AVAILABLE: print(" [WARN] Chord extraction skipped (librosa not installed)") return [] from pathlib import Path stems_dir = Path(stems_dir) print(f" Analyzing multiple sources for chord detection...") chord_candidates = [] # SOURCE 1: Original full mix (HIGHEST WEIGHT) if original_audio and Path(original_audio).exists(): print(f" • Full mix (original audio)") full_chords = detect_chords_from_stem(original_audio, focus='harmony') if full_chords: chord_candidates.append(('full_mix', full_chords, 4.0)) # SOURCE 2: Bass stem for stem_file in stems_dir.glob('*.mp3'): if 'bass' in stem_file.stem.lower(): print(f" • Bass stem") bass_chords = detect_chords_from_stem(stem_file, focus='bass') if bass_chords: chord_candidates.append(('bass', bass_chords, 3.0)) break # SOURCE 3: Guitar stem for stem_file in stems_dir.glob('*.mp3'): if 'guitar' in stem_file.stem.lower(): print(f" • Guitar stem") guitar_chords = detect_chords_from_stem(stem_file, focus='harmony') if guitar_chords: chord_candidates.append(('guitar', guitar_chords, 2.5)) break # SOURCE 4: Piano/Keys for stem_file in stems_dir.glob('*.mp3'): name_lower = stem_file.stem.lower() if 'piano' in name_lower or 'keys' in name_lower: print(f" • Piano/Keys stem") piano_chords = detect_chords_from_stem(stem_file, focus='harmony') if piano_chords: chord_candidates.append(('piano', piano_chords, 2.0)) break # SOURCE 5: Other stem for stem_file in stems_dir.glob('*.mp3'): if 'other' in stem_file.stem.lower(): print(f" • Other stem") other_chords = detect_chords_from_stem(stem_file, focus='harmony') if other_chords: chord_candidates.append(('other', other_chords, 1.5)) break if not chord_candidates: print(" [WARN] No suitable sources found") return [] print(f" Merging results from {len(chord_candidates)} sources...") merged_chords = merge_chord_detections(chord_candidates, min_duration) return merged_chords def detect_chords_from_stem(stem_file, focus='harmony'): """Detect chords - FULL SONG""" try: y, sr = librosa.load(str(stem_file), sr=22050, duration=None) hop_length = 256 if focus == 'bass' else 512 chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length) chroma = scipy.ndimage.median_filter(chroma, size=(1, 9)) templates = create_chord_templates() chords = [] last_chord = None for i in range(chroma.shape[1]): frame = chroma[:, i] time = librosa.frames_to_time(i, sr=sr, hop_length=hop_length) chord, confidence = match_chord_template_with_confidence(frame, templates, focus) if chord != last_chord and confidence > 0.12: chords.append((float(time), chord, float(confidence))) last_chord = chord return chords except Exception as e: from pathlib import Path print(f" [WARN] Failed to analyze {Path(stem_file).name}: {e}") return [] def merge_chord_detections(chord_candidates, min_duration=0.5): """Merge - if only one source, just use it directly""" # If only one source, don't filter - just use it if len(chord_candidates) == 1: name, chords, weight = chord_candidates[0] # Convert (time, chord, conf) to (time, chord) return [(time, chord) for time, chord, conf in chords] # Multiple sources - merge all_times = set() for name, chords, weight in chord_candidates: for time, chord, conf in chords: all_times.add(time) all_times = sorted(all_times) if not all_times: return [] time_grid = np.arange(0, max(all_times) + 1, 0.5) merged = [] last_chord = None last_time = 0 for grid_time in time_grid: votes = {} total_weight = 0 for name, chords, weight in chord_candidates: active_chord = get_chord_at_time(chords, grid_time) if active_chord: chord, conf = active_chord vote_strength = conf * weight if chord in votes: votes[chord] += vote_strength else: votes[chord] = vote_strength total_weight += weight if votes: best_chord = max(votes.items(), key=lambda x: x[1])[0] # Less strict threshold if best_chord != last_chord: if last_chord is not None: duration = grid_time - last_time if duration >= min_duration: merged.append((last_time, last_chord)) last_chord = best_chord last_time = grid_time if last_chord: merged.append((last_time, last_chord)) return merged def get_chord_at_time(chords, time): """Find active chord""" active_chord = None for chord_time, chord, conf in chords: if chord_time <= time: active_chord = (chord, conf) else: break return active_chord def create_chord_templates(): """Enhanced chord templates""" notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] templates = {} for i, root in enumerate(notes): # Major template = np.zeros(12) template[(i + 0) % 12] = 1.0 template[(i + 4) % 12] = 0.8 template[(i + 7) % 12] = 0.6 templates[root] = template # Minor template = np.zeros(12) template[(i + 0) % 12] = 1.0 template[(i + 3) % 12] = 0.8 template[(i + 7) % 12] = 0.6 templates[root + 'm'] = template # Dominant 7 template = np.zeros(12) template[(i + 0) % 12] = 1.0 template[(i + 4) % 12] = 0.7 template[(i + 7) % 12] = 0.5 template[(i + 10) % 12] = 0.4 templates[root + '7'] = template # Major 7 template = np.zeros(12) template[(i + 0) % 12] = 1.0 template[(i + 4) % 12] = 0.7 template[(i + 7) % 12] = 0.5 template[(i + 11) % 12] = 0.4 templates[root + 'maj7'] = template # Minor 7 template = np.zeros(12) template[(i + 0) % 12] = 1.0 template[(i + 3) % 12] = 0.7 template[(i + 7) % 12] = 0.5 template[(i + 10) % 12] = 0.4 templates[root + 'm7'] = template return templates def match_chord_template_with_confidence(chroma_frame, templates, focus='harmony'): """Match with confidence""" if chroma_frame.sum() > 0: chroma_frame = chroma_frame / chroma_frame.sum() best_chord = 'C' best_score = -1 for chord_name, template in templates.items(): if template.sum() > 0: template_norm = template / template.sum() else: continue score = np.dot(chroma_frame, template_norm) if focus == 'bass' and not ('7' in chord_name or 'm' in chord_name): score *= 1.1 if score > best_score: best_score = score best_chord = chord_name return best_chord, best_score def extract_chords(audio_path, min_duration=0.5): """ Extract chords from audio file with musical timing. Args: audio_path: Path to audio file min_duration: Minimum chord duration in seconds (default 0.5s for half-bar changes) Returns: List of (timestamp, chord_name) tuples """ if not LIBROSA_AVAILABLE: return [] try: # Load audio y, sr = librosa.load(audio_path, sr=22050, duration=None) # Use moderate hop for reasonable time resolution # hop_length=1024 at 22050Hz = ~46ms per frame hop_length = 1024 # Extract chroma features chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length) # Moderate smoothing - median filter across 11 frames (~0.5 seconds) chroma = scipy.ndimage.median_filter(chroma, size=(1, 11)) templates = create_chord_templates() # First pass: detect all chord candidates raw_chords = [] for i in range(chroma.shape[1]): frame = chroma[:, i] time = librosa.frames_to_time(i, sr=sr, hop_length=hop_length) chord, conf = match_chord_template_with_confidence(frame, templates, 'harmony') raw_chords.append((float(time), chord, float(conf))) # Second pass: consolidate with minimum duration and reasonable confidence consolidated = [] current_chord = None current_start = 0.0 confidence_threshold = 0.15 # Lower threshold to catch more chord changes for time, chord, conf in raw_chords: if conf >= confidence_threshold: if chord != current_chord: # Check if previous chord lasted long enough if current_chord is not None: duration = time - current_start if duration >= min_duration: consolidated.append((current_start, current_chord)) # Start new chord current_chord = chord current_start = time # Don't forget the last chord if current_chord is not None and raw_chords: final_time = raw_chords[-1][0] duration = final_time - current_start if duration >= min_duration: consolidated.append((current_start, current_chord)) return consolidated except Exception as e: print(f" [WARN] Chord extraction error: {e}") return [] def format_chord_chart(chords, lyrics, duration, key='C', bpm=120, song_name=''): """ Format chords aligned above lyrics like a traditional chord chart. Args: chords: List of (timestamp, chord_name) tuples lyrics: User-provided lyrics text (with optional [Section] markers) duration: Total song duration in seconds key: Detected key bpm: Detected BPM song_name: Name of the song Returns: Formatted chord chart string """ if not lyrics or not lyrics.strip(): # No lyrics - return timestamped format return format_timestamped_chords(chords, key, bpm, song_name) # Parse lyrics into sections and lines sections = parse_lyrics_into_sections(lyrics) if not sections: return format_timestamped_chords(chords, key, bpm, song_name) # Count total lyric lines (excluding section headers and blank lines) total_lines = sum(len(s['lines']) for s in sections) if total_lines == 0: return format_timestamped_chords(chords, key, bpm, song_name) # Estimate time per line time_per_line = duration / total_lines if total_lines > 0 else 4.0 # Build chord chart output = [] # Header if song_name: output.append(f"# {song_name}") output.append(f"Key: {key}") output.append(f"BPM: {bpm}") output.append("") current_time = 0.0 for section in sections: # Section header if section['name']: output.append(f"[{section['name']}]") for line in section['lines']: if not line.strip(): output.append("") continue # Find chords that fall within this line's time window line_end_time = current_time + time_per_line line_chords = get_chords_in_range(chords, current_time, line_end_time) if line_chords: # Build chord line positioned above lyrics chord_line = build_chord_line(line, line_chords, current_time, time_per_line) output.append(chord_line) output.append(line) current_time = line_end_time output.append("") # Blank line between sections return "\n".join(output) def parse_lyrics_into_sections(lyrics): """ Parse lyrics text into sections. Handles formats like: [Verse 1] Line 1 Line 2 [Chorus] Line 3 """ import re sections = [] current_section = {'name': '', 'lines': []} for line in lyrics.split('\n'): # Check for section header like [Verse 1] or [Chorus] section_match = re.match(r'^\s*\[([^\]]+)\]\s*$', line) if section_match: # Save previous section if it has content if current_section['lines'] or current_section['name']: sections.append(current_section) # Start new section current_section = {'name': section_match.group(1), 'lines': []} else: # Add line to current section (preserve blank lines for formatting) current_section['lines'].append(line) # Don't forget the last section if current_section['lines'] or current_section['name']: sections.append(current_section) # Filter out completely empty sections sections = [s for s in sections if s['lines'] or s['name']] return sections def get_chords_in_range(chords, start_time, end_time): """Get chords that fall within a time range.""" result = [] for time, chord in chords: if start_time <= time < end_time: result.append((time, chord)) return result def build_chord_line(lyric_line, line_chords, line_start_time, line_duration): """ Build a chord line positioned above lyrics. Places chords at positions proportional to their timing within the line. """ if not line_chords: return "" line_length = len(lyric_line) if line_length == 0: # Just return chords space-separated return " ".join(chord for time, chord in line_chords) # Build chord line character by character chord_chars = [' '] * line_length for time, chord in line_chords: # Calculate position in line based on timing time_offset = time - line_start_time position_ratio = time_offset / line_duration if line_duration > 0 else 0 # Map to character position char_pos = int(position_ratio * line_length) char_pos = max(0, min(char_pos, line_length - 1)) # Place chord, making room for full chord name chord_len = len(chord) # Adjust position to not overflow if char_pos + chord_len > line_length: char_pos = max(0, line_length - chord_len) # Check if there's already a chord nearby (within 3 chars) # If so, shift this one for i in range(max(0, char_pos - 3), min(line_length, char_pos + chord_len + 3)): if i < len(chord_chars) and chord_chars[i] != ' ': # There's overlap, try to find next available spot char_pos = find_next_free_position(chord_chars, char_pos, chord_len) break # Place the chord for i, c in enumerate(chord): pos = char_pos + i if pos < line_length: chord_chars[pos] = c return ''.join(chord_chars).rstrip() def find_next_free_position(chord_chars, start_pos, chord_len): """Find next position with enough space for a chord.""" line_length = len(chord_chars) # Try positions after start_pos for pos in range(start_pos, line_length - chord_len + 1): if all(chord_chars[pos + i] == ' ' for i in range(chord_len)): return pos # Try positions before start_pos for pos in range(start_pos - 1, -1, -1): if pos + chord_len <= line_length: if all(chord_chars[pos + i] == ' ' for i in range(chord_len)): return pos return start_pos # Fallback def format_timestamped_chords(chords, key, bpm, song_name=''): """Fallback: format chords with timestamps (no lyrics).""" lines = [] if song_name: lines.append(f"# {song_name}") lines.append(f"Key: {key} | BPM: {bpm}") lines.append("") lines.append("=" * 40) lines.append("") for time, chord in chords: mins = int(time // 60) secs = time % 60 lines.append(f"[{mins}:{secs:05.2f}] {chord}") return "\n".join(lines)