Spaces:

rlackey
/

vynl

Running on Zero

File size: 17,604 Bytes

"""
Multi-Source Chord Detection
Analyzes stems AND full mix for best results
"""

import numpy as np
import warnings
warnings.filterwarnings('ignore')

try:
    import librosa
    import scipy.ndimage
    LIBROSA_AVAILABLE = True
except ImportError:
    LIBROSA_AVAILABLE = False


def extract_chords_multi_stem(stems_dir, original_audio=None, min_duration=0.5):
    """
    Extract chords from multiple sources - stems AND full track
    
    Args:
        stems_dir: Path to directory containing stems
        original_audio: Path to original full mix (optional but recommended)
        min_duration: Minimum chord duration
    
    Returns:
        List of (timestamp, chord_name) tuples
    """
    
    if not LIBROSA_AVAILABLE:
        print("   [WARN]  Chord extraction skipped (librosa not installed)")
        return []
    
    from pathlib import Path
    stems_dir = Path(stems_dir)
    
    print(f"   Analyzing multiple sources for chord detection...")
    
    chord_candidates = []
    
    # SOURCE 1: Original full mix (HIGHEST WEIGHT)
    if original_audio and Path(original_audio).exists():
        print(f"   • Full mix (original audio)")
        full_chords = detect_chords_from_stem(original_audio, focus='harmony')
        if full_chords:
            chord_candidates.append(('full_mix', full_chords, 4.0))
    
    # SOURCE 2: Bass stem
    for stem_file in stems_dir.glob('*.mp3'):
        if 'bass' in stem_file.stem.lower():
            print(f"   • Bass stem")
            bass_chords = detect_chords_from_stem(stem_file, focus='bass')
            if bass_chords:
                chord_candidates.append(('bass', bass_chords, 3.0))
            break
    
    # SOURCE 3: Guitar stem
    for stem_file in stems_dir.glob('*.mp3'):
        if 'guitar' in stem_file.stem.lower():
            print(f"   • Guitar stem")
            guitar_chords = detect_chords_from_stem(stem_file, focus='harmony')
            if guitar_chords:
                chord_candidates.append(('guitar', guitar_chords, 2.5))
            break
    
    # SOURCE 4: Piano/Keys
    for stem_file in stems_dir.glob('*.mp3'):
        name_lower = stem_file.stem.lower()
        if 'piano' in name_lower or 'keys' in name_lower:
            print(f"   • Piano/Keys stem")
            piano_chords = detect_chords_from_stem(stem_file, focus='harmony')
            if piano_chords:
                chord_candidates.append(('piano', piano_chords, 2.0))
            break
    
    # SOURCE 5: Other stem
    for stem_file in stems_dir.glob('*.mp3'):
        if 'other' in stem_file.stem.lower():
            print(f"   • Other stem")
            other_chords = detect_chords_from_stem(stem_file, focus='harmony')
            if other_chords:
                chord_candidates.append(('other', other_chords, 1.5))
            break
    
    if not chord_candidates:
        print("   [WARN]  No suitable sources found")
        return []
    
    print(f"   Merging results from {len(chord_candidates)} sources...")
    merged_chords = merge_chord_detections(chord_candidates, min_duration)
    
    return merged_chords


def detect_chords_from_stem(stem_file, focus='harmony'):
    """Detect chords - FULL SONG"""
    
    try:
        y, sr = librosa.load(str(stem_file), sr=22050, duration=None)
        hop_length = 256 if focus == 'bass' else 512
        chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length)
        chroma = scipy.ndimage.median_filter(chroma, size=(1, 9))
        
        templates = create_chord_templates()
        chords = []
        last_chord = None
        
        for i in range(chroma.shape[1]):
            frame = chroma[:, i]
            time = librosa.frames_to_time(i, sr=sr, hop_length=hop_length)
            chord, confidence = match_chord_template_with_confidence(frame, templates, focus)
            
            if chord != last_chord and confidence > 0.12:
                chords.append((float(time), chord, float(confidence)))
                last_chord = chord
        
        return chords
        
    except Exception as e:
        from pathlib import Path
        print(f"   [WARN]  Failed to analyze {Path(stem_file).name}: {e}")
        return []


def merge_chord_detections(chord_candidates, min_duration=0.5):
    """Merge - if only one source, just use it directly"""
    
    # If only one source, don't filter - just use it
    if len(chord_candidates) == 1:
        name, chords, weight = chord_candidates[0]
        # Convert (time, chord, conf) to (time, chord)
        return [(time, chord) for time, chord, conf in chords]
    
    # Multiple sources - merge
    all_times = set()
    for name, chords, weight in chord_candidates:
        for time, chord, conf in chords:
            all_times.add(time)
    
    all_times = sorted(all_times)
    
    if not all_times:
        return []
    
    time_grid = np.arange(0, max(all_times) + 1, 0.5)
    
    merged = []
    last_chord = None
    last_time = 0
    
    for grid_time in time_grid:
        votes = {}
        total_weight = 0
        
        for name, chords, weight in chord_candidates:
            active_chord = get_chord_at_time(chords, grid_time)
            
            if active_chord:
                chord, conf = active_chord
                vote_strength = conf * weight
                
                if chord in votes:
                    votes[chord] += vote_strength
                else:
                    votes[chord] = vote_strength
                
                total_weight += weight
        
        if votes:
            best_chord = max(votes.items(), key=lambda x: x[1])[0]
            
            # Less strict threshold
            if best_chord != last_chord:
                if last_chord is not None:
                    duration = grid_time - last_time
                    if duration >= min_duration:
                        merged.append((last_time, last_chord))
                
                last_chord = best_chord
                last_time = grid_time
    
    if last_chord:
        merged.append((last_time, last_chord))
    
    return merged


def get_chord_at_time(chords, time):
    """Find active chord"""
    active_chord = None
    for chord_time, chord, conf in chords:
        if chord_time <= time:
            active_chord = (chord, conf)
        else:
            break
    return active_chord


def create_chord_templates():
    """Enhanced chord templates"""
    notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
    templates = {}
    
    for i, root in enumerate(notes):
        # Major
        template = np.zeros(12)
        template[(i + 0) % 12] = 1.0
        template[(i + 4) % 12] = 0.8
        template[(i + 7) % 12] = 0.6
        templates[root] = template
        
        # Minor
        template = np.zeros(12)
        template[(i + 0) % 12] = 1.0
        template[(i + 3) % 12] = 0.8
        template[(i + 7) % 12] = 0.6
        templates[root + 'm'] = template
        
        # Dominant 7
        template = np.zeros(12)
        template[(i + 0) % 12] = 1.0
        template[(i + 4) % 12] = 0.7
        template[(i + 7) % 12] = 0.5
        template[(i + 10) % 12] = 0.4
        templates[root + '7'] = template
        
        # Major 7
        template = np.zeros(12)
        template[(i + 0) % 12] = 1.0
        template[(i + 4) % 12] = 0.7
        template[(i + 7) % 12] = 0.5
        template[(i + 11) % 12] = 0.4
        templates[root + 'maj7'] = template
        
        # Minor 7
        template = np.zeros(12)
        template[(i + 0) % 12] = 1.0
        template[(i + 3) % 12] = 0.7
        template[(i + 7) % 12] = 0.5
        template[(i + 10) % 12] = 0.4
        templates[root + 'm7'] = template
    
    return templates


def match_chord_template_with_confidence(chroma_frame, templates, focus='harmony'):
    """Match with confidence"""
    if chroma_frame.sum() > 0:
        chroma_frame = chroma_frame / chroma_frame.sum()
    
    best_chord = 'C'
    best_score = -1
    
    for chord_name, template in templates.items():
        if template.sum() > 0:
            template_norm = template / template.sum()
        else:
            continue
        
        score = np.dot(chroma_frame, template_norm)
        
        if focus == 'bass' and not ('7' in chord_name or 'm' in chord_name):
            score *= 1.1
        
        if score > best_score:
            best_score = score
            best_chord = chord_name
    
    return best_chord, best_score


def extract_chords(audio_path, min_duration=0.5):
    """
    Extract chords from audio file with musical timing.

    Args:
        audio_path: Path to audio file
        min_duration: Minimum chord duration in seconds (default 0.5s for half-bar changes)

    Returns:
        List of (timestamp, chord_name) tuples
    """
    if not LIBROSA_AVAILABLE:
        return []

    try:
        # Load audio
        y, sr = librosa.load(audio_path, sr=22050, duration=None)

        # Use moderate hop for reasonable time resolution
        # hop_length=1024 at 22050Hz = ~46ms per frame
        hop_length = 1024

        # Extract chroma features
        chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length)

        # Moderate smoothing - median filter across 11 frames (~0.5 seconds)
        chroma = scipy.ndimage.median_filter(chroma, size=(1, 11))

        templates = create_chord_templates()

        # First pass: detect all chord candidates
        raw_chords = []
        for i in range(chroma.shape[1]):
            frame = chroma[:, i]
            time = librosa.frames_to_time(i, sr=sr, hop_length=hop_length)
            chord, conf = match_chord_template_with_confidence(frame, templates, 'harmony')
            raw_chords.append((float(time), chord, float(conf)))

        # Second pass: consolidate with minimum duration and reasonable confidence
        consolidated = []
        current_chord = None
        current_start = 0.0
        confidence_threshold = 0.15  # Lower threshold to catch more chord changes

        for time, chord, conf in raw_chords:
            if conf >= confidence_threshold:
                if chord != current_chord:
                    # Check if previous chord lasted long enough
                    if current_chord is not None:
                        duration = time - current_start
                        if duration >= min_duration:
                            consolidated.append((current_start, current_chord))

                    # Start new chord
                    current_chord = chord
                    current_start = time

        # Don't forget the last chord
        if current_chord is not None and raw_chords:
            final_time = raw_chords[-1][0]
            duration = final_time - current_start
            if duration >= min_duration:
                consolidated.append((current_start, current_chord))

        return consolidated

    except Exception as e:
        print(f"   [WARN] Chord extraction error: {e}")
        return []


def format_chord_chart(chords, lyrics, duration, key='C', bpm=120, song_name=''):
    """
    Format chords aligned above lyrics like a traditional chord chart.

    Args:
        chords: List of (timestamp, chord_name) tuples
        lyrics: User-provided lyrics text (with optional [Section] markers)
        duration: Total song duration in seconds
        key: Detected key
        bpm: Detected BPM
        song_name: Name of the song

    Returns:
        Formatted chord chart string
    """
    if not lyrics or not lyrics.strip():
        # No lyrics - return timestamped format
        return format_timestamped_chords(chords, key, bpm, song_name)

    # Parse lyrics into sections and lines
    sections = parse_lyrics_into_sections(lyrics)

    if not sections:
        return format_timestamped_chords(chords, key, bpm, song_name)

    # Count total lyric lines (excluding section headers and blank lines)
    total_lines = sum(len(s['lines']) for s in sections)

    if total_lines == 0:
        return format_timestamped_chords(chords, key, bpm, song_name)

    # Estimate time per line
    time_per_line = duration / total_lines if total_lines > 0 else 4.0

    # Build chord chart
    output = []

    # Header
    if song_name:
        output.append(f"# {song_name}")
    output.append(f"Key: {key}")
    output.append(f"BPM: {bpm}")
    output.append("")

    current_time = 0.0

    for section in sections:
        # Section header
        if section['name']:
            output.append(f"[{section['name']}]")

        for line in section['lines']:
            if not line.strip():
                output.append("")
                continue

            # Find chords that fall within this line's time window
            line_end_time = current_time + time_per_line
            line_chords = get_chords_in_range(chords, current_time, line_end_time)

            if line_chords:
                # Build chord line positioned above lyrics
                chord_line = build_chord_line(line, line_chords, current_time, time_per_line)
                output.append(chord_line)

            output.append(line)
            current_time = line_end_time

        output.append("")  # Blank line between sections

    return "\n".join(output)


def parse_lyrics_into_sections(lyrics):
    """
    Parse lyrics text into sections.

    Handles formats like:
        [Verse 1]
        Line 1
        Line 2

        [Chorus]
        Line 3
    """
    import re

    sections = []
    current_section = {'name': '', 'lines': []}

    for line in lyrics.split('\n'):
        # Check for section header like [Verse 1] or [Chorus]
        section_match = re.match(r'^\s*\[([^\]]+)\]\s*$', line)

        if section_match:
            # Save previous section if it has content
            if current_section['lines'] or current_section['name']:
                sections.append(current_section)

            # Start new section
            current_section = {'name': section_match.group(1), 'lines': []}
        else:
            # Add line to current section (preserve blank lines for formatting)
            current_section['lines'].append(line)

    # Don't forget the last section
    if current_section['lines'] or current_section['name']:
        sections.append(current_section)

    # Filter out completely empty sections
    sections = [s for s in sections if s['lines'] or s['name']]

    return sections


def get_chords_in_range(chords, start_time, end_time):
    """Get chords that fall within a time range."""
    result = []
    for time, chord in chords:
        if start_time <= time < end_time:
            result.append((time, chord))
    return result


def build_chord_line(lyric_line, line_chords, line_start_time, line_duration):
    """
    Build a chord line positioned above lyrics.

    Places chords at positions proportional to their timing within the line.
    """
    if not line_chords:
        return ""

    line_length = len(lyric_line)
    if line_length == 0:
        # Just return chords space-separated
        return "  ".join(chord for time, chord in line_chords)

    # Build chord line character by character
    chord_chars = [' '] * line_length

    for time, chord in line_chords:
        # Calculate position in line based on timing
        time_offset = time - line_start_time
        position_ratio = time_offset / line_duration if line_duration > 0 else 0

        # Map to character position
        char_pos = int(position_ratio * line_length)
        char_pos = max(0, min(char_pos, line_length - 1))

        # Place chord, making room for full chord name
        chord_len = len(chord)

        # Adjust position to not overflow
        if char_pos + chord_len > line_length:
            char_pos = max(0, line_length - chord_len)

        # Check if there's already a chord nearby (within 3 chars)
        # If so, shift this one
        for i in range(max(0, char_pos - 3), min(line_length, char_pos + chord_len + 3)):
            if i < len(chord_chars) and chord_chars[i] != ' ':
                # There's overlap, try to find next available spot
                char_pos = find_next_free_position(chord_chars, char_pos, chord_len)
                break

        # Place the chord
        for i, c in enumerate(chord):
            pos = char_pos + i
            if pos < line_length:
                chord_chars[pos] = c

    return ''.join(chord_chars).rstrip()


def find_next_free_position(chord_chars, start_pos, chord_len):
    """Find next position with enough space for a chord."""
    line_length = len(chord_chars)

    # Try positions after start_pos
    for pos in range(start_pos, line_length - chord_len + 1):
        if all(chord_chars[pos + i] == ' ' for i in range(chord_len)):
            return pos

    # Try positions before start_pos
    for pos in range(start_pos - 1, -1, -1):
        if pos + chord_len <= line_length:
            if all(chord_chars[pos + i] == ' ' for i in range(chord_len)):
                return pos

    return start_pos  # Fallback


def format_timestamped_chords(chords, key, bpm, song_name=''):
    """Fallback: format chords with timestamps (no lyrics)."""
    lines = []

    if song_name:
        lines.append(f"# {song_name}")
    lines.append(f"Key: {key} | BPM: {bpm}")
    lines.append("")
    lines.append("=" * 40)
    lines.append("")

    for time, chord in chords:
        mins = int(time // 60)
        secs = time % 60
        lines.append(f"[{mins}:{secs:05.2f}] {chord}")

    return "\n".join(lines)