File size: 17,604 Bytes
1e9fec0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dfa4828
e01bdfd
 
 
 
 
dfa4828
e01bdfd
 
 
 
1e9fec0
 
e01bdfd
1e9fec0
e01bdfd
1e9fec0
e01bdfd
dfa4828
 
 
e01bdfd
 
 
 
dfa4828
 
e01bdfd
1e9fec0
e01bdfd
 
 
1e9fec0
 
e01bdfd
1e9fec0
e01bdfd
 
4ec9679
e01bdfd
 
 
dfa4828
e01bdfd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e9fec0
1290a42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
"""
Multi-Source Chord Detection
Analyzes stems AND full mix for best results
"""

import numpy as np
import warnings
warnings.filterwarnings('ignore')

try:
    import librosa
    import scipy.ndimage
    LIBROSA_AVAILABLE = True
except ImportError:
    LIBROSA_AVAILABLE = False


def extract_chords_multi_stem(stems_dir, original_audio=None, min_duration=0.5):
    """
    Extract chords from multiple sources - stems AND full track
    
    Args:
        stems_dir: Path to directory containing stems
        original_audio: Path to original full mix (optional but recommended)
        min_duration: Minimum chord duration
    
    Returns:
        List of (timestamp, chord_name) tuples
    """
    
    if not LIBROSA_AVAILABLE:
        print("   [WARN]  Chord extraction skipped (librosa not installed)")
        return []
    
    from pathlib import Path
    stems_dir = Path(stems_dir)
    
    print(f"   Analyzing multiple sources for chord detection...")
    
    chord_candidates = []
    
    # SOURCE 1: Original full mix (HIGHEST WEIGHT)
    if original_audio and Path(original_audio).exists():
        print(f"   • Full mix (original audio)")
        full_chords = detect_chords_from_stem(original_audio, focus='harmony')
        if full_chords:
            chord_candidates.append(('full_mix', full_chords, 4.0))
    
    # SOURCE 2: Bass stem
    for stem_file in stems_dir.glob('*.mp3'):
        if 'bass' in stem_file.stem.lower():
            print(f"   • Bass stem")
            bass_chords = detect_chords_from_stem(stem_file, focus='bass')
            if bass_chords:
                chord_candidates.append(('bass', bass_chords, 3.0))
            break
    
    # SOURCE 3: Guitar stem
    for stem_file in stems_dir.glob('*.mp3'):
        if 'guitar' in stem_file.stem.lower():
            print(f"   • Guitar stem")
            guitar_chords = detect_chords_from_stem(stem_file, focus='harmony')
            if guitar_chords:
                chord_candidates.append(('guitar', guitar_chords, 2.5))
            break
    
    # SOURCE 4: Piano/Keys
    for stem_file in stems_dir.glob('*.mp3'):
        name_lower = stem_file.stem.lower()
        if 'piano' in name_lower or 'keys' in name_lower:
            print(f"   • Piano/Keys stem")
            piano_chords = detect_chords_from_stem(stem_file, focus='harmony')
            if piano_chords:
                chord_candidates.append(('piano', piano_chords, 2.0))
            break
    
    # SOURCE 5: Other stem
    for stem_file in stems_dir.glob('*.mp3'):
        if 'other' in stem_file.stem.lower():
            print(f"   • Other stem")
            other_chords = detect_chords_from_stem(stem_file, focus='harmony')
            if other_chords:
                chord_candidates.append(('other', other_chords, 1.5))
            break
    
    if not chord_candidates:
        print("   [WARN]  No suitable sources found")
        return []
    
    print(f"   Merging results from {len(chord_candidates)} sources...")
    merged_chords = merge_chord_detections(chord_candidates, min_duration)
    
    return merged_chords


def detect_chords_from_stem(stem_file, focus='harmony'):
    """Detect chords - FULL SONG"""
    
    try:
        y, sr = librosa.load(str(stem_file), sr=22050, duration=None)
        hop_length = 256 if focus == 'bass' else 512
        chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length)
        chroma = scipy.ndimage.median_filter(chroma, size=(1, 9))
        
        templates = create_chord_templates()
        chords = []
        last_chord = None
        
        for i in range(chroma.shape[1]):
            frame = chroma[:, i]
            time = librosa.frames_to_time(i, sr=sr, hop_length=hop_length)
            chord, confidence = match_chord_template_with_confidence(frame, templates, focus)
            
            if chord != last_chord and confidence > 0.12:
                chords.append((float(time), chord, float(confidence)))
                last_chord = chord
        
        return chords
        
    except Exception as e:
        from pathlib import Path
        print(f"   [WARN]  Failed to analyze {Path(stem_file).name}: {e}")
        return []


def merge_chord_detections(chord_candidates, min_duration=0.5):
    """Merge - if only one source, just use it directly"""
    
    # If only one source, don't filter - just use it
    if len(chord_candidates) == 1:
        name, chords, weight = chord_candidates[0]
        # Convert (time, chord, conf) to (time, chord)
        return [(time, chord) for time, chord, conf in chords]
    
    # Multiple sources - merge
    all_times = set()
    for name, chords, weight in chord_candidates:
        for time, chord, conf in chords:
            all_times.add(time)
    
    all_times = sorted(all_times)
    
    if not all_times:
        return []
    
    time_grid = np.arange(0, max(all_times) + 1, 0.5)
    
    merged = []
    last_chord = None
    last_time = 0
    
    for grid_time in time_grid:
        votes = {}
        total_weight = 0
        
        for name, chords, weight in chord_candidates:
            active_chord = get_chord_at_time(chords, grid_time)
            
            if active_chord:
                chord, conf = active_chord
                vote_strength = conf * weight
                
                if chord in votes:
                    votes[chord] += vote_strength
                else:
                    votes[chord] = vote_strength
                
                total_weight += weight
        
        if votes:
            best_chord = max(votes.items(), key=lambda x: x[1])[0]
            
            # Less strict threshold
            if best_chord != last_chord:
                if last_chord is not None:
                    duration = grid_time - last_time
                    if duration >= min_duration:
                        merged.append((last_time, last_chord))
                
                last_chord = best_chord
                last_time = grid_time
    
    if last_chord:
        merged.append((last_time, last_chord))
    
    return merged


def get_chord_at_time(chords, time):
    """Find active chord"""
    active_chord = None
    for chord_time, chord, conf in chords:
        if chord_time <= time:
            active_chord = (chord, conf)
        else:
            break
    return active_chord


def create_chord_templates():
    """Enhanced chord templates"""
    notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
    templates = {}
    
    for i, root in enumerate(notes):
        # Major
        template = np.zeros(12)
        template[(i + 0) % 12] = 1.0
        template[(i + 4) % 12] = 0.8
        template[(i + 7) % 12] = 0.6
        templates[root] = template
        
        # Minor
        template = np.zeros(12)
        template[(i + 0) % 12] = 1.0
        template[(i + 3) % 12] = 0.8
        template[(i + 7) % 12] = 0.6
        templates[root + 'm'] = template
        
        # Dominant 7
        template = np.zeros(12)
        template[(i + 0) % 12] = 1.0
        template[(i + 4) % 12] = 0.7
        template[(i + 7) % 12] = 0.5
        template[(i + 10) % 12] = 0.4
        templates[root + '7'] = template
        
        # Major 7
        template = np.zeros(12)
        template[(i + 0) % 12] = 1.0
        template[(i + 4) % 12] = 0.7
        template[(i + 7) % 12] = 0.5
        template[(i + 11) % 12] = 0.4
        templates[root + 'maj7'] = template
        
        # Minor 7
        template = np.zeros(12)
        template[(i + 0) % 12] = 1.0
        template[(i + 3) % 12] = 0.7
        template[(i + 7) % 12] = 0.5
        template[(i + 10) % 12] = 0.4
        templates[root + 'm7'] = template
    
    return templates


def match_chord_template_with_confidence(chroma_frame, templates, focus='harmony'):
    """Match with confidence"""
    if chroma_frame.sum() > 0:
        chroma_frame = chroma_frame / chroma_frame.sum()
    
    best_chord = 'C'
    best_score = -1
    
    for chord_name, template in templates.items():
        if template.sum() > 0:
            template_norm = template / template.sum()
        else:
            continue
        
        score = np.dot(chroma_frame, template_norm)
        
        if focus == 'bass' and not ('7' in chord_name or 'm' in chord_name):
            score *= 1.1
        
        if score > best_score:
            best_score = score
            best_chord = chord_name
    
    return best_chord, best_score


def extract_chords(audio_path, min_duration=0.5):
    """
    Extract chords from audio file with musical timing.

    Args:
        audio_path: Path to audio file
        min_duration: Minimum chord duration in seconds (default 0.5s for half-bar changes)

    Returns:
        List of (timestamp, chord_name) tuples
    """
    if not LIBROSA_AVAILABLE:
        return []

    try:
        # Load audio
        y, sr = librosa.load(audio_path, sr=22050, duration=None)

        # Use moderate hop for reasonable time resolution
        # hop_length=1024 at 22050Hz = ~46ms per frame
        hop_length = 1024

        # Extract chroma features
        chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length)

        # Moderate smoothing - median filter across 11 frames (~0.5 seconds)
        chroma = scipy.ndimage.median_filter(chroma, size=(1, 11))

        templates = create_chord_templates()

        # First pass: detect all chord candidates
        raw_chords = []
        for i in range(chroma.shape[1]):
            frame = chroma[:, i]
            time = librosa.frames_to_time(i, sr=sr, hop_length=hop_length)
            chord, conf = match_chord_template_with_confidence(frame, templates, 'harmony')
            raw_chords.append((float(time), chord, float(conf)))

        # Second pass: consolidate with minimum duration and reasonable confidence
        consolidated = []
        current_chord = None
        current_start = 0.0
        confidence_threshold = 0.15  # Lower threshold to catch more chord changes

        for time, chord, conf in raw_chords:
            if conf >= confidence_threshold:
                if chord != current_chord:
                    # Check if previous chord lasted long enough
                    if current_chord is not None:
                        duration = time - current_start
                        if duration >= min_duration:
                            consolidated.append((current_start, current_chord))

                    # Start new chord
                    current_chord = chord
                    current_start = time

        # Don't forget the last chord
        if current_chord is not None and raw_chords:
            final_time = raw_chords[-1][0]
            duration = final_time - current_start
            if duration >= min_duration:
                consolidated.append((current_start, current_chord))

        return consolidated

    except Exception as e:
        print(f"   [WARN] Chord extraction error: {e}")
        return []


def format_chord_chart(chords, lyrics, duration, key='C', bpm=120, song_name=''):
    """
    Format chords aligned above lyrics like a traditional chord chart.

    Args:
        chords: List of (timestamp, chord_name) tuples
        lyrics: User-provided lyrics text (with optional [Section] markers)
        duration: Total song duration in seconds
        key: Detected key
        bpm: Detected BPM
        song_name: Name of the song

    Returns:
        Formatted chord chart string
    """
    if not lyrics or not lyrics.strip():
        # No lyrics - return timestamped format
        return format_timestamped_chords(chords, key, bpm, song_name)

    # Parse lyrics into sections and lines
    sections = parse_lyrics_into_sections(lyrics)

    if not sections:
        return format_timestamped_chords(chords, key, bpm, song_name)

    # Count total lyric lines (excluding section headers and blank lines)
    total_lines = sum(len(s['lines']) for s in sections)

    if total_lines == 0:
        return format_timestamped_chords(chords, key, bpm, song_name)

    # Estimate time per line
    time_per_line = duration / total_lines if total_lines > 0 else 4.0

    # Build chord chart
    output = []

    # Header
    if song_name:
        output.append(f"# {song_name}")
    output.append(f"Key: {key}")
    output.append(f"BPM: {bpm}")
    output.append("")

    current_time = 0.0

    for section in sections:
        # Section header
        if section['name']:
            output.append(f"[{section['name']}]")

        for line in section['lines']:
            if not line.strip():
                output.append("")
                continue

            # Find chords that fall within this line's time window
            line_end_time = current_time + time_per_line
            line_chords = get_chords_in_range(chords, current_time, line_end_time)

            if line_chords:
                # Build chord line positioned above lyrics
                chord_line = build_chord_line(line, line_chords, current_time, time_per_line)
                output.append(chord_line)

            output.append(line)
            current_time = line_end_time

        output.append("")  # Blank line between sections

    return "\n".join(output)


def parse_lyrics_into_sections(lyrics):
    """
    Parse lyrics text into sections.

    Handles formats like:
        [Verse 1]
        Line 1
        Line 2

        [Chorus]
        Line 3
    """
    import re

    sections = []
    current_section = {'name': '', 'lines': []}

    for line in lyrics.split('\n'):
        # Check for section header like [Verse 1] or [Chorus]
        section_match = re.match(r'^\s*\[([^\]]+)\]\s*$', line)

        if section_match:
            # Save previous section if it has content
            if current_section['lines'] or current_section['name']:
                sections.append(current_section)

            # Start new section
            current_section = {'name': section_match.group(1), 'lines': []}
        else:
            # Add line to current section (preserve blank lines for formatting)
            current_section['lines'].append(line)

    # Don't forget the last section
    if current_section['lines'] or current_section['name']:
        sections.append(current_section)

    # Filter out completely empty sections
    sections = [s for s in sections if s['lines'] or s['name']]

    return sections


def get_chords_in_range(chords, start_time, end_time):
    """Get chords that fall within a time range."""
    result = []
    for time, chord in chords:
        if start_time <= time < end_time:
            result.append((time, chord))
    return result


def build_chord_line(lyric_line, line_chords, line_start_time, line_duration):
    """
    Build a chord line positioned above lyrics.

    Places chords at positions proportional to their timing within the line.
    """
    if not line_chords:
        return ""

    line_length = len(lyric_line)
    if line_length == 0:
        # Just return chords space-separated
        return "  ".join(chord for time, chord in line_chords)

    # Build chord line character by character
    chord_chars = [' '] * line_length

    for time, chord in line_chords:
        # Calculate position in line based on timing
        time_offset = time - line_start_time
        position_ratio = time_offset / line_duration if line_duration > 0 else 0

        # Map to character position
        char_pos = int(position_ratio * line_length)
        char_pos = max(0, min(char_pos, line_length - 1))

        # Place chord, making room for full chord name
        chord_len = len(chord)

        # Adjust position to not overflow
        if char_pos + chord_len > line_length:
            char_pos = max(0, line_length - chord_len)

        # Check if there's already a chord nearby (within 3 chars)
        # If so, shift this one
        for i in range(max(0, char_pos - 3), min(line_length, char_pos + chord_len + 3)):
            if i < len(chord_chars) and chord_chars[i] != ' ':
                # There's overlap, try to find next available spot
                char_pos = find_next_free_position(chord_chars, char_pos, chord_len)
                break

        # Place the chord
        for i, c in enumerate(chord):
            pos = char_pos + i
            if pos < line_length:
                chord_chars[pos] = c

    return ''.join(chord_chars).rstrip()


def find_next_free_position(chord_chars, start_pos, chord_len):
    """Find next position with enough space for a chord."""
    line_length = len(chord_chars)

    # Try positions after start_pos
    for pos in range(start_pos, line_length - chord_len + 1):
        if all(chord_chars[pos + i] == ' ' for i in range(chord_len)):
            return pos

    # Try positions before start_pos
    for pos in range(start_pos - 1, -1, -1):
        if pos + chord_len <= line_length:
            if all(chord_chars[pos + i] == ' ' for i in range(chord_len)):
                return pos

    return start_pos  # Fallback


def format_timestamped_chords(chords, key, bpm, song_name=''):
    """Fallback: format chords with timestamps (no lyrics)."""
    lines = []

    if song_name:
        lines.append(f"# {song_name}")
    lines.append(f"Key: {key} | BPM: {bpm}")
    lines.append("")
    lines.append("=" * 40)
    lines.append("")

    for time, chord in chords:
        mins = int(time // 60)
        secs = time % 60
        lines.append(f"[{mins}:{secs:05.2f}] {chord}")

    return "\n".join(lines)