Spaces:

rlackey
/

vynl

Running on Zero

App Files Files Community

vynl / modules /chords.py

rlackey

Tune chord detection - less aggressive filtering

dfa4828 2 months ago

raw

history blame contribute delete

17.6 kB

	"""
	Multi-Source Chord Detection
	Analyzes stems AND full mix for best results
	"""

	import numpy as np
	import warnings
	warnings.filterwarnings('ignore')

	try:
	import librosa
	import scipy.ndimage
	LIBROSA_AVAILABLE = True
	except ImportError:
	LIBROSA_AVAILABLE = False


	def extract_chords_multi_stem(stems_dir, original_audio=None, min_duration=0.5):
	"""
	Extract chords from multiple sources - stems AND full track

	Args:
	stems_dir: Path to directory containing stems
	original_audio: Path to original full mix (optional but recommended)
	min_duration: Minimum chord duration

	Returns:
	List of (timestamp, chord_name) tuples
	"""

	if not LIBROSA_AVAILABLE:
	print(" [WARN] Chord extraction skipped (librosa not installed)")
	return []

	from pathlib import Path
	stems_dir = Path(stems_dir)

	print(f" Analyzing multiple sources for chord detection...")

	chord_candidates = []

	# SOURCE 1: Original full mix (HIGHEST WEIGHT)
	if original_audio and Path(original_audio).exists():
	print(f" • Full mix (original audio)")
	full_chords = detect_chords_from_stem(original_audio, focus='harmony')
	if full_chords:
	chord_candidates.append(('full_mix', full_chords, 4.0))

	# SOURCE 2: Bass stem
	for stem_file in stems_dir.glob('*.mp3'):
	if 'bass' in stem_file.stem.lower():
	print(f" • Bass stem")
	bass_chords = detect_chords_from_stem(stem_file, focus='bass')
	if bass_chords:
	chord_candidates.append(('bass', bass_chords, 3.0))
	break

	# SOURCE 3: Guitar stem
	for stem_file in stems_dir.glob('*.mp3'):
	if 'guitar' in stem_file.stem.lower():
	print(f" • Guitar stem")
	guitar_chords = detect_chords_from_stem(stem_file, focus='harmony')
	if guitar_chords:
	chord_candidates.append(('guitar', guitar_chords, 2.5))
	break

	# SOURCE 4: Piano/Keys
	for stem_file in stems_dir.glob('*.mp3'):
	name_lower = stem_file.stem.lower()
	if 'piano' in name_lower or 'keys' in name_lower:
	print(f" • Piano/Keys stem")
	piano_chords = detect_chords_from_stem(stem_file, focus='harmony')
	if piano_chords:
	chord_candidates.append(('piano', piano_chords, 2.0))
	break

	# SOURCE 5: Other stem
	for stem_file in stems_dir.glob('*.mp3'):
	if 'other' in stem_file.stem.lower():
	print(f" • Other stem")
	other_chords = detect_chords_from_stem(stem_file, focus='harmony')
	if other_chords:
	chord_candidates.append(('other', other_chords, 1.5))
	break

	if not chord_candidates:
	print(" [WARN] No suitable sources found")
	return []

	print(f" Merging results from {len(chord_candidates)} sources...")
	merged_chords = merge_chord_detections(chord_candidates, min_duration)

	return merged_chords


	def detect_chords_from_stem(stem_file, focus='harmony'):
	"""Detect chords - FULL SONG"""

	try:
	y, sr = librosa.load(str(stem_file), sr=22050, duration=None)
	hop_length = 256 if focus == 'bass' else 512
	chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length)
	chroma = scipy.ndimage.median_filter(chroma, size=(1, 9))

	templates = create_chord_templates()
	chords = []
	last_chord = None

	for i in range(chroma.shape[1]):
	frame = chroma[:, i]
	time = librosa.frames_to_time(i, sr=sr, hop_length=hop_length)
	chord, confidence = match_chord_template_with_confidence(frame, templates, focus)

	if chord != last_chord and confidence > 0.12:
	chords.append((float(time), chord, float(confidence)))
	last_chord = chord

	return chords

	except Exception as e:
	from pathlib import Path
	print(f" [WARN] Failed to analyze {Path(stem_file).name}: {e}")
	return []


	def merge_chord_detections(chord_candidates, min_duration=0.5):
	"""Merge - if only one source, just use it directly"""

	# If only one source, don't filter - just use it
	if len(chord_candidates) == 1:
	name, chords, weight = chord_candidates[0]
	# Convert (time, chord, conf) to (time, chord)
	return [(time, chord) for time, chord, conf in chords]

	# Multiple sources - merge
	all_times = set()
	for name, chords, weight in chord_candidates:
	for time, chord, conf in chords:
	all_times.add(time)

	all_times = sorted(all_times)

	if not all_times:
	return []

	time_grid = np.arange(0, max(all_times) + 1, 0.5)

	merged = []
	last_chord = None
	last_time = 0

	for grid_time in time_grid:
	votes = {}
	total_weight = 0

	for name, chords, weight in chord_candidates:
	active_chord = get_chord_at_time(chords, grid_time)

	if active_chord:
	chord, conf = active_chord
	vote_strength = conf * weight

	if chord in votes:
	votes[chord] += vote_strength
	else:
	votes[chord] = vote_strength

	total_weight += weight

	if votes:
	best_chord = max(votes.items(), key=lambda x: x[1])[0]

	# Less strict threshold
	if best_chord != last_chord:
	if last_chord is not None:
	duration = grid_time - last_time
	if duration >= min_duration:
	merged.append((last_time, last_chord))

	last_chord = best_chord
	last_time = grid_time

	if last_chord:
	merged.append((last_time, last_chord))

	return merged


	def get_chord_at_time(chords, time):
	"""Find active chord"""
	active_chord = None
	for chord_time, chord, conf in chords:
	if chord_time <= time:
	active_chord = (chord, conf)
	else:
	break
	return active_chord


	def create_chord_templates():
	"""Enhanced chord templates"""
	notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
	templates = {}

	for i, root in enumerate(notes):
	# Major
	template = np.zeros(12)
	template[(i + 0) % 12] = 1.0
	template[(i + 4) % 12] = 0.8
	template[(i + 7) % 12] = 0.6
	templates[root] = template

	# Minor
	template = np.zeros(12)
	template[(i + 0) % 12] = 1.0
	template[(i + 3) % 12] = 0.8
	template[(i + 7) % 12] = 0.6
	templates[root + 'm'] = template

	# Dominant 7
	template = np.zeros(12)
	template[(i + 0) % 12] = 1.0
	template[(i + 4) % 12] = 0.7
	template[(i + 7) % 12] = 0.5
	template[(i + 10) % 12] = 0.4
	templates[root + '7'] = template

	# Major 7
	template = np.zeros(12)
	template[(i + 0) % 12] = 1.0
	template[(i + 4) % 12] = 0.7
	template[(i + 7) % 12] = 0.5
	template[(i + 11) % 12] = 0.4
	templates[root + 'maj7'] = template

	# Minor 7
	template = np.zeros(12)
	template[(i + 0) % 12] = 1.0
	template[(i + 3) % 12] = 0.7
	template[(i + 7) % 12] = 0.5
	template[(i + 10) % 12] = 0.4
	templates[root + 'm7'] = template

	return templates


	def match_chord_template_with_confidence(chroma_frame, templates, focus='harmony'):
	"""Match with confidence"""
	if chroma_frame.sum() > 0:
	chroma_frame = chroma_frame / chroma_frame.sum()

	best_chord = 'C'
	best_score = -1

	for chord_name, template in templates.items():
	if template.sum() > 0:
	template_norm = template / template.sum()
	else:
	continue

	score = np.dot(chroma_frame, template_norm)

	if focus == 'bass' and not ('7' in chord_name or 'm' in chord_name):
	score *= 1.1

	if score > best_score:
	best_score = score
	best_chord = chord_name

	return best_chord, best_score


	def extract_chords(audio_path, min_duration=0.5):
	"""
	Extract chords from audio file with musical timing.

	Args:
	audio_path: Path to audio file
	min_duration: Minimum chord duration in seconds (default 0.5s for half-bar changes)

	Returns:
	List of (timestamp, chord_name) tuples
	"""
	if not LIBROSA_AVAILABLE:
	return []

	try:
	# Load audio
	y, sr = librosa.load(audio_path, sr=22050, duration=None)

	# Use moderate hop for reasonable time resolution
	# hop_length=1024 at 22050Hz = ~46ms per frame
	hop_length = 1024

	# Extract chroma features
	chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length)

	# Moderate smoothing - median filter across 11 frames (~0.5 seconds)
	chroma = scipy.ndimage.median_filter(chroma, size=(1, 11))

	templates = create_chord_templates()

	# First pass: detect all chord candidates
	raw_chords = []
	for i in range(chroma.shape[1]):
	frame = chroma[:, i]
	time = librosa.frames_to_time(i, sr=sr, hop_length=hop_length)
	chord, conf = match_chord_template_with_confidence(frame, templates, 'harmony')
	raw_chords.append((float(time), chord, float(conf)))

	# Second pass: consolidate with minimum duration and reasonable confidence
	consolidated = []
	current_chord = None
	current_start = 0.0
	confidence_threshold = 0.15 # Lower threshold to catch more chord changes

	for time, chord, conf in raw_chords:
	if conf >= confidence_threshold:
	if chord != current_chord:
	# Check if previous chord lasted long enough
	if current_chord is not None:
	duration = time - current_start
	if duration >= min_duration:
	consolidated.append((current_start, current_chord))

	# Start new chord
	current_chord = chord
	current_start = time

	# Don't forget the last chord
	if current_chord is not None and raw_chords:
	final_time = raw_chords[-1][0]
	duration = final_time - current_start
	if duration >= min_duration:
	consolidated.append((current_start, current_chord))

	return consolidated

	except Exception as e:
	print(f" [WARN] Chord extraction error: {e}")
	return []


	def format_chord_chart(chords, lyrics, duration, key='C', bpm=120, song_name=''):
	"""
	Format chords aligned above lyrics like a traditional chord chart.

	Args:
	chords: List of (timestamp, chord_name) tuples
	lyrics: User-provided lyrics text (with optional [Section] markers)
	duration: Total song duration in seconds
	key: Detected key
	bpm: Detected BPM
	song_name: Name of the song

	Returns:
	Formatted chord chart string
	"""
	if not lyrics or not lyrics.strip():
	# No lyrics - return timestamped format
	return format_timestamped_chords(chords, key, bpm, song_name)

	# Parse lyrics into sections and lines
	sections = parse_lyrics_into_sections(lyrics)

	if not sections:
	return format_timestamped_chords(chords, key, bpm, song_name)

	# Count total lyric lines (excluding section headers and blank lines)
	total_lines = sum(len(s['lines']) for s in sections)

	if total_lines == 0:
	return format_timestamped_chords(chords, key, bpm, song_name)

	# Estimate time per line
	time_per_line = duration / total_lines if total_lines > 0 else 4.0

	# Build chord chart
	output = []

	# Header
	if song_name:
	output.append(f"# {song_name}")
	output.append(f"Key: {key}")
	output.append(f"BPM: {bpm}")
	output.append("")

	current_time = 0.0

	for section in sections:
	# Section header
	if section['name']:
	output.append(f"[{section['name']}]")

	for line in section['lines']:
	if not line.strip():
	output.append("")
	continue

	# Find chords that fall within this line's time window
	line_end_time = current_time + time_per_line
	line_chords = get_chords_in_range(chords, current_time, line_end_time)

	if line_chords:
	# Build chord line positioned above lyrics
	chord_line = build_chord_line(line, line_chords, current_time, time_per_line)
	output.append(chord_line)

	output.append(line)
	current_time = line_end_time

	output.append("") # Blank line between sections

	return "\n".join(output)


	def parse_lyrics_into_sections(lyrics):
	"""
	Parse lyrics text into sections.

	Handles formats like:
	[Verse 1]
	Line 1
	Line 2

	[Chorus]
	Line 3
	"""
	import re

	sections = []
	current_section = {'name': '', 'lines': []}

	for line in lyrics.split('\n'):
	# Check for section header like [Verse 1] or [Chorus]
	section_match = re.match(r'^\s\[([^\]]+)\]\s$', line)

	if section_match:
	# Save previous section if it has content
	if current_section['lines'] or current_section['name']:
	sections.append(current_section)

	# Start new section
	current_section = {'name': section_match.group(1), 'lines': []}
	else:
	# Add line to current section (preserve blank lines for formatting)
	current_section['lines'].append(line)

	# Don't forget the last section
	if current_section['lines'] or current_section['name']:
	sections.append(current_section)

	# Filter out completely empty sections
	sections = [s for s in sections if s['lines'] or s['name']]

	return sections


	def get_chords_in_range(chords, start_time, end_time):
	"""Get chords that fall within a time range."""
	result = []
	for time, chord in chords:
	if start_time <= time < end_time:
	result.append((time, chord))
	return result


	def build_chord_line(lyric_line, line_chords, line_start_time, line_duration):
	"""
	Build a chord line positioned above lyrics.

	Places chords at positions proportional to their timing within the line.
	"""
	if not line_chords:
	return ""

	line_length = len(lyric_line)
	if line_length == 0:
	# Just return chords space-separated
	return " ".join(chord for time, chord in line_chords)

	# Build chord line character by character
	chord_chars = [' '] * line_length

	for time, chord in line_chords:
	# Calculate position in line based on timing
	time_offset = time - line_start_time
	position_ratio = time_offset / line_duration if line_duration > 0 else 0

	# Map to character position
	char_pos = int(position_ratio * line_length)
	char_pos = max(0, min(char_pos, line_length - 1))

	# Place chord, making room for full chord name
	chord_len = len(chord)

	# Adjust position to not overflow
	if char_pos + chord_len > line_length:
	char_pos = max(0, line_length - chord_len)

	# Check if there's already a chord nearby (within 3 chars)
	# If so, shift this one
	for i in range(max(0, char_pos - 3), min(line_length, char_pos + chord_len + 3)):
	if i < len(chord_chars) and chord_chars[i] != ' ':
	# There's overlap, try to find next available spot
	char_pos = find_next_free_position(chord_chars, char_pos, chord_len)
	break

	# Place the chord
	for i, c in enumerate(chord):
	pos = char_pos + i
	if pos < line_length:
	chord_chars[pos] = c

	return ''.join(chord_chars).rstrip()


	def find_next_free_position(chord_chars, start_pos, chord_len):
	"""Find next position with enough space for a chord."""
	line_length = len(chord_chars)

	# Try positions after start_pos
	for pos in range(start_pos, line_length - chord_len + 1):
	if all(chord_chars[pos + i] == ' ' for i in range(chord_len)):
	return pos

	# Try positions before start_pos
	for pos in range(start_pos - 1, -1, -1):
	if pos + chord_len <= line_length:
	if all(chord_chars[pos + i] == ' ' for i in range(chord_len)):
	return pos

	return start_pos # Fallback


	def format_timestamped_chords(chords, key, bpm, song_name=''):
	"""Fallback: format chords with timestamps (no lyrics)."""
	lines = []

	if song_name:
	lines.append(f"# {song_name}")
	lines.append(f"Key: {key} \| BPM: {bpm}")
	lines.append("")
	lines.append("=" * 40)
	lines.append("")

	for time, chord in chords:
	mins = int(time // 60)
	secs = time % 60
	lines.append(f"[{mins}:{secs:05.2f}] {chord}")

	return "\n".join(lines)