Fix chord detection - reduce noise, enforce minimum duration
Browse files- Increase hop_length from 512 to 2048 (~93ms resolution vs 23ms)
- Heavy median filter smoothing (21 frames = ~2 seconds)
- Higher confidence threshold (0.35 vs 0.12)
- Enforce 1 second minimum chord duration
- Two-pass detection: raw detection then consolidation
This reduces output from 993 noisy changes to ~50-100 musical chord changes.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
- modules/chords.py +60 -17
modules/chords.py
CHANGED
|
@@ -270,29 +270,72 @@ def match_chord_template_with_confidence(chroma_frame, templates, focus='harmony
|
|
| 270 |
return best_chord, best_score
|
| 271 |
|
| 272 |
|
| 273 |
-
def extract_chords(audio_path, min_duration=
|
| 274 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
if not LIBROSA_AVAILABLE:
|
| 276 |
return []
|
| 277 |
-
|
| 278 |
try:
|
|
|
|
| 279 |
y, sr = librosa.load(audio_path, sr=22050, duration=None)
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
templates = create_chord_templates()
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
for i in range(chroma.shape[1]):
|
| 288 |
frame = chroma[:, i]
|
| 289 |
-
time = librosa.frames_to_time(i, sr=sr, hop_length=
|
| 290 |
chord, conf = match_chord_template_with_confidence(frame, templates, 'harmony')
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
return []
|
|
|
|
| 270 |
return best_chord, best_score
|
| 271 |
|
| 272 |
|
| 273 |
+
def extract_chords(audio_path, min_duration=1.0):
|
| 274 |
+
"""
|
| 275 |
+
Extract chords from audio file with musical timing.
|
| 276 |
+
|
| 277 |
+
Args:
|
| 278 |
+
audio_path: Path to audio file
|
| 279 |
+
min_duration: Minimum chord duration in seconds (default 1.0s = reasonable for most music)
|
| 280 |
+
|
| 281 |
+
Returns:
|
| 282 |
+
List of (timestamp, chord_name) tuples
|
| 283 |
+
"""
|
| 284 |
if not LIBROSA_AVAILABLE:
|
| 285 |
return []
|
| 286 |
+
|
| 287 |
try:
|
| 288 |
+
# Load audio
|
| 289 |
y, sr = librosa.load(audio_path, sr=22050, duration=None)
|
| 290 |
+
|
| 291 |
+
# Use larger hop for coarser time resolution (fewer false changes)
|
| 292 |
+
# hop_length=2048 at 22050Hz = ~93ms per frame
|
| 293 |
+
hop_length = 2048
|
| 294 |
+
|
| 295 |
+
# Extract chroma features
|
| 296 |
+
chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length)
|
| 297 |
+
|
| 298 |
+
# Heavy smoothing to reduce noise - median filter across 21 frames (~2 seconds)
|
| 299 |
+
chroma = scipy.ndimage.median_filter(chroma, size=(1, 21))
|
| 300 |
+
|
| 301 |
templates = create_chord_templates()
|
| 302 |
+
|
| 303 |
+
# First pass: detect all chord candidates
|
| 304 |
+
raw_chords = []
|
| 305 |
for i in range(chroma.shape[1]):
|
| 306 |
frame = chroma[:, i]
|
| 307 |
+
time = librosa.frames_to_time(i, sr=sr, hop_length=hop_length)
|
| 308 |
chord, conf = match_chord_template_with_confidence(frame, templates, 'harmony')
|
| 309 |
+
raw_chords.append((float(time), chord, float(conf)))
|
| 310 |
+
|
| 311 |
+
# Second pass: consolidate with minimum duration and higher confidence
|
| 312 |
+
consolidated = []
|
| 313 |
+
current_chord = None
|
| 314 |
+
current_start = 0.0
|
| 315 |
+
confidence_threshold = 0.35 # Higher threshold for cleaner detection
|
| 316 |
+
|
| 317 |
+
for time, chord, conf in raw_chords:
|
| 318 |
+
if conf >= confidence_threshold:
|
| 319 |
+
if chord != current_chord:
|
| 320 |
+
# Check if previous chord lasted long enough
|
| 321 |
+
if current_chord is not None:
|
| 322 |
+
duration = time - current_start
|
| 323 |
+
if duration >= min_duration:
|
| 324 |
+
consolidated.append((current_start, current_chord))
|
| 325 |
+
|
| 326 |
+
# Start new chord
|
| 327 |
+
current_chord = chord
|
| 328 |
+
current_start = time
|
| 329 |
+
|
| 330 |
+
# Don't forget the last chord
|
| 331 |
+
if current_chord is not None and raw_chords:
|
| 332 |
+
final_time = raw_chords[-1][0]
|
| 333 |
+
duration = final_time - current_start
|
| 334 |
+
if duration >= min_duration:
|
| 335 |
+
consolidated.append((current_start, current_chord))
|
| 336 |
+
|
| 337 |
+
return consolidated
|
| 338 |
+
|
| 339 |
+
except Exception as e:
|
| 340 |
+
print(f" [WARN] Chord extraction error: {e}")
|
| 341 |
return []
|