ChordEstimation / app.py
Richard Zhu
clean up duplicate imports and pathing at top of file
e0c145a
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
import os
base_dir = os.path.dirname(os.path.abspath(__file__))
model_dir = os.path.join(base_dir, "ChordRecognitionMIDITrainedExtractor")
sys.path.insert(0, model_dir)
os.chdir(model_dir)
import numpy as np
import networks as N
from librosa.core import cqt, load, note_to_hz
import const as C
import utils as U
from pyharp import ModelCard, build_endpoint, LabelList, AudioLabel, OutputLabel
import gradio as gr
# ── Load models once at startup ──────────────────────────────────────────────
cnn_feat_extractor = N.FullCNNFeatExtractor()
cnn_feat_extractor.load(C.DEFAULT_CONVNETFILE)
decoder = N.NBLSTMCRF()
decoder.load("nblstm_crf.model")
# ── Model card ───────────────────────────────────────────────────────────────
model_card = ModelCard(
name="Automatic Chord Recognition",
description="Estimates chord progressions from audio using a CNN feature extractor trained on MIDI data and a BLSTM-CRF sequence decoder. Outputs time-stamped chord labels.",
author="Wu & Li (2019), wrapped by PyHARP",
tags=["chord recognition", "harmony", "MIR"],
)
# ── Processing function ───────────────────────────────────────────────────────
def process_fn(input_audio_path: str) -> LabelList:
# Load audio
y, sr = load(input_audio_path, sr=C.SR)
# Extract Harmonic-CQT
fmin = note_to_hz("C1")
hcqt = np.stack([
np.abs(cqt(
y, sr=C.SR, hop_length=C.H, n_bins=C.BIN_CNT,
bins_per_octave=C.OCT_BIN, fmin=fmin * (h + 1),
filter_scale=2, tuning=None
)).T.astype(np.float32)
for h in range(C.CQT_H)
])
# Extract deep feature
feat = cnn_feat_extractor.GetFeature(U.PreprocessSpec(hcqt)).data
# Decode chord label sequence
labels = decoder.argmax(feat)
# Build LabelList for HARP
output_labels = LabelList()
cur_label = labels[0]
st = 0
for i in range(labels.size):
if labels[i] != cur_label:
ed = i
feat_seg = feat[st:ed, :]
chord_sign = U.voc.ChordSignature7thbass(cur_label, feat_seg, sevenths=True, inv=True)
start_sec = float(st * C.H) / C.SR
end_sec = float(ed * C.H) / C.SR
if chord_sign != "N":
output_labels.labels.append(
AudioLabel(
t=start_sec,
label=chord_sign,
duration=end_sec - start_sec,
description=f"Chord: {chord_sign} ({start_sec:.2f}s - {end_sec:.2f}s)",
)
)
cur_label = labels[i]
st = i
# Handle last segment
feat_seg = feat[st:labels.size, :]
chord_sign = U.voc.ChordSignature7thbass(cur_label, feat_seg)
start_sec = float(st * C.H) / C.SR
end_sec = float(labels.size * C.H) / C.SR
if chord_sign != "N":
output_labels.labels.append(
AudioLabel(
t=start_sec,
label=chord_sign,
duration=end_sec - start_sec,
description=f"Chord: {chord_sign} ({start_sec:.2f}s - {end_sec:.2f}s)",
)
)
return output_labels
# ── Gradio endpoint ───────────────────────────────────────────────────────────
with gr.Blocks() as demo:
input_components = [
gr.Audio(type="filepath", label="Input Audio").harp_required(True),
]
output_components = [
gr.JSON(label="Output Labels"),
]
app = build_endpoint(
model_card=model_card,
input_components=input_components,
output_components=output_components,
process_fn=process_fn,
)
demo.queue().launch(share=True, show_error=False, pwa=True)