Spaces:

harp-dev
/

ChordEstimation

Runtime error

File size: 4,219 Bytes

d4afb7f
 
 
 
 
 
e0c145a
 
 
 
 
 
d4afb7f

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import sys
import os

base_dir = os.path.dirname(os.path.abspath(__file__))
model_dir = os.path.join(base_dir, "ChordRecognitionMIDITrainedExtractor")

sys.path.insert(0, model_dir)
os.chdir(model_dir)

import numpy as np
import networks as N
from librosa.core import cqt, load, note_to_hz
import const as C
import utils as U

from pyharp import ModelCard, build_endpoint, LabelList, AudioLabel, OutputLabel
import gradio as gr

# ── Load models once at startup ──────────────────────────────────────────────
cnn_feat_extractor = N.FullCNNFeatExtractor()
cnn_feat_extractor.load(C.DEFAULT_CONVNETFILE)

decoder = N.NBLSTMCRF()
decoder.load("nblstm_crf.model")

# ── Model card ───────────────────────────────────────────────────────────────
model_card = ModelCard(
    name="Automatic Chord Recognition",
    description="Estimates chord progressions from audio using a CNN feature extractor trained on MIDI data and a BLSTM-CRF sequence decoder. Outputs time-stamped chord labels.",
    author="Wu & Li (2019), wrapped by PyHARP",
    tags=["chord recognition", "harmony", "MIR"],
)

# ── Processing function ───────────────────────────────────────────────────────
def process_fn(input_audio_path: str) -> LabelList:
    # Load audio
    y, sr = load(input_audio_path, sr=C.SR)

    # Extract Harmonic-CQT
    fmin = note_to_hz("C1")
    hcqt = np.stack([
        np.abs(cqt(
            y, sr=C.SR, hop_length=C.H, n_bins=C.BIN_CNT,
            bins_per_octave=C.OCT_BIN, fmin=fmin * (h + 1),
            filter_scale=2, tuning=None
        )).T.astype(np.float32)
        for h in range(C.CQT_H)
    ])

    # Extract deep feature
    feat = cnn_feat_extractor.GetFeature(U.PreprocessSpec(hcqt)).data

    # Decode chord label sequence
    labels = decoder.argmax(feat)

    # Build LabelList for HARP
    output_labels = LabelList()
    cur_label = labels[0]
    st = 0

    for i in range(labels.size):
        if labels[i] != cur_label:
            ed = i
            feat_seg = feat[st:ed, :]
            chord_sign = U.voc.ChordSignature7thbass(cur_label, feat_seg, sevenths=True, inv=True)
            start_sec = float(st * C.H) / C.SR
            end_sec = float(ed * C.H) / C.SR

            if chord_sign != "N":
                output_labels.labels.append(
                    AudioLabel(
                        t=start_sec,
                        label=chord_sign,
                        duration=end_sec - start_sec,
                        description=f"Chord: {chord_sign} ({start_sec:.2f}s - {end_sec:.2f}s)",
                    )
                )
            cur_label = labels[i]
            st = i

    # Handle last segment
    feat_seg = feat[st:labels.size, :]
    chord_sign = U.voc.ChordSignature7thbass(cur_label, feat_seg)
    start_sec = float(st * C.H) / C.SR
    end_sec = float(labels.size * C.H) / C.SR
    if chord_sign != "N":
        output_labels.labels.append(
            AudioLabel(
                t=start_sec,
                label=chord_sign,
                duration=end_sec - start_sec,
                description=f"Chord: {chord_sign} ({start_sec:.2f}s - {end_sec:.2f}s)",
            )
        )

    return output_labels


# ── Gradio endpoint ───────────────────────────────────────────────────────────
with gr.Blocks() as demo:
    input_components = [
        gr.Audio(type="filepath", label="Input Audio").harp_required(True),
    ]

    output_components = [
        gr.JSON(label="Output Labels"),
    ]

    app = build_endpoint(
        model_card=model_card,
        input_components=input_components,
        output_components=output_components,
        process_fn=process_fn,
    )

demo.queue().launch(share=True, show_error=False, pwa=True)