File size: 4,219 Bytes
d4afb7f
 
 
 
 
 
e0c145a
 
 
 
 
 
d4afb7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import sys
import os

base_dir = os.path.dirname(os.path.abspath(__file__))
model_dir = os.path.join(base_dir, "ChordRecognitionMIDITrainedExtractor")

sys.path.insert(0, model_dir)
os.chdir(model_dir)

import numpy as np
import networks as N
from librosa.core import cqt, load, note_to_hz
import const as C
import utils as U

from pyharp import ModelCard, build_endpoint, LabelList, AudioLabel, OutputLabel
import gradio as gr

# ── Load models once at startup ──────────────────────────────────────────────
cnn_feat_extractor = N.FullCNNFeatExtractor()
cnn_feat_extractor.load(C.DEFAULT_CONVNETFILE)

decoder = N.NBLSTMCRF()
decoder.load("nblstm_crf.model")

# ── Model card ───────────────────────────────────────────────────────────────
model_card = ModelCard(
    name="Automatic Chord Recognition",
    description="Estimates chord progressions from audio using a CNN feature extractor trained on MIDI data and a BLSTM-CRF sequence decoder. Outputs time-stamped chord labels.",
    author="Wu & Li (2019), wrapped by PyHARP",
    tags=["chord recognition", "harmony", "MIR"],
)

# ── Processing function ───────────────────────────────────────────────────────
def process_fn(input_audio_path: str) -> LabelList:
    # Load audio
    y, sr = load(input_audio_path, sr=C.SR)

    # Extract Harmonic-CQT
    fmin = note_to_hz("C1")
    hcqt = np.stack([
        np.abs(cqt(
            y, sr=C.SR, hop_length=C.H, n_bins=C.BIN_CNT,
            bins_per_octave=C.OCT_BIN, fmin=fmin * (h + 1),
            filter_scale=2, tuning=None
        )).T.astype(np.float32)
        for h in range(C.CQT_H)
    ])

    # Extract deep feature
    feat = cnn_feat_extractor.GetFeature(U.PreprocessSpec(hcqt)).data

    # Decode chord label sequence
    labels = decoder.argmax(feat)

    # Build LabelList for HARP
    output_labels = LabelList()
    cur_label = labels[0]
    st = 0

    for i in range(labels.size):
        if labels[i] != cur_label:
            ed = i
            feat_seg = feat[st:ed, :]
            chord_sign = U.voc.ChordSignature7thbass(cur_label, feat_seg, sevenths=True, inv=True)
            start_sec = float(st * C.H) / C.SR
            end_sec = float(ed * C.H) / C.SR

            if chord_sign != "N":
                output_labels.labels.append(
                    AudioLabel(
                        t=start_sec,
                        label=chord_sign,
                        duration=end_sec - start_sec,
                        description=f"Chord: {chord_sign} ({start_sec:.2f}s - {end_sec:.2f}s)",
                    )
                )
            cur_label = labels[i]
            st = i

    # Handle last segment
    feat_seg = feat[st:labels.size, :]
    chord_sign = U.voc.ChordSignature7thbass(cur_label, feat_seg)
    start_sec = float(st * C.H) / C.SR
    end_sec = float(labels.size * C.H) / C.SR
    if chord_sign != "N":
        output_labels.labels.append(
            AudioLabel(
                t=start_sec,
                label=chord_sign,
                duration=end_sec - start_sec,
                description=f"Chord: {chord_sign} ({start_sec:.2f}s - {end_sec:.2f}s)",
            )
        )

    return output_labels


# ── Gradio endpoint ───────────────────────────────────────────────────────────
with gr.Blocks() as demo:
    input_components = [
        gr.Audio(type="filepath", label="Input Audio").harp_required(True),
    ]

    output_components = [
        gr.JSON(label="Output Labels"),
    ]

    app = build_endpoint(
        model_card=model_card,
        input_components=input_components,
        output_components=output_components,
        process_fn=process_fn,
    )

demo.queue().launch(share=True, show_error=False, pwa=True)