File size: 2,213 Bytes
0bfee8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384d531
 
 
 
 
 
 
 
0bfee8f
 
 
 
 
 
 
 
 
 
 
 
384d531
0bfee8f
 
 
384d531
0bfee8f
 
 
 
 
 
 
 
 
384d531
 
0bfee8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56a0184
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import os
import sys
import uuid
from pathlib import Path
from contextlib import contextmanager

import numpy as np
import torch
import gradio as gr
import librosa

from BeatNet.BeatNet import BeatNet

from pyharp.core import ModelCard, build_endpoint
from pyharp.media.audio import save_audio
from pyharp import LabelList, AudioLabel, OutputLabel
from audiotools import AudioSignal

import logging, sys
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s %(message)s",
    handlers=[logging.StreamHandler(sys.stderr)],
)
log = logging.getLogger("app")

LOUDNESS_DB = -16.
SAMPLE_RATE = 48_000
ENCODEC_SAMPLE_RATE = 16_000
AUDIOSEAL_SAMPLE_RATE = 16_000

model_card = ModelCard(
    name="BeatNet Beat Detection",
    description=("Beat detection for audio."),
    author="Mojtaba Heydari, Frank Cwitkowitz, Zhiyao Duan",
    tags=["beat detection"]
)

log.info("Initializing BeatNet model...")
estimator = BeatNet(1, mode="offline", inference_model="DBN", plot=[], thread=False)

def process_fn(inp_audio):
    output = estimator.process(inp_audio)

    output_labels = LabelList()

    for t, b in output:
        output_labels.labels.append(
            AudioLabel(
                t = t,
                label = f"{b}",
                description = f"Beat: {b}",
                color = OutputLabel.rgb_color_to_int(0, 164, 235),
                amplitude = 1.0 if b == 1.0 else 0.0
            )
        )

    return inp_audio, output_labels


with gr.Blocks() as app:
    gr.Markdown("## BeatNet Beat Detection")

    # Inputs
    input_audio = gr.Audio(
        label="Input Audio",
        type="filepath",
        sources=["upload", "microphone"]
    )

    # Outputs
    output_wav = gr.Audio(
        type="filepath",
        label="Watermarked Speech"
    )
    output_label = gr.JSON(label="Watermark Confidence")

    _ = build_endpoint(
        model_card=model_card,
        input_components=[
            input_audio
        ],
        output_components=[
            output_wav,
            output_label
        ],
        process_fn=process_fn
    )

if __name__ == '__main__':
    app.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))