Huiran Yu commited on
Commit
53094d2
·
1 Parent(s): 82667c3

Transkun Model

Browse files
Files changed (4) hide show
  1. .vscode/settings.json +5 -0
  2. README.md +3 -3
  3. app.py +96 -0
  4. requirements.txt +3 -0
.vscode/settings.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "python-envs.defaultEnvManager": "ms-python.python:conda",
3
+ "python-envs.defaultPackageManager": "ms-python.python:conda",
4
+ "python-envs.pythonProjects": []
5
+ }
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
- title: Transkun
3
  emoji: 🌍
4
  colorFrom: red
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 6.2.0
8
  app_file: app.py
9
  pinned: false
10
- short_description: Transkun Piano Transcription
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Transkun Piano Transcription
3
  emoji: 🌍
4
  colorFrom: red
5
  colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 5.28.0
8
  app_file: app.py
9
  pinned: false
10
+ short_description: Transkun Piano Transcription by Yujia Yan and Zhiyao Duan
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ from transkun.ModelTransformer import TransKun, writeMidi
3
+ import numpy as np
4
+ import torch
5
+ from functools import lru_cache
6
+ import librosa
7
+
8
+ import pkg_resources
9
+ import moduleconf
10
+
11
+ from pyharp.core import ModelCard, build_endpoint
12
+ from pyharp.media.audio import load_audio
13
+ import gradio as gr
14
+
15
+ model_card = ModelCard(
16
+ name="Transkun Piano Transcription",
17
+ decription=("Transcribes solo piano performance into MIDI notation"),
18
+ author="Yujia Yan, Zhiyao Duan",
19
+ tags=["transcription"]
20
+ )
21
+
22
+ @lru_cache(maxsize=2)
23
+ def load_model(device: str):
24
+ defaultWeight = (pkg_resources.resource_filename("transkun", "pretrained/2.0.pt"))
25
+ defaultConf = (pkg_resources.resource_filename("transkun", "pretrained/2.0.conf"))
26
+
27
+ confManager = moduleconf.parseFromFile(defaultConf)
28
+ conf = confManager["Model"].config
29
+
30
+ checkpoint = torch.load(defaultWeight, map_location=device)
31
+ model = TransKun(conf=conf).to(device)
32
+
33
+ # Mirrors your checkpoint loading logic :contentReference[oaicite:3]{index=3}
34
+ if "best_state_dict" in checkpoint:
35
+ model.load_state_dict(checkpoint["best_state_dict"], strict=False)
36
+ else:
37
+ model.load_state_dict(checkpoint["state_dict"], strict=False)
38
+
39
+ model.eval()
40
+ return model
41
+
42
+ @spaces.GPU
43
+ def transcribe(input_file):
44
+ device = "cuda"
45
+ model = load_model(device)
46
+
47
+ signal = load_audio(input_file)
48
+ waveform = np.asarray(signal.audio_data)
49
+ sr = int(signal.sample_rate)
50
+
51
+ waveform = np.squeeze(waveform)
52
+
53
+ # If 2D, assume (channels, samples). Make mono.
54
+ if waveform.ndim == 2:
55
+ if waveform.shape[0] > 1:
56
+ waveform = waveform.mean(axis=0)
57
+ else:
58
+ waveform = waveform.reshape(-1)
59
+
60
+ if sr != model.fs:
61
+ waveform = librosa.resample(waveform.astype(np.float32), orig_sr=sr, target_sr=model.fs)
62
+ sr = model.fs
63
+
64
+ x = torch.from_numpy(waveform).to(device)
65
+
66
+ notesEst = model.transcribe(x, discardSecondHalf=False)
67
+
68
+ outputMidi = writeMidi(notesEst)
69
+ out_path = "transkun_out.mid"
70
+ outputMidi.write(out_path)
71
+ return out_path
72
+
73
+ def process_fn(input_audio_path: str) -> str:
74
+ midi_path = transcribe(input_audio_path)
75
+ return midi_path
76
+
77
+ with gr.Blocks() as demo:
78
+ input_audio = gr.Audio(label="Upload Solo Piano Audio", type="filepath").harp_required(True)
79
+ #output_midi = gr.File(label="Output MIDI File", file_types=[".mid"]).harp_required(True)
80
+
81
+ # ensure this is serialized as midi_track
82
+ output_midi = gr.File(
83
+ label="Output MIDI File",
84
+ file_types=[".mid", ".midi"],
85
+ type="filepath"
86
+ ).harp_required(True)
87
+
88
+
89
+ app = build_endpoint(
90
+ model_card=model_card,
91
+ input_components=[input_audio],
92
+ output_components=[output_midi],
93
+ process_fn=process_fn
94
+ )
95
+
96
+ demo.queue().launch(share=True, show_error=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ git+https://github.com/TEAMuP-dev/pyharp.git@v0.3.0
2
+ transkun
3
+ librosa