hugo flores garcia commited on
Commit
11f651c
·
1 Parent(s): 3419098
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +191 -0
  2. LICENSE +21 -0
  3. README.md +113 -6
  4. app.py +607 -0
  5. assets/example.wav +0 -0
  6. conf/c2f.yml +14 -0
  7. conf/generated/bbc-humans/c2f.yml +15 -0
  8. conf/generated/bbc-humans/coarse.yml +8 -0
  9. conf/generated/bbc-humans/interface.yml +6 -0
  10. conf/generated/boleros/c2f.yml +15 -0
  11. conf/generated/boleros/coarse.yml +8 -0
  12. conf/generated/boleros/interface.yml +6 -0
  13. conf/generated/bowl/c2f.yml +16 -0
  14. conf/generated/bowl/coarse.yml +9 -0
  15. conf/generated/bowl/interface.yml +7 -0
  16. conf/generated/breaks-steps/interface.yml +8 -0
  17. conf/generated/choir/interface.yml +9 -0
  18. conf/generated/church-bells/c2f.yml +15 -0
  19. conf/generated/church-bells/coarse.yml +8 -0
  20. conf/generated/church-bells/interface.yml +6 -0
  21. conf/generated/copepod/c2f.yml +15 -0
  22. conf/generated/copepod/coarse.yml +8 -0
  23. conf/generated/copepod/interface.yml +6 -0
  24. conf/generated/die/c2f.yml +15 -0
  25. conf/generated/die/coarse.yml +8 -0
  26. conf/generated/die/interface.yml +8 -0
  27. conf/generated/dnb/c2f.yml +15 -0
  28. conf/generated/dnb/coarse.yml +8 -0
  29. conf/generated/dnb/interface.yml +6 -0
  30. conf/generated/earlymachines/c2f.yml +15 -0
  31. conf/generated/earlymachines/coarse.yml +8 -0
  32. conf/generated/earlymachines/interface.yml +8 -0
  33. conf/generated/funk/c2f.yml +15 -0
  34. conf/generated/funk/coarse.yml +8 -0
  35. conf/generated/funk/interface.yml +8 -0
  36. conf/generated/growl/c2f.yml +16 -0
  37. conf/generated/growl/coarse.yml +9 -0
  38. conf/generated/growl/interface.yml +7 -0
  39. conf/generated/ismir-birds/c2f.yml +15 -0
  40. conf/generated/ismir-birds/coarse.yml +8 -0
  41. conf/generated/ismir-birds/interface.yml +8 -0
  42. conf/generated/ismir-machines/c2f.yml +15 -0
  43. conf/generated/ismir-machines/coarse.yml +8 -0
  44. conf/generated/ismir-machines/interface.yml +8 -0
  45. conf/generated/machines/c2f.yml +15 -0
  46. conf/generated/machines/coarse.yml +8 -0
  47. conf/generated/machines/interface.yml +8 -0
  48. conf/generated/musdb/c2f.yml +40 -0
  49. conf/generated/musdb/coarse.yml +31 -0
  50. conf/generated/musdb/interface.yml +8 -0
.gitignore ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/env.sh
108
+ venv/
109
+ env.bak/
110
+ venv.bak/
111
+
112
+ # Spyder project settings
113
+ .spyderproject
114
+ .spyproject
115
+
116
+ # Rope project settings
117
+ .ropeproject
118
+
119
+ # mkdocs documentation
120
+ /site
121
+
122
+ # mypy
123
+ .mypy_cache/
124
+ .dmypy.json
125
+ dmypy.json
126
+
127
+ # Pyre type checker
128
+ .pyre/
129
+
130
+ # Files created by experiments
131
+ output/
132
+ snapshot/
133
+ *.m4a
134
+ notebooks/scratch.ipynb
135
+ notebooks/inspect.ipynb
136
+ notebooks/effects.ipynb
137
+ notebooks/*.ipynb
138
+ notebooks/*.gif
139
+ notebooks/*.wav
140
+ notebooks/*.mp4
141
+ *runs/
142
+ boards/
143
+ samples/
144
+ *.ipynb
145
+
146
+ results.json
147
+ metrics.csv
148
+ mprofile_*
149
+ mem.png
150
+
151
+ results/
152
+ mprofile*
153
+ *.png
154
+ # do not ignore the test wav file
155
+ !tests/audio/short_test_audio.wav
156
+ !tests/audio/output.wav
157
+ */.DS_Store
158
+ .DS_Store
159
+ env.sh
160
+ _codebraid/
161
+ **/*.html
162
+ **/*.exec.md
163
+ flagged/
164
+ log.txt
165
+ ckpt/
166
+ .syncthing*
167
+ tests/assets/
168
+ archived/
169
+
170
+ scratch/
171
+
172
+ runs-archive
173
+ lyrebird-audiotools
174
+ lyrebird-audio-codec
175
+ samples-*/**
176
+
177
+ gradio-outputs/
178
+ samples*/
179
+ models-all/
180
+ models.zip
181
+ .git-old
182
+
183
+
184
+
185
+ gtzan.zip
186
+ .gtzan_emb_cache
187
+
188
+
189
+ data/
190
+ data
191
+ pyharp
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Hugo Flores García and Prem Seetharaman
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,13 +1,120 @@
1
  ---
2
  title: Salad Bowl
3
- emoji:
4
- colorFrom: pink
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 4.12.0
8
  app_file: app.py
9
  pinned: false
10
- license: cc-by-nc-sa-4.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Salad Bowl
3
+ emoji: 🥗
4
+ colorFrom: yellow
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 3.50.2
8
  app_file: app.py
9
  pinned: false
10
+ license: cc-by-nc-4.0
11
  ---
12
 
13
+ # VampNet
14
+
15
+ This repository contains recipes for training generative music models on top of the Descript Audio Codec.
16
+
17
+ ## try `unloop`
18
+ you can try vampnet in a co-creative looper called unloop. see this link: https://github.com/hugofloresgarcia/unloop
19
+
20
+ # Setting up
21
+
22
+ **Requires Python 3.9**.
23
+
24
+ you'll need a Python 3.9 environment to run VampNet. This is due to a [known issue with madmom](https://github.com/hugofloresgarcia/vampnet/issues/15).
25
+
26
+ (for example, using conda)
27
+ ```bash
28
+ conda create -n vampnet python=3.9
29
+ conda activate vampnet
30
+ ```
31
+
32
+
33
+ install VampNet
34
+
35
+ ```bash
36
+ git clone https://github.com/hugofloresgarcia/vampnet.git
37
+ pip install -e ./vampnet
38
+ ```
39
+
40
+ ## A note on argbind
41
+ This repository relies on [argbind](https://github.com/pseeth/argbind) to manage CLIs and config files.
42
+ Config files are stored in the `conf/` folder.
43
+
44
+ ## Getting the Pretrained Models
45
+
46
+ ### Licensing for Pretrained Models:
47
+ The weights for the models are licensed [`CC BY-NC-SA 4.0`](https://creativecommons.org/licenses/by-nc-sa/4.0/deed.ml). Likewise, any VampNet models fine-tuned on the pretrained models are also licensed [`CC BY-NC-SA 4.0`](https://creativecommons.org/licenses/by-nc-sa/4.0/deed.ml).
48
+
49
+ Download the pretrained models from [this link](https://zenodo.org/record/8136629). Then, extract the models to the `models/` folder.
50
+
51
+
52
+ # Usage
53
+
54
+ ## Launching the Gradio Interface
55
+ You can launch a gradio UI to play with vampnet.
56
+
57
+ ```bash
58
+ python app.py --args.load conf/interface.yml --Interface.device cuda
59
+ ```
60
+
61
+ # Training / Fine-tuning
62
+
63
+ ## Training a model
64
+
65
+ To train a model, run the following script:
66
+
67
+ ```bash
68
+ python scripts/exp/train.py --args.load conf/vampnet.yml --save_path /path/to/checkpoints
69
+ ```
70
+
71
+ for multi-gpu training, use torchrun:
72
+
73
+ ```bash
74
+ torchrun --nproc_per_node gpu scripts/exp/train.py --args.load conf/vampnet.yml --save_path path/to/ckpt
75
+ ```
76
+
77
+ You can edit `conf/vampnet.yml` to change the dataset paths or any training hyperparameters.
78
+
79
+ For coarse2fine models, you can use `conf/c2f.yml` as a starting configuration.
80
+
81
+ See `python scripts/exp/train.py -h` for a list of options.
82
+
83
+ ## Debugging training
84
+
85
+ To debug training, it's easier to debug with 1 gpu and 0 workers
86
+
87
+ ```bash
88
+ CUDA_VISIBLE_DEVICES=0 python -m pdb scripts/exp/train.py --args.load conf/vampnet.yml --save_path /path/to/checkpoints --num_workers 0
89
+ ```
90
+
91
+ ## Fine-tuning
92
+ To fine-tune a model, use the script in `scripts/exp/fine_tune.py` to generate 3 configuration files: `c2f.yml`, `coarse.yml`, and `interface.yml`.
93
+ The first two are used to fine-tune the coarse and fine models, respectively. The last one is used to launch the gradio interface.
94
+
95
+ ```bash
96
+ python scripts/exp/fine_tune.py "/path/to/audio1.mp3 /path/to/audio2/ /path/to/audio3.wav" <fine_tune_name>
97
+ ```
98
+
99
+ This will create a folder under `conf/<fine_tune_name>/` with the 3 configuration files.
100
+
101
+ The save_paths will be set to `runs/<fine_tune_name>/coarse` and `runs/<fine_tune_name>/c2f`.
102
+
103
+ launch the coarse job:
104
+ ```bash
105
+ python scripts/exp/train.py --args.load conf/generated/<fine_tune_name>/coarse.yml
106
+ ```
107
+
108
+ this will save the coarse model to `runs/<fine_tune_name>/coarse/ckpt/best/`.
109
+
110
+ launch the c2f job:
111
+ ```bash
112
+ python scripts/exp/train.py --args.load conf/generated/<fine_tune_name>/c2f.yml
113
+ ```
114
+
115
+ launch the interface:
116
+ ```bash
117
+ python app.py --args.load conf/generated/<fine_tune_name>/interface.yml
118
+ ```
119
+
120
+
app.py ADDED
@@ -0,0 +1,607 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import yaml
3
+ import uuid
4
+
5
+ import numpy as np
6
+ import audiotools as at
7
+ import argbind
8
+ import shutil
9
+ import torch
10
+ from datetime import datetime
11
+
12
+ import gradio as gr
13
+ from vampnet.interface import Interface, signal_concat
14
+ from vampnet import mask as pmask
15
+
16
+ device = "cuda" if torch.cuda.is_available() else "cpu"
17
+
18
+
19
+ interface = Interface(
20
+ device=device,
21
+ coarse_ckpt="models/vampnet/coarse.pth",
22
+ coarse2fine_ckpt="models/vampnet/c2f.pth",
23
+ codec_ckpt="models/vampnet/codec.pth",
24
+ )
25
+
26
+ # populate the model choices with any interface.yml files in the generated confs
27
+ MODEL_CHOICES = {
28
+ "default": {
29
+ "Interface.coarse_ckpt": str(interface.coarse_path),
30
+ "Interface.coarse2fine_ckpt": str(interface.c2f_path),
31
+ "Interface.codec_ckpt": str(interface.codec_path),
32
+ }
33
+ }
34
+ generated_confs = Path("conf/generated")
35
+ for conf_file in generated_confs.glob("*/interface.yml"):
36
+ with open(conf_file) as f:
37
+ _conf = yaml.safe_load(f)
38
+
39
+ # check if the coarse, c2f, and codec ckpts exist
40
+ # otherwise, dont' add this model choice
41
+ if not (
42
+ Path(_conf["Interface.coarse_ckpt"]).exists() and
43
+ Path(_conf["Interface.coarse2fine_ckpt"]).exists() and
44
+ Path(_conf["Interface.codec_ckpt"]).exists()
45
+ ):
46
+ continue
47
+
48
+ MODEL_CHOICES[conf_file.parent.name] = _conf
49
+
50
+
51
+
52
+ OUT_DIR = Path("gradio-outputs")
53
+ OUT_DIR.mkdir(exist_ok=True, parents=True)
54
+
55
+ MAX_DURATION_S = 60
56
+ def load_audio(file):
57
+ print(file)
58
+ filepath = file.name
59
+ sig = at.AudioSignal.salient_excerpt(
60
+ filepath, duration=MAX_DURATION_S
61
+ )
62
+ # sig = interface.preprocess(sig)
63
+ sig = at.AudioSignal(filepath)
64
+
65
+ out_dir = OUT_DIR / "tmp" / str(uuid.uuid4())
66
+ out_dir.mkdir(parents=True, exist_ok=True)
67
+ sig.write(out_dir / "input.wav")
68
+ return sig.path_to_file
69
+
70
+
71
+ def load_example_audio():
72
+ return "./assets/example.wav"
73
+
74
+ from torch_pitch_shift import pitch_shift, get_fast_shifts
75
+ def shift_pitch(signal, interval: int):
76
+ signal.samples = pitch_shift(
77
+ signal.samples,
78
+ shift=interval,
79
+ sample_rate=signal.sample_rate
80
+ )
81
+ return signal
82
+
83
+ def _vamp(seed, input_audio, model_choice, pitch_shift_amt, periodic_p, p2, n_mask_codebooks, n_mask_codebooks_2, rand_mask_intensity, prefix_s, suffix_s, periodic_w, onset_mask_width, dropout, masktemp, sampletemp, typical_filtering, typical_mass, typical_min_tokens, top_p, sample_cutoff, win_dur, num_feedback_steps, stretch_factor, api=False):
84
+ _seed = seed if seed > 0 else None
85
+ if _seed is None:
86
+ _seed = int(torch.randint(0, 2**32, (1,)).item())
87
+ at.util.seed(_seed)
88
+
89
+ datentime = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
90
+ out_dir = OUT_DIR / f"{Path(input_audio).stem}-{datentime}-seed-{_seed}-model-{model_choice}"
91
+ out_dir.mkdir(parents=True)
92
+ sig = at.AudioSignal(input_audio)
93
+ sig.write(out_dir / "input.wav")
94
+
95
+ # reload the model if necessary
96
+ interface.reload(
97
+ coarse_ckpt=MODEL_CHOICES[model_choice]["Interface.coarse_ckpt"],
98
+ c2f_ckpt=MODEL_CHOICES[model_choice]["Interface.coarse2fine_ckpt"],
99
+ )
100
+
101
+ loudness = sig.loudness()
102
+ print(f"input loudness is {loudness}")
103
+
104
+ if pitch_shift_amt != 0:
105
+ sig = shift_pitch(sig, pitch_shift_amt)
106
+
107
+ _p2 = periodic_p if p2 == 0 else p2
108
+ _n_codebooks_2 = n_mask_codebooks if n_mask_codebooks_2 == 0 else n_mask_codebooks_2
109
+
110
+ build_mask_kwargs = dict(
111
+ rand_mask_intensity=rand_mask_intensity,
112
+ prefix_s=prefix_s,
113
+ suffix_s=suffix_s,
114
+ periodic_prompt=int(periodic_p),
115
+ periodic_prompt2=int(_p2),
116
+ periodic_prompt_width=periodic_w,
117
+ onset_mask_width=onset_mask_width,
118
+ _dropout=dropout,
119
+ upper_codebook_mask=int(n_mask_codebooks),
120
+ upper_codebook_mask_2=int(_n_codebooks_2),
121
+ )
122
+
123
+ vamp_kwargs = dict(
124
+ mask_temperature=masktemp*10,
125
+ sampling_temperature=sampletemp,
126
+ typical_filtering=typical_filtering,
127
+ typical_mass=typical_mass,
128
+ typical_min_tokens=typical_min_tokens,
129
+ top_p=top_p if top_p > 0 else None,
130
+ seed=_seed,
131
+ sample_cutoff=sample_cutoff,
132
+ )
133
+
134
+ # save the mask as a txt file
135
+ interface.set_chunk_size(win_dur)
136
+ sig, mask, codes = interface.ez_vamp(
137
+ sig,
138
+ batch_size=4 if not api else 1,
139
+ feedback_steps=num_feedback_steps,
140
+ time_stretch_factor=stretch_factor,
141
+ build_mask_kwargs=build_mask_kwargs,
142
+ vamp_kwargs=vamp_kwargs,
143
+ return_mask=True,
144
+ )
145
+
146
+ if api:
147
+ sig.write(out_dir / "out.wav")
148
+
149
+ return sig.path_to_file
150
+
151
+ if not api:
152
+ # write codes to numpy file
153
+ np.save(out_dir / "codes.npy", codes.cpu().numpy())
154
+ metadata = {}
155
+ metadata["seed"] = _seed
156
+ metadata["model_choice"] = model_choice
157
+ metadata["mask_kwargs"] = build_mask_kwargs
158
+ metadata["vamp_kwargs"] = vamp_kwargs
159
+ metadata["loudness"] = loudness
160
+ # save the metadata
161
+ with open(out_dir / "metadata.yml", "w") as f:
162
+ yaml.dump(metadata, f)
163
+
164
+ sig0 = sig[0].write(out_dir / "out1.wav")
165
+ sig1 = sig[1].write(out_dir / "out2.wav")
166
+ sig2 = sig[2].write(out_dir / "out3.wav")
167
+ sig3 = sig[3].write(out_dir / "out4.wav")
168
+
169
+ # write the mask to txt
170
+ with open(out_dir / "mask.txt", "w") as f:
171
+ m = mask[0].cpu().numpy()
172
+ # write to txt, each time step on a new line
173
+ for i in range(m.shape[-1]):
174
+ f.write(f"{m[:, i]}\n")
175
+
176
+
177
+ import matplotlib.pyplot as plt
178
+ plt.clf()
179
+ interface.visualize_codes(mask)
180
+ plt.savefig(out_dir / "mask.png")
181
+ plt.clf()
182
+ interface.visualize_codes(codes)
183
+ plt.savefig(out_dir / "codes.png")
184
+ plt.close()
185
+
186
+ # zip out dir, and return the path to the zip
187
+ shutil.make_archive(out_dir, 'zip', out_dir)
188
+
189
+ # chunk in groups of 1024 timesteps
190
+ _mask_sigs = []
191
+ for i in range(0, mask.shape[-1], 1024):
192
+ _mask_sigs.append(interface.to_signal(mask[:, :, i:i+1024].to(interface.device)).cpu())
193
+ mask = signal_concat(_mask_sigs)
194
+ mask.write(out_dir / "mask.wav")
195
+
196
+ return (
197
+ sig0.path_to_file, sig1.path_to_file,
198
+ sig2.path_to_file, sig3.path_to_file,
199
+ mask.path_to_file, str(out_dir.with_suffix(".zip")), out_dir / "mask.png"
200
+ )
201
+
202
+ def vamp(data):
203
+ return _vamp(
204
+ seed=data[seed],
205
+ input_audio=data[input_audio],
206
+ model_choice=data[model_choice],
207
+ pitch_shift_amt=data[pitch_shift_amt],
208
+ periodic_p=data[periodic_p],
209
+ p2=data[p2],
210
+ n_mask_codebooks=data[n_mask_codebooks],
211
+ n_mask_codebooks_2=data[n_mask_codebooks_2],
212
+ rand_mask_intensity=data[rand_mask_intensity],
213
+ prefix_s=data[prefix_s],
214
+ suffix_s=data[suffix_s],
215
+ periodic_w=data[periodic_w],
216
+ onset_mask_width=data[onset_mask_width],
217
+ dropout=data[dropout],
218
+ masktemp=data[masktemp],
219
+ sampletemp=data[sampletemp],
220
+ typical_filtering=data[typical_filtering],
221
+ typical_mass=data[typical_mass],
222
+ typical_min_tokens=data[typical_min_tokens],
223
+ top_p=data[top_p],
224
+ sample_cutoff=data[sample_cutoff],
225
+ win_dur=data[win_dur],
226
+ num_feedback_steps=data[num_feedback_steps],
227
+ stretch_factor=data[stretch_factor],
228
+ api=False,
229
+ )
230
+
231
+ def api_vamp(data):
232
+ return _vamp(
233
+ seed=data[seed],
234
+ input_audio=data[input_audio],
235
+ model_choice=data[model_choice],
236
+ pitch_shift_amt=data[pitch_shift_amt],
237
+ periodic_p=data[periodic_p],
238
+ p2=data[p2],
239
+ n_mask_codebooks=data[n_mask_codebooks],
240
+ n_mask_codebooks_2=data[n_mask_codebooks_2],
241
+ rand_mask_intensity=data[rand_mask_intensity],
242
+ prefix_s=data[prefix_s],
243
+ suffix_s=data[suffix_s],
244
+ periodic_w=data[periodic_w],
245
+ onset_mask_width=data[onset_mask_width],
246
+ dropout=data[dropout],
247
+ masktemp=data[masktemp],
248
+ sampletemp=data[sampletemp],
249
+ typical_filtering=data[typical_filtering],
250
+ typical_mass=data[typical_mass],
251
+ typical_min_tokens=data[typical_min_tokens],
252
+ top_p=data[top_p],
253
+ sample_cutoff=data[sample_cutoff],
254
+ win_dur=data[win_dur],
255
+ num_feedback_steps=data[num_feedback_steps],
256
+ stretch_factor=data[stretch_factor],
257
+ api=True,
258
+ )
259
+
260
+
261
+ def harp_vamp(input_audio,
262
+ periodic_p,
263
+ n_mask_codebooks,
264
+ pitch_shift_amt,
265
+ win_dur):
266
+ return _vamp(
267
+ seed=0,
268
+ input_audio=input_audio,
269
+ model_choice="default",
270
+ pitch_shift_amt=pitch_shift_amt,
271
+ periodic_p=periodic_p,
272
+ p2=0,
273
+ n_mask_codebooks=n_mask_codebooks,
274
+ n_mask_codebooks_2=0,
275
+ rand_mask_intensity=1.0,
276
+ prefix_s=0.0,
277
+ suffix_s=0.0,
278
+ periodic_w=1,
279
+ onset_mask_width=0,
280
+ dropout=0.0,
281
+ masktemp=1.5,
282
+ sampletemp=1.0,
283
+ typical_filtering=True,
284
+ typical_mass=0.15,
285
+ typical_min_tokens=64,
286
+ top_p=0.9,
287
+ sample_cutoff=1.0,
288
+ win_dur=win_dur,
289
+ num_feedback_steps=1,
290
+ stretch_factor=1.0,
291
+ api=True,
292
+ )
293
+
294
+
295
+
296
+ with gr.Blocks() as demo:
297
+ with gr.Row():
298
+ with gr.Column():
299
+ manual_audio_upload = gr.File(
300
+ label=f"upload some audio (will be randomly trimmed to max of 100s)",
301
+ file_types=["audio"]
302
+ )
303
+ load_example_audio_button = gr.Button("or load example audio")
304
+
305
+ input_audio = gr.Audio(
306
+ label="input audio",
307
+ interactive=False,
308
+ type="filepath",
309
+ )
310
+
311
+ audio_mask = gr.Audio(
312
+ label="audio mask (listen to this to hear the mask hints)",
313
+ interactive=False,
314
+ type="filepath",
315
+ )
316
+
317
+ # connect widgets
318
+ load_example_audio_button.click(
319
+ fn=load_example_audio,
320
+ inputs=[],
321
+ outputs=[ input_audio]
322
+ )
323
+
324
+ manual_audio_upload.change(
325
+ fn=load_audio,
326
+ inputs=[manual_audio_upload],
327
+ outputs=[ input_audio]
328
+ )
329
+
330
+
331
+
332
+ # mask settings
333
+ with gr.Column():
334
+ with gr.Accordion("manual controls", open=True):
335
+ periodic_p = gr.Slider(
336
+ label="periodic prompt",
337
+ minimum=0,
338
+ maximum=128,
339
+ step=1,
340
+ value=3,
341
+ )
342
+ p2 = gr.Slider(
343
+ label="periodic prompt 2 (0 - same as p1, 2 - lots of hints, 8 - a couple of hints, 16 - occasional hint, 32 - very occasional hint, etc)",
344
+ minimum=0,
345
+ maximum=128,
346
+ step=1,
347
+ value=0,
348
+ )
349
+
350
+ onset_mask_width = gr.Slider(
351
+ label="onset mask width (multiplies with the periodic mask, 1 step ~= 10milliseconds) ",
352
+ minimum=0,
353
+ maximum=100,
354
+ step=1,
355
+ value=0,
356
+ )
357
+
358
+ n_mask_codebooks = gr.Slider(
359
+ label="compression prompt ",
360
+ value=3,
361
+ minimum=0,
362
+ maximum=14,
363
+ step=1,
364
+ )
365
+ n_mask_codebooks_2 = gr.Number(
366
+ label="compression prompt 2 via linear interpolation (0 == constant)",
367
+ value=0,
368
+ )
369
+
370
+ with gr.Accordion("extras ", open=False):
371
+ pitch_shift_amt = gr.Slider(
372
+ label="pitch shift amount (semitones)",
373
+ minimum=-12,
374
+ maximum=12,
375
+ step=1,
376
+ value=0,
377
+ )
378
+
379
+ stretch_factor = gr.Slider(
380
+ label="time stretch factor",
381
+ minimum=0,
382
+ maximum=64,
383
+ step=1,
384
+ value=1,
385
+ )
386
+
387
+ rand_mask_intensity = gr.Slider(
388
+ label="random mask intensity. (If this is less than 1, scatters prompts throughout the audio, should be between 0.9 and 1.0)",
389
+ minimum=0.0,
390
+ maximum=1.0,
391
+ value=1.0
392
+ )
393
+
394
+ periodic_w = gr.Slider(
395
+ label="periodic prompt width (steps, 1 step ~= 10milliseconds)",
396
+ minimum=1,
397
+ maximum=20,
398
+ step=1,
399
+ value=1,
400
+ )
401
+
402
+ with gr.Accordion("prefix/suffix prompts", open=True):
403
+ prefix_s = gr.Slider(
404
+ label="prefix hint length (seconds)",
405
+ minimum=0.0,
406
+ maximum=10.0,
407
+ value=0.0
408
+ )
409
+ suffix_s = gr.Slider(
410
+ label="suffix hint length (seconds)",
411
+ minimum=0.0,
412
+ maximum=10.0,
413
+ value=0.0
414
+ )
415
+
416
+ masktemp = gr.Slider(
417
+ label="mask temperature",
418
+ minimum=0.0,
419
+ maximum=100.0,
420
+ value=1.5
421
+ )
422
+ sampletemp = gr.Slider(
423
+ label="sample temperature",
424
+ minimum=0.1,
425
+ maximum=10.0,
426
+ value=1.0,
427
+ step=0.001
428
+ )
429
+
430
+
431
+
432
+ with gr.Accordion("sampling settings", open=False):
433
+ top_p = gr.Slider(
434
+ label="top p (0.0 = off)",
435
+ minimum=0.0,
436
+ maximum=1.0,
437
+ value=0.9
438
+ )
439
+ typical_filtering = gr.Checkbox(
440
+ label="typical filtering ",
441
+ value=True
442
+ )
443
+ typical_mass = gr.Slider(
444
+ label="typical mass (should probably stay between 0.1 and 0.5)",
445
+ minimum=0.01,
446
+ maximum=0.99,
447
+ value=0.15
448
+ )
449
+ typical_min_tokens = gr.Slider(
450
+ label="typical min tokens (should probably stay between 1 and 256)",
451
+ minimum=1,
452
+ maximum=256,
453
+ step=1,
454
+ value=64
455
+ )
456
+ sample_cutoff = gr.Slider(
457
+ label="sample cutoff",
458
+ minimum=0.0,
459
+ maximum=1.0,
460
+ value=1.0,
461
+ step=0.01
462
+ )
463
+
464
+ dropout = gr.Slider(
465
+ label="mask dropout",
466
+ minimum=0.0,
467
+ maximum=1.0,
468
+ step=0.01,
469
+ value=0.0
470
+ )
471
+
472
+
473
+ seed = gr.Number(
474
+ label="seed (0 for random)",
475
+ value=0,
476
+ precision=0,
477
+ )
478
+
479
+
480
+
481
+ # mask settings
482
+ with gr.Column():
483
+
484
+ model_choice = gr.Dropdown(
485
+ label="model choice",
486
+ choices=list(MODEL_CHOICES.keys()),
487
+ value="default",
488
+ visible=True
489
+ )
490
+
491
+ num_feedback_steps = gr.Slider(
492
+ label="number of feedback steps (each one takes a while)",
493
+ minimum=1,
494
+ maximum=16,
495
+ step=1,
496
+ value=1
497
+ )
498
+
499
+ win_dur= gr.Slider(
500
+ label="window duration (seconds)",
501
+ minimum=2,
502
+ maximum=10,
503
+ value=6)
504
+
505
+
506
+ vamp_button = gr.Button("generate (vamp)!!!")
507
+ maskimg = gr.Image(
508
+ label="mask image",
509
+ interactive=False,
510
+ type="filepath"
511
+ )
512
+ out1 = gr.Audio(
513
+ label="output audio 1",
514
+ interactive=False,
515
+ type="filepath"
516
+ )
517
+ out2 = gr.Audio(
518
+ label="output audio 2",
519
+ interactive=False,
520
+ type="filepath"
521
+ )
522
+ out3 = gr.Audio(
523
+ label="output audio 3",
524
+ interactive=False,
525
+ type="filepath"
526
+ )
527
+ out4 = gr.Audio(
528
+ label="output audio 4",
529
+ interactive=False,
530
+ type="filepath"
531
+ )
532
+
533
+ thank_you = gr.Markdown("")
534
+
535
+ # download all the outputs
536
+ download = gr.File(type="file", label="download outputs")
537
+
538
+
539
+ _inputs = {
540
+ input_audio,
541
+ masktemp,
542
+ sampletemp,
543
+ top_p,
544
+ prefix_s, suffix_s,
545
+ rand_mask_intensity,
546
+ periodic_p, periodic_w,
547
+ dropout,
548
+ stretch_factor,
549
+ onset_mask_width,
550
+ typical_filtering,
551
+ typical_mass,
552
+ typical_min_tokens,
553
+ seed,
554
+ model_choice,
555
+ n_mask_codebooks,
556
+ pitch_shift_amt,
557
+ sample_cutoff,
558
+ num_feedback_steps,
559
+ p2,
560
+ n_mask_codebooks_2,
561
+ win_dur
562
+ }
563
+
564
+ # connect widgets
565
+ vamp_button.click(
566
+ fn=vamp,
567
+ inputs=_inputs,
568
+ outputs=[out1, out2, out3, out4, audio_mask, download, maskimg],
569
+ )
570
+
571
+ api_vamp_button = gr.Button("api vamp", visible=False)
572
+ api_vamp_button.click(
573
+ fn=api_vamp,
574
+ inputs=_inputs,
575
+ outputs=[out1],
576
+ api_name="vamp"
577
+ )
578
+
579
+ from pyharp import ModelCard, build_endpoint
580
+
581
+ model_card = ModelCard(
582
+ name="percussion vampnet",
583
+ description="",
584
+ author="hugo flores garcía",
585
+ tags=["generative","sound"],
586
+ )
587
+
588
+ build_endpoint(
589
+ inputs=[
590
+ input_audio,
591
+ periodic_p,
592
+ n_mask_codebooks,
593
+ pitch_shift_amt,
594
+ win_dur,
595
+ ],
596
+ output=out1,
597
+ process_fn=harp_vamp,
598
+ card=model_card
599
+ )
600
+
601
+
602
+ try:
603
+ demo.queue()
604
+ demo.launch(share=True)
605
+ except KeyboardInterrupt:
606
+ shutil.rmtree("gradio-outputs", ignore_errors=True)
607
+ raise
assets/example.wav ADDED
Binary file (883 kB). View file
 
conf/c2f.yml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/vampnet.yml
3
+
4
+ VampNet.n_codebooks: 14
5
+ VampNet.n_conditioning_codebooks: 4
6
+
7
+ VampNet.embedding_dim: 1280
8
+ VampNet.n_layers: 16
9
+ VampNet.n_heads: 20
10
+
11
+ AudioDataset.duration: 3.0
12
+
13
+
14
+ AudioDataset.loudness_cutoff: -40.0
conf/generated/bbc-humans/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/bbc-humans/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /home/hugo/Humans/
15
+ val/AudioLoader.sources: *id001
conf/generated/bbc-humans/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/bbc-humans/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /home/hugo/Humans/
8
+ val/AudioLoader.sources: *id001
conf/generated/bbc-humans/interface.yml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /home/hugo/Humans/
3
+ Interface.coarse2fine_ckpt: ./runs/bbc-humans/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse_ckpt: ./runs/bbc-humans/coarse/latest/vampnet/weights.pth
5
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
6
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/boleros/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/boleros/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/CHONK/hugo/loras/boleros
15
+ val/AudioLoader.sources: *id001
conf/generated/boleros/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/boleros/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/CHONK/hugo/loras/boleros
8
+ val/AudioLoader.sources: *id001
conf/generated/boleros/interface.yml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK/hugo/loras/boleros
3
+ Interface.coarse2fine_ckpt: ./runs/boleros/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse_ckpt: ./runs/boleros/coarse/latest/vampnet/weights.pth
5
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
6
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/bowl/c2f.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/bowl/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/seagate_prosound/prosound_core_complete/Anns
15
+ - Animals
16
+ val/AudioLoader.sources: *id001
conf/generated/bowl/coarse.yml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/bowl/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/seagate_prosound/prosound_core_complete/Anns
8
+ - Animals
9
+ val/AudioLoader.sources: *id001
conf/generated/bowl/interface.yml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/seagate_prosound/prosound_core_complete/Anns
3
+ - Animals
4
+ Interface.coarse2fine_ckpt: ./runs/bowl/c2f/latest/vampnet/weights.pth
5
+ Interface.coarse_ckpt: ./runs/bowl/coarse/latest/vampnet/weights.pth
6
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
7
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/breaks-steps/interface.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK/hugo/machines
3
+ Interface.coarse2fine_ckpt: ./runs-june-23/breaks-steps/c2f/best/vampnet/weights.pth
4
+ Interface.coarse2fine_lora_ckpt: ./runs-june-23/breaks-steps/c2f/best/lora.pth
5
+ Interface.coarse_ckpt: ./runs-june-23/breaks-steps/coarse/best/vampnet/weights.pth
6
+ Interface.coarse_lora_ckpt: ./runs-june-23/breaks-steps/coarse/best/lora.pth
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/choir/interface.yml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK/hugo/knower
3
+ Interface.coarse2fine_ckpt: ./runs/choir/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse2fine_lora_ckpt: ./runs/choir/c2f/latest/lora.pth
5
+ Interface.coarse_ckpt: ./runs/choir/coarse/latest/vampnet/weights.pth
6
+ Interface.coarse_lora_ckpt: ./runs/choir/coarse/latest/lora.pth
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
9
+ Interface.coarse_chunk_size_s: 15
conf/generated/church-bells/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/church-bells/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - data/church-bells
15
+ val/AudioLoader.sources: *id001
conf/generated/church-bells/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/church-bells/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - data/church-bells
8
+ val/AudioLoader.sources: *id001
conf/generated/church-bells/interface.yml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - data/church-bells
3
+ Interface.coarse2fine_ckpt: ./runs/church-bells/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse_ckpt: ./runs/church-bells/coarse/latest/vampnet/weights.pth
5
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
6
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/copepod/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/copepod/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - data/copepod
15
+ val/AudioLoader.sources: *id001
conf/generated/copepod/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/copepod/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - data/copepod
8
+ val/AudioLoader.sources: *id001
conf/generated/copepod/interface.yml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - data/copepod
3
+ Interface.coarse2fine_ckpt: ./runs/copepod/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse_ckpt: ./runs/copepod/coarse/latest/vampnet/weights.pth
5
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
6
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/die/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/die/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/CHONK2/prosound_redacted/Chris Diebold General
15
+ val/AudioLoader.sources: *id001
conf/generated/die/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/die/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/CHONK2/prosound_redacted/Chris Diebold General
8
+ val/AudioLoader.sources: *id001
conf/generated/die/interface.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK2/prosound_redacted/Chris
3
+ - Diebold
4
+ - General
5
+ Interface.coarse2fine_ckpt: ./runs/die/c2f/latest/vampnet/weights.pth
6
+ Interface.coarse_ckpt: ./runs/die/coarse/latest/vampnet/weights.pth
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/dnb/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/dnb/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /home/hugo/ccm/data/audio/dnb
15
+ val/AudioLoader.sources: *id001
conf/generated/dnb/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/dnb/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /home/hugo/ccm/data/audio/dnb
8
+ val/AudioLoader.sources: *id001
conf/generated/dnb/interface.yml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /home/hugo/ccm/data/audio/dnb
3
+ Interface.coarse2fine_ckpt: ./runs/dnb/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse_ckpt: ./runs/dnb/coarse/latest/vampnet/weights.pth
5
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
6
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/earlymachines/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/machines/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/CHONK/hugo/machines
15
+ val/AudioLoader.sources: *id001
conf/generated/earlymachines/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/machines/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/CHONK/hugo/machines
8
+ val/AudioLoader.sources: *id001
conf/generated/earlymachines/interface.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK/hugo/machines
3
+ Interface.coarse2fine_ckpt: ./runs/machines/c2f/best/vampnet/weights.pth
4
+ Interface.coarse2fine_lora_ckpt: ./runs/machines/c2f/best/lora.pth
5
+ Interface.coarse_ckpt: ./runs/machines/coarse/best/vampnet/weights.pth
6
+ Interface.coarse_lora_ckpt: ./runs/machines/coarse/best/lora.pth
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/funk/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/knower/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/CHONK/hugo/knower
15
+ val/AudioLoader.sources: *id001
conf/generated/funk/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/knower/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/CHONK/hugo/knower
8
+ val/AudioLoader.sources: *id001
conf/generated/funk/interface.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK/hugo/knower
3
+ Interface.coarse2fine_ckpt: ./runs/knower/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse2fine_lora_ckpt: ./runs/knower/c2f/latest/lora.pth
5
+ Interface.coarse_ckpt: ./runs/knower/coarse/latest/vampnet/weights.pth
6
+ Interface.coarse_lora_ckpt: ./runs/knower/coarse/latest/lora.pth
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/growl/c2f.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/growl/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - data/growly
15
+ - animals/
16
+ val/AudioLoader.sources: *id001
conf/generated/growl/coarse.yml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/growl/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - data/growly
8
+ - animals/
9
+ val/AudioLoader.sources: *id001
conf/generated/growl/interface.yml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - data/growly
3
+ - animals/
4
+ Interface.coarse2fine_ckpt: ./runs/growl/c2f/latest/vampnet/weights.pth
5
+ Interface.coarse_ckpt: ./runs/growl/coarse/latest/vampnet/weights.pth
6
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
7
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/ismir-birds/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/ismir-birds/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/CHONK/hugo/ismir-23-cfm/xeno-canto-augmented-filtered
15
+ val/AudioLoader.sources: *id001
conf/generated/ismir-birds/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/ismir-birds/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/CHONK/hugo/ismir-23-cfm/xeno-canto-augmented-filtered
8
+ val/AudioLoader.sources: *id001
conf/generated/ismir-birds/interface.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK/hugo/ismir-23-cfm/xeno-canto-augmented-filtered
3
+ Interface.coarse2fine_ckpt: ./runs/ismir-birds/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse2fine_lora_ckpt: ./runs/ismir-birds/c2f/latest/lora.pth
5
+ Interface.coarse_ckpt: ./runs/ismir-birds/coarse/latest/vampnet/weights.pth
6
+ Interface.coarse_lora_ckpt: ./runs/ismir-birds/coarse/latest/lora.pth
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/ismir-machines/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/ismir-machines/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/CHONK/hugo/ismir-23-cfm/machines-augmented-filtered
15
+ val/AudioLoader.sources: *id001
conf/generated/ismir-machines/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/ismir-machines/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/CHONK/hugo/ismir-23-cfm/machines-augmented-filtered
8
+ val/AudioLoader.sources: *id001
conf/generated/ismir-machines/interface.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK/hugo/ismir-23-cfm/machines-augmented-filtered
3
+ Interface.coarse2fine_ckpt: ./runs/ismir-machines/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse2fine_lora_ckpt: ./runs/ismir-machines/c2f/latest/lora.pth
5
+ Interface.coarse_ckpt: ./runs/ismir-machines/coarse/latest/vampnet/weights.pth
6
+ Interface.coarse_lora_ckpt: ./runs/ismir-machines/coarse/latest/lora.pth
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/machines/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/machines/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/CHONK/hugo/machines
15
+ val/AudioLoader.sources: *id001
conf/generated/machines/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/machines/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/CHONK/hugo/machines
8
+ val/AudioLoader.sources: *id001
conf/generated/machines/interface.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK/hugo/machines
3
+ Interface.coarse2fine_ckpt: ./runs/machines/c2f/best/vampnet/weights.pth
4
+ Interface.coarse2fine_lora_ckpt: ./runs/machines/c2f/best/lora.pth
5
+ Interface.coarse_ckpt: ./runs/machines/coarse/best/vampnet/weights.pth
6
+ Interface.coarse_lora_ckpt: ./runs/machines/coarse/best/lora.pth
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/musdb/c2f.yml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/xeno-canto/c2f
13
+
14
+ AudioDataset.aligned: true
15
+ train/build_dataset.folders:
16
+ bass:
17
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train]
18
+ ext: "bass.wav"
19
+ drums:
20
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train]
21
+ ext: "drums.wav"
22
+ other:
23
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train]
24
+ ext: "other.wav"
25
+ vocals:
26
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train]
27
+ ext: "vocals.wav"
28
+ val/build_dataset.folders:
29
+ bass:
30
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/val]
31
+ ext: "bass.wav"
32
+ drums:
33
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/val]
34
+ ext: "drums.wav"
35
+ other:
36
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/val]
37
+ ext: "other.wav"
38
+ vocals:
39
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/val]
40
+ ext: "vocals.wav"
conf/generated/musdb/coarse.yml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/xeno-canto/coarse
6
+ train/build_dataset.folders:
7
+ bass:
8
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train/]
9
+ ext: ["bass.wav"]
10
+ drums:
11
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train/]
12
+ ext: ["drums.wav"]
13
+ other:
14
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train/]
15
+ ext: ["other.wav"]
16
+ vocals:
17
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train/]
18
+ ext: ["vocals.wav"]
19
+ val/build_dataset.folders:
20
+ bass:
21
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/test/]
22
+ ext: ["bass.wav"]
23
+ drums:
24
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/test/]
25
+ ext: ["drums.wav"]
26
+ other:
27
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/test/]
28
+ ext: ["other.wav"]
29
+ vocals:
30
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/test/]
31
+ ext: ["vocals.wav"]
conf/generated/musdb/interface.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Interface.coarse2fine_ckpt: ./models/vampnet/c2f.pth
2
+ Interface.coarse2fine_lora_ckpt: null
3
+
4
+ Interface.coarse_ckpt: ./runs/musdb-cond-clfdrop/best/vampnet/weights.pth
5
+ Interface.coarse_lora_ckpt: null
6
+
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth