Spaces:

hugggof
/

vampnet-percussion-old

Runtime error

App Files Files Community

hugo flores garcia commited on Jun 4, 2024

Commit

11f651c

1 Parent(s): 3419098

s

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +191 -0
LICENSE +21 -0
README.md +113 -6
app.py +607 -0
assets/example.wav +0 -0
conf/c2f.yml +14 -0
conf/generated/bbc-humans/c2f.yml +15 -0
conf/generated/bbc-humans/coarse.yml +8 -0
conf/generated/bbc-humans/interface.yml +6 -0
conf/generated/boleros/c2f.yml +15 -0
conf/generated/boleros/coarse.yml +8 -0
conf/generated/boleros/interface.yml +6 -0
conf/generated/bowl/c2f.yml +16 -0
conf/generated/bowl/coarse.yml +9 -0
conf/generated/bowl/interface.yml +7 -0
conf/generated/breaks-steps/interface.yml +8 -0
conf/generated/choir/interface.yml +9 -0
conf/generated/church-bells/c2f.yml +15 -0
conf/generated/church-bells/coarse.yml +8 -0
conf/generated/church-bells/interface.yml +6 -0
conf/generated/copepod/c2f.yml +15 -0
conf/generated/copepod/coarse.yml +8 -0
conf/generated/copepod/interface.yml +6 -0
conf/generated/die/c2f.yml +15 -0
conf/generated/die/coarse.yml +8 -0
conf/generated/die/interface.yml +8 -0
conf/generated/dnb/c2f.yml +15 -0
conf/generated/dnb/coarse.yml +8 -0
conf/generated/dnb/interface.yml +6 -0
conf/generated/earlymachines/c2f.yml +15 -0
conf/generated/earlymachines/coarse.yml +8 -0
conf/generated/earlymachines/interface.yml +8 -0
conf/generated/funk/c2f.yml +15 -0
conf/generated/funk/coarse.yml +8 -0
conf/generated/funk/interface.yml +8 -0
conf/generated/growl/c2f.yml +16 -0
conf/generated/growl/coarse.yml +9 -0
conf/generated/growl/interface.yml +7 -0
conf/generated/ismir-birds/c2f.yml +15 -0
conf/generated/ismir-birds/coarse.yml +8 -0
conf/generated/ismir-birds/interface.yml +8 -0
conf/generated/ismir-machines/c2f.yml +15 -0
conf/generated/ismir-machines/coarse.yml +8 -0
conf/generated/ismir-machines/interface.yml +8 -0
conf/generated/machines/c2f.yml +15 -0
conf/generated/machines/coarse.yml +8 -0
conf/generated/machines/interface.yml +8 -0
conf/generated/musdb/c2f.yml +40 -0
conf/generated/musdb/coarse.yml +31 -0
conf/generated/musdb/interface.yml +8 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,191 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/env.sh
+venv/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# Files created by experiments
+output/
+snapshot/
+*.m4a
+notebooks/scratch.ipynb
+notebooks/inspect.ipynb
+notebooks/effects.ipynb
+notebooks/*.ipynb
+notebooks/*.gif
+notebooks/*.wav
+notebooks/*.mp4
+*runs/
+boards/
+samples/
+*.ipynb
+results.json
+metrics.csv
+mprofile_*
+mem.png
+results/
+mprofile*
+*.png
+# do not ignore the test wav file
+!tests/audio/short_test_audio.wav
+!tests/audio/output.wav
+*/.DS_Store
+.DS_Store
+env.sh
+_codebraid/
+**/*.html
+**/*.exec.md
+flagged/
+log.txt
+ckpt/
+.syncthing*
+tests/assets/
+archived/
+scratch/
+runs-archive
+lyrebird-audiotools
+lyrebird-audio-codec
+samples-*/**
+gradio-outputs/
+samples*/
+models-all/
+models.zip
+.git-old
+gtzan.zip
+.gtzan_emb_cache
+data/
+data
+pyharp

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 Hugo Flores García and Prem Seetharaman
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,13 +1,120 @@
 ---
 title: Salad Bowl
-emoji: ⚡
-colorFrom: pink
-colorTo: gray
 sdk: gradio
-sdk_version: 4.12.0
 app_file: app.py
 pinned: false
-license: cc-by-nc-sa-4.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Salad Bowl
+emoji: 🥗
+colorFrom: yellow
+colorTo: green
 sdk: gradio
+sdk_version: 3.50.2
 app_file: app.py
 pinned: false
+license: cc-by-nc-4.0
 ---
+# VampNet
+This repository contains recipes for training generative music models on top of the Descript Audio Codec.
+## try `unloop`
+you can try vampnet in a co-creative looper called unloop. see this link: https://github.com/hugofloresgarcia/unloop
+# Setting up
+**Requires Python 3.9**.
+you'll need a Python 3.9 environment to run VampNet. This is due to a [known issue with madmom](https://github.com/hugofloresgarcia/vampnet/issues/15).
+(for example, using conda)
+```bash
+conda create -n vampnet python=3.9
+conda activate vampnet
+```
+install VampNet
+```bash
+git clone https://github.com/hugofloresgarcia/vampnet.git
+pip install -e ./vampnet
+```
+## A note on argbind
+This repository relies on [argbind](https://github.com/pseeth/argbind) to manage CLIs and config files.
+Config files are stored in the `conf/` folder.
+## Getting the Pretrained Models
+### Licensing for Pretrained Models:
+The weights for the models are licensed [`CC BY-NC-SA 4.0`](https://creativecommons.org/licenses/by-nc-sa/4.0/deed.ml). Likewise, any VampNet models fine-tuned on the pretrained models are also licensed [`CC BY-NC-SA 4.0`](https://creativecommons.org/licenses/by-nc-sa/4.0/deed.ml).
+Download the pretrained models from [this link](https://zenodo.org/record/8136629). Then, extract the models to the `models/` folder.
+# Usage
+## Launching the Gradio Interface
+You can launch a gradio UI to play with vampnet.
+```bash
+python app.py --args.load conf/interface.yml --Interface.device cuda
+```
+# Training / Fine-tuning
+## Training a model
+To train a model, run the following script:
+```bash
+python scripts/exp/train.py --args.load conf/vampnet.yml --save_path /path/to/checkpoints
+```
+for multi-gpu training, use torchrun:
+```bash
+torchrun --nproc_per_node gpu scripts/exp/train.py --args.load conf/vampnet.yml --save_path path/to/ckpt
+```
+You can edit `conf/vampnet.yml` to change the dataset paths or any training hyperparameters.
+For coarse2fine models, you can use `conf/c2f.yml` as a starting configuration.
+See `python scripts/exp/train.py -h` for a list of options.
+## Debugging training
+To debug training, it's easier to debug with 1 gpu and 0 workers
+```bash
+CUDA_VISIBLE_DEVICES=0 python -m pdb scripts/exp/train.py --args.load conf/vampnet.yml --save_path /path/to/checkpoints --num_workers 0
+```
+## Fine-tuning
+To fine-tune a model, use the script in `scripts/exp/fine_tune.py` to generate 3 configuration files: `c2f.yml`, `coarse.yml`, and `interface.yml`.
+The first two are used to fine-tune the coarse and fine models, respectively. The last one is used to launch the gradio interface.
+```bash
+python scripts/exp/fine_tune.py "/path/to/audio1.mp3 /path/to/audio2/ /path/to/audio3.wav" <fine_tune_name>
+```
+This will create a folder under `conf/<fine_tune_name>/` with the 3 configuration files.
+The save_paths will be set to `runs/<fine_tune_name>/coarse` and `runs/<fine_tune_name>/c2f`.
+launch the coarse job:
+```bash
+python scripts/exp/train.py --args.load conf/generated/<fine_tune_name>/coarse.yml
+```
+this will save the coarse model to `runs/<fine_tune_name>/coarse/ckpt/best/`.
+launch the c2f job:
+```bash
+python  scripts/exp/train.py --args.load conf/generated/<fine_tune_name>/c2f.yml
+```
+launch the interface:
+```bash
+python  app.py --args.load conf/generated/<fine_tune_name>/interface.yml
+```

app.py ADDED Viewed

	@@ -0,0 +1,607 @@

+from pathlib import Path
+import yaml
+import uuid
+import numpy as np
+import audiotools as at
+import argbind
+import shutil
+import torch
+from datetime import datetime
+import gradio as gr
+from vampnet.interface import Interface, signal_concat
+from vampnet import mask as pmask
+device = "cuda" if torch.cuda.is_available() else "cpu"
+interface = Interface(
+    device=device,
+    coarse_ckpt="models/vampnet/coarse.pth",
+    coarse2fine_ckpt="models/vampnet/c2f.pth",
+    codec_ckpt="models/vampnet/codec.pth",
+)
+# populate the model choices with any interface.yml files in the generated confs
+MODEL_CHOICES = {
+    "default": {
+        "Interface.coarse_ckpt": str(interface.coarse_path),
+        "Interface.coarse2fine_ckpt": str(interface.c2f_path),
+        "Interface.codec_ckpt": str(interface.codec_path),
+    }
+}
+generated_confs = Path("conf/generated")
+for conf_file in generated_confs.glob("*/interface.yml"):
+    with open(conf_file) as f:
+        _conf = yaml.safe_load(f)
+        # check if the coarse, c2f, and codec ckpts exist
+        # otherwise, dont' add this model choice
+        if not (
+            Path(_conf["Interface.coarse_ckpt"]).exists() and
+            Path(_conf["Interface.coarse2fine_ckpt"]).exists() and
+            Path(_conf["Interface.codec_ckpt"]).exists()
+        ):
+            continue
+        MODEL_CHOICES[conf_file.parent.name] = _conf
+OUT_DIR = Path("gradio-outputs")
+OUT_DIR.mkdir(exist_ok=True, parents=True)
+MAX_DURATION_S = 60
+def load_audio(file):
+    print(file)
+    filepath = file.name
+    sig = at.AudioSignal.salient_excerpt(
+        filepath, duration=MAX_DURATION_S
+    )
+    # sig = interface.preprocess(sig)
+    sig = at.AudioSignal(filepath)
+    out_dir = OUT_DIR / "tmp" / str(uuid.uuid4())
+    out_dir.mkdir(parents=True, exist_ok=True)
+    sig.write(out_dir / "input.wav")
+    return sig.path_to_file
+def load_example_audio():
+    return "./assets/example.wav"
+from torch_pitch_shift import pitch_shift, get_fast_shifts
+def shift_pitch(signal, interval: int):
+    signal.samples = pitch_shift(
+        signal.samples,
+        shift=interval,
+        sample_rate=signal.sample_rate
+    )
+    return signal
+def _vamp(seed, input_audio, model_choice, pitch_shift_amt, periodic_p, p2, n_mask_codebooks, n_mask_codebooks_2, rand_mask_intensity, prefix_s, suffix_s, periodic_w, onset_mask_width, dropout, masktemp, sampletemp, typical_filtering, typical_mass, typical_min_tokens, top_p, sample_cutoff, win_dur, num_feedback_steps, stretch_factor, api=False):
+    _seed = seed if seed > 0 else None
+    if _seed is None:
+        _seed = int(torch.randint(0, 2**32, (1,)).item())
+    at.util.seed(_seed)
+    datentime = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
+    out_dir = OUT_DIR / f"{Path(input_audio).stem}-{datentime}-seed-{_seed}-model-{model_choice}"
+    out_dir.mkdir(parents=True)
+    sig = at.AudioSignal(input_audio)
+    sig.write(out_dir / "input.wav")
+    # reload the model if necessary
+    interface.reload(
+        coarse_ckpt=MODEL_CHOICES[model_choice]["Interface.coarse_ckpt"],
+        c2f_ckpt=MODEL_CHOICES[model_choice]["Interface.coarse2fine_ckpt"],
+    )
+    loudness = sig.loudness()
+    print(f"input loudness is {loudness}")
+    if pitch_shift_amt != 0:
+        sig = shift_pitch(sig, pitch_shift_amt)
+    _p2 = periodic_p if p2 == 0 else p2
+    _n_codebooks_2 = n_mask_codebooks if n_mask_codebooks_2 == 0 else n_mask_codebooks_2
+    build_mask_kwargs = dict(
+        rand_mask_intensity=rand_mask_intensity,
+        prefix_s=prefix_s,
+        suffix_s=suffix_s,
+        periodic_prompt=int(periodic_p),
+        periodic_prompt2=int(_p2),
+        periodic_prompt_width=periodic_w,
+        onset_mask_width=onset_mask_width,
+        _dropout=dropout,
+        upper_codebook_mask=int(n_mask_codebooks),
+        upper_codebook_mask_2=int(_n_codebooks_2),
+    )
+    vamp_kwargs = dict(
+        mask_temperature=masktemp*10,
+        sampling_temperature=sampletemp,
+        typical_filtering=typical_filtering,
+        typical_mass=typical_mass,
+        typical_min_tokens=typical_min_tokens,
+        top_p=top_p if top_p > 0 else None,
+        seed=_seed,
+        sample_cutoff=sample_cutoff,
+    )
+    # save the mask as a txt file
+    interface.set_chunk_size(win_dur)
+    sig, mask, codes = interface.ez_vamp(
+        sig,
+        batch_size=4 if not api else 1,
+        feedback_steps=num_feedback_steps,
+        time_stretch_factor=stretch_factor,
+        build_mask_kwargs=build_mask_kwargs,
+        vamp_kwargs=vamp_kwargs,
+        return_mask=True,
+    )
+    if api:
+        sig.write(out_dir / "out.wav")
+        return sig.path_to_file
+    if not api:
+        # write codes to numpy file
+        np.save(out_dir / "codes.npy", codes.cpu().numpy())
+        metadata = {}
+        metadata["seed"] = _seed
+        metadata["model_choice"] = model_choice
+        metadata["mask_kwargs"] = build_mask_kwargs
+        metadata["vamp_kwargs"] = vamp_kwargs
+        metadata["loudness"] = loudness
+        # save the metadata
+        with open(out_dir / "metadata.yml", "w") as f:
+            yaml.dump(metadata, f)
+        sig0 = sig[0].write(out_dir / "out1.wav")
+        sig1 = sig[1].write(out_dir / "out2.wav")
+        sig2 = sig[2].write(out_dir / "out3.wav")
+        sig3 = sig[3].write(out_dir / "out4.wav")
+        # write the mask to txt
+        with open(out_dir / "mask.txt", "w") as f:
+            m = mask[0].cpu().numpy()
+            # write to txt, each time step on a new line
+            for i in range(m.shape[-1]):
+                f.write(f"{m[:, i]}\n")
+        import matplotlib.pyplot as plt
+        plt.clf()
+        interface.visualize_codes(mask)
+        plt.savefig(out_dir / "mask.png")
+        plt.clf()
+        interface.visualize_codes(codes)
+        plt.savefig(out_dir / "codes.png")
+        plt.close()
+        # zip out dir, and return the path to the zip
+        shutil.make_archive(out_dir, 'zip', out_dir)
+        # chunk in groups of 1024 timesteps
+        _mask_sigs = []
+        for i in range(0, mask.shape[-1], 1024):
+            _mask_sigs.append(interface.to_signal(mask[:, :, i:i+1024].to(interface.device)).cpu())
+        mask = signal_concat(_mask_sigs)
+        mask.write(out_dir / "mask.wav")
+        return (
+            sig0.path_to_file, sig1.path_to_file,
+            sig2.path_to_file, sig3.path_to_file,
+            mask.path_to_file, str(out_dir.with_suffix(".zip")), out_dir / "mask.png"
+        )
+def vamp(data):
+    return _vamp(
+        seed=data[seed],
+        input_audio=data[input_audio],
+        model_choice=data[model_choice],
+        pitch_shift_amt=data[pitch_shift_amt],
+        periodic_p=data[periodic_p],
+        p2=data[p2],
+        n_mask_codebooks=data[n_mask_codebooks],
+        n_mask_codebooks_2=data[n_mask_codebooks_2],
+        rand_mask_intensity=data[rand_mask_intensity],
+        prefix_s=data[prefix_s],
+        suffix_s=data[suffix_s],
+        periodic_w=data[periodic_w],
+        onset_mask_width=data[onset_mask_width],
+        dropout=data[dropout],
+        masktemp=data[masktemp],
+        sampletemp=data[sampletemp],
+        typical_filtering=data[typical_filtering],
+        typical_mass=data[typical_mass],
+        typical_min_tokens=data[typical_min_tokens],
+        top_p=data[top_p],
+        sample_cutoff=data[sample_cutoff],
+        win_dur=data[win_dur],
+        num_feedback_steps=data[num_feedback_steps],
+        stretch_factor=data[stretch_factor],
+        api=False,
+    )
+def api_vamp(data):
+    return _vamp(
+        seed=data[seed],
+        input_audio=data[input_audio],
+        model_choice=data[model_choice],
+        pitch_shift_amt=data[pitch_shift_amt],
+        periodic_p=data[periodic_p],
+        p2=data[p2],
+        n_mask_codebooks=data[n_mask_codebooks],
+        n_mask_codebooks_2=data[n_mask_codebooks_2],
+        rand_mask_intensity=data[rand_mask_intensity],
+        prefix_s=data[prefix_s],
+        suffix_s=data[suffix_s],
+        periodic_w=data[periodic_w],
+        onset_mask_width=data[onset_mask_width],
+        dropout=data[dropout],
+        masktemp=data[masktemp],
+        sampletemp=data[sampletemp],
+        typical_filtering=data[typical_filtering],
+        typical_mass=data[typical_mass],
+        typical_min_tokens=data[typical_min_tokens],
+        top_p=data[top_p],
+        sample_cutoff=data[sample_cutoff],
+        win_dur=data[win_dur],
+        num_feedback_steps=data[num_feedback_steps],
+        stretch_factor=data[stretch_factor],
+        api=True,
+    )
+def harp_vamp(input_audio,
+            periodic_p,
+            n_mask_codebooks,
+            pitch_shift_amt,
+            win_dur):
+    return _vamp(
+        seed=0,
+        input_audio=input_audio,
+        model_choice="default",
+        pitch_shift_amt=pitch_shift_amt,
+        periodic_p=periodic_p,
+        p2=0,
+        n_mask_codebooks=n_mask_codebooks,
+        n_mask_codebooks_2=0,
+        rand_mask_intensity=1.0,
+        prefix_s=0.0,
+        suffix_s=0.0,
+        periodic_w=1,
+        onset_mask_width=0,
+        dropout=0.0,
+        masktemp=1.5,
+        sampletemp=1.0,
+        typical_filtering=True,
+        typical_mass=0.15,
+        typical_min_tokens=64,
+        top_p=0.9,
+        sample_cutoff=1.0,
+        win_dur=win_dur,
+        num_feedback_steps=1,
+        stretch_factor=1.0,
+        api=True,
+    )
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column():
+            manual_audio_upload = gr.File(
+                label=f"upload some audio (will be randomly trimmed to max of 100s)",
+                file_types=["audio"]
+            )
+            load_example_audio_button = gr.Button("or load example audio")
+            input_audio = gr.Audio(
+                label="input audio",
+                interactive=False,
+                type="filepath",
+            )
+            audio_mask = gr.Audio(
+                label="audio mask (listen to this to hear the mask hints)",
+                interactive=False,
+                type="filepath",
+            )
+            # connect widgets
+            load_example_audio_button.click(
+                fn=load_example_audio,
+                inputs=[],
+                outputs=[ input_audio]
+            )
+            manual_audio_upload.change(
+                fn=load_audio,
+                inputs=[manual_audio_upload],
+                outputs=[ input_audio]
+            )
+        # mask settings
+        with gr.Column():
+            with gr.Accordion("manual controls", open=True):
+                periodic_p = gr.Slider(
+                    label="periodic prompt",
+                    minimum=0,
+                    maximum=128,
+                    step=1,
+                    value=3,
+                )
+                p2 = gr.Slider(
+                    label="periodic prompt 2 (0 - same as p1, 2 - lots of hints, 8 - a couple of hints, 16 - occasional hint, 32 - very occasional hint, etc)",
+                    minimum=0,
+                    maximum=128,
+                    step=1,
+                    value=0,
+                )
+                onset_mask_width = gr.Slider(
+                    label="onset mask width (multiplies with the periodic mask, 1 step ~= 10milliseconds) ",
+                    minimum=0,
+                    maximum=100,
+                    step=1,
+                    value=0,
+                )
+                n_mask_codebooks = gr.Slider(
+                    label="compression prompt ",
+                    value=3,
+                    minimum=0,
+                    maximum=14,
+                    step=1,
+                )
+                n_mask_codebooks_2 = gr.Number(
+                    label="compression prompt 2 via linear interpolation (0 == constant)",
+                    value=0,
+                )
+            with gr.Accordion("extras ", open=False):
+                pitch_shift_amt = gr.Slider(
+                    label="pitch shift amount (semitones)",
+                    minimum=-12,
+                    maximum=12,
+                    step=1,
+                    value=0,
+                )
+                stretch_factor = gr.Slider(
+                    label="time stretch factor",
+                    minimum=0,
+                    maximum=64,
+                    step=1,
+                    value=1,
+                )
+                rand_mask_intensity = gr.Slider(
+                    label="random mask intensity. (If this is less than 1, scatters prompts throughout the audio, should be between 0.9 and 1.0)",
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=1.0
+                )
+                periodic_w = gr.Slider(
+                    label="periodic prompt width (steps, 1 step ~= 10milliseconds)",
+                    minimum=1,
+                    maximum=20,
+                    step=1,
+                    value=1,
+                )
+            with gr.Accordion("prefix/suffix prompts", open=True):
+                prefix_s = gr.Slider(
+                    label="prefix hint length (seconds)",
+                    minimum=0.0,
+                    maximum=10.0,
+                    value=0.0
+                )
+                suffix_s = gr.Slider(
+                    label="suffix hint length (seconds)",
+                    minimum=0.0,
+                    maximum=10.0,
+                    value=0.0
+                )
+            masktemp = gr.Slider(
+                label="mask temperature",
+                minimum=0.0,
+                maximum=100.0,
+                value=1.5
+            )
+            sampletemp = gr.Slider(
+                label="sample temperature",
+                minimum=0.1,
+                maximum=10.0,
+                value=1.0,
+                step=0.001
+            )
+            with gr.Accordion("sampling settings", open=False):
+                top_p = gr.Slider(
+                    label="top p (0.0 = off)",
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=0.9
+                )
+                typical_filtering = gr.Checkbox(
+                    label="typical filtering ",
+                    value=True
+                )
+                typical_mass = gr.Slider(
+                    label="typical mass (should probably stay between 0.1 and 0.5)",
+                    minimum=0.01,
+                    maximum=0.99,
+                    value=0.15
+                )
+                typical_min_tokens = gr.Slider(
+                    label="typical min tokens (should probably stay between 1 and 256)",
+                    minimum=1,
+                    maximum=256,
+                    step=1,
+                    value=64
+                )
+                sample_cutoff = gr.Slider(
+                    label="sample cutoff",
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=1.0,
+                    step=0.01
+                )
+            dropout = gr.Slider(
+                label="mask dropout",
+                minimum=0.0,
+                maximum=1.0,
+                step=0.01,
+                value=0.0
+            )
+            seed = gr.Number(
+                label="seed (0 for random)",
+                value=0,
+                precision=0,
+            )
+        # mask settings
+        with gr.Column():
+            model_choice = gr.Dropdown(
+                label="model choice",
+                choices=list(MODEL_CHOICES.keys()),
+                value="default",
+                visible=True
+            )
+            num_feedback_steps = gr.Slider(
+                label="number of feedback steps (each one takes a while)",
+                minimum=1,
+                maximum=16,
+                step=1,
+                value=1
+            )
+            win_dur= gr.Slider(
+                label="window duration (seconds)",
+                minimum=2,
+                maximum=10,
+                value=6)
+            vamp_button = gr.Button("generate (vamp)!!!")
+            maskimg = gr.Image(
+                label="mask image",
+                interactive=False,
+                type="filepath"
+            )
+            out1 = gr.Audio(
+                label="output audio 1",
+                interactive=False,
+                type="filepath"
+            )
+            out2 = gr.Audio(
+                label="output audio 2",
+                interactive=False,
+                type="filepath"
+            )
+            out3 = gr.Audio(
+                label="output audio 3",
+                interactive=False,
+                type="filepath"
+            )
+            out4 = gr.Audio(
+                label="output audio 4",
+                interactive=False,
+                type="filepath"
+            )
+            thank_you = gr.Markdown("")
+            # download all the outputs
+            download = gr.File(type="file", label="download outputs")
+    _inputs = {
+            input_audio,
+            masktemp,
+            sampletemp,
+            top_p,
+            prefix_s, suffix_s,
+            rand_mask_intensity,
+            periodic_p, periodic_w,
+            dropout,
+            stretch_factor,
+            onset_mask_width,
+            typical_filtering,
+            typical_mass,
+            typical_min_tokens,
+            seed,
+            model_choice,
+            n_mask_codebooks,
+            pitch_shift_amt,
+            sample_cutoff,
+            num_feedback_steps,
+            p2,
+            n_mask_codebooks_2,
+            win_dur
+        }
+    # connect widgets
+    vamp_button.click(
+        fn=vamp,
+        inputs=_inputs,
+        outputs=[out1, out2, out3, out4, audio_mask, download, maskimg],
+    )
+    api_vamp_button = gr.Button("api vamp", visible=False)
+    api_vamp_button.click(
+        fn=api_vamp,
+        inputs=_inputs,
+        outputs=[out1],
+        api_name="vamp"
+    )
+    from pyharp import ModelCard, build_endpoint
+    model_card = ModelCard(
+        name="percussion vampnet",
+        description="",
+        author="hugo flores garcía",
+        tags=["generative","sound"],
+    )
+    build_endpoint(
+        inputs=[
+            input_audio,
+            periodic_p,
+            n_mask_codebooks,
+            pitch_shift_amt,
+            win_dur,
+        ],
+        output=out1,
+        process_fn=harp_vamp,
+        card=model_card
+    )
+try:
+    demo.queue()
+    demo.launch(share=True)
+except KeyboardInterrupt:
+    shutil.rmtree("gradio-outputs", ignore_errors=True)
+    raise

assets/example.wav ADDED Viewed

Binary file (883 kB). View file

conf/c2f.yml ADDED Viewed

	@@ -0,0 +1,14 @@

+$include:
+  - conf/vampnet.yml
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.embedding_dim: 1280
+VampNet.n_layers: 16
+VampNet.n_heads: 20
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0

conf/generated/bbc-humans/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/bbc-humans/c2f
+train/AudioLoader.sources: &id001
+- /home/hugo/Humans/
+val/AudioLoader.sources: *id001

conf/generated/bbc-humans/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/bbc-humans/coarse
+train/AudioLoader.sources: &id001
+- /home/hugo/Humans/
+val/AudioLoader.sources: *id001

conf/generated/bbc-humans/interface.yml ADDED Viewed

	@@ -0,0 +1,6 @@

+AudioLoader.sources:
+- - /home/hugo/Humans/
+Interface.coarse2fine_ckpt: ./runs/bbc-humans/c2f/latest/vampnet/weights.pth
+Interface.coarse_ckpt: ./runs/bbc-humans/coarse/latest/vampnet/weights.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/boleros/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/boleros/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/loras/boleros
+val/AudioLoader.sources: *id001

conf/generated/boleros/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/boleros/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/loras/boleros
+val/AudioLoader.sources: *id001

conf/generated/boleros/interface.yml ADDED Viewed

	@@ -0,0 +1,6 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/loras/boleros
+Interface.coarse2fine_ckpt: ./runs/boleros/c2f/latest/vampnet/weights.pth
+Interface.coarse_ckpt: ./runs/boleros/coarse/latest/vampnet/weights.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/bowl/c2f.yml ADDED Viewed

	@@ -0,0 +1,16 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/bowl/c2f
+train/AudioLoader.sources: &id001
+- /media/seagate_prosound/prosound_core_complete/Anns
+- Animals
+val/AudioLoader.sources: *id001

conf/generated/bowl/coarse.yml ADDED Viewed

	@@ -0,0 +1,9 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/bowl/coarse
+train/AudioLoader.sources: &id001
+- /media/seagate_prosound/prosound_core_complete/Anns
+- Animals
+val/AudioLoader.sources: *id001

conf/generated/bowl/interface.yml ADDED Viewed

	@@ -0,0 +1,7 @@

+AudioLoader.sources:
+- - /media/seagate_prosound/prosound_core_complete/Anns
+  - Animals
+Interface.coarse2fine_ckpt: ./runs/bowl/c2f/latest/vampnet/weights.pth
+Interface.coarse_ckpt: ./runs/bowl/coarse/latest/vampnet/weights.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/breaks-steps/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/machines
+Interface.coarse2fine_ckpt: ./runs-june-23/breaks-steps/c2f/best/vampnet/weights.pth
+Interface.coarse2fine_lora_ckpt: ./runs-june-23/breaks-steps/c2f/best/lora.pth
+Interface.coarse_ckpt: ./runs-june-23/breaks-steps/coarse/best/vampnet/weights.pth
+Interface.coarse_lora_ckpt: ./runs-june-23/breaks-steps/coarse/best/lora.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/choir/interface.yml ADDED Viewed

	@@ -0,0 +1,9 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/knower
+Interface.coarse2fine_ckpt: ./runs/choir/c2f/latest/vampnet/weights.pth
+Interface.coarse2fine_lora_ckpt: ./runs/choir/c2f/latest/lora.pth
+Interface.coarse_ckpt: ./runs/choir/coarse/latest/vampnet/weights.pth
+Interface.coarse_lora_ckpt: ./runs/choir/coarse/latest/lora.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth
+Interface.coarse_chunk_size_s: 15

conf/generated/church-bells/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/church-bells/c2f
+train/AudioLoader.sources: &id001
+- data/church-bells
+val/AudioLoader.sources: *id001

conf/generated/church-bells/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/church-bells/coarse
+train/AudioLoader.sources: &id001
+- data/church-bells
+val/AudioLoader.sources: *id001

conf/generated/church-bells/interface.yml ADDED Viewed

	@@ -0,0 +1,6 @@

+AudioLoader.sources:
+- - data/church-bells
+Interface.coarse2fine_ckpt: ./runs/church-bells/c2f/latest/vampnet/weights.pth
+Interface.coarse_ckpt: ./runs/church-bells/coarse/latest/vampnet/weights.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/copepod/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/copepod/c2f
+train/AudioLoader.sources: &id001
+- data/copepod
+val/AudioLoader.sources: *id001

conf/generated/copepod/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/copepod/coarse
+train/AudioLoader.sources: &id001
+- data/copepod
+val/AudioLoader.sources: *id001

conf/generated/copepod/interface.yml ADDED Viewed

	@@ -0,0 +1,6 @@

+AudioLoader.sources:
+- - data/copepod
+Interface.coarse2fine_ckpt: ./runs/copepod/c2f/latest/vampnet/weights.pth
+Interface.coarse_ckpt: ./runs/copepod/coarse/latest/vampnet/weights.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/die/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/die/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK2/prosound_redacted/Chris Diebold General
+val/AudioLoader.sources: *id001

conf/generated/die/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/die/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK2/prosound_redacted/Chris Diebold General
+val/AudioLoader.sources: *id001

conf/generated/die/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+AudioLoader.sources:
+- - /media/CHONK2/prosound_redacted/Chris
+  - Diebold
+  - General
+Interface.coarse2fine_ckpt: ./runs/die/c2f/latest/vampnet/weights.pth
+Interface.coarse_ckpt: ./runs/die/coarse/latest/vampnet/weights.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/dnb/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/dnb/c2f
+train/AudioLoader.sources: &id001
+- /home/hugo/ccm/data/audio/dnb
+val/AudioLoader.sources: *id001

conf/generated/dnb/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/dnb/coarse
+train/AudioLoader.sources: &id001
+- /home/hugo/ccm/data/audio/dnb
+val/AudioLoader.sources: *id001

conf/generated/dnb/interface.yml ADDED Viewed

	@@ -0,0 +1,6 @@

+AudioLoader.sources:
+- - /home/hugo/ccm/data/audio/dnb
+Interface.coarse2fine_ckpt: ./runs/dnb/c2f/latest/vampnet/weights.pth
+Interface.coarse_ckpt: ./runs/dnb/coarse/latest/vampnet/weights.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/earlymachines/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/machines/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/machines
+val/AudioLoader.sources: *id001

conf/generated/earlymachines/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/machines/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/machines
+val/AudioLoader.sources: *id001

conf/generated/earlymachines/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/machines
+Interface.coarse2fine_ckpt: ./runs/machines/c2f/best/vampnet/weights.pth
+Interface.coarse2fine_lora_ckpt: ./runs/machines/c2f/best/lora.pth
+Interface.coarse_ckpt: ./runs/machines/coarse/best/vampnet/weights.pth
+Interface.coarse_lora_ckpt: ./runs/machines/coarse/best/lora.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/funk/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/knower/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/knower
+val/AudioLoader.sources: *id001

conf/generated/funk/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/knower/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/knower
+val/AudioLoader.sources: *id001

conf/generated/funk/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/knower
+Interface.coarse2fine_ckpt: ./runs/knower/c2f/latest/vampnet/weights.pth
+Interface.coarse2fine_lora_ckpt: ./runs/knower/c2f/latest/lora.pth
+Interface.coarse_ckpt: ./runs/knower/coarse/latest/vampnet/weights.pth
+Interface.coarse_lora_ckpt: ./runs/knower/coarse/latest/lora.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/growl/c2f.yml ADDED Viewed

	@@ -0,0 +1,16 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/growl/c2f
+train/AudioLoader.sources: &id001
+- data/growly
+- animals/
+val/AudioLoader.sources: *id001

conf/generated/growl/coarse.yml ADDED Viewed

	@@ -0,0 +1,9 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/growl/coarse
+train/AudioLoader.sources: &id001
+- data/growly
+- animals/
+val/AudioLoader.sources: *id001

conf/generated/growl/interface.yml ADDED Viewed

	@@ -0,0 +1,7 @@

+AudioLoader.sources:
+- - data/growly
+  - animals/
+Interface.coarse2fine_ckpt: ./runs/growl/c2f/latest/vampnet/weights.pth
+Interface.coarse_ckpt: ./runs/growl/coarse/latest/vampnet/weights.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/ismir-birds/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/ismir-birds/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/ismir-23-cfm/xeno-canto-augmented-filtered
+val/AudioLoader.sources: *id001

conf/generated/ismir-birds/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/ismir-birds/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/ismir-23-cfm/xeno-canto-augmented-filtered
+val/AudioLoader.sources: *id001

conf/generated/ismir-birds/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/ismir-23-cfm/xeno-canto-augmented-filtered
+Interface.coarse2fine_ckpt: ./runs/ismir-birds/c2f/latest/vampnet/weights.pth
+Interface.coarse2fine_lora_ckpt: ./runs/ismir-birds/c2f/latest/lora.pth
+Interface.coarse_ckpt: ./runs/ismir-birds/coarse/latest/vampnet/weights.pth
+Interface.coarse_lora_ckpt: ./runs/ismir-birds/coarse/latest/lora.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/ismir-machines/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/ismir-machines/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/ismir-23-cfm/machines-augmented-filtered
+val/AudioLoader.sources: *id001

conf/generated/ismir-machines/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/ismir-machines/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/ismir-23-cfm/machines-augmented-filtered
+val/AudioLoader.sources: *id001

conf/generated/ismir-machines/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/ismir-23-cfm/machines-augmented-filtered
+Interface.coarse2fine_ckpt: ./runs/ismir-machines/c2f/latest/vampnet/weights.pth
+Interface.coarse2fine_lora_ckpt: ./runs/ismir-machines/c2f/latest/lora.pth
+Interface.coarse_ckpt: ./runs/ismir-machines/coarse/latest/vampnet/weights.pth
+Interface.coarse_lora_ckpt: ./runs/ismir-machines/coarse/latest/lora.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/machines/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/machines/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/machines
+val/AudioLoader.sources: *id001

conf/generated/machines/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/machines/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/machines
+val/AudioLoader.sources: *id001

conf/generated/machines/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/machines
+Interface.coarse2fine_ckpt: ./runs/machines/c2f/best/vampnet/weights.pth
+Interface.coarse2fine_lora_ckpt: ./runs/machines/c2f/best/lora.pth
+Interface.coarse_ckpt: ./runs/machines/coarse/best/vampnet/weights.pth
+Interface.coarse_lora_ckpt: ./runs/machines/coarse/best/lora.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/musdb/c2f.yml ADDED Viewed

	@@ -0,0 +1,40 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/xeno-canto/c2f
+AudioDataset.aligned: true
+train/build_dataset.folders:
+  bass:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train]
+    ext: "bass.wav"
+  drums:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train]
+    ext: "drums.wav"
+  other:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train]
+    ext: "other.wav"
+  vocals:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train]
+    ext: "vocals.wav"
+val/build_dataset.folders:
+  bass:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/val]
+    ext: "bass.wav"
+  drums:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/val]
+    ext: "drums.wav"
+  other:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/val]
+    ext: "other.wav"
+  vocals:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/val]
+    ext: "vocals.wav"

conf/generated/musdb/coarse.yml ADDED Viewed

	@@ -0,0 +1,31 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/xeno-canto/coarse
+train/build_dataset.folders:
+  bass:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train/]
+    ext: ["bass.wav"]
+  drums:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train/]
+    ext: ["drums.wav"]
+  other:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train/]
+    ext: ["other.wav"]
+  vocals:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train/]
+    ext: ["vocals.wav"]
+val/build_dataset.folders:
+  bass:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/test/]
+    ext: ["bass.wav"]
+  drums:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/test/]
+    ext: ["drums.wav"]
+  other:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/test/]
+    ext: ["other.wav"]
+  vocals:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/test/]
+    ext: ["vocals.wav"]

conf/generated/musdb/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+Interface.coarse2fine_ckpt: ./models/vampnet/c2f.pth
+Interface.coarse2fine_lora_ckpt: null
+Interface.coarse_ckpt: ./runs/musdb-cond-clfdrop/best/vampnet/weights.pth
+Interface.coarse_lora_ckpt: null
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth