Spaces:

descript
/

vampnet

Runtime error

App Files Files Community

pseeth

hugggof commited on Jul 11, 2023

Commit

c91e8cc

0 Parent(s):

Duplicate from hugggof/vampnet

Browse files

Co-authored-by: Hugo Flores <hugggof@users.noreply.huggingface.co>

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +36 -0
.gitignore +184 -0
.pre-commit-config.yaml +15 -0
LICENSE +21 -0
README.md +95 -0
app.py +511 -0
assets/example.wav +0 -0
conf/c2f.yml +14 -0
conf/generated-v0/berta-goldman-speech/c2f.yml +15 -0
conf/generated-v0/berta-goldman-speech/coarse.yml +8 -0
conf/generated-v0/berta-goldman-speech/interface.yml +5 -0
conf/generated-v0/gamelan-xeno-canto/c2f.yml +17 -0
conf/generated-v0/gamelan-xeno-canto/coarse.yml +10 -0
conf/generated-v0/gamelan-xeno-canto/interface.yml +6 -0
conf/generated-v0/nasralla/c2f.yml +15 -0
conf/generated-v0/nasralla/coarse.yml +8 -0
conf/generated-v0/nasralla/interface.yml +5 -0
conf/generated/breaks-steps/c2f.yml +15 -0
conf/generated/breaks-steps/coarse.yml +8 -0
conf/generated/breaks-steps/interface.yml +7 -0
conf/generated/bulgarian-tv-choir/c2f.yml +15 -0
conf/generated/bulgarian-tv-choir/coarse.yml +8 -0
conf/generated/bulgarian-tv-choir/interface.yml +7 -0
conf/generated/dariacore/c2f.yml +15 -0
conf/generated/dariacore/coarse.yml +8 -0
conf/generated/dariacore/interface.yml +7 -0
conf/generated/musica-bolero-marimba/c2f.yml +18 -0
conf/generated/musica-bolero-marimba/coarse.yml +11 -0
conf/generated/musica-bolero-marimba/interface.yml +8 -0
conf/generated/panchos/c2f.yml +15 -0
conf/generated/panchos/coarse.yml +8 -0
conf/generated/panchos/interface.yml +7 -0
conf/generated/titi-monkey/c2f.yml +15 -0
conf/generated/titi-monkey/coarse.yml +8 -0
conf/generated/titi-monkey/interface.yml +7 -0
conf/generated/xeno-canto/c2f.yml +15 -0
conf/generated/xeno-canto/coarse.yml +8 -0
conf/generated/xeno-canto/interface.yml +7 -0
conf/interface.yml +10 -0
conf/lora/birds.yml +10 -0
conf/lora/birdss.yml +12 -0
conf/lora/constructions.yml +10 -0
conf/lora/ella-baila-sola.yml +10 -0
conf/lora/gas-station.yml +10 -0
conf/lora/lora-is-this-charlie-parker.yml +10 -0
conf/lora/lora.yml +22 -0
conf/lora/underworld.yml +10 -0
conf/lora/xeno-canto/c2f.yml +21 -0
conf/lora/xeno-canto/coarse.yml +10 -0
conf/vampnet-musdb-drums.yml +22 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+.pth filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,184 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/env.sh
+venv/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# Files created by experiments
+output/
+snapshot/
+*.m4a
+notebooks/scratch.ipynb
+notebooks/inspect.ipynb
+notebooks/effects.ipynb
+notebooks/*.ipynb
+notebooks/*.gif
+notebooks/*.wav
+notebooks/*.mp4
+*runs/
+boards/
+samples/
+*.ipynb
+results.json
+metrics.csv
+mprofile_*
+mem.png
+results/
+mprofile*
+*.png
+# do not ignore the test wav file
+!tests/audio/short_test_audio.wav
+!tests/audio/output.wav
+*/.DS_Store
+.DS_Store
+env.sh
+_codebraid/
+**/*.html
+**/*.exec.md
+flagged/
+log.txt
+ckpt/
+.syncthing*
+tests/assets/
+archived/
+scratch/
+runs-archive
+lyrebird-audiotools
+lyrebird-audio-codec
+samples-*/**
+gradio-outputs/
+samples*/
+models-all/
+models.zip
+audiotools/
+descript-audio-codec/
+# *.pth
+.git-old

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+repos:
+- repo: https://github.com/asottile/reorder_python_imports
+  rev: v2.5.0
+  hooks:
+    - id: reorder-python-imports
+- repo: https://github.com/psf/black
+  rev: 23.1.0
+  hooks:
+    - id: black
+      language_version: python3
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.0.1
+  hooks:
+    - id: end-of-file-fixer
+    - id: trailing-whitespace

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 Hugo Flores García and Prem Seetharaman
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,95 @@

+---
+title: 'VampNet: Music Generation with Masked Transformers'
+emoji: 🤖
+colorFrom: gray
+colorTo: gray
+sdk: gradio
+sdk_version: 3.36.1
+app_file: app.py
+pinned: false
+duplicated_from: hugggof/vampnet
+---
+# VampNet
+This repository contains recipes for training generative music models on top of the Lyrebird Audio Codec.
+# Setting up
+Requires Python 3.9 or later.
+install VampNet
+```bash
+git clone https://github.com/hugofloresgarcia/vampnet.git
+pip install -e ./vampnet
+```
+## A note on argbind
+This repository relies on [argbind](https://github.com/pseeth/argbind) to manage CLIs and config files.
+Config files are stored in the `conf/` folder.
+## Getting the Pretrained Models
+### Licensing for Pretrained Models:
+The weights for the models are licensed [`CC BY-NC-SA 4.0`](https://creativecommons.org/licenses/by-nc-sa/4.0/deed.ml). Likewise, any VampNet models fine-tuned on the pretrained models are also licensed [`CC BY-NC-SA 4.0`](https://creativecommons.org/licenses/by-nc-sa/4.0/deed.ml).
+Download the pretrained models from [this link](https://zenodo.org/record/8136545). Then, extract the models to the `models/` folder.
+# Usage
+## Launching the Gradio Interface
+You can launch a gradio UI to play with vampnet.
+```bash
+python app.py --args.load conf/interface.yml --Interface.device cuda
+```
+# Training / Fine-tuning
+## Training a model
+To train a model, run the following script:
+```bash
+python scripts/exp/train.py --args.load conf/vampnet.yml --save_path /path/to/checkpoints
+```
+You can edit `conf/vampnet.yml` to change the dataset paths or any training hyperparameters.
+For coarse2fine models, you can use `conf/c2f.yml` as a starting configuration.
+See `python scripts/exp/train.py -h` for a list of options.
+## Fine-tuning
+To fine-tune a model, use the script in `scripts/exp/fine_tune.py` to generate 3 configuration files: `c2f.yml`, `coarse.yml`, and `interface.yml`.
+The first two are used to fine-tune the coarse and fine models, respectively. The last one is used to launch the gradio interface.
+```bash
+python scripts/exp/fine_tune.py "/path/to/audio1.mp3 /path/to/audio2/ /path/to/audio3.wav" <fine_tune_name>
+```
+This will create a folder under `conf/<fine_tune_name>/` with the 3 configuration files.
+The save_paths will be set to `runs/<fine_tune_name>/coarse` and `runs/<fine_tune_name>/c2f`.
+launch the coarse job:
+```bash
+python scripts/exp/train.py --args.load conf/<fine_tune_name>/coarse.yml
+```
+this will save the coarse model to `runs/<fine_tune_name>/coarse/ckpt/best/`.
+launch the c2f job:
+```bash
+python  scripts/exp/train.py --args.load conf/<fine_tune_name>/c2f.yml
+```
+launch the interface:
+```bash
+python  demo.py --args.load conf/generated/<fine_tune_name>/interface.yml
+```

app.py ADDED Viewed

	@@ -0,0 +1,511 @@

+from pathlib import Path
+from typing import Tuple
+import yaml
+import tempfile
+import uuid
+import shutil
+from dataclasses import dataclass, asdict
+import numpy as np
+import audiotools as at
+import argbind
+import torch
+import gradio as gr
+from vampnet.interface import Interface
+from vampnet import mask as pmask
+# Interface = argbind.bind(Interface)
+# AudioLoader = argbind.bind(at.data.datasets.AudioLoader)
+interface = Interface(
+    coarse_ckpt="./models/vampnet/coarse.pth",
+    coarse2fine_ckpt="./models/vampnet/c2f.pth",
+    codec_ckpt="./models/vampnet/codec.pth",
+    device="cuda" if torch.cuda.is_available() else "cpu",
+)
+# loader = AudioLoader()
+print(f"interface device is {interface.device}")
+# dataset = at.data.datasets.AudioDataset(
+#     loader,
+#     sample_rate=interface.codec.sample_rate,
+#     duration=interface.coarse.chunk_size_s,
+#     n_examples=5000,
+#     without_replacement=True,
+# )
+OUT_DIR = Path("gradio-outputs")
+OUT_DIR.mkdir(exist_ok=True, parents=True)
+def load_audio(file):
+    print(file)
+    filepath = file.name
+    sig = at.AudioSignal.salient_excerpt(
+        filepath,
+        duration=interface.coarse.chunk_size_s
+    )
+    sig = interface.preprocess(sig)
+    out_dir = OUT_DIR / str(uuid.uuid4())
+    out_dir.mkdir(parents=True, exist_ok=True)
+    sig.write(out_dir / "input.wav")
+    return sig.path_to_file
+def load_example_audio():
+    return "./assets/example.wav"
+def _vamp(data, return_mask=False):
+    # remove any old files in the output directory (from previous runs)
+    shutil.rmtree(OUT_DIR)
+    OUT_DIR.mkdir()
+    out_dir = OUT_DIR / str(uuid.uuid4())
+    out_dir.mkdir()
+    sig = at.AudioSignal(data[input_audio])
+    z = interface.encode(sig)
+    ncc = data[n_conditioning_codebooks]
+    # build the mask
+    mask = pmask.linear_random(z, data[rand_mask_intensity])
+    mask = pmask.mask_and(
+        mask, pmask.inpaint(
+            z,
+            interface.s2t(data[prefix_s]),
+            interface.s2t(data[suffix_s])
+        )
+    )
+    mask = pmask.mask_and(
+        mask, pmask.periodic_mask(
+            z,
+            data[periodic_p],
+            data[periodic_w],
+            random_roll=True
+        )
+    )
+    if data[onset_mask_width] > 0:
+        mask = pmask.mask_or(
+            mask, pmask.onset_mask(sig, z, interface, width=data[onset_mask_width])
+        )
+    if data[beat_mask_width] > 0:
+        beat_mask = interface.make_beat_mask(
+            sig,
+            after_beat_s=(data[beat_mask_width]/1000),
+            mask_upbeats=not data[beat_mask_downbeats],
+        )
+        mask = pmask.mask_and(mask, beat_mask)
+    # these should be the last two mask ops
+    mask = pmask.dropout(mask, data[dropout])
+    mask = pmask.codebook_unmask(mask, ncc)
+    print(f"created mask with: linear random {data[rand_mask_intensity]}, inpaint {data[prefix_s]}:{data[suffix_s]}, periodic {data[periodic_p]}:{data[periodic_w]}, dropout {data[dropout]}, codebook unmask {ncc}, onset mask {data[onset_mask_width]}, num steps {data[num_steps]}, init temp {data[temp]},  use coarse2fine {data[use_coarse2fine]}")
+    # save the mask as a txt file
+    np.savetxt(out_dir / "mask.txt", mask[:,0,:].long().cpu().numpy())
+    zv, mask_z = interface.coarse_vamp(
+        z,
+        mask=mask,
+        sampling_steps=data[num_steps],
+        temperature=data[temp]*10,
+        return_mask=True,
+        typical_filtering=data[typical_filtering],
+        typical_mass=data[typical_mass],
+        typical_min_tokens=data[typical_min_tokens],
+        gen_fn=interface.coarse.generate,
+    )
+    if use_coarse2fine:
+        zv = interface.coarse_to_fine(zv, temperature=data[temp])
+    sig = interface.to_signal(zv).cpu()
+    print("done")
+    sig.write(out_dir / "output.wav")
+    if return_mask:
+        mask = interface.to_signal(mask_z).cpu()
+        mask.write(out_dir / "mask.wav")
+        return sig.path_to_file, mask.path_to_file
+    else:
+        return sig.path_to_file
+def vamp(data):
+    return _vamp(data, return_mask=True)
+def api_vamp(data):
+    return _vamp(data, return_mask=False)
+def save_vamp(data):
+    out_dir = OUT_DIR / "saved" / str(uuid.uuid4())
+    out_dir.mkdir(parents=True, exist_ok=True)
+    sig_in = at.AudioSignal(data[input_audio])
+    sig_out = at.AudioSignal(data[output_audio])
+    sig_in.write(out_dir / "input.wav")
+    sig_out.write(out_dir / "output.wav")
+    _data = {
+        "temp": data[temp],
+        "prefix_s": data[prefix_s],
+        "suffix_s": data[suffix_s],
+        "rand_mask_intensity": data[rand_mask_intensity],
+        "num_steps": data[num_steps],
+        "notes": data[notes_text],
+        "periodic_period": data[periodic_p],
+        "periodic_width": data[periodic_w],
+        "n_conditioning_codebooks": data[n_conditioning_codebooks],
+        "use_coarse2fine": data[use_coarse2fine],
+        "stretch_factor": data[stretch_factor],
+    }
+    # save with yaml
+    with open(out_dir / "data.yaml", "w") as f:
+        yaml.dump(_data, f)
+    import zipfile
+    zip_path = out_dir.with_suffix(".zip")
+    with zipfile.ZipFile(zip_path, "w") as zf:
+        for file in out_dir.iterdir():
+            zf.write(file, file.name)
+    return f"saved! your save code is {out_dir.stem}", zip_path
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("# VampNet Audio Vamping")
+            gr.Markdown("""## Description:
+            This is a demo of the VampNet, a generative audio model that transforms the input audio based on the chosen settings.
+            You can control the extent and nature of variation with a set of manual controls and presets.
+            Use this interface to experiment with different mask settings and explore the audio outputs.
+            """)
+            gr.Markdown("""
+            ## Instructions:
+            1. You can start by uploading some audio, or by loading the example audio.
+            2. Choose a preset for the vamp operation, or manually adjust the controls to customize the mask settings.
+            3. Click the "generate (vamp)!!!" button to apply the vamp operation. Listen to the output audio.
+            4. Optionally, you can add some notes and save the result.
+            5. You can also use the output as the new input and continue experimenting!
+            """)
+    with gr.Row():
+        with gr.Column():
+            manual_audio_upload = gr.File(
+                label=f"upload some audio (will be randomly trimmed to max of {interface.coarse.chunk_size_s:.2f}s)",
+                file_types=["audio"]
+            )
+            load_example_audio_button = gr.Button("or load example audio")
+            input_audio = gr.Audio(
+                label="input audio",
+                interactive=False,
+                type="filepath",
+            )
+            audio_mask = gr.Audio(
+                label="audio mask (listen to this to hear the mask hints)",
+                interactive=False,
+                type="filepath",
+            )
+            # connect widgets
+            load_example_audio_button.click(
+                fn=load_example_audio,
+                inputs=[],
+                outputs=[ input_audio]
+            )
+            manual_audio_upload.change(
+                fn=load_audio,
+                inputs=[manual_audio_upload],
+                outputs=[ input_audio]
+            )
+        # mask settings
+        with gr.Column():
+            presets = {
+                    "unconditional": {
+                        "periodic_p": 0,
+                        "onset_mask_width": 0,
+                        "beat_mask_width": 0,
+                        "beat_mask_downbeats": False,
+                    },
+                    "slight periodic variation": {
+                        "periodic_p": 5,
+                        "onset_mask_width": 5,
+                        "beat_mask_width": 0,
+                        "beat_mask_downbeats": False,
+                    },
+                    "moderate periodic variation": {
+                        "periodic_p": 13,
+                        "onset_mask_width": 5,
+                        "beat_mask_width": 0,
+                        "beat_mask_downbeats": False,
+                    },
+                    "strong periodic variation": {
+                        "periodic_p": 17,
+                        "onset_mask_width": 5,
+                        "beat_mask_width": 0,
+                        "beat_mask_downbeats": False,
+                    },
+                    "very strong periodic variation": {
+                        "periodic_p": 21,
+                        "onset_mask_width": 5,
+                        "beat_mask_width": 0,
+                        "beat_mask_downbeats": False,
+                    },
+                    "beat-driven variation": {
+                        "periodic_p": 0,
+                        "onset_mask_width": 0,
+                        "beat_mask_width": 50,
+                        "beat_mask_downbeats": False,
+                    },
+                    "beat-driven variation (downbeats only)": {
+                        "periodic_p": 0,
+                        "onset_mask_width": 0,
+                        "beat_mask_width": 50,
+                        "beat_mask_downbeats": True,
+                    },
+                    "beat-driven variation (downbeats only, strong)": {
+                        "periodic_p": 0,
+                        "onset_mask_width": 0,
+                        "beat_mask_width": 20,
+                        "beat_mask_downbeats": True,
+                    },
+                }
+            preset = gr.Dropdown(
+                label="preset",
+                choices=list(presets.keys()),
+                value="strong periodic variation",
+            )
+            load_preset_button = gr.Button("load_preset")
+            with gr.Accordion("manual controls", open=True):
+                periodic_p = gr.Slider(
+                    label="periodic prompt  (0 - unconditional, 2 - lots of hints, 8 - a couple of hints, 16 - occasional hint, 32 - very occasional hint, etc)",
+                    minimum=0,
+                    maximum=128,
+                    step=1,
+                    value=3,
+                )
+                onset_mask_width = gr.Slider(
+                    label="onset mask width (multiplies with the periodic mask, 1 step ~= 10milliseconds) ",
+                    minimum=0,
+                    maximum=20,
+                    step=1,
+                    value=5,
+                )
+                beat_mask_width = gr.Slider(
+                    label="beat mask width (in milliseconds)",
+                    minimum=0,
+                    maximum=200,
+                    value=0,
+                )
+                beat_mask_downbeats = gr.Checkbox(
+                    label="beat mask downbeats only?",
+                    value=False
+                )
+                with gr.Accordion("extras ", open=False):
+                    rand_mask_intensity = gr.Slider(
+                        label="random mask intensity. (If this is less than 1, scatters prompts throughout the audio, should be between 0.9 and 1.0)",
+                        minimum=0.0,
+                        maximum=1.0,
+                        value=1.0
+                    )
+                    periodic_w = gr.Slider(
+                        label="periodic prompt width (steps, 1 step ~= 10milliseconds)",
+                        minimum=1,
+                        maximum=20,
+                        step=1,
+                        value=1,
+                    )
+                    n_conditioning_codebooks = gr.Number(
+                        label="number of conditioning codebooks. probably 0",
+                        value=0,
+                        precision=0,
+                    )
+                    stretch_factor = gr.Slider(
+                        label="time stretch factor",
+                        minimum=0,
+                        maximum=64,
+                        step=1,
+                        value=1,
+                    )
+            preset_outputs = {
+                periodic_p,
+                onset_mask_width,
+                beat_mask_width,
+                beat_mask_downbeats,
+            }
+            def load_preset(_preset):
+                return tuple(presets[_preset].values())
+            load_preset_button.click(
+                fn=load_preset,
+                inputs=[preset],
+                outputs=preset_outputs
+            )
+            with gr.Accordion("prefix/suffix prompts", open=False):
+                prefix_s = gr.Slider(
+                    label="prefix hint length (seconds)",
+                    minimum=0.0,
+                    maximum=10.0,
+                    value=0.0
+                )
+                suffix_s = gr.Slider(
+                    label="suffix hint length (seconds)",
+                    minimum=0.0,
+                    maximum=10.0,
+                    value=0.0
+                )
+            temp = gr.Slider(
+                label="temperature",
+                minimum=0.0,
+                maximum=10.0,
+                value=0.8
+            )
+            with gr.Accordion("sampling settings", open=False):
+                typical_filtering = gr.Checkbox(
+                    label="typical filtering ",
+                    value=False
+                )
+                typical_mass = gr.Slider(
+                    label="typical mass (should probably stay between 0.1 and 0.5)",
+                    minimum=0.01,
+                    maximum=0.99,
+                    value=0.15
+                )
+                typical_min_tokens = gr.Slider(
+                    label="typical min tokens (should probably stay between 1 and 256)",
+                    minimum=1,
+                    maximum=256,
+                    step=1,
+                    value=64
+                )
+            use_coarse2fine = gr.Checkbox(
+                label="use coarse2fine",
+                value=True
+            )
+            num_steps = gr.Slider(
+                label="number of steps (should normally be between 12 and 36)",
+                minimum=1,
+                maximum=128,
+                step=1,
+                value=36
+            )
+            dropout = gr.Slider(
+                label="mask dropout",
+                minimum=0.0,
+                maximum=1.0,
+                step=0.01,
+                value=0.0
+            )
+        # mask settings
+        with gr.Column():
+            vamp_button = gr.Button("generate (vamp)!!!")
+            output_audio = gr.Audio(
+                label="output audio",
+                interactive=False,
+                type="filepath"
+            )
+            notes_text = gr.Textbox(
+                label="type any notes about the generated audio here",
+                value="",
+                interactive=True
+            )
+            save_button = gr.Button("save vamp")
+            download_file = gr.File(
+                label="vamp to download will appear here",
+                interactive=False
+            )
+            use_as_input_button = gr.Button("use output as input")
+            thank_you = gr.Markdown("")
+    _inputs = {
+            input_audio,
+            num_steps,
+            temp,
+            prefix_s, suffix_s,
+            rand_mask_intensity,
+            periodic_p, periodic_w,
+            n_conditioning_codebooks,
+            dropout,
+            use_coarse2fine,
+            stretch_factor,
+            onset_mask_width,
+            typical_filtering,
+            typical_mass,
+            typical_min_tokens,
+            beat_mask_width,
+            beat_mask_downbeats
+        }
+    # connect widgets
+    vamp_button.click(
+        fn=vamp,
+        inputs=_inputs,
+        outputs=[output_audio, audio_mask],
+    )
+    api_vamp_button = gr.Button("api vamp", visible=False)
+    api_vamp_button.click(
+        fn=api_vamp,
+        inputs=_inputs,
+        outputs=[output_audio],
+        api_name="vamp"
+    )
+    use_as_input_button.click(
+        fn=lambda x: x,
+        inputs=[output_audio],
+        outputs=[input_audio]
+    )
+    save_button.click(
+        fn=save_vamp,
+        inputs=_inputs | {notes_text, output_audio},
+        outputs=[thank_you, download_file]
+    )
+demo.queue().launch()

assets/example.wav ADDED Viewed

Binary file (883 kB). View file

conf/c2f.yml ADDED Viewed

	@@ -0,0 +1,14 @@

+$include:
+  - conf/vampnet.yml
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.embedding_dim: 1280
+VampNet.n_layers: 16
+VampNet.n_heads: 20
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0

conf/generated-v0/berta-goldman-speech/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+save_path: ./runs/berta-goldman-speech/c2f
+train/AudioLoader.sources:
+- /media/CHONK/hugo/Berta-Caceres-2015-Goldman-Speech.mp3
+val/AudioLoader.sources:
+- /media/CHONK/hugo/Berta-Caceres-2015-Goldman-Speech.mp3

conf/generated-v0/berta-goldman-speech/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+save_path: ./runs/berta-goldman-speech/coarse
+train/AudioLoader.sources:
+- /media/CHONK/hugo/Berta-Caceres-2015-Goldman-Speech.mp3
+val/AudioLoader.sources:
+- /media/CHONK/hugo/Berta-Caceres-2015-Goldman-Speech.mp3

conf/generated-v0/berta-goldman-speech/interface.yml ADDED Viewed

	@@ -0,0 +1,5 @@

+AudioLoader.sources:
+- /media/CHONK/hugo/Berta-Caceres-2015-Goldman-Speech.mp3
+Interface.coarse2fine_ckpt: ./runs/berta-goldman-speech/c2f/best/vampnet/weights.pth
+Interface.coarse_ckpt: ./runs/berta-goldman-speech/coarse/best/vampnet/weights.pth
+Interface.codec_ckpt: ./models/spotdl/codec.pth

conf/generated-v0/gamelan-xeno-canto/c2f.yml ADDED Viewed

	@@ -0,0 +1,17 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+save_path: ./runs/gamelan-xeno-canto/c2f
+train/AudioLoader.sources:
+- /media/CHONK/hugo/loras/Sound Tracker - Gamelan (Indonesia) [UEWCCSuHsuQ].mp3
+- /media/CHONK/hugo/loras/xeno-canto-2
+val/AudioLoader.sources:
+- /media/CHONK/hugo/loras/Sound Tracker - Gamelan (Indonesia) [UEWCCSuHsuQ].mp3
+- /media/CHONK/hugo/loras/xeno-canto-2

conf/generated-v0/gamelan-xeno-canto/coarse.yml ADDED Viewed

	@@ -0,0 +1,10 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+save_path: ./runs/gamelan-xeno-canto/coarse
+train/AudioLoader.sources:
+- /media/CHONK/hugo/loras/Sound Tracker - Gamelan (Indonesia) [UEWCCSuHsuQ].mp3
+- /media/CHONK/hugo/loras/xeno-canto-2
+val/AudioLoader.sources:
+- /media/CHONK/hugo/loras/Sound Tracker - Gamelan (Indonesia) [UEWCCSuHsuQ].mp3
+- /media/CHONK/hugo/loras/xeno-canto-2

conf/generated-v0/gamelan-xeno-canto/interface.yml ADDED Viewed

	@@ -0,0 +1,6 @@

+AudioLoader.sources:
+- /media/CHONK/hugo/loras/Sound Tracker - Gamelan (Indonesia) [UEWCCSuHsuQ].mp3
+- /media/CHONK/hugo/loras/xeno-canto-2
+Interface.coarse2fine_ckpt: ./runs/gamelan-xeno-canto/c2f/best/vampnet/weights.pth
+Interface.coarse_ckpt: ./runs/gamelan-xeno-canto/coarse/best/vampnet/weights.pth
+Interface.codec_ckpt: ./models/spotdl/codec.pth

conf/generated-v0/nasralla/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+save_path: ./runs/nasralla/c2f
+train/AudioLoader.sources:
+- /media/CHONK/hugo/nasralla
+val/AudioLoader.sources:
+- /media/CHONK/hugo/nasralla

conf/generated-v0/nasralla/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+save_path: ./runs/nasralla/coarse
+train/AudioLoader.sources:
+- /media/CHONK/hugo/nasralla
+val/AudioLoader.sources:
+- /media/CHONK/hugo/nasralla

conf/generated-v0/nasralla/interface.yml ADDED Viewed

	@@ -0,0 +1,5 @@

+AudioLoader.sources:
+- /media/CHONK/hugo/nasralla
+Interface.coarse2fine_ckpt: ./runs/nasralla/c2f/best/vampnet/weights.pth
+Interface.coarse_ckpt: ./runs/nasralla/coarse/best/vampnet/weights.pth
+Interface.codec_ckpt: ./models/spotdl/codec.pth

conf/generated/breaks-steps/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/spotdl/c2f.pth
+save_path: ./runs/breaks-steps/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/breaks-steps
+val/AudioLoader.sources: *id001

conf/generated/breaks-steps/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/spotdl/coarse.pth
+save_path: ./runs/breaks-steps/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/breaks-steps
+val/AudioLoader.sources: *id001

conf/generated/breaks-steps/interface.yml ADDED Viewed

	@@ -0,0 +1,7 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/breaks-steps
+Interface.coarse2fine_ckpt: ./models/spotdl/c2f.pth
+Interface.coarse2fine_lora_ckpt: ./runs/breaks-steps/c2f/latest/lora.pth
+Interface.coarse_ckpt: ./models/spotdl/coarse.pth
+Interface.coarse_lora_ckpt: ./runs/breaks-steps/coarse/latest/lora.pth
+Interface.codec_ckpt: ./models/spotdl/codec.pth

conf/generated/bulgarian-tv-choir/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/spotdl/c2f.pth
+save_path: ./runs/bulgarian-tv-choir/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/loras/bulgarian-female-tv-choir/
+val/AudioLoader.sources: *id001

conf/generated/bulgarian-tv-choir/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/spotdl/coarse.pth
+save_path: ./runs/bulgarian-tv-choir/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/loras/bulgarian-female-tv-choir/
+val/AudioLoader.sources: *id001

conf/generated/bulgarian-tv-choir/interface.yml ADDED Viewed

	@@ -0,0 +1,7 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/loras/bulgarian-female-tv-choir/
+Interface.coarse2fine_ckpt: ./models/spotdl/c2f.pth
+Interface.coarse2fine_lora_ckpt: ./runs/bulgarian-tv-choir/c2f/latest/lora.pth
+Interface.coarse_ckpt: ./models/spotdl/coarse.pth
+Interface.coarse_lora_ckpt: ./runs/bulgarian-tv-choir/coarse/latest/lora.pth
+Interface.codec_ckpt: ./models/spotdl/codec.pth

conf/generated/dariacore/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/spotdl/c2f.pth
+save_path: ./runs/dariacore/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/loras/dariacore
+val/AudioLoader.sources: *id001

conf/generated/dariacore/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/spotdl/coarse.pth
+save_path: ./runs/dariacore/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/loras/dariacore
+val/AudioLoader.sources: *id001

conf/generated/dariacore/interface.yml ADDED Viewed

	@@ -0,0 +1,7 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/loras/dariacore
+Interface.coarse2fine_ckpt: ./models/spotdl/c2f.pth
+Interface.coarse2fine_lora_ckpt: ./runs/dariacore/c2f/latest/lora.pth
+Interface.coarse_ckpt: ./models/spotdl/coarse.pth
+Interface.coarse_lora_ckpt: ./runs/dariacore/coarse/latest/lora.pth
+Interface.codec_ckpt: ./models/spotdl/codec.pth

conf/generated/musica-bolero-marimba/c2f.yml ADDED Viewed

	@@ -0,0 +1,18 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/spotdl/c2f.pth
+save_path: ./runs/musica-bolero-marimba/c2f
+train/AudioLoader.sources:
+- /media/CHONK/hugo/loras/boleros
+- /media/CHONK/hugo/loras/marimba-honduras
+val/AudioLoader.sources:
+- /media/CHONK/hugo/loras/boleros
+- /media/CHONK/hugo/loras/marimba-honduras

conf/generated/musica-bolero-marimba/coarse.yml ADDED Viewed

	@@ -0,0 +1,11 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/spotdl/coarse.pth
+save_path: ./runs/musica-bolero-marimba/coarse
+train/AudioLoader.sources:
+- /media/CHONK/hugo/loras/boleros
+- /media/CHONK/hugo/loras/marimba-honduras
+val/AudioLoader.sources:
+- /media/CHONK/hugo/loras/boleros
+- /media/CHONK/hugo/loras/marimba-honduras

conf/generated/musica-bolero-marimba/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+AudioLoader.sources:
+- /media/CHONK/hugo/loras/boleros
+- /media/CHONK/hugo/loras/marimba-honduras
+Interface.coarse2fine_ckpt: ./models/spotdl/c2f.pth
+Interface.coarse2fine_lora_ckpt: ./runs/musica-bolero-marimba/c2f/latest/lora.pth
+Interface.coarse_ckpt: ./models/spotdl/coarse.pth
+Interface.coarse_lora_ckpt: ./runs/musica-bolero-marimba/coarse/latest/lora.pth
+Interface.codec_ckpt: ./models/spotdl/codec.pth

conf/generated/panchos/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/spotdl/c2f.pth
+save_path: ./runs/panchos/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/loras/panchos/
+val/AudioLoader.sources: *id001

conf/generated/panchos/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/spotdl/coarse.pth
+save_path: ./runs/panchos/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/loras/panchos/
+val/AudioLoader.sources: *id001

conf/generated/panchos/interface.yml ADDED Viewed

	@@ -0,0 +1,7 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/loras/panchos/
+Interface.coarse2fine_ckpt: ./models/spotdl/c2f.pth
+Interface.coarse2fine_lora_ckpt: ./runs/panchos/c2f/latest/lora.pth
+Interface.coarse_ckpt: ./models/spotdl/coarse.pth
+Interface.coarse_lora_ckpt: ./runs/panchos/coarse/latest/lora.pth
+Interface.codec_ckpt: ./models/spotdl/codec.pth

conf/generated/titi-monkey/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/spotdl/c2f.pth
+save_path: ./runs/titi-monkey/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/loras/titi-monkey.mp3
+val/AudioLoader.sources: *id001

conf/generated/titi-monkey/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/spotdl/coarse.pth
+save_path: ./runs/titi-monkey/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/loras/titi-monkey.mp3
+val/AudioLoader.sources: *id001

conf/generated/titi-monkey/interface.yml ADDED Viewed

	@@ -0,0 +1,7 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/loras/titi-monkey.mp3
+Interface.coarse2fine_ckpt: ./models/spotdl/c2f.pth
+Interface.coarse2fine_lora_ckpt: ./runs/titi-monkey/c2f/latest/lora.pth
+Interface.coarse_ckpt: ./models/spotdl/coarse.pth
+Interface.coarse_lora_ckpt: ./runs/titi-monkey/coarse/latest/lora.pth
+Interface.codec_ckpt: ./models/spotdl/codec.pth

conf/generated/xeno-canto/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/spotdl/c2f.pth
+save_path: ./runs/xeno-canto/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/loras/xeno-canto-2/
+val/AudioLoader.sources: *id001

conf/generated/xeno-canto/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/spotdl/coarse.pth
+save_path: ./runs/xeno-canto/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/loras/xeno-canto-2/
+val/AudioLoader.sources: *id001

conf/generated/xeno-canto/interface.yml ADDED Viewed

	@@ -0,0 +1,7 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/loras/xeno-canto-2/
+Interface.coarse2fine_ckpt: ./mod els/spotdl/c2f.pth
+Interface.coarse2fine_lora_ckpt: ./runs/xeno-canto/c2f/latest/lora.pth
+Interface.coarse_ckpt: ./models/spotdl/coarse.pth
+Interface.coarse_lora_ckpt: ./runs/xeno-canto/coarse/latest/lora.pth
+Interface.codec_ckpt: ./models/spotdl/codec.pth

conf/interface.yml ADDED Viewed

	@@ -0,0 +1,10 @@

+Interface.coarse_ckpt: ./models/vampnet/coarse.pth
+Interface.coarse2fine_ckpt: ./models/vampnet/c2f.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.coarse_chunk_size_s: 10
+Interface.coarse2fine_chunk_size_s: 3
+Interface.wavebeat_ckpt: ./models/wavebeat.pth
+# AudioLoader.sources:
+#   - /media/CHONK/null

conf/lora/birds.yml ADDED Viewed

	@@ -0,0 +1,10 @@

+$include:
+  - conf/lora/lora.yml
+fine_tune: True
+train/AudioLoader.sources:
+  - /media/CHONK/hugo/spotdl/subsets/birds
+val/AudioLoader.sources:
+  - /media/CHONK/hugo/spotdl/subsets/birds

conf/lora/birdss.yml ADDED Viewed

	@@ -0,0 +1,12 @@

+$include:
+  - conf/lora/lora.yml
+fine_tune: True
+train/AudioLoader.sources:
+  - /media/CHONK/hugo/spotdl/subsets/birds
+  - /media/CHONK/hugo/spotdl/subsets/this-is-charlie-parker/
+val/AudioLoader.sources:
+  - /media/CHONK/hugo/spotdl/subsets/birds
+  - /media/CHONK/hugo/spotdl/subsets/this-is-charlie-parker/

conf/lora/constructions.yml ADDED Viewed

	@@ -0,0 +1,10 @@

+$include:
+  - conf/lora/lora.yml
+fine_tune: True
+train/AudioLoader.sources:
+  - /media/CHONK/hugo/spotdl/subsets/constructions/third.mp3
+val/AudioLoader.sources:
+  - /media/CHONK/hugo/spotdl/subsets/constructions/third.mp3

conf/lora/ella-baila-sola.yml ADDED Viewed

	@@ -0,0 +1,10 @@

+$include:
+  - conf/lora/lora.yml
+fine_tune: True
+train/AudioLoader.sources:
+  - /media/CHONK/hugo/spotdl/subsets/ella-baila-sola.mp3
+val/AudioLoader.sources:
+  - /media/CHONK/hugo/spotdl/subsets/ella-baila-sola.mp3

conf/lora/gas-station.yml ADDED Viewed

	@@ -0,0 +1,10 @@

+$include:
+  - conf/lora/lora.yml
+fine_tune: True
+train/AudioLoader.sources:
+  - /media/CHONK/hugo/spotdl/subsets/gas-station-sushi.mp3
+val/AudioLoader.sources:
+  - /media/CHONK/hugo/spotdl/subsets/gas-station-sushi.mp3

conf/lora/lora-is-this-charlie-parker.yml ADDED Viewed

	@@ -0,0 +1,10 @@

+$include:
+  - conf/lora/lora.yml
+fine_tune: True
+train/AudioLoader.sources:
+  - /media/CHONK/hugo/spotdl/subsets/this-is-charlie-parker/Charlie Parker - Donna Lee.mp3
+val/AudioLoader.sources:
+  - /media/CHONK/hugo/spotdl/subsets/this-is-charlie-parker/Charlie Parker - Donna Lee.mp3

conf/lora/lora.yml ADDED Viewed

	@@ -0,0 +1,22 @@

+$include:
+  - conf/vampnet.yml
+fine_tune: True
+train/AudioDataset.n_examples: 10000000
+val/AudioDataset.n_examples: 10
+NoamScheduler.warmup: 500
+batch_size: 7
+num_workers: 7
+epoch_length: 100
+save_audio_epochs: 10
+AdamW.lr: 0.0001
+# let's us organize sound classes into folders and choose from those sound classes uniformly
+AudioDataset.without_replacement: False
+max_epochs: 500

conf/lora/underworld.yml ADDED Viewed

	@@ -0,0 +1,10 @@

+$include:
+  - conf/lora/lora.yml
+fine_tune: True
+train/AudioLoader.sources:
+  - /media/CHONK/hugo/spotdl/subsets/underworld.mp3
+val/AudioLoader.sources:
+  - /media/CHONK/hugo/spotdl/subsets/underworld.mp3

conf/lora/xeno-canto/c2f.yml ADDED Viewed

	@@ -0,0 +1,21 @@

+$include:
+  - conf/lora/lora.yml
+fine_tune: True
+train/AudioLoader.sources:
+  - /media/CHONK/hugo/xeno-canto-2
+val/AudioLoader.sources:
+  - /media/CHONK/hugo/xeno-canto-2
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.embedding_dim: 1280
+VampNet.n_layers: 16
+VampNet.n_heads: 20
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0

conf/lora/xeno-canto/coarse.yml ADDED Viewed

	@@ -0,0 +1,10 @@

+$include:
+  - conf/lora/lora.yml
+fine_tune: True
+train/AudioLoader.sources:
+  - /media/CHONK/hugo/xeno-canto-2
+val/AudioLoader.sources:
+  - /media/CHONK/hugo/xeno-canto-2

conf/vampnet-musdb-drums.yml ADDED Viewed

	@@ -0,0 +1,22 @@

+$include:
+  - conf/vampnet.yml
+VampNet.embedding_dim: 512
+VampNet.n_layers: 12
+VampNet.n_heads: 8
+AudioDataset.duration: 12.0
+train/AudioDataset.n_examples: 10000000
+train/AudioLoader.sources:
+  - /data/musdb18hq/train/**/*drums.wav
+val/AudioDataset.n_examples: 500
+val/AudioLoader.sources:
+  - /data/musdb18hq/test/**/*drums.wav
+test/AudioDataset.n_examples: 1000
+test/AudioLoader.sources:
+  - /data/musdb18hq/test/**/*drums.wav