vidmuse
Browse files- audiocraft/grids_/__init__.py +0 -6
- audiocraft/grids_/_base_explorers.py +0 -80
- audiocraft/grids_/audiogen/__init__.py +0 -6
- audiocraft/grids_/audiogen/audiogen_base_16khz.py +0 -23
- audiocraft/grids_/audiogen/audiogen_pretrained_16khz_eval.py +0 -68
- audiocraft/grids_/compression/__init__.py +0 -6
- audiocraft/grids_/compression/_explorers.py +0 -55
- audiocraft/grids_/compression/debug.py +0 -31
- audiocraft/grids_/compression/encodec_audiogen_16khz.py +0 -29
- audiocraft/grids_/compression/encodec_base_24khz.py +0 -28
- audiocraft/grids_/compression/encodec_musicgen_32khz.py +0 -34
- audiocraft/grids_/diffusion/4_bands_base_32khz.py +0 -27
- audiocraft/grids_/diffusion/__init__.py +0 -6
- audiocraft/grids_/diffusion/_explorers.py +0 -66
- audiocraft/grids_/musicgen/__init__.py +0 -6
- audiocraft/grids_/musicgen/_explorers.py +0 -93
- audiocraft/grids_/musicgen/musicgen_base_32khz.py +0 -44
- audiocraft/grids_/musicgen/musicgen_base_cached_32khz.py +0 -67
- audiocraft/grids_/musicgen/musicgen_clapemb_32khz.py +0 -32
- audiocraft/grids_/musicgen/musicgen_melody_32khz.py +0 -65
- audiocraft/grids_/musicgen/musicgen_pretrained_32khz_eval.py +0 -99
- audiocraft/grids_/musicgen/musicgen_stereo_finetune_32khz.py +0 -57
- audiocraft/models/__init__.py +1 -1
- audiocraft/models/{musicgen.py → vidmuse.py} +0 -0
audiocraft/grids_/__init__.py
DELETED
|
@@ -1,6 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
"""Dora Grids."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/_base_explorers.py
DELETED
|
@@ -1,80 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
from abc import ABC, abstractmethod
|
| 8 |
-
import time
|
| 9 |
-
import typing as tp
|
| 10 |
-
from dora import Explorer
|
| 11 |
-
import treetable as tt
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
def get_sheep_ping(sheep) -> tp.Optional[str]:
|
| 15 |
-
"""Return the amount of time since the Sheep made some update
|
| 16 |
-
to its log. Returns a str using the relevant time unit."""
|
| 17 |
-
ping = None
|
| 18 |
-
if sheep.log is not None and sheep.log.exists():
|
| 19 |
-
delta = time.time() - sheep.log.stat().st_mtime
|
| 20 |
-
if delta > 3600 * 24:
|
| 21 |
-
ping = f'{delta / (3600 * 24):.1f}d'
|
| 22 |
-
elif delta > 3600:
|
| 23 |
-
ping = f'{delta / (3600):.1f}h'
|
| 24 |
-
elif delta > 60:
|
| 25 |
-
ping = f'{delta / 60:.1f}m'
|
| 26 |
-
else:
|
| 27 |
-
ping = f'{delta:.1f}s'
|
| 28 |
-
return ping
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
class BaseExplorer(ABC, Explorer):
|
| 32 |
-
"""Base explorer for AudioCraft grids.
|
| 33 |
-
|
| 34 |
-
All task specific solvers are expected to implement the `get_grid_metrics`
|
| 35 |
-
method to specify logic about metrics to display for a given task.
|
| 36 |
-
|
| 37 |
-
If additional stages are used, the child explorer must define how to handle
|
| 38 |
-
these new stages in the `process_history` and `process_sheep` methods.
|
| 39 |
-
"""
|
| 40 |
-
def stages(self):
|
| 41 |
-
return ["train", "valid", "evaluate"]
|
| 42 |
-
|
| 43 |
-
def get_grid_meta(self):
|
| 44 |
-
"""Returns the list of Meta information to display for each XP/job.
|
| 45 |
-
"""
|
| 46 |
-
return [
|
| 47 |
-
tt.leaf("index", align=">"),
|
| 48 |
-
tt.leaf("name", wrap=140),
|
| 49 |
-
tt.leaf("state"),
|
| 50 |
-
tt.leaf("sig", align=">"),
|
| 51 |
-
tt.leaf("sid", align="<"),
|
| 52 |
-
]
|
| 53 |
-
|
| 54 |
-
@abstractmethod
|
| 55 |
-
def get_grid_metrics(self):
|
| 56 |
-
"""Return the metrics that should be displayed in the tracking table.
|
| 57 |
-
"""
|
| 58 |
-
...
|
| 59 |
-
|
| 60 |
-
def process_sheep(self, sheep, history):
|
| 61 |
-
train = {
|
| 62 |
-
"epoch": len(history),
|
| 63 |
-
}
|
| 64 |
-
parts = {"train": train}
|
| 65 |
-
for metrics in history:
|
| 66 |
-
for key, sub in metrics.items():
|
| 67 |
-
part = parts.get(key, {})
|
| 68 |
-
if 'duration' in sub:
|
| 69 |
-
# Convert to minutes for readability.
|
| 70 |
-
sub['duration'] = sub['duration'] / 60.
|
| 71 |
-
part.update(sub)
|
| 72 |
-
parts[key] = part
|
| 73 |
-
ping = get_sheep_ping(sheep)
|
| 74 |
-
if ping is not None:
|
| 75 |
-
for name in self.stages():
|
| 76 |
-
if name not in parts:
|
| 77 |
-
parts[name] = {}
|
| 78 |
-
# Add the ping to each part for convenience.
|
| 79 |
-
parts[name]['ping'] = ping
|
| 80 |
-
return parts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/audiogen/__init__.py
DELETED
|
@@ -1,6 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
"""AudioGen grids."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/audiogen/audiogen_base_16khz.py
DELETED
|
@@ -1,23 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
from ..musicgen._explorers import LMExplorer
|
| 8 |
-
from ...environment import AudioCraftEnvironment
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
@LMExplorer
|
| 12 |
-
def explorer(launcher):
|
| 13 |
-
partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
|
| 14 |
-
launcher.slurm_(gpus=64, partition=partitions)
|
| 15 |
-
launcher.bind_(solver='audiogen/audiogen_base_16khz')
|
| 16 |
-
# replace this by the desired environmental sound dataset
|
| 17 |
-
launcher.bind_(dset='internal/sounds_16khz')
|
| 18 |
-
|
| 19 |
-
fsdp = {'autocast': False, 'fsdp.use': True}
|
| 20 |
-
medium = {'model/lm/model_scale': 'medium'}
|
| 21 |
-
|
| 22 |
-
launcher.bind_(fsdp)
|
| 23 |
-
launcher(medium)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/audiogen/audiogen_pretrained_16khz_eval.py
DELETED
|
@@ -1,68 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
"""
|
| 8 |
-
Evaluation with objective metrics for the pretrained AudioGen models.
|
| 9 |
-
This grid takes signature from the training grid and runs evaluation-only stage.
|
| 10 |
-
|
| 11 |
-
When running the grid for the first time, please use:
|
| 12 |
-
REGEN=1 dora grid audiogen.audiogen_pretrained_16khz_eval
|
| 13 |
-
and re-use the REGEN=1 option when the grid is changed to force regenerating it.
|
| 14 |
-
|
| 15 |
-
Note that you need the proper metrics external libraries setup to use all
|
| 16 |
-
the objective metrics activated in this grid. Refer to the README for more information.
|
| 17 |
-
"""
|
| 18 |
-
|
| 19 |
-
import os
|
| 20 |
-
|
| 21 |
-
from ..musicgen._explorers import GenerationEvalExplorer
|
| 22 |
-
from ...environment import AudioCraftEnvironment
|
| 23 |
-
from ... import train
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
def eval(launcher, batch_size: int = 32):
|
| 27 |
-
opts = {
|
| 28 |
-
'dset': 'audio/audiocaps_16khz',
|
| 29 |
-
'solver/audiogen/evaluation': 'objective_eval',
|
| 30 |
-
'execute_only': 'evaluate',
|
| 31 |
-
'+dataset.evaluate.batch_size': batch_size,
|
| 32 |
-
'+metrics.fad.tf.batch_size': 32,
|
| 33 |
-
}
|
| 34 |
-
# binary for FAD computation: replace this path with your own path
|
| 35 |
-
metrics_opts = {
|
| 36 |
-
'metrics.fad.tf.bin': '/data/home/jadecopet/local/usr/opt/google-research'
|
| 37 |
-
}
|
| 38 |
-
opt1 = {'generate.lm.use_sampling': True, 'generate.lm.top_k': 250, 'generate.lm.top_p': 0.}
|
| 39 |
-
opt2 = {'transformer_lm.two_step_cfg': True}
|
| 40 |
-
|
| 41 |
-
sub = launcher.bind(opts)
|
| 42 |
-
sub.bind_(metrics_opts)
|
| 43 |
-
|
| 44 |
-
# base objective metrics
|
| 45 |
-
sub(opt1, opt2)
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
@GenerationEvalExplorer
|
| 49 |
-
def explorer(launcher):
|
| 50 |
-
partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
|
| 51 |
-
launcher.slurm_(gpus=4, partition=partitions)
|
| 52 |
-
|
| 53 |
-
if 'REGEN' not in os.environ:
|
| 54 |
-
folder = train.main.dora.dir / 'grids' / __name__.split('.', 2)[-1]
|
| 55 |
-
with launcher.job_array():
|
| 56 |
-
for sig in folder.iterdir():
|
| 57 |
-
if not sig.is_symlink():
|
| 58 |
-
continue
|
| 59 |
-
xp = train.main.get_xp_from_sig(sig.name)
|
| 60 |
-
launcher(xp.argv)
|
| 61 |
-
return
|
| 62 |
-
|
| 63 |
-
audiogen_base = launcher.bind(solver="audiogen/audiogen_base_16khz")
|
| 64 |
-
audiogen_base.bind_({'autocast': False, 'fsdp.use': True})
|
| 65 |
-
|
| 66 |
-
audiogen_base_medium = audiogen_base.bind({'continue_from': '//pretrained/facebook/audiogen-medium'})
|
| 67 |
-
audiogen_base_medium.bind_({'model/lm/model_scale': 'medium'})
|
| 68 |
-
eval(audiogen_base_medium, batch_size=128)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/compression/__init__.py
DELETED
|
@@ -1,6 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
"""EnCodec grids."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/compression/_explorers.py
DELETED
|
@@ -1,55 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
import treetable as tt
|
| 8 |
-
|
| 9 |
-
from .._base_explorers import BaseExplorer
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
class CompressionExplorer(BaseExplorer):
|
| 13 |
-
eval_metrics = ["sisnr", "visqol"]
|
| 14 |
-
|
| 15 |
-
def stages(self):
|
| 16 |
-
return ["train", "valid", "evaluate"]
|
| 17 |
-
|
| 18 |
-
def get_grid_meta(self):
|
| 19 |
-
"""Returns the list of Meta information to display for each XP/job.
|
| 20 |
-
"""
|
| 21 |
-
return [
|
| 22 |
-
tt.leaf("index", align=">"),
|
| 23 |
-
tt.leaf("name", wrap=140),
|
| 24 |
-
tt.leaf("state"),
|
| 25 |
-
tt.leaf("sig", align=">"),
|
| 26 |
-
]
|
| 27 |
-
|
| 28 |
-
def get_grid_metrics(self):
|
| 29 |
-
"""Return the metrics that should be displayed in the tracking table.
|
| 30 |
-
"""
|
| 31 |
-
return [
|
| 32 |
-
tt.group(
|
| 33 |
-
"train",
|
| 34 |
-
[
|
| 35 |
-
tt.leaf("epoch"),
|
| 36 |
-
tt.leaf("bandwidth", ".2f"),
|
| 37 |
-
tt.leaf("adv", ".4f"),
|
| 38 |
-
tt.leaf("d_loss", ".4f"),
|
| 39 |
-
],
|
| 40 |
-
align=">",
|
| 41 |
-
),
|
| 42 |
-
tt.group(
|
| 43 |
-
"valid",
|
| 44 |
-
[
|
| 45 |
-
tt.leaf("bandwidth", ".2f"),
|
| 46 |
-
tt.leaf("adv", ".4f"),
|
| 47 |
-
tt.leaf("msspec", ".4f"),
|
| 48 |
-
tt.leaf("sisnr", ".2f"),
|
| 49 |
-
],
|
| 50 |
-
align=">",
|
| 51 |
-
),
|
| 52 |
-
tt.group(
|
| 53 |
-
"evaluate", [tt.leaf(name, ".3f") for name in self.eval_metrics], align=">"
|
| 54 |
-
),
|
| 55 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/compression/debug.py
DELETED
|
@@ -1,31 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
"""
|
| 8 |
-
Grid search file, simply list all the exp you want in `explorer`.
|
| 9 |
-
Any new exp added there will be scheduled.
|
| 10 |
-
You can cancel and experiment by commenting its line.
|
| 11 |
-
|
| 12 |
-
This grid is a minimal example for debugging compression task
|
| 13 |
-
and how to override parameters directly in a grid.
|
| 14 |
-
Learn more about dora grids: https://github.com/facebookresearch/dora
|
| 15 |
-
"""
|
| 16 |
-
|
| 17 |
-
from ._explorers import CompressionExplorer
|
| 18 |
-
from ...environment import AudioCraftEnvironment
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
@CompressionExplorer
|
| 22 |
-
def explorer(launcher):
|
| 23 |
-
partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
|
| 24 |
-
launcher.slurm_(gpus=2, partition=partitions)
|
| 25 |
-
launcher.bind_(solver='compression/debug')
|
| 26 |
-
|
| 27 |
-
with launcher.job_array():
|
| 28 |
-
# base debug task using config from solver=compression/debug
|
| 29 |
-
launcher()
|
| 30 |
-
# we can override parameters in the grid to launch additional xps
|
| 31 |
-
launcher({'rvq.bins': 2048, 'rvq.n_q': 4})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/compression/encodec_audiogen_16khz.py
DELETED
|
@@ -1,29 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
"""
|
| 8 |
-
Grid search file, simply list all the exp you want in `explorer`.
|
| 9 |
-
Any new exp added there will be scheduled.
|
| 10 |
-
You can cancel and experiment by commenting its line.
|
| 11 |
-
|
| 12 |
-
This grid shows how to train the new AudioGen EnCodec model at 16 kHz.
|
| 13 |
-
"""
|
| 14 |
-
|
| 15 |
-
from ._explorers import CompressionExplorer
|
| 16 |
-
from ...environment import AudioCraftEnvironment
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
@CompressionExplorer
|
| 20 |
-
def explorer(launcher):
|
| 21 |
-
partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
|
| 22 |
-
launcher.slurm_(gpus=8, partition=partitions)
|
| 23 |
-
# use configuration for AudioGen's EnCodec model trained on monophonic audio sampled at 16 kHz
|
| 24 |
-
# AudioGen's EnCodec is trained with a total stride of 320 leading to a frame rate of 50 hz
|
| 25 |
-
launcher.bind_(solver='compression/encodec_audiogen_16khz')
|
| 26 |
-
# replace this by the desired sound dataset
|
| 27 |
-
launcher.bind_(dset='internal/sounds_16khz')
|
| 28 |
-
# launch xp
|
| 29 |
-
launcher()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/compression/encodec_base_24khz.py
DELETED
|
@@ -1,28 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
"""
|
| 8 |
-
Grid search file, simply list all the exp you want in `explorer`.
|
| 9 |
-
Any new exp added there will be scheduled.
|
| 10 |
-
You can cancel and experiment by commenting its line.
|
| 11 |
-
|
| 12 |
-
This grid shows how to train a base causal EnCodec model at 24 kHz.
|
| 13 |
-
"""
|
| 14 |
-
|
| 15 |
-
from ._explorers import CompressionExplorer
|
| 16 |
-
from ...environment import AudioCraftEnvironment
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
@CompressionExplorer
|
| 20 |
-
def explorer(launcher):
|
| 21 |
-
partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
|
| 22 |
-
launcher.slurm_(gpus=8, partition=partitions)
|
| 23 |
-
# base causal EnCodec trained on monophonic audio sampled at 24 kHz
|
| 24 |
-
launcher.bind_(solver='compression/encodec_base_24khz')
|
| 25 |
-
# replace this by the desired dataset
|
| 26 |
-
launcher.bind_(dset='audio/example')
|
| 27 |
-
# launch xp
|
| 28 |
-
launcher()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/compression/encodec_musicgen_32khz.py
DELETED
|
@@ -1,34 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
"""
|
| 8 |
-
Grid search file, simply list all the exp you want in `explorer`.
|
| 9 |
-
Any new exp added there will be scheduled.
|
| 10 |
-
You can cancel and experiment by commenting its line.
|
| 11 |
-
|
| 12 |
-
This grid shows how to train a MusicGen EnCodec model at 32 kHz.
|
| 13 |
-
"""
|
| 14 |
-
|
| 15 |
-
from ._explorers import CompressionExplorer
|
| 16 |
-
from ...environment import AudioCraftEnvironment
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
@CompressionExplorer
|
| 20 |
-
def explorer(launcher):
|
| 21 |
-
partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
|
| 22 |
-
launcher.slurm_(gpus=8, partition=partitions)
|
| 23 |
-
# use configuration for MusicGen's EnCodec model trained on monophonic audio sampled at 32 kHz
|
| 24 |
-
# MusicGen's EnCodec is trained with a total stride of 640 leading to a frame rate of 50 hz
|
| 25 |
-
launcher.bind_(solver='compression/encodec_musicgen_32khz')
|
| 26 |
-
# replace this by the desired music dataset
|
| 27 |
-
launcher.bind_(dset='internal/music_400k_32khz')
|
| 28 |
-
# launch xp
|
| 29 |
-
launcher()
|
| 30 |
-
launcher({
|
| 31 |
-
'metrics.visqol.bin': '/data/home/jadecopet/local/usr/opt/visqol',
|
| 32 |
-
'label': 'visqol',
|
| 33 |
-
'evaluate.metrics.visqol': True
|
| 34 |
-
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/diffusion/4_bands_base_32khz.py
DELETED
|
@@ -1,27 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
"""
|
| 8 |
-
Training of the 4 diffusion models described in
|
| 9 |
-
"From Discrete Tokens to High-Fidelity Audio Using Multi-Band Diffusion"
|
| 10 |
-
(paper link).
|
| 11 |
-
"""
|
| 12 |
-
|
| 13 |
-
from ._explorers import DiffusionExplorer
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
@DiffusionExplorer
|
| 17 |
-
def explorer(launcher):
|
| 18 |
-
launcher.slurm_(gpus=4, partition='learnfair')
|
| 19 |
-
|
| 20 |
-
launcher.bind_({'solver': 'diffusion/default',
|
| 21 |
-
'dset': 'internal/music_10k_32khz'})
|
| 22 |
-
|
| 23 |
-
with launcher.job_array():
|
| 24 |
-
launcher({'filter.use': True, 'filter.idx_band': 0, "processor.use": False, 'processor.power_std': 0.4})
|
| 25 |
-
launcher({'filter.use': True, 'filter.idx_band': 1, "processor.use": False, 'processor.power_std': 0.4})
|
| 26 |
-
launcher({'filter.use': True, 'filter.idx_band': 2, "processor.use": True, 'processor.power_std': 0.4})
|
| 27 |
-
launcher({'filter.use': True, 'filter.idx_band': 3, "processor.use": True, 'processor.power_std': 0.75})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/diffusion/__init__.py
DELETED
|
@@ -1,6 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
"""Diffusion grids."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/diffusion/_explorers.py
DELETED
|
@@ -1,66 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
import treetable as tt
|
| 8 |
-
|
| 9 |
-
from .._base_explorers import BaseExplorer
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
class DiffusionExplorer(BaseExplorer):
|
| 13 |
-
eval_metrics = ["sisnr", "visqol"]
|
| 14 |
-
|
| 15 |
-
def stages(self):
|
| 16 |
-
return ["train", "valid", "valid_ema", "evaluate", "evaluate_ema"]
|
| 17 |
-
|
| 18 |
-
def get_grid_meta(self):
|
| 19 |
-
"""Returns the list of Meta information to display for each XP/job.
|
| 20 |
-
"""
|
| 21 |
-
return [
|
| 22 |
-
tt.leaf("index", align=">"),
|
| 23 |
-
tt.leaf("name", wrap=140),
|
| 24 |
-
tt.leaf("state"),
|
| 25 |
-
tt.leaf("sig", align=">"),
|
| 26 |
-
]
|
| 27 |
-
|
| 28 |
-
def get_grid_metrics(self):
|
| 29 |
-
"""Return the metrics that should be displayed in the tracking table.
|
| 30 |
-
"""
|
| 31 |
-
return [
|
| 32 |
-
tt.group(
|
| 33 |
-
"train",
|
| 34 |
-
[
|
| 35 |
-
tt.leaf("epoch"),
|
| 36 |
-
tt.leaf("loss", ".3%"),
|
| 37 |
-
],
|
| 38 |
-
align=">",
|
| 39 |
-
),
|
| 40 |
-
tt.group(
|
| 41 |
-
"valid",
|
| 42 |
-
[
|
| 43 |
-
tt.leaf("loss", ".3%"),
|
| 44 |
-
# tt.leaf("loss_0", ".3%"),
|
| 45 |
-
],
|
| 46 |
-
align=">",
|
| 47 |
-
),
|
| 48 |
-
tt.group(
|
| 49 |
-
"valid_ema",
|
| 50 |
-
[
|
| 51 |
-
tt.leaf("loss", ".3%"),
|
| 52 |
-
# tt.leaf("loss_0", ".3%"),
|
| 53 |
-
],
|
| 54 |
-
align=">",
|
| 55 |
-
),
|
| 56 |
-
tt.group(
|
| 57 |
-
"evaluate", [tt.leaf("rvm", ".4f"), tt.leaf("rvm_0", ".4f"),
|
| 58 |
-
tt.leaf("rvm_1", ".4f"), tt.leaf("rvm_2", ".4f"),
|
| 59 |
-
tt.leaf("rvm_3", ".4f"), ], align=">"
|
| 60 |
-
),
|
| 61 |
-
tt.group(
|
| 62 |
-
"evaluate_ema", [tt.leaf("rvm", ".4f"), tt.leaf("rvm_0", ".4f"),
|
| 63 |
-
tt.leaf("rvm_1", ".4f"), tt.leaf("rvm_2", ".4f"),
|
| 64 |
-
tt.leaf("rvm_3", ".4f")], align=">"
|
| 65 |
-
),
|
| 66 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/musicgen/__init__.py
DELETED
|
@@ -1,6 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
"""MusicGen grids."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/musicgen/_explorers.py
DELETED
|
@@ -1,93 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
import typing as tp
|
| 8 |
-
|
| 9 |
-
import treetable as tt
|
| 10 |
-
|
| 11 |
-
from .._base_explorers import BaseExplorer
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
class LMExplorer(BaseExplorer):
|
| 15 |
-
eval_metrics: tp.List[str] = []
|
| 16 |
-
|
| 17 |
-
def stages(self) -> tp.List[str]:
|
| 18 |
-
return ['train', 'valid']
|
| 19 |
-
|
| 20 |
-
def get_grid_metrics(self):
|
| 21 |
-
"""Return the metrics that should be displayed in the tracking table."""
|
| 22 |
-
return [
|
| 23 |
-
tt.group(
|
| 24 |
-
'train',
|
| 25 |
-
[
|
| 26 |
-
tt.leaf('epoch'),
|
| 27 |
-
tt.leaf('duration', '.1f'), # duration in minutes
|
| 28 |
-
tt.leaf('ping'),
|
| 29 |
-
tt.leaf('ce', '.4f'), # cross entropy
|
| 30 |
-
tt.leaf("ppl", '.3f'), # perplexity
|
| 31 |
-
],
|
| 32 |
-
align='>',
|
| 33 |
-
),
|
| 34 |
-
tt.group(
|
| 35 |
-
'valid',
|
| 36 |
-
[
|
| 37 |
-
tt.leaf('ce', '.4f'),
|
| 38 |
-
tt.leaf('ppl', '.3f'),
|
| 39 |
-
tt.leaf('best_ppl', '.3f'),
|
| 40 |
-
],
|
| 41 |
-
align='>',
|
| 42 |
-
),
|
| 43 |
-
]
|
| 44 |
-
|
| 45 |
-
def process_sheep(self, sheep, history):
|
| 46 |
-
parts = super().process_sheep(sheep, history)
|
| 47 |
-
|
| 48 |
-
track_by = {'ppl': 'lower'} # values should be in ['lower', 'higher']
|
| 49 |
-
best_metrics = {k: (1 if v == 'lower' else -1) * float('inf') for k, v in track_by.items()}
|
| 50 |
-
|
| 51 |
-
def comparator(mode, a, b):
|
| 52 |
-
return a < b if mode == 'lower' else a > b
|
| 53 |
-
|
| 54 |
-
for metrics in history:
|
| 55 |
-
for key, sub in metrics.items():
|
| 56 |
-
for metric in track_by:
|
| 57 |
-
# for the validation set, keep track of best metrics (ppl in this example)
|
| 58 |
-
# this is so we can conveniently compare metrics between runs in the grid
|
| 59 |
-
if key == 'valid' and metric in sub and comparator(
|
| 60 |
-
track_by[metric], sub[metric], best_metrics[metric]
|
| 61 |
-
):
|
| 62 |
-
best_metrics[metric] = sub[metric]
|
| 63 |
-
|
| 64 |
-
if 'valid' in parts:
|
| 65 |
-
parts['valid'].update({f'best_{k}': v for k, v in best_metrics.items()})
|
| 66 |
-
return parts
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
class GenerationEvalExplorer(BaseExplorer):
|
| 70 |
-
eval_metrics: tp.List[str] = []
|
| 71 |
-
|
| 72 |
-
def stages(self) -> tp.List[str]:
|
| 73 |
-
return ['evaluate']
|
| 74 |
-
|
| 75 |
-
def get_grid_metrics(self):
|
| 76 |
-
"""Return the metrics that should be displayed in the tracking table."""
|
| 77 |
-
return [
|
| 78 |
-
tt.group(
|
| 79 |
-
'evaluate',
|
| 80 |
-
[
|
| 81 |
-
tt.leaf('epoch', '.3f'),
|
| 82 |
-
tt.leaf('duration', '.1f'),
|
| 83 |
-
tt.leaf('ping'),
|
| 84 |
-
tt.leaf('ce', '.4f'),
|
| 85 |
-
tt.leaf('ppl', '.3f'),
|
| 86 |
-
tt.leaf('fad', '.3f'),
|
| 87 |
-
tt.leaf('kld', '.3f'),
|
| 88 |
-
tt.leaf('text_consistency', '.3f'),
|
| 89 |
-
tt.leaf('chroma_cosine', '.3f'),
|
| 90 |
-
],
|
| 91 |
-
align='>',
|
| 92 |
-
),
|
| 93 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/musicgen/musicgen_base_32khz.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
from ._explorers import LMExplorer
|
| 8 |
-
from ...environment import AudioCraftEnvironment
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
@LMExplorer
|
| 12 |
-
def explorer(launcher):
|
| 13 |
-
partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
|
| 14 |
-
launcher.slurm_(gpus=32, partition=partitions)
|
| 15 |
-
launcher.bind_(solver='musicgen/musicgen_base_32khz')
|
| 16 |
-
# replace this by the desired music dataset
|
| 17 |
-
launcher.bind_(dset='internal/music_400k_32khz')
|
| 18 |
-
|
| 19 |
-
fsdp = {'autocast': False, 'fsdp.use': True}
|
| 20 |
-
small = {'model/lm/model_scale': 'small'}
|
| 21 |
-
medium = {'model/lm/model_scale': 'medium'}
|
| 22 |
-
large = {'model/lm/model_scale': 'large'}
|
| 23 |
-
|
| 24 |
-
cfg_low = {'classifier_free_guidance.training_dropout': 0.2}
|
| 25 |
-
wd_low = {'conditioners.description.t5.word_dropout': 0.2}
|
| 26 |
-
|
| 27 |
-
adam = {'optim.optimizer': 'adamw', 'optim.lr': 1e-2}
|
| 28 |
-
|
| 29 |
-
launcher.bind_(fsdp)
|
| 30 |
-
|
| 31 |
-
launcher.slurm_(gpus=32).bind_(label='32gpus')
|
| 32 |
-
with launcher.job_array():
|
| 33 |
-
sub = launcher.bind()
|
| 34 |
-
sub()
|
| 35 |
-
|
| 36 |
-
launcher.slurm_(gpus=64).bind_(label='64gpus')
|
| 37 |
-
with launcher.job_array():
|
| 38 |
-
sub = launcher.bind()
|
| 39 |
-
sub(medium, adam)
|
| 40 |
-
|
| 41 |
-
launcher.slurm_(gpus=96).bind_(label='96gpus')
|
| 42 |
-
with launcher.job_array():
|
| 43 |
-
sub = launcher.bind()
|
| 44 |
-
sub(large, cfg_low, wd_low, adam, {'optim.max_norm': 3})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/musicgen/musicgen_base_cached_32khz.py
DELETED
|
@@ -1,67 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
from ._explorers import LMExplorer
|
| 8 |
-
from ...environment import AudioCraftEnvironment
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
@LMExplorer
|
| 12 |
-
def explorer(launcher):
|
| 13 |
-
partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
|
| 14 |
-
launcher.slurm_(gpus=32, partition=partitions)
|
| 15 |
-
launcher.bind_(solver='musicgen/musicgen_base_32khz')
|
| 16 |
-
# replace this by the desired music dataset
|
| 17 |
-
launcher.bind_(dset='internal/music_400k_32khz')
|
| 18 |
-
|
| 19 |
-
fsdp = {'autocast': False, 'fsdp.use': True}
|
| 20 |
-
medium = {'model/lm/model_scale': 'medium'}
|
| 21 |
-
large = {'model/lm/model_scale': 'large'}
|
| 22 |
-
|
| 23 |
-
cfg_low = {'classifier_free_guidance.training_dropout': 0.2}
|
| 24 |
-
wd_low = {'conditioners.description.t5.word_dropout': 0.2}
|
| 25 |
-
|
| 26 |
-
adam = {'optim.optimizer': 'adamw', 'optim.lr': 1e-2}
|
| 27 |
-
|
| 28 |
-
# BEGINNING OF CACHE WRITING JOBS.
|
| 29 |
-
cache_write = {
|
| 30 |
-
'cache.path': '/fsx-codegen/defossez/cache/interleave_stereo_nv_32k',
|
| 31 |
-
'cache.write': True,
|
| 32 |
-
'generate.every': 500,
|
| 33 |
-
'evaluate.every': 500,
|
| 34 |
-
'logging.log_updates': 50,
|
| 35 |
-
}
|
| 36 |
-
|
| 37 |
-
cache_sub = launcher.bind({'model/lm/model_scale': 'xsmall', 'conditioner': 'none'})
|
| 38 |
-
cache_sub.bind_({'deadlock.use': True})
|
| 39 |
-
cache_sub.slurm_(gpus=8)
|
| 40 |
-
with launcher.job_array():
|
| 41 |
-
num_shards = 10 # total number of jobs running in parallel.
|
| 42 |
-
for shard in range(0, num_shards):
|
| 43 |
-
launcher(cache_write, {'cache.write_num_shards': num_shards, 'cache.write_shard': shard})
|
| 44 |
-
|
| 45 |
-
# REMOVE THE FOLLOWING RETURN STATEMENT ONCE THE ABOVE JOBS ARE DONE,
|
| 46 |
-
# OR SUFFICIENTLY AHEAD.
|
| 47 |
-
return
|
| 48 |
-
|
| 49 |
-
cache = {
|
| 50 |
-
'cache.path': '/fsx-codegen/defossez/cache/interleave_stereo_nv_32k',
|
| 51 |
-
}
|
| 52 |
-
launcher.bind_(fsdp, cache)
|
| 53 |
-
|
| 54 |
-
launcher.slurm_(gpus=32).bind_(label='32gpus')
|
| 55 |
-
with launcher.job_array():
|
| 56 |
-
sub = launcher.bind()
|
| 57 |
-
sub()
|
| 58 |
-
|
| 59 |
-
launcher.slurm_(gpus=64).bind_(label='64gpus')
|
| 60 |
-
with launcher.job_array():
|
| 61 |
-
sub = launcher.bind()
|
| 62 |
-
sub(medium, adam)
|
| 63 |
-
|
| 64 |
-
launcher.slurm_(gpus=96).bind_(label='96gpus')
|
| 65 |
-
with launcher.job_array():
|
| 66 |
-
sub = launcher.bind()
|
| 67 |
-
sub(large, cfg_low, wd_low, adam, {'optim.max_norm': 3})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/musicgen/musicgen_clapemb_32khz.py
DELETED
|
@@ -1,32 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
from ._explorers import LMExplorer
|
| 8 |
-
from ...environment import AudioCraftEnvironment
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
@LMExplorer
|
| 12 |
-
def explorer(launcher):
|
| 13 |
-
partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
|
| 14 |
-
launcher.slurm_(gpus=32, partition=partitions)
|
| 15 |
-
launcher.bind_(solver='musicgen/musicgen_base_32khz')
|
| 16 |
-
# replace this by the desired music dataset
|
| 17 |
-
launcher.bind_(dset='internal/music_400k_32khz')
|
| 18 |
-
launcher.bind_(conditioner='clapemb2music')
|
| 19 |
-
|
| 20 |
-
fsdp = {'autocast': False, 'fsdp.use': True}
|
| 21 |
-
cache_path = {'conditioners.description.clap.cache_path':
|
| 22 |
-
'/fsx-audio-craft-llm/jadecopet/experiments/audiocraft/caches/clap_embed_music'}
|
| 23 |
-
text_wav_training_opt = {'conditioners.description.clap.text_p': 0.5}
|
| 24 |
-
|
| 25 |
-
launcher.bind_(fsdp)
|
| 26 |
-
|
| 27 |
-
launcher.slurm_(gpus=32).bind_(label='32gpus')
|
| 28 |
-
with launcher.job_array():
|
| 29 |
-
launcher()
|
| 30 |
-
launcher(text_wav_training_opt)
|
| 31 |
-
launcher(cache_path)
|
| 32 |
-
launcher(cache_path, text_wav_training_opt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/musicgen/musicgen_melody_32khz.py
DELETED
|
@@ -1,65 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
from ._explorers import LMExplorer
|
| 8 |
-
from ...environment import AudioCraftEnvironment
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
@LMExplorer
|
| 12 |
-
def explorer(launcher):
|
| 13 |
-
partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
|
| 14 |
-
launcher.slurm_(gpus=32, partition=partitions)
|
| 15 |
-
launcher.bind_(solver='musicgen/musicgen_melody_32khz')
|
| 16 |
-
# replace this by the desired music dataset
|
| 17 |
-
launcher.bind_(dset='internal/music_400k_32khz')
|
| 18 |
-
|
| 19 |
-
fsdp = {'autocast': False, 'fsdp.use': True}
|
| 20 |
-
medium = {'model/lm/model_scale': 'medium'}
|
| 21 |
-
large = {'model/lm/model_scale': 'large'}
|
| 22 |
-
|
| 23 |
-
cfg_low = {'classifier_free_guidance.training_dropout': 0.2}
|
| 24 |
-
wd_low = {'conditioners.description.t5.word_dropout': 0.2}
|
| 25 |
-
|
| 26 |
-
adam = {'optim.optimizer': 'adamw', 'optim.lr': 1e-4}
|
| 27 |
-
|
| 28 |
-
cache_path = {'conditioners.self_wav.chroma_stem.cache_path':
|
| 29 |
-
'/fsx-audio-craft-llm/jadecopet/experiments/audiocraft/caches/chroma_stem'}
|
| 30 |
-
|
| 31 |
-
# CACHE GENERATION JOBS
|
| 32 |
-
n_cache_gen_jobs = 4
|
| 33 |
-
gen_sub = launcher.slurm(gpus=1)
|
| 34 |
-
gen_sub.bind_(
|
| 35 |
-
cache_path, {
|
| 36 |
-
# the cache is always computed over the whole file, so duration doesn't matter here.
|
| 37 |
-
'dataset.segment_duration': 2.,
|
| 38 |
-
'dataset.batch_size': 8,
|
| 39 |
-
'dataset.train.permutation_on_files': True, # try to not repeat files.
|
| 40 |
-
'optim.epochs': 10,
|
| 41 |
-
'model/lm/model_scale': 'xsmall',
|
| 42 |
-
|
| 43 |
-
})
|
| 44 |
-
with gen_sub.job_array():
|
| 45 |
-
for gen_job in range(n_cache_gen_jobs):
|
| 46 |
-
gen_sub({'dataset.train.shuffle_seed': gen_job})
|
| 47 |
-
|
| 48 |
-
# ACTUAL TRAINING JOBS.
|
| 49 |
-
launcher.bind_(fsdp)
|
| 50 |
-
|
| 51 |
-
launcher.slurm_(gpus=32).bind_(label='32gpus')
|
| 52 |
-
with launcher.job_array():
|
| 53 |
-
sub = launcher.bind()
|
| 54 |
-
sub()
|
| 55 |
-
sub(cache_path)
|
| 56 |
-
|
| 57 |
-
launcher.slurm_(gpus=64).bind_(label='64gpus')
|
| 58 |
-
with launcher.job_array():
|
| 59 |
-
sub = launcher.bind()
|
| 60 |
-
sub(medium, adam)
|
| 61 |
-
|
| 62 |
-
launcher.slurm_(gpus=96).bind_(label='96gpus')
|
| 63 |
-
with launcher.job_array():
|
| 64 |
-
sub = launcher.bind()
|
| 65 |
-
sub(large, cfg_low, wd_low, adam, {'optim.max_norm': 3})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/musicgen/musicgen_pretrained_32khz_eval.py
DELETED
|
@@ -1,99 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
"""
|
| 8 |
-
Evaluation with objective metrics for the pretrained MusicGen models.
|
| 9 |
-
This grid takes signature from the training grid and runs evaluation-only stage.
|
| 10 |
-
|
| 11 |
-
When running the grid for the first time, please use:
|
| 12 |
-
REGEN=1 dora grid musicgen.musicgen_pretrained_32khz_eval
|
| 13 |
-
and re-use the REGEN=1 option when the grid is changed to force regenerating it.
|
| 14 |
-
|
| 15 |
-
Note that you need the proper metrics external libraries setup to use all
|
| 16 |
-
the objective metrics activated in this grid. Refer to the README for more information.
|
| 17 |
-
"""
|
| 18 |
-
|
| 19 |
-
import os
|
| 20 |
-
|
| 21 |
-
from ._explorers import GenerationEvalExplorer
|
| 22 |
-
from ...environment import AudioCraftEnvironment
|
| 23 |
-
from ... import train
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
def eval(launcher, batch_size: int = 32, eval_melody: bool = False):
|
| 27 |
-
opts = {
|
| 28 |
-
'dset': 'audio/musiccaps_32khz',
|
| 29 |
-
'solver/musicgen/evaluation': 'objective_eval',
|
| 30 |
-
'execute_only': 'evaluate',
|
| 31 |
-
'+dataset.evaluate.batch_size': batch_size,
|
| 32 |
-
'+metrics.fad.tf.batch_size': 16,
|
| 33 |
-
}
|
| 34 |
-
# chroma-specific evaluation
|
| 35 |
-
chroma_opts = {
|
| 36 |
-
'dset': 'internal/music_400k_32khz',
|
| 37 |
-
'dataset.evaluate.segment_duration': 30,
|
| 38 |
-
'dataset.evaluate.num_samples': 1000,
|
| 39 |
-
'evaluate.metrics.chroma_cosine': True,
|
| 40 |
-
'evaluate.metrics.fad': False,
|
| 41 |
-
'evaluate.metrics.kld': False,
|
| 42 |
-
'evaluate.metrics.text_consistency': False,
|
| 43 |
-
}
|
| 44 |
-
# binary for FAD computation: replace this path with your own path
|
| 45 |
-
metrics_opts = {
|
| 46 |
-
'metrics.fad.tf.bin': '/data/home/jadecopet/local/usr/opt/google-research'
|
| 47 |
-
}
|
| 48 |
-
opt1 = {'generate.lm.use_sampling': True, 'generate.lm.top_k': 250, 'generate.lm.top_p': 0.}
|
| 49 |
-
opt2 = {'transformer_lm.two_step_cfg': True}
|
| 50 |
-
|
| 51 |
-
sub = launcher.bind(opts)
|
| 52 |
-
sub.bind_(metrics_opts)
|
| 53 |
-
|
| 54 |
-
# base objective metrics
|
| 55 |
-
sub(opt1, opt2)
|
| 56 |
-
|
| 57 |
-
if eval_melody:
|
| 58 |
-
# chroma-specific metrics
|
| 59 |
-
sub(opt1, opt2, chroma_opts)
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
@GenerationEvalExplorer
|
| 63 |
-
def explorer(launcher):
|
| 64 |
-
partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
|
| 65 |
-
launcher.slurm_(gpus=4, partition=partitions)
|
| 66 |
-
|
| 67 |
-
if 'REGEN' not in os.environ:
|
| 68 |
-
folder = train.main.dora.dir / 'grids' / __name__.split('.', 2)[-1]
|
| 69 |
-
with launcher.job_array():
|
| 70 |
-
for sig in folder.iterdir():
|
| 71 |
-
if not sig.is_symlink():
|
| 72 |
-
continue
|
| 73 |
-
xp = train.main.get_xp_from_sig(sig.name)
|
| 74 |
-
launcher(xp.argv)
|
| 75 |
-
return
|
| 76 |
-
|
| 77 |
-
with launcher.job_array():
|
| 78 |
-
musicgen_base = launcher.bind(solver="musicgen/musicgen_base_32khz")
|
| 79 |
-
musicgen_base.bind_({'autocast': False, 'fsdp.use': True})
|
| 80 |
-
|
| 81 |
-
# base musicgen models
|
| 82 |
-
musicgen_base_small = musicgen_base.bind({'continue_from': '//pretrained/facebook/musicgen-small'})
|
| 83 |
-
eval(musicgen_base_small, batch_size=128)
|
| 84 |
-
|
| 85 |
-
musicgen_base_medium = musicgen_base.bind({'continue_from': '//pretrained/facebook/musicgen-medium'})
|
| 86 |
-
musicgen_base_medium.bind_({'model/lm/model_scale': 'medium'})
|
| 87 |
-
eval(musicgen_base_medium, batch_size=128)
|
| 88 |
-
|
| 89 |
-
musicgen_base_large = musicgen_base.bind({'continue_from': '//pretrained/facebook/musicgen-large'})
|
| 90 |
-
musicgen_base_large.bind_({'model/lm/model_scale': 'large'})
|
| 91 |
-
eval(musicgen_base_large, batch_size=128)
|
| 92 |
-
|
| 93 |
-
# melody musicgen model
|
| 94 |
-
musicgen_melody = launcher.bind(solver="musicgen/musicgen_melody_32khz")
|
| 95 |
-
musicgen_melody.bind_({'autocast': False, 'fsdp.use': True})
|
| 96 |
-
|
| 97 |
-
musicgen_melody_medium = musicgen_melody.bind({'continue_from': '//pretrained/facebook/musicgen-melody'})
|
| 98 |
-
musicgen_melody_medium.bind_({'model/lm/model_scale': 'medium'})
|
| 99 |
-
eval(musicgen_melody_medium, batch_size=128, eval_melody=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/grids_/musicgen/musicgen_stereo_finetune_32khz.py
DELETED
|
@@ -1,57 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
from pathlib import Path
|
| 8 |
-
from ._explorers import LMExplorer
|
| 9 |
-
from ...environment import AudioCraftEnvironment
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
@LMExplorer
|
| 13 |
-
def explorer(launcher):
|
| 14 |
-
partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
|
| 15 |
-
launcher.slurm_(gpus=32, partition=partitions)
|
| 16 |
-
launcher.bind_(solver='musicgen/musicgen_base_32khz')
|
| 17 |
-
# replace this by the desired music dataset, which needs to be stereo
|
| 18 |
-
launcher.bind_(dset='audio/example')
|
| 19 |
-
|
| 20 |
-
fsdp = {'autocast': False, 'fsdp.use': True}
|
| 21 |
-
medium = {'model/lm/model_scale': 'medium'}
|
| 22 |
-
large = {'model/lm/model_scale': 'large'}
|
| 23 |
-
|
| 24 |
-
cfg_low = {'classifier_free_guidance.training_dropout': 0.2}
|
| 25 |
-
wd_low = {'conditioners.description.t5.word_dropout': 0.2}
|
| 26 |
-
|
| 27 |
-
adam = {'optim.optimizer': 'adamw', 'optim.lr': 1e-4}
|
| 28 |
-
|
| 29 |
-
stereo = {
|
| 30 |
-
'codebooks_pattern.delay.delays': [0, 0, 1, 1, 2, 2, 3, 3],
|
| 31 |
-
'transformer_lm.n_q': 8,
|
| 32 |
-
'interleave_stereo_codebooks.use': True,
|
| 33 |
-
'channels': 2,
|
| 34 |
-
}
|
| 35 |
-
|
| 36 |
-
# You must follow the instructions in docs/MUSICGEN.md about the creation
|
| 37 |
-
# of the proper fine tuning checkpoints. We will assume they are stored under
|
| 38 |
-
# ~/checkpoints/{mode_name}.
|
| 39 |
-
|
| 40 |
-
checkpoints = Path.home() / 'checkpoints'
|
| 41 |
-
|
| 42 |
-
launcher.bind_(fsdp, stereo, {'optim.epochs': 100})
|
| 43 |
-
|
| 44 |
-
launcher.slurm_(gpus=32).bind_(label='32gpus')
|
| 45 |
-
with launcher.job_array():
|
| 46 |
-
sub = launcher.bind({'continue_from': str(checkpoints / 'stereo_finetune_musicgen-small.th')})
|
| 47 |
-
sub()
|
| 48 |
-
|
| 49 |
-
launcher.slurm_(gpus=64).bind_(label='64gpus')
|
| 50 |
-
with launcher.job_array():
|
| 51 |
-
sub = launcher.bind({'continue_from': str(checkpoints / 'stereo_finetune_musicgen-medium.th')})
|
| 52 |
-
sub(medium, adam)
|
| 53 |
-
|
| 54 |
-
launcher.slurm_(gpus=96).bind_(label='96gpus')
|
| 55 |
-
with launcher.job_array():
|
| 56 |
-
sub = launcher.bind({'continue_from': str(checkpoints / 'stereo_finetune_musicgen-large.th')})
|
| 57 |
-
sub(large, cfg_low, wd_low, adam, {'optim.max_norm': 3})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audiocraft/models/__init__.py
CHANGED
|
@@ -14,5 +14,5 @@ from .encodec import (
|
|
| 14 |
from .audiogen import AudioGen
|
| 15 |
from .lm import LMModel
|
| 16 |
from .multibanddiffusion import MultiBandDiffusion
|
| 17 |
-
from .
|
| 18 |
from .unet import DiffusionUnet
|
|
|
|
| 14 |
from .audiogen import AudioGen
|
| 15 |
from .lm import LMModel
|
| 16 |
from .multibanddiffusion import MultiBandDiffusion
|
| 17 |
+
from .vidmuse import VidMuse
|
| 18 |
from .unet import DiffusionUnet
|
audiocraft/models/{musicgen.py → vidmuse.py}
RENAMED
|
File without changes
|