Zeyue7 commited on
Commit
82158e3
·
1 Parent(s): 9d45348
audiocraft/grids_/__init__.py DELETED
@@ -1,6 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
- """Dora Grids."""
 
 
 
 
 
 
 
audiocraft/grids_/_base_explorers.py DELETED
@@ -1,80 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- from abc import ABC, abstractmethod
8
- import time
9
- import typing as tp
10
- from dora import Explorer
11
- import treetable as tt
12
-
13
-
14
- def get_sheep_ping(sheep) -> tp.Optional[str]:
15
- """Return the amount of time since the Sheep made some update
16
- to its log. Returns a str using the relevant time unit."""
17
- ping = None
18
- if sheep.log is not None and sheep.log.exists():
19
- delta = time.time() - sheep.log.stat().st_mtime
20
- if delta > 3600 * 24:
21
- ping = f'{delta / (3600 * 24):.1f}d'
22
- elif delta > 3600:
23
- ping = f'{delta / (3600):.1f}h'
24
- elif delta > 60:
25
- ping = f'{delta / 60:.1f}m'
26
- else:
27
- ping = f'{delta:.1f}s'
28
- return ping
29
-
30
-
31
- class BaseExplorer(ABC, Explorer):
32
- """Base explorer for AudioCraft grids.
33
-
34
- All task specific solvers are expected to implement the `get_grid_metrics`
35
- method to specify logic about metrics to display for a given task.
36
-
37
- If additional stages are used, the child explorer must define how to handle
38
- these new stages in the `process_history` and `process_sheep` methods.
39
- """
40
- def stages(self):
41
- return ["train", "valid", "evaluate"]
42
-
43
- def get_grid_meta(self):
44
- """Returns the list of Meta information to display for each XP/job.
45
- """
46
- return [
47
- tt.leaf("index", align=">"),
48
- tt.leaf("name", wrap=140),
49
- tt.leaf("state"),
50
- tt.leaf("sig", align=">"),
51
- tt.leaf("sid", align="<"),
52
- ]
53
-
54
- @abstractmethod
55
- def get_grid_metrics(self):
56
- """Return the metrics that should be displayed in the tracking table.
57
- """
58
- ...
59
-
60
- def process_sheep(self, sheep, history):
61
- train = {
62
- "epoch": len(history),
63
- }
64
- parts = {"train": train}
65
- for metrics in history:
66
- for key, sub in metrics.items():
67
- part = parts.get(key, {})
68
- if 'duration' in sub:
69
- # Convert to minutes for readability.
70
- sub['duration'] = sub['duration'] / 60.
71
- part.update(sub)
72
- parts[key] = part
73
- ping = get_sheep_ping(sheep)
74
- if ping is not None:
75
- for name in self.stages():
76
- if name not in parts:
77
- parts[name] = {}
78
- # Add the ping to each part for convenience.
79
- parts[name]['ping'] = ping
80
- return parts
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audiocraft/grids_/audiogen/__init__.py DELETED
@@ -1,6 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
- """AudioGen grids."""
 
 
 
 
 
 
 
audiocraft/grids_/audiogen/audiogen_base_16khz.py DELETED
@@ -1,23 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- from ..musicgen._explorers import LMExplorer
8
- from ...environment import AudioCraftEnvironment
9
-
10
-
11
- @LMExplorer
12
- def explorer(launcher):
13
- partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
14
- launcher.slurm_(gpus=64, partition=partitions)
15
- launcher.bind_(solver='audiogen/audiogen_base_16khz')
16
- # replace this by the desired environmental sound dataset
17
- launcher.bind_(dset='internal/sounds_16khz')
18
-
19
- fsdp = {'autocast': False, 'fsdp.use': True}
20
- medium = {'model/lm/model_scale': 'medium'}
21
-
22
- launcher.bind_(fsdp)
23
- launcher(medium)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audiocraft/grids_/audiogen/audiogen_pretrained_16khz_eval.py DELETED
@@ -1,68 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- """
8
- Evaluation with objective metrics for the pretrained AudioGen models.
9
- This grid takes signature from the training grid and runs evaluation-only stage.
10
-
11
- When running the grid for the first time, please use:
12
- REGEN=1 dora grid audiogen.audiogen_pretrained_16khz_eval
13
- and re-use the REGEN=1 option when the grid is changed to force regenerating it.
14
-
15
- Note that you need the proper metrics external libraries setup to use all
16
- the objective metrics activated in this grid. Refer to the README for more information.
17
- """
18
-
19
- import os
20
-
21
- from ..musicgen._explorers import GenerationEvalExplorer
22
- from ...environment import AudioCraftEnvironment
23
- from ... import train
24
-
25
-
26
- def eval(launcher, batch_size: int = 32):
27
- opts = {
28
- 'dset': 'audio/audiocaps_16khz',
29
- 'solver/audiogen/evaluation': 'objective_eval',
30
- 'execute_only': 'evaluate',
31
- '+dataset.evaluate.batch_size': batch_size,
32
- '+metrics.fad.tf.batch_size': 32,
33
- }
34
- # binary for FAD computation: replace this path with your own path
35
- metrics_opts = {
36
- 'metrics.fad.tf.bin': '/data/home/jadecopet/local/usr/opt/google-research'
37
- }
38
- opt1 = {'generate.lm.use_sampling': True, 'generate.lm.top_k': 250, 'generate.lm.top_p': 0.}
39
- opt2 = {'transformer_lm.two_step_cfg': True}
40
-
41
- sub = launcher.bind(opts)
42
- sub.bind_(metrics_opts)
43
-
44
- # base objective metrics
45
- sub(opt1, opt2)
46
-
47
-
48
- @GenerationEvalExplorer
49
- def explorer(launcher):
50
- partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
51
- launcher.slurm_(gpus=4, partition=partitions)
52
-
53
- if 'REGEN' not in os.environ:
54
- folder = train.main.dora.dir / 'grids' / __name__.split('.', 2)[-1]
55
- with launcher.job_array():
56
- for sig in folder.iterdir():
57
- if not sig.is_symlink():
58
- continue
59
- xp = train.main.get_xp_from_sig(sig.name)
60
- launcher(xp.argv)
61
- return
62
-
63
- audiogen_base = launcher.bind(solver="audiogen/audiogen_base_16khz")
64
- audiogen_base.bind_({'autocast': False, 'fsdp.use': True})
65
-
66
- audiogen_base_medium = audiogen_base.bind({'continue_from': '//pretrained/facebook/audiogen-medium'})
67
- audiogen_base_medium.bind_({'model/lm/model_scale': 'medium'})
68
- eval(audiogen_base_medium, batch_size=128)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audiocraft/grids_/compression/__init__.py DELETED
@@ -1,6 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
- """EnCodec grids."""
 
 
 
 
 
 
 
audiocraft/grids_/compression/_explorers.py DELETED
@@ -1,55 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- import treetable as tt
8
-
9
- from .._base_explorers import BaseExplorer
10
-
11
-
12
- class CompressionExplorer(BaseExplorer):
13
- eval_metrics = ["sisnr", "visqol"]
14
-
15
- def stages(self):
16
- return ["train", "valid", "evaluate"]
17
-
18
- def get_grid_meta(self):
19
- """Returns the list of Meta information to display for each XP/job.
20
- """
21
- return [
22
- tt.leaf("index", align=">"),
23
- tt.leaf("name", wrap=140),
24
- tt.leaf("state"),
25
- tt.leaf("sig", align=">"),
26
- ]
27
-
28
- def get_grid_metrics(self):
29
- """Return the metrics that should be displayed in the tracking table.
30
- """
31
- return [
32
- tt.group(
33
- "train",
34
- [
35
- tt.leaf("epoch"),
36
- tt.leaf("bandwidth", ".2f"),
37
- tt.leaf("adv", ".4f"),
38
- tt.leaf("d_loss", ".4f"),
39
- ],
40
- align=">",
41
- ),
42
- tt.group(
43
- "valid",
44
- [
45
- tt.leaf("bandwidth", ".2f"),
46
- tt.leaf("adv", ".4f"),
47
- tt.leaf("msspec", ".4f"),
48
- tt.leaf("sisnr", ".2f"),
49
- ],
50
- align=">",
51
- ),
52
- tt.group(
53
- "evaluate", [tt.leaf(name, ".3f") for name in self.eval_metrics], align=">"
54
- ),
55
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audiocraft/grids_/compression/debug.py DELETED
@@ -1,31 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- """
8
- Grid search file, simply list all the exp you want in `explorer`.
9
- Any new exp added there will be scheduled.
10
- You can cancel and experiment by commenting its line.
11
-
12
- This grid is a minimal example for debugging compression task
13
- and how to override parameters directly in a grid.
14
- Learn more about dora grids: https://github.com/facebookresearch/dora
15
- """
16
-
17
- from ._explorers import CompressionExplorer
18
- from ...environment import AudioCraftEnvironment
19
-
20
-
21
- @CompressionExplorer
22
- def explorer(launcher):
23
- partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
24
- launcher.slurm_(gpus=2, partition=partitions)
25
- launcher.bind_(solver='compression/debug')
26
-
27
- with launcher.job_array():
28
- # base debug task using config from solver=compression/debug
29
- launcher()
30
- # we can override parameters in the grid to launch additional xps
31
- launcher({'rvq.bins': 2048, 'rvq.n_q': 4})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audiocraft/grids_/compression/encodec_audiogen_16khz.py DELETED
@@ -1,29 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- """
8
- Grid search file, simply list all the exp you want in `explorer`.
9
- Any new exp added there will be scheduled.
10
- You can cancel and experiment by commenting its line.
11
-
12
- This grid shows how to train the new AudioGen EnCodec model at 16 kHz.
13
- """
14
-
15
- from ._explorers import CompressionExplorer
16
- from ...environment import AudioCraftEnvironment
17
-
18
-
19
- @CompressionExplorer
20
- def explorer(launcher):
21
- partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
22
- launcher.slurm_(gpus=8, partition=partitions)
23
- # use configuration for AudioGen's EnCodec model trained on monophonic audio sampled at 16 kHz
24
- # AudioGen's EnCodec is trained with a total stride of 320 leading to a frame rate of 50 hz
25
- launcher.bind_(solver='compression/encodec_audiogen_16khz')
26
- # replace this by the desired sound dataset
27
- launcher.bind_(dset='internal/sounds_16khz')
28
- # launch xp
29
- launcher()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audiocraft/grids_/compression/encodec_base_24khz.py DELETED
@@ -1,28 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- """
8
- Grid search file, simply list all the exp you want in `explorer`.
9
- Any new exp added there will be scheduled.
10
- You can cancel and experiment by commenting its line.
11
-
12
- This grid shows how to train a base causal EnCodec model at 24 kHz.
13
- """
14
-
15
- from ._explorers import CompressionExplorer
16
- from ...environment import AudioCraftEnvironment
17
-
18
-
19
- @CompressionExplorer
20
- def explorer(launcher):
21
- partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
22
- launcher.slurm_(gpus=8, partition=partitions)
23
- # base causal EnCodec trained on monophonic audio sampled at 24 kHz
24
- launcher.bind_(solver='compression/encodec_base_24khz')
25
- # replace this by the desired dataset
26
- launcher.bind_(dset='audio/example')
27
- # launch xp
28
- launcher()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audiocraft/grids_/compression/encodec_musicgen_32khz.py DELETED
@@ -1,34 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- """
8
- Grid search file, simply list all the exp you want in `explorer`.
9
- Any new exp added there will be scheduled.
10
- You can cancel and experiment by commenting its line.
11
-
12
- This grid shows how to train a MusicGen EnCodec model at 32 kHz.
13
- """
14
-
15
- from ._explorers import CompressionExplorer
16
- from ...environment import AudioCraftEnvironment
17
-
18
-
19
- @CompressionExplorer
20
- def explorer(launcher):
21
- partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
22
- launcher.slurm_(gpus=8, partition=partitions)
23
- # use configuration for MusicGen's EnCodec model trained on monophonic audio sampled at 32 kHz
24
- # MusicGen's EnCodec is trained with a total stride of 640 leading to a frame rate of 50 hz
25
- launcher.bind_(solver='compression/encodec_musicgen_32khz')
26
- # replace this by the desired music dataset
27
- launcher.bind_(dset='internal/music_400k_32khz')
28
- # launch xp
29
- launcher()
30
- launcher({
31
- 'metrics.visqol.bin': '/data/home/jadecopet/local/usr/opt/visqol',
32
- 'label': 'visqol',
33
- 'evaluate.metrics.visqol': True
34
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audiocraft/grids_/diffusion/4_bands_base_32khz.py DELETED
@@ -1,27 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- """
8
- Training of the 4 diffusion models described in
9
- "From Discrete Tokens to High-Fidelity Audio Using Multi-Band Diffusion"
10
- (paper link).
11
- """
12
-
13
- from ._explorers import DiffusionExplorer
14
-
15
-
16
- @DiffusionExplorer
17
- def explorer(launcher):
18
- launcher.slurm_(gpus=4, partition='learnfair')
19
-
20
- launcher.bind_({'solver': 'diffusion/default',
21
- 'dset': 'internal/music_10k_32khz'})
22
-
23
- with launcher.job_array():
24
- launcher({'filter.use': True, 'filter.idx_band': 0, "processor.use": False, 'processor.power_std': 0.4})
25
- launcher({'filter.use': True, 'filter.idx_band': 1, "processor.use": False, 'processor.power_std': 0.4})
26
- launcher({'filter.use': True, 'filter.idx_band': 2, "processor.use": True, 'processor.power_std': 0.4})
27
- launcher({'filter.use': True, 'filter.idx_band': 3, "processor.use": True, 'processor.power_std': 0.75})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audiocraft/grids_/diffusion/__init__.py DELETED
@@ -1,6 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
- """Diffusion grids."""
 
 
 
 
 
 
 
audiocraft/grids_/diffusion/_explorers.py DELETED
@@ -1,66 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- import treetable as tt
8
-
9
- from .._base_explorers import BaseExplorer
10
-
11
-
12
- class DiffusionExplorer(BaseExplorer):
13
- eval_metrics = ["sisnr", "visqol"]
14
-
15
- def stages(self):
16
- return ["train", "valid", "valid_ema", "evaluate", "evaluate_ema"]
17
-
18
- def get_grid_meta(self):
19
- """Returns the list of Meta information to display for each XP/job.
20
- """
21
- return [
22
- tt.leaf("index", align=">"),
23
- tt.leaf("name", wrap=140),
24
- tt.leaf("state"),
25
- tt.leaf("sig", align=">"),
26
- ]
27
-
28
- def get_grid_metrics(self):
29
- """Return the metrics that should be displayed in the tracking table.
30
- """
31
- return [
32
- tt.group(
33
- "train",
34
- [
35
- tt.leaf("epoch"),
36
- tt.leaf("loss", ".3%"),
37
- ],
38
- align=">",
39
- ),
40
- tt.group(
41
- "valid",
42
- [
43
- tt.leaf("loss", ".3%"),
44
- # tt.leaf("loss_0", ".3%"),
45
- ],
46
- align=">",
47
- ),
48
- tt.group(
49
- "valid_ema",
50
- [
51
- tt.leaf("loss", ".3%"),
52
- # tt.leaf("loss_0", ".3%"),
53
- ],
54
- align=">",
55
- ),
56
- tt.group(
57
- "evaluate", [tt.leaf("rvm", ".4f"), tt.leaf("rvm_0", ".4f"),
58
- tt.leaf("rvm_1", ".4f"), tt.leaf("rvm_2", ".4f"),
59
- tt.leaf("rvm_3", ".4f"), ], align=">"
60
- ),
61
- tt.group(
62
- "evaluate_ema", [tt.leaf("rvm", ".4f"), tt.leaf("rvm_0", ".4f"),
63
- tt.leaf("rvm_1", ".4f"), tt.leaf("rvm_2", ".4f"),
64
- tt.leaf("rvm_3", ".4f")], align=">"
65
- ),
66
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audiocraft/grids_/musicgen/__init__.py DELETED
@@ -1,6 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
- """MusicGen grids."""
 
 
 
 
 
 
 
audiocraft/grids_/musicgen/_explorers.py DELETED
@@ -1,93 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- import typing as tp
8
-
9
- import treetable as tt
10
-
11
- from .._base_explorers import BaseExplorer
12
-
13
-
14
- class LMExplorer(BaseExplorer):
15
- eval_metrics: tp.List[str] = []
16
-
17
- def stages(self) -> tp.List[str]:
18
- return ['train', 'valid']
19
-
20
- def get_grid_metrics(self):
21
- """Return the metrics that should be displayed in the tracking table."""
22
- return [
23
- tt.group(
24
- 'train',
25
- [
26
- tt.leaf('epoch'),
27
- tt.leaf('duration', '.1f'), # duration in minutes
28
- tt.leaf('ping'),
29
- tt.leaf('ce', '.4f'), # cross entropy
30
- tt.leaf("ppl", '.3f'), # perplexity
31
- ],
32
- align='>',
33
- ),
34
- tt.group(
35
- 'valid',
36
- [
37
- tt.leaf('ce', '.4f'),
38
- tt.leaf('ppl', '.3f'),
39
- tt.leaf('best_ppl', '.3f'),
40
- ],
41
- align='>',
42
- ),
43
- ]
44
-
45
- def process_sheep(self, sheep, history):
46
- parts = super().process_sheep(sheep, history)
47
-
48
- track_by = {'ppl': 'lower'} # values should be in ['lower', 'higher']
49
- best_metrics = {k: (1 if v == 'lower' else -1) * float('inf') for k, v in track_by.items()}
50
-
51
- def comparator(mode, a, b):
52
- return a < b if mode == 'lower' else a > b
53
-
54
- for metrics in history:
55
- for key, sub in metrics.items():
56
- for metric in track_by:
57
- # for the validation set, keep track of best metrics (ppl in this example)
58
- # this is so we can conveniently compare metrics between runs in the grid
59
- if key == 'valid' and metric in sub and comparator(
60
- track_by[metric], sub[metric], best_metrics[metric]
61
- ):
62
- best_metrics[metric] = sub[metric]
63
-
64
- if 'valid' in parts:
65
- parts['valid'].update({f'best_{k}': v for k, v in best_metrics.items()})
66
- return parts
67
-
68
-
69
- class GenerationEvalExplorer(BaseExplorer):
70
- eval_metrics: tp.List[str] = []
71
-
72
- def stages(self) -> tp.List[str]:
73
- return ['evaluate']
74
-
75
- def get_grid_metrics(self):
76
- """Return the metrics that should be displayed in the tracking table."""
77
- return [
78
- tt.group(
79
- 'evaluate',
80
- [
81
- tt.leaf('epoch', '.3f'),
82
- tt.leaf('duration', '.1f'),
83
- tt.leaf('ping'),
84
- tt.leaf('ce', '.4f'),
85
- tt.leaf('ppl', '.3f'),
86
- tt.leaf('fad', '.3f'),
87
- tt.leaf('kld', '.3f'),
88
- tt.leaf('text_consistency', '.3f'),
89
- tt.leaf('chroma_cosine', '.3f'),
90
- ],
91
- align='>',
92
- ),
93
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audiocraft/grids_/musicgen/musicgen_base_32khz.py DELETED
@@ -1,44 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- from ._explorers import LMExplorer
8
- from ...environment import AudioCraftEnvironment
9
-
10
-
11
- @LMExplorer
12
- def explorer(launcher):
13
- partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
14
- launcher.slurm_(gpus=32, partition=partitions)
15
- launcher.bind_(solver='musicgen/musicgen_base_32khz')
16
- # replace this by the desired music dataset
17
- launcher.bind_(dset='internal/music_400k_32khz')
18
-
19
- fsdp = {'autocast': False, 'fsdp.use': True}
20
- small = {'model/lm/model_scale': 'small'}
21
- medium = {'model/lm/model_scale': 'medium'}
22
- large = {'model/lm/model_scale': 'large'}
23
-
24
- cfg_low = {'classifier_free_guidance.training_dropout': 0.2}
25
- wd_low = {'conditioners.description.t5.word_dropout': 0.2}
26
-
27
- adam = {'optim.optimizer': 'adamw', 'optim.lr': 1e-2}
28
-
29
- launcher.bind_(fsdp)
30
-
31
- launcher.slurm_(gpus=32).bind_(label='32gpus')
32
- with launcher.job_array():
33
- sub = launcher.bind()
34
- sub()
35
-
36
- launcher.slurm_(gpus=64).bind_(label='64gpus')
37
- with launcher.job_array():
38
- sub = launcher.bind()
39
- sub(medium, adam)
40
-
41
- launcher.slurm_(gpus=96).bind_(label='96gpus')
42
- with launcher.job_array():
43
- sub = launcher.bind()
44
- sub(large, cfg_low, wd_low, adam, {'optim.max_norm': 3})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audiocraft/grids_/musicgen/musicgen_base_cached_32khz.py DELETED
@@ -1,67 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- from ._explorers import LMExplorer
8
- from ...environment import AudioCraftEnvironment
9
-
10
-
11
- @LMExplorer
12
- def explorer(launcher):
13
- partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
14
- launcher.slurm_(gpus=32, partition=partitions)
15
- launcher.bind_(solver='musicgen/musicgen_base_32khz')
16
- # replace this by the desired music dataset
17
- launcher.bind_(dset='internal/music_400k_32khz')
18
-
19
- fsdp = {'autocast': False, 'fsdp.use': True}
20
- medium = {'model/lm/model_scale': 'medium'}
21
- large = {'model/lm/model_scale': 'large'}
22
-
23
- cfg_low = {'classifier_free_guidance.training_dropout': 0.2}
24
- wd_low = {'conditioners.description.t5.word_dropout': 0.2}
25
-
26
- adam = {'optim.optimizer': 'adamw', 'optim.lr': 1e-2}
27
-
28
- # BEGINNING OF CACHE WRITING JOBS.
29
- cache_write = {
30
- 'cache.path': '/fsx-codegen/defossez/cache/interleave_stereo_nv_32k',
31
- 'cache.write': True,
32
- 'generate.every': 500,
33
- 'evaluate.every': 500,
34
- 'logging.log_updates': 50,
35
- }
36
-
37
- cache_sub = launcher.bind({'model/lm/model_scale': 'xsmall', 'conditioner': 'none'})
38
- cache_sub.bind_({'deadlock.use': True})
39
- cache_sub.slurm_(gpus=8)
40
- with launcher.job_array():
41
- num_shards = 10 # total number of jobs running in parallel.
42
- for shard in range(0, num_shards):
43
- launcher(cache_write, {'cache.write_num_shards': num_shards, 'cache.write_shard': shard})
44
-
45
- # REMOVE THE FOLLOWING RETURN STATEMENT ONCE THE ABOVE JOBS ARE DONE,
46
- # OR SUFFICIENTLY AHEAD.
47
- return
48
-
49
- cache = {
50
- 'cache.path': '/fsx-codegen/defossez/cache/interleave_stereo_nv_32k',
51
- }
52
- launcher.bind_(fsdp, cache)
53
-
54
- launcher.slurm_(gpus=32).bind_(label='32gpus')
55
- with launcher.job_array():
56
- sub = launcher.bind()
57
- sub()
58
-
59
- launcher.slurm_(gpus=64).bind_(label='64gpus')
60
- with launcher.job_array():
61
- sub = launcher.bind()
62
- sub(medium, adam)
63
-
64
- launcher.slurm_(gpus=96).bind_(label='96gpus')
65
- with launcher.job_array():
66
- sub = launcher.bind()
67
- sub(large, cfg_low, wd_low, adam, {'optim.max_norm': 3})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audiocraft/grids_/musicgen/musicgen_clapemb_32khz.py DELETED
@@ -1,32 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- from ._explorers import LMExplorer
8
- from ...environment import AudioCraftEnvironment
9
-
10
-
11
- @LMExplorer
12
- def explorer(launcher):
13
- partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
14
- launcher.slurm_(gpus=32, partition=partitions)
15
- launcher.bind_(solver='musicgen/musicgen_base_32khz')
16
- # replace this by the desired music dataset
17
- launcher.bind_(dset='internal/music_400k_32khz')
18
- launcher.bind_(conditioner='clapemb2music')
19
-
20
- fsdp = {'autocast': False, 'fsdp.use': True}
21
- cache_path = {'conditioners.description.clap.cache_path':
22
- '/fsx-audio-craft-llm/jadecopet/experiments/audiocraft/caches/clap_embed_music'}
23
- text_wav_training_opt = {'conditioners.description.clap.text_p': 0.5}
24
-
25
- launcher.bind_(fsdp)
26
-
27
- launcher.slurm_(gpus=32).bind_(label='32gpus')
28
- with launcher.job_array():
29
- launcher()
30
- launcher(text_wav_training_opt)
31
- launcher(cache_path)
32
- launcher(cache_path, text_wav_training_opt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audiocraft/grids_/musicgen/musicgen_melody_32khz.py DELETED
@@ -1,65 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- from ._explorers import LMExplorer
8
- from ...environment import AudioCraftEnvironment
9
-
10
-
11
- @LMExplorer
12
- def explorer(launcher):
13
- partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
14
- launcher.slurm_(gpus=32, partition=partitions)
15
- launcher.bind_(solver='musicgen/musicgen_melody_32khz')
16
- # replace this by the desired music dataset
17
- launcher.bind_(dset='internal/music_400k_32khz')
18
-
19
- fsdp = {'autocast': False, 'fsdp.use': True}
20
- medium = {'model/lm/model_scale': 'medium'}
21
- large = {'model/lm/model_scale': 'large'}
22
-
23
- cfg_low = {'classifier_free_guidance.training_dropout': 0.2}
24
- wd_low = {'conditioners.description.t5.word_dropout': 0.2}
25
-
26
- adam = {'optim.optimizer': 'adamw', 'optim.lr': 1e-4}
27
-
28
- cache_path = {'conditioners.self_wav.chroma_stem.cache_path':
29
- '/fsx-audio-craft-llm/jadecopet/experiments/audiocraft/caches/chroma_stem'}
30
-
31
- # CACHE GENERATION JOBS
32
- n_cache_gen_jobs = 4
33
- gen_sub = launcher.slurm(gpus=1)
34
- gen_sub.bind_(
35
- cache_path, {
36
- # the cache is always computed over the whole file, so duration doesn't matter here.
37
- 'dataset.segment_duration': 2.,
38
- 'dataset.batch_size': 8,
39
- 'dataset.train.permutation_on_files': True, # try to not repeat files.
40
- 'optim.epochs': 10,
41
- 'model/lm/model_scale': 'xsmall',
42
-
43
- })
44
- with gen_sub.job_array():
45
- for gen_job in range(n_cache_gen_jobs):
46
- gen_sub({'dataset.train.shuffle_seed': gen_job})
47
-
48
- # ACTUAL TRAINING JOBS.
49
- launcher.bind_(fsdp)
50
-
51
- launcher.slurm_(gpus=32).bind_(label='32gpus')
52
- with launcher.job_array():
53
- sub = launcher.bind()
54
- sub()
55
- sub(cache_path)
56
-
57
- launcher.slurm_(gpus=64).bind_(label='64gpus')
58
- with launcher.job_array():
59
- sub = launcher.bind()
60
- sub(medium, adam)
61
-
62
- launcher.slurm_(gpus=96).bind_(label='96gpus')
63
- with launcher.job_array():
64
- sub = launcher.bind()
65
- sub(large, cfg_low, wd_low, adam, {'optim.max_norm': 3})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audiocraft/grids_/musicgen/musicgen_pretrained_32khz_eval.py DELETED
@@ -1,99 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- """
8
- Evaluation with objective metrics for the pretrained MusicGen models.
9
- This grid takes signature from the training grid and runs evaluation-only stage.
10
-
11
- When running the grid for the first time, please use:
12
- REGEN=1 dora grid musicgen.musicgen_pretrained_32khz_eval
13
- and re-use the REGEN=1 option when the grid is changed to force regenerating it.
14
-
15
- Note that you need the proper metrics external libraries setup to use all
16
- the objective metrics activated in this grid. Refer to the README for more information.
17
- """
18
-
19
- import os
20
-
21
- from ._explorers import GenerationEvalExplorer
22
- from ...environment import AudioCraftEnvironment
23
- from ... import train
24
-
25
-
26
- def eval(launcher, batch_size: int = 32, eval_melody: bool = False):
27
- opts = {
28
- 'dset': 'audio/musiccaps_32khz',
29
- 'solver/musicgen/evaluation': 'objective_eval',
30
- 'execute_only': 'evaluate',
31
- '+dataset.evaluate.batch_size': batch_size,
32
- '+metrics.fad.tf.batch_size': 16,
33
- }
34
- # chroma-specific evaluation
35
- chroma_opts = {
36
- 'dset': 'internal/music_400k_32khz',
37
- 'dataset.evaluate.segment_duration': 30,
38
- 'dataset.evaluate.num_samples': 1000,
39
- 'evaluate.metrics.chroma_cosine': True,
40
- 'evaluate.metrics.fad': False,
41
- 'evaluate.metrics.kld': False,
42
- 'evaluate.metrics.text_consistency': False,
43
- }
44
- # binary for FAD computation: replace this path with your own path
45
- metrics_opts = {
46
- 'metrics.fad.tf.bin': '/data/home/jadecopet/local/usr/opt/google-research'
47
- }
48
- opt1 = {'generate.lm.use_sampling': True, 'generate.lm.top_k': 250, 'generate.lm.top_p': 0.}
49
- opt2 = {'transformer_lm.two_step_cfg': True}
50
-
51
- sub = launcher.bind(opts)
52
- sub.bind_(metrics_opts)
53
-
54
- # base objective metrics
55
- sub(opt1, opt2)
56
-
57
- if eval_melody:
58
- # chroma-specific metrics
59
- sub(opt1, opt2, chroma_opts)
60
-
61
-
62
- @GenerationEvalExplorer
63
- def explorer(launcher):
64
- partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
65
- launcher.slurm_(gpus=4, partition=partitions)
66
-
67
- if 'REGEN' not in os.environ:
68
- folder = train.main.dora.dir / 'grids' / __name__.split('.', 2)[-1]
69
- with launcher.job_array():
70
- for sig in folder.iterdir():
71
- if not sig.is_symlink():
72
- continue
73
- xp = train.main.get_xp_from_sig(sig.name)
74
- launcher(xp.argv)
75
- return
76
-
77
- with launcher.job_array():
78
- musicgen_base = launcher.bind(solver="musicgen/musicgen_base_32khz")
79
- musicgen_base.bind_({'autocast': False, 'fsdp.use': True})
80
-
81
- # base musicgen models
82
- musicgen_base_small = musicgen_base.bind({'continue_from': '//pretrained/facebook/musicgen-small'})
83
- eval(musicgen_base_small, batch_size=128)
84
-
85
- musicgen_base_medium = musicgen_base.bind({'continue_from': '//pretrained/facebook/musicgen-medium'})
86
- musicgen_base_medium.bind_({'model/lm/model_scale': 'medium'})
87
- eval(musicgen_base_medium, batch_size=128)
88
-
89
- musicgen_base_large = musicgen_base.bind({'continue_from': '//pretrained/facebook/musicgen-large'})
90
- musicgen_base_large.bind_({'model/lm/model_scale': 'large'})
91
- eval(musicgen_base_large, batch_size=128)
92
-
93
- # melody musicgen model
94
- musicgen_melody = launcher.bind(solver="musicgen/musicgen_melody_32khz")
95
- musicgen_melody.bind_({'autocast': False, 'fsdp.use': True})
96
-
97
- musicgen_melody_medium = musicgen_melody.bind({'continue_from': '//pretrained/facebook/musicgen-melody'})
98
- musicgen_melody_medium.bind_({'model/lm/model_scale': 'medium'})
99
- eval(musicgen_melody_medium, batch_size=128, eval_melody=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audiocraft/grids_/musicgen/musicgen_stereo_finetune_32khz.py DELETED
@@ -1,57 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- from pathlib import Path
8
- from ._explorers import LMExplorer
9
- from ...environment import AudioCraftEnvironment
10
-
11
-
12
- @LMExplorer
13
- def explorer(launcher):
14
- partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
15
- launcher.slurm_(gpus=32, partition=partitions)
16
- launcher.bind_(solver='musicgen/musicgen_base_32khz')
17
- # replace this by the desired music dataset, which needs to be stereo
18
- launcher.bind_(dset='audio/example')
19
-
20
- fsdp = {'autocast': False, 'fsdp.use': True}
21
- medium = {'model/lm/model_scale': 'medium'}
22
- large = {'model/lm/model_scale': 'large'}
23
-
24
- cfg_low = {'classifier_free_guidance.training_dropout': 0.2}
25
- wd_low = {'conditioners.description.t5.word_dropout': 0.2}
26
-
27
- adam = {'optim.optimizer': 'adamw', 'optim.lr': 1e-4}
28
-
29
- stereo = {
30
- 'codebooks_pattern.delay.delays': [0, 0, 1, 1, 2, 2, 3, 3],
31
- 'transformer_lm.n_q': 8,
32
- 'interleave_stereo_codebooks.use': True,
33
- 'channels': 2,
34
- }
35
-
36
- # You must follow the instructions in docs/MUSICGEN.md about the creation
37
- # of the proper fine tuning checkpoints. We will assume they are stored under
38
- # ~/checkpoints/{mode_name}.
39
-
40
- checkpoints = Path.home() / 'checkpoints'
41
-
42
- launcher.bind_(fsdp, stereo, {'optim.epochs': 100})
43
-
44
- launcher.slurm_(gpus=32).bind_(label='32gpus')
45
- with launcher.job_array():
46
- sub = launcher.bind({'continue_from': str(checkpoints / 'stereo_finetune_musicgen-small.th')})
47
- sub()
48
-
49
- launcher.slurm_(gpus=64).bind_(label='64gpus')
50
- with launcher.job_array():
51
- sub = launcher.bind({'continue_from': str(checkpoints / 'stereo_finetune_musicgen-medium.th')})
52
- sub(medium, adam)
53
-
54
- launcher.slurm_(gpus=96).bind_(label='96gpus')
55
- with launcher.job_array():
56
- sub = launcher.bind({'continue_from': str(checkpoints / 'stereo_finetune_musicgen-large.th')})
57
- sub(large, cfg_low, wd_low, adam, {'optim.max_norm': 3})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audiocraft/models/__init__.py CHANGED
@@ -14,5 +14,5 @@ from .encodec import (
14
  from .audiogen import AudioGen
15
  from .lm import LMModel
16
  from .multibanddiffusion import MultiBandDiffusion
17
- from .musicgen import VidMuse
18
  from .unet import DiffusionUnet
 
14
  from .audiogen import AudioGen
15
  from .lm import LMModel
16
  from .multibanddiffusion import MultiBandDiffusion
17
+ from .vidmuse import VidMuse
18
  from .unet import DiffusionUnet
audiocraft/models/{musicgen.py → vidmuse.py} RENAMED
File without changes