Zeyue7 commited on Feb 28, 2025

Commit

82158e3

1 Parent(s): 9d45348

vidmuse

Browse files

Files changed (24) hide show

audiocraft/grids_/__init__.py +0 -6
audiocraft/grids_/_base_explorers.py +0 -80
audiocraft/grids_/audiogen/__init__.py +0 -6
audiocraft/grids_/audiogen/audiogen_base_16khz.py +0 -23
audiocraft/grids_/audiogen/audiogen_pretrained_16khz_eval.py +0 -68
audiocraft/grids_/compression/__init__.py +0 -6
audiocraft/grids_/compression/_explorers.py +0 -55
audiocraft/grids_/compression/debug.py +0 -31
audiocraft/grids_/compression/encodec_audiogen_16khz.py +0 -29
audiocraft/grids_/compression/encodec_base_24khz.py +0 -28
audiocraft/grids_/compression/encodec_musicgen_32khz.py +0 -34
audiocraft/grids_/diffusion/4_bands_base_32khz.py +0 -27
audiocraft/grids_/diffusion/__init__.py +0 -6
audiocraft/grids_/diffusion/_explorers.py +0 -66
audiocraft/grids_/musicgen/__init__.py +0 -6
audiocraft/grids_/musicgen/_explorers.py +0 -93
audiocraft/grids_/musicgen/musicgen_base_32khz.py +0 -44
audiocraft/grids_/musicgen/musicgen_base_cached_32khz.py +0 -67
audiocraft/grids_/musicgen/musicgen_clapemb_32khz.py +0 -32
audiocraft/grids_/musicgen/musicgen_melody_32khz.py +0 -65
audiocraft/grids_/musicgen/musicgen_pretrained_32khz_eval.py +0 -99
audiocraft/grids_/musicgen/musicgen_stereo_finetune_32khz.py +0 -57
audiocraft/models/__init__.py +1 -1
audiocraft/models/{musicgen.py → vidmuse.py} +0 -0

audiocraft/grids_/__init__.py DELETED Viewed

@@ -1,6 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-"""Dora Grids."""

audiocraft/grids_/_base_explorers.py DELETED Viewed

@@ -1,80 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-from abc import ABC, abstractmethod
-import time
-import typing as tp
-from dora import Explorer
-import treetable as tt
-def get_sheep_ping(sheep) -> tp.Optional[str]:
-    """Return the amount of time since the Sheep made some update
-    to its log. Returns a str using the relevant time unit."""
-    ping = None
-    if sheep.log is not None and sheep.log.exists():
-        delta = time.time() - sheep.log.stat().st_mtime
-        if delta > 3600 * 24:
-            ping = f'{delta / (3600 * 24):.1f}d'
-        elif delta > 3600:
-            ping = f'{delta / (3600):.1f}h'
-        elif delta > 60:
-            ping = f'{delta / 60:.1f}m'
-        else:
-            ping = f'{delta:.1f}s'
-    return ping
-class BaseExplorer(ABC, Explorer):
-    """Base explorer for AudioCraft grids.
-    All task specific solvers are expected to implement the `get_grid_metrics`
-    method to specify logic about metrics to display for a given task.
-    If additional stages are used, the child explorer must define how to handle
-    these new stages in the `process_history` and `process_sheep` methods.
-    """
-    def stages(self):
-        return ["train", "valid", "evaluate"]
-    def get_grid_meta(self):
-        """Returns the list of Meta information to display for each XP/job.
-        """
-        return [
-            tt.leaf("index", align=">"),
-            tt.leaf("name", wrap=140),
-            tt.leaf("state"),
-            tt.leaf("sig", align=">"),
-            tt.leaf("sid", align="<"),
-        ]
-    @abstractmethod
-    def get_grid_metrics(self):
-        """Return the metrics that should be displayed in the tracking table.
-        """
-        ...
-    def process_sheep(self, sheep, history):
-        train = {
-            "epoch": len(history),
-        }
-        parts = {"train": train}
-        for metrics in history:
-            for key, sub in metrics.items():
-                part = parts.get(key, {})
-                if 'duration' in sub:
-                    # Convert to minutes for readability.
-                    sub['duration'] = sub['duration'] / 60.
-                part.update(sub)
-                parts[key] = part
-        ping = get_sheep_ping(sheep)
-        if ping is not None:
-            for name in self.stages():
-                if name not in parts:
-                    parts[name] = {}
-                # Add the ping to each part for convenience.
-                parts[name]['ping'] = ping
-        return parts

audiocraft/grids_/audiogen/__init__.py DELETED Viewed

@@ -1,6 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-"""AudioGen grids."""

audiocraft/grids_/audiogen/audiogen_base_16khz.py DELETED Viewed

@@ -1,23 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-from ..musicgen._explorers import LMExplorer
-from ...environment import AudioCraftEnvironment
-@LMExplorer
-def explorer(launcher):
-    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
-    launcher.slurm_(gpus=64, partition=partitions)
-    launcher.bind_(solver='audiogen/audiogen_base_16khz')
-    # replace this by the desired environmental sound dataset
-    launcher.bind_(dset='internal/sounds_16khz')
-    fsdp = {'autocast': False, 'fsdp.use': True}
-    medium = {'model/lm/model_scale': 'medium'}
-    launcher.bind_(fsdp)
-    launcher(medium)

audiocraft/grids_/audiogen/audiogen_pretrained_16khz_eval.py DELETED Viewed

@@ -1,68 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-"""
-Evaluation with objective metrics for the pretrained AudioGen models.
-This grid takes signature from the training grid and runs evaluation-only stage.
-When running the grid for the first time, please use:
-REGEN=1 dora grid audiogen.audiogen_pretrained_16khz_eval
-and re-use the REGEN=1 option when the grid is changed to force regenerating it.
-Note that you need the proper metrics external libraries setup to use all
-the objective metrics activated in this grid. Refer to the README for more information.
-"""
-import os
-from ..musicgen._explorers import GenerationEvalExplorer
-from ...environment import AudioCraftEnvironment
-from ... import train
-def eval(launcher, batch_size: int = 32):
-    opts = {
-        'dset': 'audio/audiocaps_16khz',
-        'solver/audiogen/evaluation': 'objective_eval',
-        'execute_only': 'evaluate',
-        '+dataset.evaluate.batch_size': batch_size,
-        '+metrics.fad.tf.batch_size': 32,
-    }
-    # binary for FAD computation: replace this path with your own path
-    metrics_opts = {
-        'metrics.fad.tf.bin': '/data/home/jadecopet/local/usr/opt/google-research'
-    }
-    opt1 = {'generate.lm.use_sampling': True, 'generate.lm.top_k': 250, 'generate.lm.top_p': 0.}
-    opt2 = {'transformer_lm.two_step_cfg': True}
-    sub = launcher.bind(opts)
-    sub.bind_(metrics_opts)
-    # base objective metrics
-    sub(opt1, opt2)
-@GenerationEvalExplorer
-def explorer(launcher):
-    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
-    launcher.slurm_(gpus=4, partition=partitions)
-    if 'REGEN' not in os.environ:
-        folder = train.main.dora.dir / 'grids' / __name__.split('.', 2)[-1]
-        with launcher.job_array():
-            for sig in folder.iterdir():
-                if not sig.is_symlink():
-                    continue
-                xp = train.main.get_xp_from_sig(sig.name)
-                launcher(xp.argv)
-        return
-    audiogen_base = launcher.bind(solver="audiogen/audiogen_base_16khz")
-    audiogen_base.bind_({'autocast': False, 'fsdp.use': True})
-    audiogen_base_medium = audiogen_base.bind({'continue_from': '//pretrained/facebook/audiogen-medium'})
-    audiogen_base_medium.bind_({'model/lm/model_scale': 'medium'})
-    eval(audiogen_base_medium, batch_size=128)

audiocraft/grids_/compression/__init__.py DELETED Viewed

@@ -1,6 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-"""EnCodec grids."""

audiocraft/grids_/compression/_explorers.py DELETED Viewed

@@ -1,55 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-import treetable as tt
-from .._base_explorers import BaseExplorer
-class CompressionExplorer(BaseExplorer):
-    eval_metrics = ["sisnr", "visqol"]
-    def stages(self):
-        return ["train", "valid", "evaluate"]
-    def get_grid_meta(self):
-        """Returns the list of Meta information to display for each XP/job.
-        """
-        return [
-            tt.leaf("index", align=">"),
-            tt.leaf("name", wrap=140),
-            tt.leaf("state"),
-            tt.leaf("sig", align=">"),
-        ]
-    def get_grid_metrics(self):
-        """Return the metrics that should be displayed in the tracking table.
-        """
-        return [
-            tt.group(
-                "train",
-                [
-                    tt.leaf("epoch"),
-                    tt.leaf("bandwidth", ".2f"),
-                    tt.leaf("adv", ".4f"),
-                    tt.leaf("d_loss", ".4f"),
-                ],
-                align=">",
-            ),
-            tt.group(
-                "valid",
-                [
-                    tt.leaf("bandwidth", ".2f"),
-                    tt.leaf("adv", ".4f"),
-                    tt.leaf("msspec", ".4f"),
-                    tt.leaf("sisnr", ".2f"),
-                ],
-                align=">",
-            ),
-            tt.group(
-                "evaluate", [tt.leaf(name, ".3f") for name in self.eval_metrics], align=">"
-            ),
-        ]

audiocraft/grids_/compression/debug.py DELETED Viewed

@@ -1,31 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-"""
-Grid search file, simply list all the exp you want in `explorer`.
-Any new exp added there will be scheduled.
-You can cancel and experiment by commenting its line.
-This grid is a minimal example for debugging compression task
-and how to override parameters directly in a grid.
-Learn more about dora grids: https://github.com/facebookresearch/dora
-"""
-from ._explorers import CompressionExplorer
-from ...environment import AudioCraftEnvironment
-@CompressionExplorer
-def explorer(launcher):
-    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
-    launcher.slurm_(gpus=2, partition=partitions)
-    launcher.bind_(solver='compression/debug')
-    with launcher.job_array():
-        # base debug task using config from solver=compression/debug
-        launcher()
-        # we can override parameters in the grid to launch additional xps
-        launcher({'rvq.bins': 2048, 'rvq.n_q': 4})

audiocraft/grids_/compression/encodec_audiogen_16khz.py DELETED Viewed

@@ -1,29 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-"""
-Grid search file, simply list all the exp you want in `explorer`.
-Any new exp added there will be scheduled.
-You can cancel and experiment by commenting its line.
-This grid shows how to train the new AudioGen EnCodec model at 16 kHz.
-"""
-from ._explorers import CompressionExplorer
-from ...environment import AudioCraftEnvironment
-@CompressionExplorer
-def explorer(launcher):
-    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
-    launcher.slurm_(gpus=8, partition=partitions)
-    # use configuration for AudioGen's EnCodec model trained on monophonic audio sampled at 16 kHz
-    # AudioGen's EnCodec is trained with a total stride of 320 leading to a frame rate of 50 hz
-    launcher.bind_(solver='compression/encodec_audiogen_16khz')
-    # replace this by the desired sound dataset
-    launcher.bind_(dset='internal/sounds_16khz')
-    # launch xp
-    launcher()

audiocraft/grids_/compression/encodec_base_24khz.py DELETED Viewed

@@ -1,28 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-"""
-Grid search file, simply list all the exp you want in `explorer`.
-Any new exp added there will be scheduled.
-You can cancel and experiment by commenting its line.
-This grid shows how to train a base causal EnCodec model at 24 kHz.
-"""
-from ._explorers import CompressionExplorer
-from ...environment import AudioCraftEnvironment
-@CompressionExplorer
-def explorer(launcher):
-    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
-    launcher.slurm_(gpus=8, partition=partitions)
-    # base causal EnCodec trained on monophonic audio sampled at 24 kHz
-    launcher.bind_(solver='compression/encodec_base_24khz')
-    # replace this by the desired dataset
-    launcher.bind_(dset='audio/example')
-    # launch xp
-    launcher()

audiocraft/grids_/compression/encodec_musicgen_32khz.py DELETED Viewed

@@ -1,34 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-"""
-Grid search file, simply list all the exp you want in `explorer`.
-Any new exp added there will be scheduled.
-You can cancel and experiment by commenting its line.
-This grid shows how to train a MusicGen EnCodec model at 32 kHz.
-"""
-from ._explorers import CompressionExplorer
-from ...environment import AudioCraftEnvironment
-@CompressionExplorer
-def explorer(launcher):
-    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
-    launcher.slurm_(gpus=8, partition=partitions)
-    # use configuration for MusicGen's EnCodec model trained on monophonic audio sampled at 32 kHz
-    # MusicGen's EnCodec is trained with a total stride of 640 leading to a frame rate of 50 hz
-    launcher.bind_(solver='compression/encodec_musicgen_32khz')
-    # replace this by the desired music dataset
-    launcher.bind_(dset='internal/music_400k_32khz')
-    # launch xp
-    launcher()
-    launcher({
-        'metrics.visqol.bin': '/data/home/jadecopet/local/usr/opt/visqol',
-        'label': 'visqol',
-        'evaluate.metrics.visqol': True
-    })

audiocraft/grids_/diffusion/4_bands_base_32khz.py DELETED Viewed

@@ -1,27 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-"""
-Training of the 4 diffusion models described in
-"From Discrete Tokens to High-Fidelity Audio Using Multi-Band Diffusion"
-(paper link).
-"""
-from ._explorers import DiffusionExplorer
-@DiffusionExplorer
-def explorer(launcher):
-    launcher.slurm_(gpus=4, partition='learnfair')
-    launcher.bind_({'solver': 'diffusion/default',
-                    'dset': 'internal/music_10k_32khz'})
-    with launcher.job_array():
-        launcher({'filter.use': True, 'filter.idx_band': 0, "processor.use": False, 'processor.power_std': 0.4})
-        launcher({'filter.use': True, 'filter.idx_band': 1, "processor.use": False, 'processor.power_std': 0.4})
-        launcher({'filter.use': True, 'filter.idx_band': 2, "processor.use": True, 'processor.power_std': 0.4})
-        launcher({'filter.use': True, 'filter.idx_band': 3, "processor.use": True, 'processor.power_std': 0.75})

audiocraft/grids_/diffusion/__init__.py DELETED Viewed

@@ -1,6 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-"""Diffusion grids."""

audiocraft/grids_/diffusion/_explorers.py DELETED Viewed

@@ -1,66 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-import treetable as tt
-from .._base_explorers import BaseExplorer
-class DiffusionExplorer(BaseExplorer):
-    eval_metrics = ["sisnr", "visqol"]
-    def stages(self):
-        return ["train", "valid", "valid_ema", "evaluate", "evaluate_ema"]
-    def get_grid_meta(self):
-        """Returns the list of Meta information to display for each XP/job.
-        """
-        return [
-            tt.leaf("index", align=">"),
-            tt.leaf("name", wrap=140),
-            tt.leaf("state"),
-            tt.leaf("sig", align=">"),
-        ]
-    def get_grid_metrics(self):
-        """Return the metrics that should be displayed in the tracking table.
-        """
-        return [
-            tt.group(
-                "train",
-                [
-                    tt.leaf("epoch"),
-                    tt.leaf("loss", ".3%"),
-                ],
-                align=">",
-            ),
-            tt.group(
-                "valid",
-                [
-                    tt.leaf("loss", ".3%"),
-                    # tt.leaf("loss_0", ".3%"),
-                ],
-                align=">",
-            ),
-            tt.group(
-                "valid_ema",
-                [
-                    tt.leaf("loss", ".3%"),
-                    # tt.leaf("loss_0", ".3%"),
-                ],
-                align=">",
-            ),
-            tt.group(
-                "evaluate", [tt.leaf("rvm", ".4f"), tt.leaf("rvm_0", ".4f"),
-                             tt.leaf("rvm_1", ".4f"), tt.leaf("rvm_2", ".4f"),
-                             tt.leaf("rvm_3", ".4f"), ], align=">"
-            ),
-            tt.group(
-                "evaluate_ema", [tt.leaf("rvm", ".4f"), tt.leaf("rvm_0", ".4f"),
-                                 tt.leaf("rvm_1", ".4f"), tt.leaf("rvm_2", ".4f"),
-                                 tt.leaf("rvm_3", ".4f")], align=">"
-            ),
-        ]

audiocraft/grids_/musicgen/__init__.py DELETED Viewed

@@ -1,6 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-"""MusicGen grids."""

audiocraft/grids_/musicgen/_explorers.py DELETED Viewed

@@ -1,93 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-import typing as tp
-import treetable as tt
-from .._base_explorers import BaseExplorer
-class LMExplorer(BaseExplorer):
-    eval_metrics: tp.List[str] = []
-    def stages(self) -> tp.List[str]:
-        return ['train', 'valid']
-    def get_grid_metrics(self):
-        """Return the metrics that should be displayed in the tracking table."""
-        return [
-            tt.group(
-                'train',
-                [
-                    tt.leaf('epoch'),
-                    tt.leaf('duration', '.1f'),  # duration in minutes
-                    tt.leaf('ping'),
-                    tt.leaf('ce', '.4f'),  # cross entropy
-                    tt.leaf("ppl", '.3f'),  # perplexity
-                ],
-                align='>',
-            ),
-            tt.group(
-                'valid',
-                [
-                    tt.leaf('ce', '.4f'),
-                    tt.leaf('ppl', '.3f'),
-                    tt.leaf('best_ppl', '.3f'),
-                ],
-                align='>',
-            ),
-        ]
-    def process_sheep(self, sheep, history):
-        parts = super().process_sheep(sheep, history)
-        track_by = {'ppl': 'lower'}  # values should be in ['lower', 'higher']
-        best_metrics = {k: (1 if v == 'lower' else -1) * float('inf') for k, v in track_by.items()}
-        def comparator(mode, a, b):
-            return a < b if mode == 'lower' else a > b
-        for metrics in history:
-            for key, sub in metrics.items():
-                for metric in track_by:
-                    # for the validation set, keep track of best metrics (ppl in this example)
-                    # this is so we can conveniently compare metrics between runs in the grid
-                    if key == 'valid' and metric in sub and comparator(
-                        track_by[metric], sub[metric], best_metrics[metric]
-                    ):
-                        best_metrics[metric] = sub[metric]
-        if 'valid' in parts:
-            parts['valid'].update({f'best_{k}': v for k, v in best_metrics.items()})
-        return parts
-class GenerationEvalExplorer(BaseExplorer):
-    eval_metrics: tp.List[str] = []
-    def stages(self) -> tp.List[str]:
-        return ['evaluate']
-    def get_grid_metrics(self):
-        """Return the metrics that should be displayed in the tracking table."""
-        return [
-            tt.group(
-                'evaluate',
-                [
-                    tt.leaf('epoch', '.3f'),
-                    tt.leaf('duration', '.1f'),
-                    tt.leaf('ping'),
-                    tt.leaf('ce', '.4f'),
-                    tt.leaf('ppl', '.3f'),
-                    tt.leaf('fad', '.3f'),
-                    tt.leaf('kld', '.3f'),
-                    tt.leaf('text_consistency', '.3f'),
-                    tt.leaf('chroma_cosine', '.3f'),
-                ],
-                align='>',
-            ),
-        ]

audiocraft/grids_/musicgen/musicgen_base_32khz.py DELETED Viewed

@@ -1,44 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-from ._explorers import LMExplorer
-from ...environment import AudioCraftEnvironment
-@LMExplorer
-def explorer(launcher):
-    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
-    launcher.slurm_(gpus=32, partition=partitions)
-    launcher.bind_(solver='musicgen/musicgen_base_32khz')
-    # replace this by the desired music dataset
-    launcher.bind_(dset='internal/music_400k_32khz')
-    fsdp = {'autocast': False, 'fsdp.use': True}
-    small = {'model/lm/model_scale': 'small'}
-    medium = {'model/lm/model_scale': 'medium'}
-    large = {'model/lm/model_scale': 'large'}
-    cfg_low = {'classifier_free_guidance.training_dropout': 0.2}
-    wd_low = {'conditioners.description.t5.word_dropout': 0.2}
-    adam = {'optim.optimizer': 'adamw', 'optim.lr': 1e-2}
-    launcher.bind_(fsdp)
-    launcher.slurm_(gpus=32).bind_(label='32gpus')
-    with launcher.job_array():
-        sub = launcher.bind()
-        sub()
-    launcher.slurm_(gpus=64).bind_(label='64gpus')
-    with launcher.job_array():
-        sub = launcher.bind()
-        sub(medium, adam)
-    launcher.slurm_(gpus=96).bind_(label='96gpus')
-    with launcher.job_array():
-        sub = launcher.bind()
-        sub(large, cfg_low, wd_low, adam, {'optim.max_norm': 3})

audiocraft/grids_/musicgen/musicgen_base_cached_32khz.py DELETED Viewed

@@ -1,67 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-from ._explorers import LMExplorer
-from ...environment import AudioCraftEnvironment
-@LMExplorer
-def explorer(launcher):
-    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
-    launcher.slurm_(gpus=32, partition=partitions)
-    launcher.bind_(solver='musicgen/musicgen_base_32khz')
-    # replace this by the desired music dataset
-    launcher.bind_(dset='internal/music_400k_32khz')
-    fsdp = {'autocast': False, 'fsdp.use': True}
-    medium = {'model/lm/model_scale': 'medium'}
-    large = {'model/lm/model_scale': 'large'}
-    cfg_low = {'classifier_free_guidance.training_dropout': 0.2}
-    wd_low = {'conditioners.description.t5.word_dropout': 0.2}
-    adam = {'optim.optimizer': 'adamw', 'optim.lr': 1e-2}
-    # BEGINNING OF CACHE WRITING JOBS.
-    cache_write = {
-        'cache.path': '/fsx-codegen/defossez/cache/interleave_stereo_nv_32k',
-        'cache.write': True,
-        'generate.every': 500,
-        'evaluate.every': 500,
-        'logging.log_updates': 50,
-    }
-    cache_sub = launcher.bind({'model/lm/model_scale': 'xsmall', 'conditioner': 'none'})
-    cache_sub.bind_({'deadlock.use': True})
-    cache_sub.slurm_(gpus=8)
-    with launcher.job_array():
-        num_shards = 10  # total number of jobs running in parallel.
-        for shard in range(0, num_shards):
-            launcher(cache_write, {'cache.write_num_shards': num_shards, 'cache.write_shard': shard})
-    # REMOVE THE FOLLOWING RETURN STATEMENT ONCE THE ABOVE JOBS ARE DONE,
-    # OR SUFFICIENTLY AHEAD.
-    return
-    cache = {
-        'cache.path': '/fsx-codegen/defossez/cache/interleave_stereo_nv_32k',
-    }
-    launcher.bind_(fsdp, cache)
-    launcher.slurm_(gpus=32).bind_(label='32gpus')
-    with launcher.job_array():
-        sub = launcher.bind()
-        sub()
-    launcher.slurm_(gpus=64).bind_(label='64gpus')
-    with launcher.job_array():
-        sub = launcher.bind()
-        sub(medium, adam)
-    launcher.slurm_(gpus=96).bind_(label='96gpus')
-    with launcher.job_array():
-        sub = launcher.bind()
-        sub(large, cfg_low, wd_low, adam, {'optim.max_norm': 3})

audiocraft/grids_/musicgen/musicgen_clapemb_32khz.py DELETED Viewed

@@ -1,32 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-from ._explorers import LMExplorer
-from ...environment import AudioCraftEnvironment
-@LMExplorer
-def explorer(launcher):
-    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
-    launcher.slurm_(gpus=32, partition=partitions)
-    launcher.bind_(solver='musicgen/musicgen_base_32khz')
-    # replace this by the desired music dataset
-    launcher.bind_(dset='internal/music_400k_32khz')
-    launcher.bind_(conditioner='clapemb2music')
-    fsdp = {'autocast': False, 'fsdp.use': True}
-    cache_path = {'conditioners.description.clap.cache_path':
-                  '/fsx-audio-craft-llm/jadecopet/experiments/audiocraft/caches/clap_embed_music'}
-    text_wav_training_opt = {'conditioners.description.clap.text_p': 0.5}
-    launcher.bind_(fsdp)
-    launcher.slurm_(gpus=32).bind_(label='32gpus')
-    with launcher.job_array():
-        launcher()
-        launcher(text_wav_training_opt)
-        launcher(cache_path)
-        launcher(cache_path, text_wav_training_opt)

audiocraft/grids_/musicgen/musicgen_melody_32khz.py DELETED Viewed

@@ -1,65 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-from ._explorers import LMExplorer
-from ...environment import AudioCraftEnvironment
-@LMExplorer
-def explorer(launcher):
-    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
-    launcher.slurm_(gpus=32, partition=partitions)
-    launcher.bind_(solver='musicgen/musicgen_melody_32khz')
-    # replace this by the desired music dataset
-    launcher.bind_(dset='internal/music_400k_32khz')
-    fsdp = {'autocast': False, 'fsdp.use': True}
-    medium = {'model/lm/model_scale': 'medium'}
-    large = {'model/lm/model_scale': 'large'}
-    cfg_low = {'classifier_free_guidance.training_dropout': 0.2}
-    wd_low = {'conditioners.description.t5.word_dropout': 0.2}
-    adam = {'optim.optimizer': 'adamw', 'optim.lr': 1e-4}
-    cache_path = {'conditioners.self_wav.chroma_stem.cache_path':
-                  '/fsx-audio-craft-llm/jadecopet/experiments/audiocraft/caches/chroma_stem'}
-    # CACHE GENERATION JOBS
-    n_cache_gen_jobs = 4
-    gen_sub = launcher.slurm(gpus=1)
-    gen_sub.bind_(
-        cache_path, {
-            # the cache is always computed over the whole file, so duration doesn't matter here.
-            'dataset.segment_duration': 2.,
-            'dataset.batch_size': 8,
-            'dataset.train.permutation_on_files': True,  # try to not repeat files.
-            'optim.epochs': 10,
-            'model/lm/model_scale': 'xsmall',
-        })
-    with gen_sub.job_array():
-        for gen_job in range(n_cache_gen_jobs):
-            gen_sub({'dataset.train.shuffle_seed': gen_job})
-    # ACTUAL TRAINING JOBS.
-    launcher.bind_(fsdp)
-    launcher.slurm_(gpus=32).bind_(label='32gpus')
-    with launcher.job_array():
-        sub = launcher.bind()
-        sub()
-        sub(cache_path)
-    launcher.slurm_(gpus=64).bind_(label='64gpus')
-    with launcher.job_array():
-        sub = launcher.bind()
-        sub(medium, adam)
-    launcher.slurm_(gpus=96).bind_(label='96gpus')
-    with launcher.job_array():
-        sub = launcher.bind()
-        sub(large, cfg_low, wd_low, adam, {'optim.max_norm': 3})

audiocraft/grids_/musicgen/musicgen_pretrained_32khz_eval.py DELETED Viewed

@@ -1,99 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-"""
-Evaluation with objective metrics for the pretrained MusicGen models.
-This grid takes signature from the training grid and runs evaluation-only stage.
-When running the grid for the first time, please use:
-REGEN=1 dora grid musicgen.musicgen_pretrained_32khz_eval
-and re-use the REGEN=1 option when the grid is changed to force regenerating it.
-Note that you need the proper metrics external libraries setup to use all
-the objective metrics activated in this grid. Refer to the README for more information.
-"""
-import os
-from ._explorers import GenerationEvalExplorer
-from ...environment import AudioCraftEnvironment
-from ... import train
-def eval(launcher, batch_size: int = 32, eval_melody: bool = False):
-    opts = {
-        'dset': 'audio/musiccaps_32khz',
-        'solver/musicgen/evaluation': 'objective_eval',
-        'execute_only': 'evaluate',
-        '+dataset.evaluate.batch_size': batch_size,
-        '+metrics.fad.tf.batch_size': 16,
-    }
-    # chroma-specific evaluation
-    chroma_opts = {
-        'dset': 'internal/music_400k_32khz',
-        'dataset.evaluate.segment_duration': 30,
-        'dataset.evaluate.num_samples': 1000,
-        'evaluate.metrics.chroma_cosine': True,
-        'evaluate.metrics.fad': False,
-        'evaluate.metrics.kld': False,
-        'evaluate.metrics.text_consistency': False,
-    }
-    # binary for FAD computation: replace this path with your own path
-    metrics_opts = {
-        'metrics.fad.tf.bin': '/data/home/jadecopet/local/usr/opt/google-research'
-    }
-    opt1 = {'generate.lm.use_sampling': True, 'generate.lm.top_k': 250, 'generate.lm.top_p': 0.}
-    opt2 = {'transformer_lm.two_step_cfg': True}
-    sub = launcher.bind(opts)
-    sub.bind_(metrics_opts)
-    # base objective metrics
-    sub(opt1, opt2)
-    if eval_melody:
-        # chroma-specific metrics
-        sub(opt1, opt2, chroma_opts)
-@GenerationEvalExplorer
-def explorer(launcher):
-    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
-    launcher.slurm_(gpus=4, partition=partitions)
-    if 'REGEN' not in os.environ:
-        folder = train.main.dora.dir / 'grids' / __name__.split('.', 2)[-1]
-        with launcher.job_array():
-            for sig in folder.iterdir():
-                if not sig.is_symlink():
-                    continue
-                xp = train.main.get_xp_from_sig(sig.name)
-                launcher(xp.argv)
-        return
-    with launcher.job_array():
-        musicgen_base = launcher.bind(solver="musicgen/musicgen_base_32khz")
-        musicgen_base.bind_({'autocast': False, 'fsdp.use': True})
-        # base musicgen models
-        musicgen_base_small = musicgen_base.bind({'continue_from': '//pretrained/facebook/musicgen-small'})
-        eval(musicgen_base_small, batch_size=128)
-        musicgen_base_medium = musicgen_base.bind({'continue_from': '//pretrained/facebook/musicgen-medium'})
-        musicgen_base_medium.bind_({'model/lm/model_scale': 'medium'})
-        eval(musicgen_base_medium, batch_size=128)
-        musicgen_base_large = musicgen_base.bind({'continue_from': '//pretrained/facebook/musicgen-large'})
-        musicgen_base_large.bind_({'model/lm/model_scale': 'large'})
-        eval(musicgen_base_large, batch_size=128)
-        # melody musicgen model
-        musicgen_melody = launcher.bind(solver="musicgen/musicgen_melody_32khz")
-        musicgen_melody.bind_({'autocast': False, 'fsdp.use': True})
-        musicgen_melody_medium = musicgen_melody.bind({'continue_from': '//pretrained/facebook/musicgen-melody'})
-        musicgen_melody_medium.bind_({'model/lm/model_scale': 'medium'})
-        eval(musicgen_melody_medium, batch_size=128, eval_melody=True)

audiocraft/grids_/musicgen/musicgen_stereo_finetune_32khz.py DELETED Viewed

@@ -1,57 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-from pathlib import Path
-from ._explorers import LMExplorer
-from ...environment import AudioCraftEnvironment
-@LMExplorer
-def explorer(launcher):
-    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
-    launcher.slurm_(gpus=32, partition=partitions)
-    launcher.bind_(solver='musicgen/musicgen_base_32khz')
-    # replace this by the desired music dataset, which needs to be stereo
-    launcher.bind_(dset='audio/example')
-    fsdp = {'autocast': False, 'fsdp.use': True}
-    medium = {'model/lm/model_scale': 'medium'}
-    large = {'model/lm/model_scale': 'large'}
-    cfg_low = {'classifier_free_guidance.training_dropout': 0.2}
-    wd_low = {'conditioners.description.t5.word_dropout': 0.2}
-    adam = {'optim.optimizer': 'adamw', 'optim.lr': 1e-4}
-    stereo = {
-        'codebooks_pattern.delay.delays': [0, 0, 1, 1, 2, 2, 3, 3],
-        'transformer_lm.n_q': 8,
-        'interleave_stereo_codebooks.use': True,
-        'channels': 2,
-    }
-    # You must follow the instructions in docs/MUSICGEN.md about the creation
-    # of the proper fine tuning checkpoints. We will assume they are stored under
-    # ~/checkpoints/{mode_name}.
-    checkpoints = Path.home() / 'checkpoints'
-    launcher.bind_(fsdp, stereo, {'optim.epochs': 100})
-    launcher.slurm_(gpus=32).bind_(label='32gpus')
-    with launcher.job_array():
-        sub = launcher.bind({'continue_from': str(checkpoints / 'stereo_finetune_musicgen-small.th')})
-        sub()
-    launcher.slurm_(gpus=64).bind_(label='64gpus')
-    with launcher.job_array():
-        sub = launcher.bind({'continue_from': str(checkpoints / 'stereo_finetune_musicgen-medium.th')})
-        sub(medium, adam)
-    launcher.slurm_(gpus=96).bind_(label='96gpus')
-    with launcher.job_array():
-        sub = launcher.bind({'continue_from': str(checkpoints / 'stereo_finetune_musicgen-large.th')})
-        sub(large, cfg_low, wd_low, adam, {'optim.max_norm': 3})

audiocraft/models/__init__.py CHANGED Viewed

@@ -14,5 +14,5 @@ from .encodec import (
 from .audiogen import AudioGen
 from .lm import LMModel
 from .multibanddiffusion import MultiBandDiffusion
-from .musicgen import VidMuse
 from .unet import DiffusionUnet

 from .audiogen import AudioGen
 from .lm import LMModel
 from .multibanddiffusion import MultiBandDiffusion
+from .vidmuse import VidMuse
 from .unet import DiffusionUnet

audiocraft/models/{musicgen.py → vidmuse.py} RENAMED Viewed

File without changes