Spaces:
Sleeping
Sleeping
Ubuntu
commited on
Commit
·
a7dc8e9
1
Parent(s):
434855f
update training code
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- speech/cosyvoice/utils/executor.py +9 -94
- speech/cosyvoice/utils/train_utils.py +150 -114
- speech/cosyvoice2.yaml +217 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/__init__.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/app.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/cli.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/LICENSE +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/README.md +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/__init__.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/config.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/denoiser.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/env.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/meldataset.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/models.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/xutils.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/models/__init__.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/models/baselightningmodule.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/__init__.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/decoder.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/flow_matching.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/text_encoder.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/transformer.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/models/matcha_tts.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/__init__.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/export.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/infer.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/text/__init__.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/text/cleaners.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/text/numbers.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/text/symbols.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/utils/__init__.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/utils/audio.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/utils/generate_data_statistics.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/utils/instantiators.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/utils/logging_utils.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/utils/model.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/__init__.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/core.pyx +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/setup.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/utils/pylogger.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/utils/rich_utils.py +0 -0
- speech/{third_party/Matcha-TTS/matcha → matcha}/utils/utils.py +0 -0
- speech/third_party/Matcha-TTS/.env.example +0 -6
- speech/third_party/Matcha-TTS/.github/PULL_REQUEST_TEMPLATE.md +0 -22
- speech/third_party/Matcha-TTS/.github/codecov.yml +0 -15
- speech/third_party/Matcha-TTS/.github/dependabot.yml +0 -17
- speech/third_party/Matcha-TTS/.github/release-drafter.yml +0 -44
- speech/third_party/Matcha-TTS/.gitignore +0 -163
- speech/third_party/Matcha-TTS/.pre-commit-config.yaml +0 -59
- speech/third_party/Matcha-TTS/.project-root +0 -2
speech/cosyvoice/utils/executor.py
CHANGED
|
@@ -49,10 +49,11 @@ class Executor:
|
|
| 49 |
scheduler,
|
| 50 |
train_data_loader,
|
| 51 |
cv_data_loader,
|
| 52 |
-
|
| 53 |
info_dict,
|
| 54 |
scaler,
|
| 55 |
group_join,
|
|
|
|
| 56 |
):
|
| 57 |
"""Train one epoch"""
|
| 58 |
|
|
@@ -101,10 +102,10 @@ class Executor:
|
|
| 101 |
info_dict = batch_backward(model, scaler, info_dict)
|
| 102 |
|
| 103 |
info_dict = update_parameter_and_lr(
|
| 104 |
-
model, optimizer, scheduler, scaler, info_dict
|
| 105 |
)
|
| 106 |
-
log_per_step(
|
| 107 |
-
|
| 108 |
if (
|
| 109 |
info_dict["save_per_step"] > 0
|
| 110 |
and (self.step + 1) % info_dict["save_per_step"] == 0
|
|
@@ -112,102 +113,16 @@ class Executor:
|
|
| 112 |
):
|
| 113 |
dist.barrier()
|
| 114 |
self.cv(
|
| 115 |
-
model, cv_data_loader,
|
| 116 |
)
|
| 117 |
model.train()
|
| 118 |
if (batch_idx + 1) % info_dict["accum_grad"] == 0:
|
| 119 |
self.step += 1
|
| 120 |
dist.barrier()
|
| 121 |
-
self.cv(model, cv_data_loader, writer, info_dict, on_batch_end=True)
|
| 122 |
-
|
| 123 |
-
def train_one_epoc_gan(
|
| 124 |
-
self,
|
| 125 |
-
model,
|
| 126 |
-
optimizer,
|
| 127 |
-
scheduler,
|
| 128 |
-
optimizer_d,
|
| 129 |
-
scheduler_d,
|
| 130 |
-
train_data_loader,
|
| 131 |
-
cv_data_loader,
|
| 132 |
-
writer,
|
| 133 |
-
info_dict,
|
| 134 |
-
scaler,
|
| 135 |
-
group_join,
|
| 136 |
-
):
|
| 137 |
-
"""Train one epoch"""
|
| 138 |
-
|
| 139 |
-
lr = optimizer.param_groups[0]["lr"]
|
| 140 |
-
logger.info(
|
| 141 |
-
f"Epoch {self.epoch} TRAIN info lr {lr} rank {self.rank}"
|
| 142 |
-
)
|
| 143 |
-
logger.info(
|
| 144 |
-
f"using accumulate grad, new batch size is {info_dict['accum_grad']} times larger than before"
|
| 145 |
-
)
|
| 146 |
-
# A context manager to be used in conjunction with an instance of
|
| 147 |
-
# torch.nn.parallel.DistributedDataParallel to be able to train
|
| 148 |
-
# with uneven inputs across participating processes.
|
| 149 |
-
model.train()
|
| 150 |
-
model_context = (
|
| 151 |
-
model.join if info_dict["train_engine"] == "torch_ddp" else nullcontext
|
| 152 |
-
)
|
| 153 |
-
with model_context():
|
| 154 |
-
for batch_idx, batch_dict in enumerate(train_data_loader):
|
| 155 |
-
info_dict["tag"] = "TRAIN"
|
| 156 |
-
info_dict["step"] = self.step
|
| 157 |
-
info_dict["epoch"] = self.epoch
|
| 158 |
-
info_dict["batch_idx"] = batch_idx
|
| 159 |
-
if cosyvoice_join(group_join, info_dict):
|
| 160 |
-
break
|
| 161 |
-
|
| 162 |
-
# Disable gradient synchronizations across DDP processes.
|
| 163 |
-
# Within this context, gradients will be accumulated on module
|
| 164 |
-
# variables, which will later be synchronized.
|
| 165 |
-
if (
|
| 166 |
-
info_dict["train_engine"] == "torch_ddp"
|
| 167 |
-
and (batch_idx + 1) % info_dict["accum_grad"] != 0
|
| 168 |
-
):
|
| 169 |
-
context = model.no_sync
|
| 170 |
-
# Used for single gpu training and DDP gradient synchronization
|
| 171 |
-
# processes.
|
| 172 |
-
else:
|
| 173 |
-
context = nullcontext
|
| 174 |
-
|
| 175 |
-
with context():
|
| 176 |
-
batch_dict["turn"] = "discriminator"
|
| 177 |
-
info_dict = batch_forward(model, batch_dict, scaler, info_dict)
|
| 178 |
-
info_dict = batch_backward(model, scaler, info_dict)
|
| 179 |
-
info_dict = update_parameter_and_lr(
|
| 180 |
-
model, optimizer_d, scheduler_d, scaler, info_dict
|
| 181 |
-
)
|
| 182 |
-
optimizer.zero_grad()
|
| 183 |
-
log_per_step(writer, info_dict)
|
| 184 |
-
with context():
|
| 185 |
-
batch_dict["turn"] = "generator"
|
| 186 |
-
info_dict = batch_forward(model, batch_dict, scaler, info_dict)
|
| 187 |
-
info_dict = batch_backward(model, scaler, info_dict)
|
| 188 |
-
info_dict = update_parameter_and_lr(
|
| 189 |
-
model, optimizer, scheduler, scaler, info_dict
|
| 190 |
-
)
|
| 191 |
-
optimizer_d.zero_grad()
|
| 192 |
-
log_per_step(writer, info_dict)
|
| 193 |
-
# NOTE specify save_per_step in cosyvoice.yaml if you want to enable step save
|
| 194 |
-
if (
|
| 195 |
-
info_dict["save_per_step"] > 0
|
| 196 |
-
and (self.step + 1) % info_dict["save_per_step"] == 0
|
| 197 |
-
and (batch_idx + 1) % info_dict["accum_grad"] == 0
|
| 198 |
-
):
|
| 199 |
-
dist.barrier()
|
| 200 |
-
self.cv(
|
| 201 |
-
model, cv_data_loader, writer, info_dict, on_batch_end=False
|
| 202 |
-
)
|
| 203 |
-
model.train()
|
| 204 |
-
if (batch_idx + 1) % info_dict["accum_grad"] == 0:
|
| 205 |
-
self.step += 1
|
| 206 |
-
dist.barrier()
|
| 207 |
-
# self.cv(model, cv_data_loader, writer, info_dict, on_batch_end=True)
|
| 208 |
|
| 209 |
@torch.inference_mode()
|
| 210 |
-
def cv(self, model, cv_data_loader,
|
| 211 |
"""Cross validation on"""
|
| 212 |
logger.info(f"Epoch {self.epoch} Step {self.step + 1} on_batch_end {on_batch_end} CV rank {self.rank}")
|
| 213 |
model.eval()
|
|
@@ -233,7 +148,7 @@ class Executor:
|
|
| 233 |
for k, v in total_loss_dict.items():
|
| 234 |
total_loss_dict[k] = sum(v) / total_num_utts
|
| 235 |
info_dict["loss_dict"] = total_loss_dict
|
| 236 |
-
log_per_save(
|
| 237 |
model_name = (
|
| 238 |
f"epoch_{self.epoch}_whole"
|
| 239 |
if on_batch_end
|
|
|
|
| 49 |
scheduler,
|
| 50 |
train_data_loader,
|
| 51 |
cv_data_loader,
|
| 52 |
+
experiment,
|
| 53 |
info_dict,
|
| 54 |
scaler,
|
| 55 |
group_join,
|
| 56 |
+
model_type
|
| 57 |
):
|
| 58 |
"""Train one epoch"""
|
| 59 |
|
|
|
|
| 102 |
info_dict = batch_backward(model, scaler, info_dict)
|
| 103 |
|
| 104 |
info_dict = update_parameter_and_lr(
|
| 105 |
+
model, optimizer, scheduler, scaler, info_dict, model_type=model_type
|
| 106 |
)
|
| 107 |
+
log_per_step(experiment, info_dict)
|
| 108 |
+
|
| 109 |
if (
|
| 110 |
info_dict["save_per_step"] > 0
|
| 111 |
and (self.step + 1) % info_dict["save_per_step"] == 0
|
|
|
|
| 113 |
):
|
| 114 |
dist.barrier()
|
| 115 |
self.cv(
|
| 116 |
+
model, cv_data_loader, experiment, info_dict, on_batch_end=False
|
| 117 |
)
|
| 118 |
model.train()
|
| 119 |
if (batch_idx + 1) % info_dict["accum_grad"] == 0:
|
| 120 |
self.step += 1
|
| 121 |
dist.barrier()
|
| 122 |
+
#self.cv(model, cv_data_loader, writer, info_dict, on_batch_end=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
@torch.inference_mode()
|
| 125 |
+
def cv(self, model, cv_data_loader, experiment, info_dict, on_batch_end=True):
|
| 126 |
"""Cross validation on"""
|
| 127 |
logger.info(f"Epoch {self.epoch} Step {self.step + 1} on_batch_end {on_batch_end} CV rank {self.rank}")
|
| 128 |
model.eval()
|
|
|
|
| 148 |
for k, v in total_loss_dict.items():
|
| 149 |
total_loss_dict[k] = sum(v) / total_num_utts
|
| 150 |
info_dict["loss_dict"] = total_loss_dict
|
| 151 |
+
log_per_save(experiment, info_dict)
|
| 152 |
model_name = (
|
| 153 |
f"epoch_{self.epoch}_whole"
|
| 154 |
if on_batch_end
|
speech/cosyvoice/utils/train_utils.py
CHANGED
|
@@ -26,15 +26,14 @@ import deepspeed
|
|
| 26 |
import torch.optim as optim
|
| 27 |
import torch.distributed as dist
|
| 28 |
|
| 29 |
-
from torch.utils.tensorboard import SummaryWriter
|
| 30 |
from torch.utils.data import DataLoader
|
| 31 |
from torch.nn.utils import clip_grad_norm_
|
| 32 |
from loguru import logger
|
| 33 |
from deepspeed.runtime.zero.stage_1_and_2 import estimate_zero2_model_states_mem_needs_all_live
|
| 34 |
|
| 35 |
from cosyvoice.dataset.dataset import Dataset
|
| 36 |
-
from cosyvoice.utils.scheduler import WarmupLR, NoamHoldAnnealing, ConstantLR
|
| 37 |
|
|
|
|
| 38 |
|
| 39 |
def init_distributed(args):
|
| 40 |
world_size = int(os.environ.get('WORLD_SIZE', 1))
|
|
@@ -49,10 +48,10 @@ def init_distributed(args):
|
|
| 49 |
return world_size, local_rank, rank
|
| 50 |
|
| 51 |
|
| 52 |
-
def init_dataset_and_dataloader(args, configs,
|
| 53 |
-
data_pipeline = configs['
|
| 54 |
-
train_dataset = Dataset(args.train_data, data_pipeline=data_pipeline, mode='train', gan=
|
| 55 |
-
cv_dataset = Dataset(args.cv_data, data_pipeline=data_pipeline, mode='train', gan=
|
| 56 |
|
| 57 |
# do not use persistent_workers=True, as whisper tokenizer opens tiktoken file each time when the for loop starts
|
| 58 |
train_data_loader = DataLoader(train_dataset,
|
|
@@ -109,90 +108,38 @@ def wrap_cuda_model(args, model):
|
|
| 109 |
return model
|
| 110 |
|
| 111 |
|
| 112 |
-
def init_optimizer_and_scheduler(
|
| 113 |
"""Init optimizer and scheduler"""
|
| 114 |
-
if
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
optimizer = optim.AdamW(model.parameters(), **configs['train_conf']['optim_conf'])
|
| 119 |
-
else:
|
| 120 |
-
raise ValueError("unknown optimizer: " + configs['train_conf'])
|
| 121 |
-
|
| 122 |
-
if configs['train_conf']['scheduler'] == 'warmuplr':
|
| 123 |
-
scheduler_type = WarmupLR
|
| 124 |
-
scheduler = WarmupLR(optimizer, **configs['train_conf']['scheduler_conf'])
|
| 125 |
-
elif configs['train_conf']['scheduler'] == 'NoamHoldAnnealing':
|
| 126 |
-
scheduler_type = NoamHoldAnnealing
|
| 127 |
-
scheduler = NoamHoldAnnealing(optimizer, **configs['train_conf']['scheduler_conf'])
|
| 128 |
-
elif configs['train_conf']['scheduler'] == 'constantlr':
|
| 129 |
-
scheduler_type = ConstantLR
|
| 130 |
-
scheduler = ConstantLR(optimizer)
|
| 131 |
-
else:
|
| 132 |
-
raise ValueError("unknown scheduler: " + configs['train_conf'])
|
| 133 |
-
|
| 134 |
-
# use deepspeed optimizer for speedup
|
| 135 |
-
if args.train_engine == "deepspeed":
|
| 136 |
-
def scheduler(opt):
|
| 137 |
-
return scheduler_type(opt, **configs['train_conf']['scheduler_conf'])
|
| 138 |
-
model, optimizer, _, scheduler = deepspeed.initialize(
|
| 139 |
-
args=args,
|
| 140 |
-
model=model,
|
| 141 |
-
optimizer=None,
|
| 142 |
-
lr_scheduler=scheduler,
|
| 143 |
-
model_parameters=model.parameters())
|
| 144 |
-
|
| 145 |
-
optimizer_d, scheduler_d = None, None
|
| 146 |
-
|
| 147 |
else:
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
-
if configs['train_conf']['optim_d'] == 'adam':
|
| 169 |
-
optimizer_d = optim.Adam(model.module.discriminator.parameters(), **configs['train_conf']['optim_conf'])
|
| 170 |
-
elif configs['train_conf']['optim_d'] == 'adamw':
|
| 171 |
-
optimizer_d = optim.AdamW(model.module.discriminator.parameters(), **configs['train_conf']['optim_conf'])
|
| 172 |
-
else:
|
| 173 |
-
raise ValueError("unknown optimizer: " + configs['train_conf'])
|
| 174 |
-
|
| 175 |
-
if configs['train_conf']['scheduler_d'] == 'warmuplr':
|
| 176 |
-
scheduler_type = WarmupLR
|
| 177 |
-
scheduler_d = WarmupLR(optimizer_d, **configs['train_conf']['scheduler_conf'])
|
| 178 |
-
elif configs['train_conf']['scheduler_d'] == 'NoamHoldAnnealing':
|
| 179 |
-
scheduler_type = NoamHoldAnnealing
|
| 180 |
-
scheduler_d = NoamHoldAnnealing(optimizer_d, **configs['train_conf']['scheduler_conf'])
|
| 181 |
-
elif configs['train_conf']['scheduler'] == 'constantlr':
|
| 182 |
-
scheduler_type = ConstantLR
|
| 183 |
-
scheduler_d = ConstantLR(optimizer_d)
|
| 184 |
-
else:
|
| 185 |
-
raise ValueError("unknown scheduler: " + configs['train_conf'])
|
| 186 |
-
return model, optimizer, scheduler, optimizer_d, scheduler_d
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
def init_summarywriter(args):
|
| 190 |
-
"""Init summary writer"""
|
| 191 |
-
writer = None
|
| 192 |
-
if int(os.environ.get('RANK', 0)) == 0:
|
| 193 |
-
os.makedirs(args.model_dir, exist_ok=True)
|
| 194 |
-
writer = SummaryWriter(args.tensorboard_dir)
|
| 195 |
-
return writer
|
| 196 |
|
| 197 |
|
| 198 |
def save_model(model, model_name, info_dict):
|
|
@@ -295,21 +242,87 @@ def batch_backward(model, scaler, info_dict):
|
|
| 295 |
return info_dict
|
| 296 |
|
| 297 |
|
| 298 |
-
def update_parameter_and_lr(model, optimizer, scheduler, scaler, info_dict):
|
| 299 |
"""Update parameters and learning rate"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
grad_norm = 0.0
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
# Use mixed precision training
|
| 307 |
if scaler is not None:
|
| 308 |
scaler.unscale_(optimizer)
|
| 309 |
grad_norm = clip_grad_norm_(model.parameters(), info_dict['grad_clip'])
|
| 310 |
-
# We don't check grad here since that if the gradient
|
| 311 |
-
# has inf/nan values, scaler.step will skip
|
| 312 |
-
# optimizer.step().
|
| 313 |
if torch.isfinite(grad_norm):
|
| 314 |
scaler.step(optimizer)
|
| 315 |
else:
|
|
@@ -325,11 +338,12 @@ def update_parameter_and_lr(model, optimizer, scheduler, scaler, info_dict):
|
|
| 325 |
scheduler.step()
|
| 326 |
info_dict["lr"] = optimizer.param_groups[0]['lr']
|
| 327 |
info_dict["grad_norm"] = grad_norm
|
|
|
|
|
|
|
| 328 |
return info_dict
|
| 329 |
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
"""Log per step"""
|
| 333 |
tag = info_dict["tag"]
|
| 334 |
epoch = info_dict.get('epoch', 0)
|
| 335 |
step = info_dict["step"]
|
|
@@ -337,39 +351,61 @@ def log_per_step(writer, info_dict):
|
|
| 337 |
loss_dict = info_dict['loss_dict']
|
| 338 |
rank = int(os.environ.get('RANK', 0))
|
| 339 |
|
| 340 |
-
#
|
| 341 |
-
if
|
| 342 |
if (info_dict['train_engine'] == 'deepspeed' and info_dict['is_gradient_accumulation_boundary'] is True) or \
|
| 343 |
(info_dict['train_engine'] == 'torch_ddp' and (info_dict['batch_idx'] + 1) % info_dict['accum_grad'] == 0):
|
| 344 |
-
|
| 345 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
for k, v in loss_dict.items():
|
| 347 |
-
|
|
|
|
|
|
|
| 348 |
|
| 349 |
# TRAIN & CV, Shell log (stdout)
|
| 350 |
if (info_dict['batch_idx'] + 1) % info_dict['log_interval'] == 0:
|
| 351 |
log_str = f'{tag} Batch {epoch}/{batch_idx + 1} '
|
| 352 |
for name, value in loss_dict.items():
|
|
|
|
|
|
|
| 353 |
log_str += f'{name} {value:.6f} '
|
| 354 |
if tag == "TRAIN":
|
| 355 |
log_str += f'lr {info_dict["lr"]:.8f} grad_norm {info_dict["grad_norm"]:.6f}'
|
| 356 |
log_str += f' rank {rank}'
|
| 357 |
logging.debug(log_str)
|
| 358 |
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
"""Log per save"""
|
| 362 |
tag = info_dict["tag"]
|
| 363 |
epoch = info_dict["epoch"]
|
| 364 |
step = info_dict["step"]
|
| 365 |
loss_dict = info_dict["loss_dict"]
|
| 366 |
lr = info_dict['lr']
|
| 367 |
rank = int(os.environ.get('RANK', 0))
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
for k, v in loss_dict.items():
|
| 375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
import torch.optim as optim
|
| 27 |
import torch.distributed as dist
|
| 28 |
|
|
|
|
| 29 |
from torch.utils.data import DataLoader
|
| 30 |
from torch.nn.utils import clip_grad_norm_
|
| 31 |
from loguru import logger
|
| 32 |
from deepspeed.runtime.zero.stage_1_and_2 import estimate_zero2_model_states_mem_needs_all_live
|
| 33 |
|
| 34 |
from cosyvoice.dataset.dataset import Dataset
|
|
|
|
| 35 |
|
| 36 |
+
from torch.optim.lr_scheduler import LinearLR, ConstantLR, SequentialLR
|
| 37 |
|
| 38 |
def init_distributed(args):
|
| 39 |
world_size = int(os.environ.get('WORLD_SIZE', 1))
|
|
|
|
| 48 |
return world_size, local_rank, rank
|
| 49 |
|
| 50 |
|
| 51 |
+
def init_dataset_and_dataloader(args, configs, dpo):
|
| 52 |
+
data_pipeline = configs['data_pipeline']
|
| 53 |
+
train_dataset = Dataset(args.train_data, data_pipeline=data_pipeline, mode='train', gan=False, dpo=dpo, shuffle=True, partition=True)
|
| 54 |
+
cv_dataset = Dataset(args.cv_data, data_pipeline=data_pipeline, mode='train', gan=False, dpo=dpo, shuffle=False, partition=False)
|
| 55 |
|
| 56 |
# do not use persistent_workers=True, as whisper tokenizer opens tiktoken file each time when the for loop starts
|
| 57 |
train_data_loader = DataLoader(train_dataset,
|
|
|
|
| 108 |
return model
|
| 109 |
|
| 110 |
|
| 111 |
+
def init_optimizer_and_scheduler(configs, model):
|
| 112 |
"""Init optimizer and scheduler"""
|
| 113 |
+
if configs['train_conf']['optim'] == 'adam':
|
| 114 |
+
optimizer = optim.Adam(model.parameters(), **configs['train_conf']['optim_conf'])
|
| 115 |
+
elif configs['train_conf']['optim'] == 'adamw':
|
| 116 |
+
optimizer = optim.AdamW(model.parameters(), **configs['train_conf']['optim_conf'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
else:
|
| 118 |
+
raise ValueError("unknown optimizer: " + configs['train_conf'])
|
| 119 |
+
|
| 120 |
+
# Create schedulers
|
| 121 |
+
warmup_scheduler = LinearLR(
|
| 122 |
+
optimizer,
|
| 123 |
+
start_factor=1e-9, # Start at nearly 0
|
| 124 |
+
end_factor=1.0, # End at base learning rate
|
| 125 |
+
total_iters=5000 # 5k warmup steps
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
constant_scheduler = ConstantLR(
|
| 129 |
+
optimizer,
|
| 130 |
+
factor=1.0, # Keep learning rate constant
|
| 131 |
+
total_iters=float('inf') # Run indefinitely
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
# Combine schedulers: warmup for 5k steps, then constant
|
| 135 |
+
scheduler = SequentialLR(
|
| 136 |
+
optimizer,
|
| 137 |
+
schedulers=[warmup_scheduler, constant_scheduler],
|
| 138 |
+
milestones=[5000] # Switch after 5k steps
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
return model, optimizer, scheduler
|
| 142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
|
| 145 |
def save_model(model, model_name, info_dict):
|
|
|
|
| 242 |
return info_dict
|
| 243 |
|
| 244 |
|
| 245 |
+
def update_parameter_and_lr(model, optimizer, scheduler, scaler, info_dict, model_type='llm'):
|
| 246 |
"""Update parameters and learning rate"""
|
| 247 |
+
|
| 248 |
+
#Define key components based on model type
|
| 249 |
+
if model_type == 'llm':
|
| 250 |
+
key_components = {
|
| 251 |
+
# Text processing components
|
| 252 |
+
'text_embedding': [],
|
| 253 |
+
'text_encoder': [],
|
| 254 |
+
'text_encoder_affine': [],
|
| 255 |
+
|
| 256 |
+
# LLM core components
|
| 257 |
+
'llm_embedding': [],
|
| 258 |
+
'llm.model': [], # Qwen2 model layers
|
| 259 |
+
'llm_decoder': [],
|
| 260 |
+
|
| 261 |
+
# Speech components
|
| 262 |
+
'speech_embedding': [],
|
| 263 |
+
'spk_embed_affine': [],
|
| 264 |
+
|
| 265 |
+
# Other components
|
| 266 |
+
'other': []
|
| 267 |
+
}
|
| 268 |
+
elif model_type == 'flow':
|
| 269 |
+
key_components = {
|
| 270 |
+
# Input processing
|
| 271 |
+
'input_embedding': [],
|
| 272 |
+
'spk_embed_affine': [],
|
| 273 |
+
|
| 274 |
+
# Encoder components
|
| 275 |
+
'encoder': [],
|
| 276 |
+
'encoder_proj': [],
|
| 277 |
+
|
| 278 |
+
# Flow/Diffusion components
|
| 279 |
+
'decoder.cfm': [], # Conditional Flow Matching
|
| 280 |
+
'decoder.unet': [], # UNet backbone
|
| 281 |
+
'decoder.estimator': [], # Score/velocity estimator
|
| 282 |
+
'decoder.time_embedding': [], # Time embeddings
|
| 283 |
+
'decoder.conv': [], # Convolutional layers
|
| 284 |
+
'decoder.attention': [], # Attention layers
|
| 285 |
+
|
| 286 |
+
# Length regulation
|
| 287 |
+
'length_regulator': [],
|
| 288 |
+
|
| 289 |
+
# Other components
|
| 290 |
+
'other': []
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
grad_norm = 0.0
|
| 294 |
+
layer_grad_norms = {}
|
| 295 |
+
|
| 296 |
+
if (info_dict['batch_idx'] + 1) % info_dict["accum_grad"] == 0:
|
| 297 |
+
|
| 298 |
+
for name, param in model.named_parameters():
|
| 299 |
+
if param.grad is not None:
|
| 300 |
+
# Calculate gradient norm for this parameter
|
| 301 |
+
param_grad_norm = param.grad.data.norm(2).item()
|
| 302 |
+
layer_grad_norms[name] = param_grad_norm
|
| 303 |
+
|
| 304 |
+
# Categorize into key components
|
| 305 |
+
categorized = False
|
| 306 |
+
for component_key in key_components:
|
| 307 |
+
if component_key != 'other':
|
| 308 |
+
# Special handling for decoder sub-components in flow models
|
| 309 |
+
if model_type == 'flow' and component_key.startswith('decoder.'):
|
| 310 |
+
component_pattern = component_key.replace('decoder.', '')
|
| 311 |
+
if 'decoder' in name and component_pattern in name:
|
| 312 |
+
key_components[component_key].append((name, param_grad_norm))
|
| 313 |
+
categorized = True
|
| 314 |
+
break
|
| 315 |
+
elif component_key in name:
|
| 316 |
+
key_components[component_key].append((name, param_grad_norm))
|
| 317 |
+
categorized = True
|
| 318 |
+
break
|
| 319 |
+
if not categorized:
|
| 320 |
+
key_components['other'].append((name, param_grad_norm))
|
| 321 |
+
|
| 322 |
# Use mixed precision training
|
| 323 |
if scaler is not None:
|
| 324 |
scaler.unscale_(optimizer)
|
| 325 |
grad_norm = clip_grad_norm_(model.parameters(), info_dict['grad_clip'])
|
|
|
|
|
|
|
|
|
|
| 326 |
if torch.isfinite(grad_norm):
|
| 327 |
scaler.step(optimizer)
|
| 328 |
else:
|
|
|
|
| 338 |
scheduler.step()
|
| 339 |
info_dict["lr"] = optimizer.param_groups[0]['lr']
|
| 340 |
info_dict["grad_norm"] = grad_norm
|
| 341 |
+
info_dict["layer_grad_norms"] = layer_grad_norms
|
| 342 |
+
info_dict["key_component_grads"] = key_components
|
| 343 |
return info_dict
|
| 344 |
|
| 345 |
+
def log_per_step(experiment, info_dict):
|
| 346 |
+
"""Log per step using Comet ML"""
|
|
|
|
| 347 |
tag = info_dict["tag"]
|
| 348 |
epoch = info_dict.get('epoch', 0)
|
| 349 |
step = info_dict["step"]
|
|
|
|
| 351 |
loss_dict = info_dict['loss_dict']
|
| 352 |
rank = int(os.environ.get('RANK', 0))
|
| 353 |
|
| 354 |
+
# Only rank 0 writes to Comet ML to avoid multi-process write
|
| 355 |
+
if experiment is not None and rank == 0:
|
| 356 |
if (info_dict['train_engine'] == 'deepspeed' and info_dict['is_gradient_accumulation_boundary'] is True) or \
|
| 357 |
(info_dict['train_engine'] == 'torch_ddp' and (info_dict['batch_idx'] + 1) % info_dict['accum_grad'] == 0):
|
| 358 |
+
# Log metrics to Comet ML
|
| 359 |
+
experiment.log_metric(f'{tag}_epoch', info_dict['epoch'], step=step + 1)
|
| 360 |
+
experiment.log_metric(f'{tag}_lr', info_dict['lr'], step=step + 1)
|
| 361 |
+
experiment.log_metric(f'{tag}_grad_norm', info_dict['grad_norm'], step=step + 1)
|
| 362 |
+
|
| 363 |
+
# Log all losses
|
| 364 |
for k, v in loss_dict.items():
|
| 365 |
+
if isinstance(v, torch.Tensor):
|
| 366 |
+
v = v.item()
|
| 367 |
+
experiment.log_metric(f'{tag}_{k}', v, step=step + 1)
|
| 368 |
|
| 369 |
# TRAIN & CV, Shell log (stdout)
|
| 370 |
if (info_dict['batch_idx'] + 1) % info_dict['log_interval'] == 0:
|
| 371 |
log_str = f'{tag} Batch {epoch}/{batch_idx + 1} '
|
| 372 |
for name, value in loss_dict.items():
|
| 373 |
+
if isinstance(value, torch.Tensor):
|
| 374 |
+
value = value.item()
|
| 375 |
log_str += f'{name} {value:.6f} '
|
| 376 |
if tag == "TRAIN":
|
| 377 |
log_str += f'lr {info_dict["lr"]:.8f} grad_norm {info_dict["grad_norm"]:.6f}'
|
| 378 |
log_str += f' rank {rank}'
|
| 379 |
logging.debug(log_str)
|
| 380 |
|
| 381 |
+
def log_per_save(experiment, info_dict):
|
| 382 |
+
"""Log per save using Comet ML"""
|
|
|
|
| 383 |
tag = info_dict["tag"]
|
| 384 |
epoch = info_dict["epoch"]
|
| 385 |
step = info_dict["step"]
|
| 386 |
loss_dict = info_dict["loss_dict"]
|
| 387 |
lr = info_dict['lr']
|
| 388 |
rank = int(os.environ.get('RANK', 0))
|
| 389 |
+
|
| 390 |
+
# Create loss string for logging
|
| 391 |
+
loss_str = ' '.join([f"{k} {v.item() if isinstance(v, torch.Tensor) else v}" for k, v in loss_dict.items()])
|
| 392 |
+
logger.info(f'Epoch {epoch} Step {step + 1} CV info lr {lr} {rank} {loss_str}')
|
| 393 |
+
|
| 394 |
+
if experiment is not None and rank == 0:
|
| 395 |
+
# Log metrics to Comet ML
|
| 396 |
+
experiment.log_metric(f'{tag}_epoch', info_dict['epoch'], step=step + 1)
|
| 397 |
+
experiment.log_metric(f'{tag}_lr', info_dict['lr'], step=step + 1)
|
| 398 |
+
|
| 399 |
+
# Log all losses
|
| 400 |
for k, v in loss_dict.items():
|
| 401 |
+
if isinstance(v, torch.Tensor):
|
| 402 |
+
v = v.item()
|
| 403 |
+
experiment.log_metric(f'{tag}_{k}', v, step=step + 1)
|
| 404 |
+
|
| 405 |
+
# Log additional validation info
|
| 406 |
+
if tag == "CV":
|
| 407 |
+
# Calculate average CV loss for the epoch
|
| 408 |
+
avg_loss = loss_dict.get('loss', 0)
|
| 409 |
+
if isinstance(avg_loss, torch.Tensor):
|
| 410 |
+
avg_loss = avg_loss.item()
|
| 411 |
+
experiment.log_metric('cv_avg_loss_per_epoch', avg_loss, epoch=epoch)
|
speech/cosyvoice2.yaml
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# set random seed, so that you may reproduce your result.
|
| 2 |
+
__set_seed1: !apply:random.seed [1986]
|
| 3 |
+
__set_seed2: !apply:numpy.random.seed [1986]
|
| 4 |
+
__set_seed3: !apply:torch.manual_seed [1986]
|
| 5 |
+
__set_seed4: !apply:torch.cuda.manual_seed_all [1986]
|
| 6 |
+
|
| 7 |
+
# fixed params
|
| 8 |
+
sample_rate: 24000
|
| 9 |
+
llm_input_size: 896
|
| 10 |
+
llm_output_size: 896
|
| 11 |
+
spk_embed_dim: 192
|
| 12 |
+
qwen_pretrain_path: ''
|
| 13 |
+
token_frame_rate: 25
|
| 14 |
+
token_mel_ratio: 2
|
| 15 |
+
|
| 16 |
+
# stream related params
|
| 17 |
+
chunk_size: 25 # streaming inference chunk size, in token
|
| 18 |
+
num_decoding_left_chunks: -1 # streaming inference flow decoder left chunk size, <0 means use all left chunks
|
| 19 |
+
|
| 20 |
+
# model params
|
| 21 |
+
# for all class/function included in this repo, we use !<name> or !<new> for intialization, so that user may find all corresponding class/function according to one single yaml.
|
| 22 |
+
# for system/third_party class/function, we do not require this.
|
| 23 |
+
llm: !new:cosyvoice.llm.llm.Qwen2LM
|
| 24 |
+
llm_input_size: !ref <llm_input_size>
|
| 25 |
+
llm_output_size: !ref <llm_output_size>
|
| 26 |
+
speech_token_size: 6561
|
| 27 |
+
length_normalized_loss: True
|
| 28 |
+
lsm_weight: 0
|
| 29 |
+
mix_ratio: [5, 15]
|
| 30 |
+
llm: !new:cosyvoice.llm.llm.Qwen2Encoder
|
| 31 |
+
pretrain_path: !ref <qwen_pretrain_path>
|
| 32 |
+
sampling: !name:cosyvoice.utils.common.ras_sampling
|
| 33 |
+
top_p: 0.8
|
| 34 |
+
top_k: 25
|
| 35 |
+
win_size: 10
|
| 36 |
+
tau_r: 0.1
|
| 37 |
+
|
| 38 |
+
flow: !new:cosyvoice.flow.flow.CausalMaskedDiffWithXvec
|
| 39 |
+
input_size: 512
|
| 40 |
+
output_size: 80
|
| 41 |
+
spk_embed_dim: !ref <spk_embed_dim>
|
| 42 |
+
output_type: 'mel'
|
| 43 |
+
vocab_size: 6561
|
| 44 |
+
input_frame_rate: !ref <token_frame_rate>
|
| 45 |
+
only_mask_loss: True
|
| 46 |
+
token_mel_ratio: !ref <token_mel_ratio>
|
| 47 |
+
pre_lookahead_len: 3
|
| 48 |
+
encoder: !new:cosyvoice.transformer.upsample_encoder.UpsampleConformerEncoder
|
| 49 |
+
output_size: 512
|
| 50 |
+
attention_heads: 8
|
| 51 |
+
linear_units: 2048
|
| 52 |
+
num_blocks: 6
|
| 53 |
+
dropout_rate: 0.1
|
| 54 |
+
positional_dropout_rate: 0.1
|
| 55 |
+
attention_dropout_rate: 0.1
|
| 56 |
+
normalize_before: True
|
| 57 |
+
input_layer: 'linear'
|
| 58 |
+
pos_enc_layer_type: 'rel_pos_espnet'
|
| 59 |
+
selfattention_layer_type: 'rel_selfattn'
|
| 60 |
+
input_size: 512
|
| 61 |
+
use_cnn_module: False
|
| 62 |
+
macaron_style: False
|
| 63 |
+
static_chunk_size: !ref <chunk_size>
|
| 64 |
+
decoder: !new:cosyvoice.flow.flow_matching.CausalConditionalCFM
|
| 65 |
+
in_channels: 240
|
| 66 |
+
n_spks: 1
|
| 67 |
+
spk_emb_dim: 80
|
| 68 |
+
cfm_params: !new:omegaconf.DictConfig
|
| 69 |
+
content:
|
| 70 |
+
sigma_min: 1e-06
|
| 71 |
+
solver: 'euler'
|
| 72 |
+
t_scheduler: 'cosine'
|
| 73 |
+
training_cfg_rate: 0.2
|
| 74 |
+
inference_cfg_rate: 0.7
|
| 75 |
+
reg_loss_type: 'l1'
|
| 76 |
+
estimator: !new:cosyvoice.flow.decoder.CausalConditionalDecoder
|
| 77 |
+
in_channels: 320
|
| 78 |
+
out_channels: 80
|
| 79 |
+
channels: [256]
|
| 80 |
+
dropout: 0.0
|
| 81 |
+
attention_head_dim: 64
|
| 82 |
+
n_blocks: 4
|
| 83 |
+
num_mid_blocks: 12
|
| 84 |
+
num_heads: 8
|
| 85 |
+
act_fn: 'gelu'
|
| 86 |
+
static_chunk_size: !ref <chunk_size> * <token_mel_ratio>
|
| 87 |
+
num_decoding_left_chunks: !ref <num_decoding_left_chunks>
|
| 88 |
+
|
| 89 |
+
hift: !new:cosyvoice.hifigan.generator.HiFTGenerator
|
| 90 |
+
in_channels: 80
|
| 91 |
+
base_channels: 512
|
| 92 |
+
nb_harmonics: 8
|
| 93 |
+
sampling_rate: !ref <sample_rate>
|
| 94 |
+
nsf_alpha: 0.1
|
| 95 |
+
nsf_sigma: 0.003
|
| 96 |
+
nsf_voiced_threshold: 10
|
| 97 |
+
upsample_rates: [8, 5, 3]
|
| 98 |
+
upsample_kernel_sizes: [16, 11, 7]
|
| 99 |
+
istft_params:
|
| 100 |
+
n_fft: 16
|
| 101 |
+
hop_len: 4
|
| 102 |
+
resblock_kernel_sizes: [3, 7, 11]
|
| 103 |
+
resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
|
| 104 |
+
source_resblock_kernel_sizes: [7, 7, 11]
|
| 105 |
+
source_resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
|
| 106 |
+
lrelu_slope: 0.1
|
| 107 |
+
audio_limit: 0.99
|
| 108 |
+
f0_predictor: !new:cosyvoice.hifigan.f0_predictor.ConvRNNF0Predictor
|
| 109 |
+
num_class: 1
|
| 110 |
+
in_channels: 80
|
| 111 |
+
cond_channels: 512
|
| 112 |
+
|
| 113 |
+
# gan related module
|
| 114 |
+
mel_spec_transform1: !name:matcha.utils.audio.mel_spectrogram
|
| 115 |
+
n_fft: 1920
|
| 116 |
+
num_mels: 80
|
| 117 |
+
sampling_rate: !ref <sample_rate>
|
| 118 |
+
hop_size: 480
|
| 119 |
+
win_size: 1920
|
| 120 |
+
fmin: 0
|
| 121 |
+
fmax: null
|
| 122 |
+
center: False
|
| 123 |
+
hifigan: !new:cosyvoice.hifigan.hifigan.HiFiGan
|
| 124 |
+
generator: !ref <hift>
|
| 125 |
+
discriminator: !new:cosyvoice.hifigan.discriminator.MultipleDiscriminator
|
| 126 |
+
mpd: !new:matcha.hifigan.models.MultiPeriodDiscriminator
|
| 127 |
+
mrd: !new:cosyvoice.hifigan.discriminator.MultiResSpecDiscriminator
|
| 128 |
+
mel_spec_transform: [
|
| 129 |
+
!ref <mel_spec_transform1>
|
| 130 |
+
]
|
| 131 |
+
|
| 132 |
+
# processor functions
|
| 133 |
+
parquet_opener: !name:cosyvoice.dataset.processor.parquet_opener
|
| 134 |
+
get_tokenizer: !name:cosyvoice.tokenizer.tokenizer.get_qwen_tokenizer
|
| 135 |
+
token_path: !ref <qwen_pretrain_path>
|
| 136 |
+
skip_special_tokens: True
|
| 137 |
+
allowed_special: 'all'
|
| 138 |
+
tokenize: !name:cosyvoice.dataset.processor.tokenize
|
| 139 |
+
get_tokenizer: !ref <get_tokenizer>
|
| 140 |
+
allowed_special: !ref <allowed_special>
|
| 141 |
+
filter: !name:cosyvoice.dataset.processor.filter
|
| 142 |
+
max_length: 40960
|
| 143 |
+
min_length: 100
|
| 144 |
+
token_max_length: 200
|
| 145 |
+
token_min_length: 1
|
| 146 |
+
resample: !name:cosyvoice.dataset.processor.resample
|
| 147 |
+
resample_rate: !ref <sample_rate>
|
| 148 |
+
truncate: !name:cosyvoice.dataset.processor.truncate
|
| 149 |
+
truncate_length: 24480 # must be a multiplier of hop_size
|
| 150 |
+
feat_extractor: !name:matcha.utils.audio.mel_spectrogram
|
| 151 |
+
n_fft: 1920
|
| 152 |
+
num_mels: 80
|
| 153 |
+
sampling_rate: !ref <sample_rate>
|
| 154 |
+
hop_size: 480
|
| 155 |
+
win_size: 1920
|
| 156 |
+
fmin: 0
|
| 157 |
+
fmax: 8000
|
| 158 |
+
center: False
|
| 159 |
+
compute_fbank: !name:cosyvoice.dataset.processor.compute_fbank
|
| 160 |
+
feat_extractor: !ref <feat_extractor>
|
| 161 |
+
compute_f0: !name:cosyvoice.dataset.processor.compute_f0
|
| 162 |
+
sample_rate: !ref <sample_rate>
|
| 163 |
+
hop_size: 480
|
| 164 |
+
parse_embedding: !name:cosyvoice.dataset.processor.parse_embedding
|
| 165 |
+
normalize: True
|
| 166 |
+
shuffle: !name:cosyvoice.dataset.processor.shuffle
|
| 167 |
+
shuffle_size: 1000
|
| 168 |
+
sort: !name:cosyvoice.dataset.processor.sort
|
| 169 |
+
sort_size: 500 # sort_size should be less than shuffle_size
|
| 170 |
+
batch: !name:cosyvoice.dataset.processor.batch
|
| 171 |
+
batch_type: 'dynamic'
|
| 172 |
+
max_frames_in_batch: 2000
|
| 173 |
+
padding: !name:cosyvoice.dataset.processor.padding
|
| 174 |
+
use_spk_embedding: False # change to True during sft
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
# dataset processor pipeline
|
| 178 |
+
data_pipeline: [
|
| 179 |
+
!ref <parquet_opener>,
|
| 180 |
+
!ref <tokenize>,
|
| 181 |
+
!ref <filter>,
|
| 182 |
+
!ref <resample>,
|
| 183 |
+
!ref <compute_fbank>,
|
| 184 |
+
!ref <parse_embedding>,
|
| 185 |
+
!ref <shuffle>,
|
| 186 |
+
!ref <sort>,
|
| 187 |
+
!ref <batch>,
|
| 188 |
+
!ref <padding>,
|
| 189 |
+
]
|
| 190 |
+
data_pipeline_gan: [
|
| 191 |
+
!ref <parquet_opener>,
|
| 192 |
+
!ref <tokenize>,
|
| 193 |
+
!ref <filter>,
|
| 194 |
+
!ref <resample>,
|
| 195 |
+
!ref <truncate>,
|
| 196 |
+
!ref <compute_fbank>,
|
| 197 |
+
!ref <compute_f0>,
|
| 198 |
+
!ref <parse_embedding>,
|
| 199 |
+
!ref <shuffle>,
|
| 200 |
+
!ref <sort>,
|
| 201 |
+
!ref <batch>,
|
| 202 |
+
!ref <padding>,
|
| 203 |
+
]
|
| 204 |
+
|
| 205 |
+
# llm flow train conf
|
| 206 |
+
train_conf:
|
| 207 |
+
optim: adamw
|
| 208 |
+
optim_conf:
|
| 209 |
+
lr: 1e-5 # change to 1e-5 during sft
|
| 210 |
+
scheduler: constantlr # change to constantlr during sft
|
| 211 |
+
scheduler_conf:
|
| 212 |
+
warmup_steps: 2500
|
| 213 |
+
max_epoch: 200
|
| 214 |
+
grad_clip: 1
|
| 215 |
+
accum_grad: 1
|
| 216 |
+
log_interval: 100
|
| 217 |
+
save_per_step: -1
|
speech/{third_party/Matcha-TTS/matcha → matcha}/__init__.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/app.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/cli.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/LICENSE
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/README.md
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/__init__.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/config.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/denoiser.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/env.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/meldataset.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/models.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/xutils.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/models/__init__.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/models/baselightningmodule.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/__init__.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/decoder.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/flow_matching.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/text_encoder.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/transformer.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/models/matcha_tts.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/__init__.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/export.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/infer.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/text/__init__.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/text/cleaners.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/text/numbers.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/text/symbols.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/__init__.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/audio.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/generate_data_statistics.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/instantiators.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/logging_utils.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/model.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/__init__.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/core.pyx
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/setup.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/pylogger.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/rich_utils.py
RENAMED
|
File without changes
|
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/utils.py
RENAMED
|
File without changes
|
speech/third_party/Matcha-TTS/.env.example
DELETED
|
@@ -1,6 +0,0 @@
|
|
| 1 |
-
# example of file for storing private and user specific environment variables, like keys or system paths
|
| 2 |
-
# rename it to ".env" (excluded from version control by default)
|
| 3 |
-
# .env is loaded by train.py automatically
|
| 4 |
-
# hydra allows you to reference variables in .yaml configs with special syntax: ${oc.env:MY_VAR}
|
| 5 |
-
|
| 6 |
-
MY_VAR="/home/user/my/system/path"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
speech/third_party/Matcha-TTS/.github/PULL_REQUEST_TEMPLATE.md
DELETED
|
@@ -1,22 +0,0 @@
|
|
| 1 |
-
## What does this PR do?
|
| 2 |
-
|
| 3 |
-
<!--
|
| 4 |
-
Please include a summary of the change and which issue is fixed.
|
| 5 |
-
Please also include relevant motivation and context.
|
| 6 |
-
List any dependencies that are required for this change.
|
| 7 |
-
List all the breaking changes introduced by this pull request.
|
| 8 |
-
-->
|
| 9 |
-
|
| 10 |
-
Fixes #\<issue_number>
|
| 11 |
-
|
| 12 |
-
## Before submitting
|
| 13 |
-
|
| 14 |
-
- [ ] Did you make sure **title is self-explanatory** and **the description concisely explains the PR**?
|
| 15 |
-
- [ ] Did you make sure your **PR does only one thing**, instead of bundling different changes together?
|
| 16 |
-
- [ ] Did you list all the **breaking changes** introduced by this pull request?
|
| 17 |
-
- [ ] Did you **test your PR locally** with `pytest` command?
|
| 18 |
-
- [ ] Did you **run pre-commit hooks** with `pre-commit run -a` command?
|
| 19 |
-
|
| 20 |
-
## Did you have fun?
|
| 21 |
-
|
| 22 |
-
Make sure you had fun coding 🙃
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
speech/third_party/Matcha-TTS/.github/codecov.yml
DELETED
|
@@ -1,15 +0,0 @@
|
|
| 1 |
-
coverage:
|
| 2 |
-
status:
|
| 3 |
-
# measures overall project coverage
|
| 4 |
-
project:
|
| 5 |
-
default:
|
| 6 |
-
threshold: 100% # how much decrease in coverage is needed to not consider success
|
| 7 |
-
|
| 8 |
-
# measures PR or single commit coverage
|
| 9 |
-
patch:
|
| 10 |
-
default:
|
| 11 |
-
threshold: 100% # how much decrease in coverage is needed to not consider success
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
# project: off
|
| 15 |
-
# patch: off
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
speech/third_party/Matcha-TTS/.github/dependabot.yml
DELETED
|
@@ -1,17 +0,0 @@
|
|
| 1 |
-
# To get started with Dependabot version updates, you'll need to specify which
|
| 2 |
-
# package ecosystems to update and where the package manifests are located.
|
| 3 |
-
# Please see the documentation for all configuration options:
|
| 4 |
-
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
| 5 |
-
|
| 6 |
-
version: 2
|
| 7 |
-
updates:
|
| 8 |
-
- package-ecosystem: "pip" # See documentation for possible values
|
| 9 |
-
directory: "/" # Location of package manifests
|
| 10 |
-
target-branch: "dev"
|
| 11 |
-
schedule:
|
| 12 |
-
interval: "daily"
|
| 13 |
-
ignore:
|
| 14 |
-
- dependency-name: "pytorch-lightning"
|
| 15 |
-
update-types: ["version-update:semver-patch"]
|
| 16 |
-
- dependency-name: "torchmetrics"
|
| 17 |
-
update-types: ["version-update:semver-patch"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
speech/third_party/Matcha-TTS/.github/release-drafter.yml
DELETED
|
@@ -1,44 +0,0 @@
|
|
| 1 |
-
name-template: "v$RESOLVED_VERSION"
|
| 2 |
-
tag-template: "v$RESOLVED_VERSION"
|
| 3 |
-
|
| 4 |
-
categories:
|
| 5 |
-
- title: "🚀 Features"
|
| 6 |
-
labels:
|
| 7 |
-
- "feature"
|
| 8 |
-
- "enhancement"
|
| 9 |
-
- title: "🐛 Bug Fixes"
|
| 10 |
-
labels:
|
| 11 |
-
- "fix"
|
| 12 |
-
- "bugfix"
|
| 13 |
-
- "bug"
|
| 14 |
-
- title: "🧹 Maintenance"
|
| 15 |
-
labels:
|
| 16 |
-
- "maintenance"
|
| 17 |
-
- "dependencies"
|
| 18 |
-
- "refactoring"
|
| 19 |
-
- "cosmetic"
|
| 20 |
-
- "chore"
|
| 21 |
-
- title: "📝️ Documentation"
|
| 22 |
-
labels:
|
| 23 |
-
- "documentation"
|
| 24 |
-
- "docs"
|
| 25 |
-
|
| 26 |
-
change-template: "- $TITLE @$AUTHOR (#$NUMBER)"
|
| 27 |
-
change-title-escapes: '\<*_&' # You can add # and @ to disable mentions
|
| 28 |
-
|
| 29 |
-
version-resolver:
|
| 30 |
-
major:
|
| 31 |
-
labels:
|
| 32 |
-
- "major"
|
| 33 |
-
minor:
|
| 34 |
-
labels:
|
| 35 |
-
- "minor"
|
| 36 |
-
patch:
|
| 37 |
-
labels:
|
| 38 |
-
- "patch"
|
| 39 |
-
default: patch
|
| 40 |
-
|
| 41 |
-
template: |
|
| 42 |
-
## Changes
|
| 43 |
-
|
| 44 |
-
$CHANGES
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
speech/third_party/Matcha-TTS/.gitignore
DELETED
|
@@ -1,163 +0,0 @@
|
|
| 1 |
-
# Byte-compiled / optimized / DLL files
|
| 2 |
-
__pycache__/
|
| 3 |
-
*.py[cod]
|
| 4 |
-
*$py.class
|
| 5 |
-
|
| 6 |
-
# C extensions
|
| 7 |
-
*.so
|
| 8 |
-
|
| 9 |
-
# Distribution / packaging
|
| 10 |
-
.Python
|
| 11 |
-
build/
|
| 12 |
-
develop-eggs/
|
| 13 |
-
dist/
|
| 14 |
-
downloads/
|
| 15 |
-
eggs/
|
| 16 |
-
.eggs/
|
| 17 |
-
lib/
|
| 18 |
-
lib64/
|
| 19 |
-
parts/
|
| 20 |
-
sdist/
|
| 21 |
-
var/
|
| 22 |
-
wheels/
|
| 23 |
-
pip-wheel-metadata/
|
| 24 |
-
share/python-wheels/
|
| 25 |
-
*.egg-info/
|
| 26 |
-
.installed.cfg
|
| 27 |
-
*.egg
|
| 28 |
-
MANIFEST
|
| 29 |
-
|
| 30 |
-
# PyInstaller
|
| 31 |
-
# Usually these files are written by a python script from a template
|
| 32 |
-
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 33 |
-
*.manifest
|
| 34 |
-
*.spec
|
| 35 |
-
|
| 36 |
-
# Installer logs
|
| 37 |
-
pip-log.txt
|
| 38 |
-
pip-delete-this-directory.txt
|
| 39 |
-
|
| 40 |
-
# Unit test / coverage reports
|
| 41 |
-
htmlcov/
|
| 42 |
-
.tox/
|
| 43 |
-
.nox/
|
| 44 |
-
.coverage
|
| 45 |
-
.coverage.*
|
| 46 |
-
.cache
|
| 47 |
-
nosetests.xml
|
| 48 |
-
coverage.xml
|
| 49 |
-
*.cover
|
| 50 |
-
*.py,cover
|
| 51 |
-
.hypothesis/
|
| 52 |
-
.pytest_cache/
|
| 53 |
-
|
| 54 |
-
# Translations
|
| 55 |
-
*.mo
|
| 56 |
-
*.pot
|
| 57 |
-
|
| 58 |
-
# Django stuff:
|
| 59 |
-
*.log
|
| 60 |
-
local_settings.py
|
| 61 |
-
db.sqlite3
|
| 62 |
-
db.sqlite3-journal
|
| 63 |
-
|
| 64 |
-
# Flask stuff:
|
| 65 |
-
instance/
|
| 66 |
-
.webassets-cache
|
| 67 |
-
|
| 68 |
-
# Scrapy stuff:
|
| 69 |
-
.scrapy
|
| 70 |
-
|
| 71 |
-
# Sphinx documentation
|
| 72 |
-
docs/_build/
|
| 73 |
-
|
| 74 |
-
# PyBuilder
|
| 75 |
-
target/
|
| 76 |
-
|
| 77 |
-
# Jupyter Notebook
|
| 78 |
-
.ipynb_checkpoints
|
| 79 |
-
|
| 80 |
-
# IPython
|
| 81 |
-
profile_default/
|
| 82 |
-
ipython_config.py
|
| 83 |
-
|
| 84 |
-
# pyenv
|
| 85 |
-
.python-version
|
| 86 |
-
|
| 87 |
-
# pipenv
|
| 88 |
-
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 89 |
-
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 90 |
-
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 91 |
-
# install all needed dependencies.
|
| 92 |
-
#Pipfile.lock
|
| 93 |
-
|
| 94 |
-
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
| 95 |
-
__pypackages__/
|
| 96 |
-
|
| 97 |
-
# Celery stuff
|
| 98 |
-
celerybeat-schedule
|
| 99 |
-
celerybeat.pid
|
| 100 |
-
|
| 101 |
-
# SageMath parsed files
|
| 102 |
-
*.sage.py
|
| 103 |
-
|
| 104 |
-
# Environments
|
| 105 |
-
.venv
|
| 106 |
-
env/
|
| 107 |
-
venv/
|
| 108 |
-
ENV/
|
| 109 |
-
env.bak/
|
| 110 |
-
venv.bak/
|
| 111 |
-
|
| 112 |
-
# Spyder project settings
|
| 113 |
-
.spyderproject
|
| 114 |
-
.spyproject
|
| 115 |
-
|
| 116 |
-
# Rope project settings
|
| 117 |
-
.ropeproject
|
| 118 |
-
|
| 119 |
-
# mkdocs documentation
|
| 120 |
-
/site
|
| 121 |
-
|
| 122 |
-
# mypy
|
| 123 |
-
.mypy_cache/
|
| 124 |
-
.dmypy.json
|
| 125 |
-
dmypy.json
|
| 126 |
-
|
| 127 |
-
# Pyre type checker
|
| 128 |
-
.pyre/
|
| 129 |
-
|
| 130 |
-
### VisualStudioCode
|
| 131 |
-
.vscode/*
|
| 132 |
-
!.vscode/settings.json
|
| 133 |
-
!.vscode/tasks.json
|
| 134 |
-
!.vscode/launch.json
|
| 135 |
-
!.vscode/extensions.json
|
| 136 |
-
*.code-workspace
|
| 137 |
-
**/.vscode
|
| 138 |
-
|
| 139 |
-
# JetBrains
|
| 140 |
-
.idea/
|
| 141 |
-
|
| 142 |
-
# Data & Models
|
| 143 |
-
*.h5
|
| 144 |
-
*.tar
|
| 145 |
-
*.tar.gz
|
| 146 |
-
|
| 147 |
-
# Lightning-Hydra-Template
|
| 148 |
-
configs/local/default.yaml
|
| 149 |
-
/data/
|
| 150 |
-
/logs/
|
| 151 |
-
.env
|
| 152 |
-
|
| 153 |
-
# Aim logging
|
| 154 |
-
.aim
|
| 155 |
-
|
| 156 |
-
# Cython complied files
|
| 157 |
-
matcha/utils/monotonic_align/core.c
|
| 158 |
-
|
| 159 |
-
# Ignoring hifigan checkpoint
|
| 160 |
-
generator_v1
|
| 161 |
-
g_02500000
|
| 162 |
-
gradio_cached_examples/
|
| 163 |
-
synth_output/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
speech/third_party/Matcha-TTS/.pre-commit-config.yaml
DELETED
|
@@ -1,59 +0,0 @@
|
|
| 1 |
-
default_language_version:
|
| 2 |
-
python: python3.10
|
| 3 |
-
|
| 4 |
-
repos:
|
| 5 |
-
- repo: https://github.com/pre-commit/pre-commit-hooks
|
| 6 |
-
rev: v4.5.0
|
| 7 |
-
hooks:
|
| 8 |
-
# list of supported hooks: https://pre-commit.com/hooks.html
|
| 9 |
-
- id: trailing-whitespace
|
| 10 |
-
- id: end-of-file-fixer
|
| 11 |
-
# - id: check-docstring-first
|
| 12 |
-
- id: check-yaml
|
| 13 |
-
- id: debug-statements
|
| 14 |
-
- id: detect-private-key
|
| 15 |
-
- id: check-toml
|
| 16 |
-
- id: check-case-conflict
|
| 17 |
-
- id: check-added-large-files
|
| 18 |
-
|
| 19 |
-
# python code formatting
|
| 20 |
-
- repo: https://github.com/psf/black
|
| 21 |
-
rev: 23.12.1
|
| 22 |
-
hooks:
|
| 23 |
-
- id: black
|
| 24 |
-
args: [--line-length, "120"]
|
| 25 |
-
|
| 26 |
-
# python import sorting
|
| 27 |
-
- repo: https://github.com/PyCQA/isort
|
| 28 |
-
rev: 5.13.2
|
| 29 |
-
hooks:
|
| 30 |
-
- id: isort
|
| 31 |
-
args: ["--profile", "black", "--filter-files"]
|
| 32 |
-
|
| 33 |
-
# python upgrading syntax to newer version
|
| 34 |
-
- repo: https://github.com/asottile/pyupgrade
|
| 35 |
-
rev: v3.15.0
|
| 36 |
-
hooks:
|
| 37 |
-
- id: pyupgrade
|
| 38 |
-
args: [--py38-plus]
|
| 39 |
-
|
| 40 |
-
# python check (PEP8), programming errors and code complexity
|
| 41 |
-
- repo: https://github.com/PyCQA/flake8
|
| 42 |
-
rev: 7.0.0
|
| 43 |
-
hooks:
|
| 44 |
-
- id: flake8
|
| 45 |
-
args:
|
| 46 |
-
[
|
| 47 |
-
"--max-line-length", "120",
|
| 48 |
-
"--extend-ignore",
|
| 49 |
-
"E203,E402,E501,F401,F841,RST2,RST301",
|
| 50 |
-
"--exclude",
|
| 51 |
-
"logs/*,data/*,matcha/hifigan/*",
|
| 52 |
-
]
|
| 53 |
-
additional_dependencies: [flake8-rst-docstrings==0.3.0]
|
| 54 |
-
|
| 55 |
-
# pylint
|
| 56 |
-
- repo: https://github.com/pycqa/pylint
|
| 57 |
-
rev: v3.0.3
|
| 58 |
-
hooks:
|
| 59 |
-
- id: pylint
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
speech/third_party/Matcha-TTS/.project-root
DELETED
|
@@ -1,2 +0,0 @@
|
|
| 1 |
-
# this file is required for inferring the project root directory
|
| 2 |
-
# do not delete
|
|
|
|
|
|
|
|
|