Ubuntu commited on
Commit
a7dc8e9
·
1 Parent(s): 434855f

update training code

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. speech/cosyvoice/utils/executor.py +9 -94
  2. speech/cosyvoice/utils/train_utils.py +150 -114
  3. speech/cosyvoice2.yaml +217 -0
  4. speech/{third_party/Matcha-TTS/matcha → matcha}/__init__.py +0 -0
  5. speech/{third_party/Matcha-TTS/matcha → matcha}/app.py +0 -0
  6. speech/{third_party/Matcha-TTS/matcha → matcha}/cli.py +0 -0
  7. speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/LICENSE +0 -0
  8. speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/README.md +0 -0
  9. speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/__init__.py +0 -0
  10. speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/config.py +0 -0
  11. speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/denoiser.py +0 -0
  12. speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/env.py +0 -0
  13. speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/meldataset.py +0 -0
  14. speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/models.py +0 -0
  15. speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/xutils.py +0 -0
  16. speech/{third_party/Matcha-TTS/matcha → matcha}/models/__init__.py +0 -0
  17. speech/{third_party/Matcha-TTS/matcha → matcha}/models/baselightningmodule.py +0 -0
  18. speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/__init__.py +0 -0
  19. speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/decoder.py +0 -0
  20. speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/flow_matching.py +0 -0
  21. speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/text_encoder.py +0 -0
  22. speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/transformer.py +0 -0
  23. speech/{third_party/Matcha-TTS/matcha → matcha}/models/matcha_tts.py +0 -0
  24. speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/__init__.py +0 -0
  25. speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/export.py +0 -0
  26. speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/infer.py +0 -0
  27. speech/{third_party/Matcha-TTS/matcha → matcha}/text/__init__.py +0 -0
  28. speech/{third_party/Matcha-TTS/matcha → matcha}/text/cleaners.py +0 -0
  29. speech/{third_party/Matcha-TTS/matcha → matcha}/text/numbers.py +0 -0
  30. speech/{third_party/Matcha-TTS/matcha → matcha}/text/symbols.py +0 -0
  31. speech/{third_party/Matcha-TTS/matcha → matcha}/utils/__init__.py +0 -0
  32. speech/{third_party/Matcha-TTS/matcha → matcha}/utils/audio.py +0 -0
  33. speech/{third_party/Matcha-TTS/matcha → matcha}/utils/generate_data_statistics.py +0 -0
  34. speech/{third_party/Matcha-TTS/matcha → matcha}/utils/instantiators.py +0 -0
  35. speech/{third_party/Matcha-TTS/matcha → matcha}/utils/logging_utils.py +0 -0
  36. speech/{third_party/Matcha-TTS/matcha → matcha}/utils/model.py +0 -0
  37. speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/__init__.py +0 -0
  38. speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/core.pyx +0 -0
  39. speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/setup.py +0 -0
  40. speech/{third_party/Matcha-TTS/matcha → matcha}/utils/pylogger.py +0 -0
  41. speech/{third_party/Matcha-TTS/matcha → matcha}/utils/rich_utils.py +0 -0
  42. speech/{third_party/Matcha-TTS/matcha → matcha}/utils/utils.py +0 -0
  43. speech/third_party/Matcha-TTS/.env.example +0 -6
  44. speech/third_party/Matcha-TTS/.github/PULL_REQUEST_TEMPLATE.md +0 -22
  45. speech/third_party/Matcha-TTS/.github/codecov.yml +0 -15
  46. speech/third_party/Matcha-TTS/.github/dependabot.yml +0 -17
  47. speech/third_party/Matcha-TTS/.github/release-drafter.yml +0 -44
  48. speech/third_party/Matcha-TTS/.gitignore +0 -163
  49. speech/third_party/Matcha-TTS/.pre-commit-config.yaml +0 -59
  50. speech/third_party/Matcha-TTS/.project-root +0 -2
speech/cosyvoice/utils/executor.py CHANGED
@@ -49,10 +49,11 @@ class Executor:
49
  scheduler,
50
  train_data_loader,
51
  cv_data_loader,
52
- writer,
53
  info_dict,
54
  scaler,
55
  group_join,
 
56
  ):
57
  """Train one epoch"""
58
 
@@ -101,10 +102,10 @@ class Executor:
101
  info_dict = batch_backward(model, scaler, info_dict)
102
 
103
  info_dict = update_parameter_and_lr(
104
- model, optimizer, scheduler, scaler, info_dict
105
  )
106
- log_per_step(writer, info_dict)
107
- # NOTE specify save_per_step in cosyvoice.yaml if you want to enable step save
108
  if (
109
  info_dict["save_per_step"] > 0
110
  and (self.step + 1) % info_dict["save_per_step"] == 0
@@ -112,102 +113,16 @@ class Executor:
112
  ):
113
  dist.barrier()
114
  self.cv(
115
- model, cv_data_loader, writer, info_dict, on_batch_end=False
116
  )
117
  model.train()
118
  if (batch_idx + 1) % info_dict["accum_grad"] == 0:
119
  self.step += 1
120
  dist.barrier()
121
- self.cv(model, cv_data_loader, writer, info_dict, on_batch_end=True)
122
-
123
- def train_one_epoc_gan(
124
- self,
125
- model,
126
- optimizer,
127
- scheduler,
128
- optimizer_d,
129
- scheduler_d,
130
- train_data_loader,
131
- cv_data_loader,
132
- writer,
133
- info_dict,
134
- scaler,
135
- group_join,
136
- ):
137
- """Train one epoch"""
138
-
139
- lr = optimizer.param_groups[0]["lr"]
140
- logger.info(
141
- f"Epoch {self.epoch} TRAIN info lr {lr} rank {self.rank}"
142
- )
143
- logger.info(
144
- f"using accumulate grad, new batch size is {info_dict['accum_grad']} times larger than before"
145
- )
146
- # A context manager to be used in conjunction with an instance of
147
- # torch.nn.parallel.DistributedDataParallel to be able to train
148
- # with uneven inputs across participating processes.
149
- model.train()
150
- model_context = (
151
- model.join if info_dict["train_engine"] == "torch_ddp" else nullcontext
152
- )
153
- with model_context():
154
- for batch_idx, batch_dict in enumerate(train_data_loader):
155
- info_dict["tag"] = "TRAIN"
156
- info_dict["step"] = self.step
157
- info_dict["epoch"] = self.epoch
158
- info_dict["batch_idx"] = batch_idx
159
- if cosyvoice_join(group_join, info_dict):
160
- break
161
-
162
- # Disable gradient synchronizations across DDP processes.
163
- # Within this context, gradients will be accumulated on module
164
- # variables, which will later be synchronized.
165
- if (
166
- info_dict["train_engine"] == "torch_ddp"
167
- and (batch_idx + 1) % info_dict["accum_grad"] != 0
168
- ):
169
- context = model.no_sync
170
- # Used for single gpu training and DDP gradient synchronization
171
- # processes.
172
- else:
173
- context = nullcontext
174
-
175
- with context():
176
- batch_dict["turn"] = "discriminator"
177
- info_dict = batch_forward(model, batch_dict, scaler, info_dict)
178
- info_dict = batch_backward(model, scaler, info_dict)
179
- info_dict = update_parameter_and_lr(
180
- model, optimizer_d, scheduler_d, scaler, info_dict
181
- )
182
- optimizer.zero_grad()
183
- log_per_step(writer, info_dict)
184
- with context():
185
- batch_dict["turn"] = "generator"
186
- info_dict = batch_forward(model, batch_dict, scaler, info_dict)
187
- info_dict = batch_backward(model, scaler, info_dict)
188
- info_dict = update_parameter_and_lr(
189
- model, optimizer, scheduler, scaler, info_dict
190
- )
191
- optimizer_d.zero_grad()
192
- log_per_step(writer, info_dict)
193
- # NOTE specify save_per_step in cosyvoice.yaml if you want to enable step save
194
- if (
195
- info_dict["save_per_step"] > 0
196
- and (self.step + 1) % info_dict["save_per_step"] == 0
197
- and (batch_idx + 1) % info_dict["accum_grad"] == 0
198
- ):
199
- dist.barrier()
200
- self.cv(
201
- model, cv_data_loader, writer, info_dict, on_batch_end=False
202
- )
203
- model.train()
204
- if (batch_idx + 1) % info_dict["accum_grad"] == 0:
205
- self.step += 1
206
- dist.barrier()
207
- # self.cv(model, cv_data_loader, writer, info_dict, on_batch_end=True)
208
 
209
  @torch.inference_mode()
210
- def cv(self, model, cv_data_loader, writer, info_dict, on_batch_end=True):
211
  """Cross validation on"""
212
  logger.info(f"Epoch {self.epoch} Step {self.step + 1} on_batch_end {on_batch_end} CV rank {self.rank}")
213
  model.eval()
@@ -233,7 +148,7 @@ class Executor:
233
  for k, v in total_loss_dict.items():
234
  total_loss_dict[k] = sum(v) / total_num_utts
235
  info_dict["loss_dict"] = total_loss_dict
236
- log_per_save(writer, info_dict)
237
  model_name = (
238
  f"epoch_{self.epoch}_whole"
239
  if on_batch_end
 
49
  scheduler,
50
  train_data_loader,
51
  cv_data_loader,
52
+ experiment,
53
  info_dict,
54
  scaler,
55
  group_join,
56
+ model_type
57
  ):
58
  """Train one epoch"""
59
 
 
102
  info_dict = batch_backward(model, scaler, info_dict)
103
 
104
  info_dict = update_parameter_and_lr(
105
+ model, optimizer, scheduler, scaler, info_dict, model_type=model_type
106
  )
107
+ log_per_step(experiment, info_dict)
108
+
109
  if (
110
  info_dict["save_per_step"] > 0
111
  and (self.step + 1) % info_dict["save_per_step"] == 0
 
113
  ):
114
  dist.barrier()
115
  self.cv(
116
+ model, cv_data_loader, experiment, info_dict, on_batch_end=False
117
  )
118
  model.train()
119
  if (batch_idx + 1) % info_dict["accum_grad"] == 0:
120
  self.step += 1
121
  dist.barrier()
122
+ #self.cv(model, cv_data_loader, writer, info_dict, on_batch_end=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
  @torch.inference_mode()
125
+ def cv(self, model, cv_data_loader, experiment, info_dict, on_batch_end=True):
126
  """Cross validation on"""
127
  logger.info(f"Epoch {self.epoch} Step {self.step + 1} on_batch_end {on_batch_end} CV rank {self.rank}")
128
  model.eval()
 
148
  for k, v in total_loss_dict.items():
149
  total_loss_dict[k] = sum(v) / total_num_utts
150
  info_dict["loss_dict"] = total_loss_dict
151
+ log_per_save(experiment, info_dict)
152
  model_name = (
153
  f"epoch_{self.epoch}_whole"
154
  if on_batch_end
speech/cosyvoice/utils/train_utils.py CHANGED
@@ -26,15 +26,14 @@ import deepspeed
26
  import torch.optim as optim
27
  import torch.distributed as dist
28
 
29
- from torch.utils.tensorboard import SummaryWriter
30
  from torch.utils.data import DataLoader
31
  from torch.nn.utils import clip_grad_norm_
32
  from loguru import logger
33
  from deepspeed.runtime.zero.stage_1_and_2 import estimate_zero2_model_states_mem_needs_all_live
34
 
35
  from cosyvoice.dataset.dataset import Dataset
36
- from cosyvoice.utils.scheduler import WarmupLR, NoamHoldAnnealing, ConstantLR
37
 
 
38
 
39
  def init_distributed(args):
40
  world_size = int(os.environ.get('WORLD_SIZE', 1))
@@ -49,10 +48,10 @@ def init_distributed(args):
49
  return world_size, local_rank, rank
50
 
51
 
52
- def init_dataset_and_dataloader(args, configs, gan, dpo):
53
- data_pipeline = configs['data_pipeline_gan'] if gan is True else configs['data_pipeline']
54
- train_dataset = Dataset(args.train_data, data_pipeline=data_pipeline, mode='train', gan=gan, dpo=dpo, shuffle=True, partition=True)
55
- cv_dataset = Dataset(args.cv_data, data_pipeline=data_pipeline, mode='train', gan=gan, dpo=dpo, shuffle=False, partition=False)
56
 
57
  # do not use persistent_workers=True, as whisper tokenizer opens tiktoken file each time when the for loop starts
58
  train_data_loader = DataLoader(train_dataset,
@@ -109,90 +108,38 @@ def wrap_cuda_model(args, model):
109
  return model
110
 
111
 
112
- def init_optimizer_and_scheduler(args, configs, model, gan):
113
  """Init optimizer and scheduler"""
114
- if gan is False:
115
- if configs['train_conf']['optim'] == 'adam':
116
- optimizer = optim.Adam(model.parameters(), **configs['train_conf']['optim_conf'])
117
- elif configs['train_conf']['optim'] == 'adamw':
118
- optimizer = optim.AdamW(model.parameters(), **configs['train_conf']['optim_conf'])
119
- else:
120
- raise ValueError("unknown optimizer: " + configs['train_conf'])
121
-
122
- if configs['train_conf']['scheduler'] == 'warmuplr':
123
- scheduler_type = WarmupLR
124
- scheduler = WarmupLR(optimizer, **configs['train_conf']['scheduler_conf'])
125
- elif configs['train_conf']['scheduler'] == 'NoamHoldAnnealing':
126
- scheduler_type = NoamHoldAnnealing
127
- scheduler = NoamHoldAnnealing(optimizer, **configs['train_conf']['scheduler_conf'])
128
- elif configs['train_conf']['scheduler'] == 'constantlr':
129
- scheduler_type = ConstantLR
130
- scheduler = ConstantLR(optimizer)
131
- else:
132
- raise ValueError("unknown scheduler: " + configs['train_conf'])
133
-
134
- # use deepspeed optimizer for speedup
135
- if args.train_engine == "deepspeed":
136
- def scheduler(opt):
137
- return scheduler_type(opt, **configs['train_conf']['scheduler_conf'])
138
- model, optimizer, _, scheduler = deepspeed.initialize(
139
- args=args,
140
- model=model,
141
- optimizer=None,
142
- lr_scheduler=scheduler,
143
- model_parameters=model.parameters())
144
-
145
- optimizer_d, scheduler_d = None, None
146
-
147
  else:
148
- # currently we wrap generator and discriminator in one model, so we cannot use deepspeed
149
- if configs['train_conf']['optim'] == 'adam':
150
- optimizer = optim.Adam(model.module.generator.parameters(), **configs['train_conf']['optim_conf'])
151
- elif configs['train_conf']['optim'] == 'adamw':
152
- optimizer = optim.AdamW(model.module.generator.parameters(), **configs['train_conf']['optim_conf'])
153
- else:
154
- raise ValueError("unknown optimizer: " + configs['train_conf'])
155
-
156
- if configs['train_conf']['scheduler'] == 'warmuplr':
157
- scheduler_type = WarmupLR
158
- scheduler = WarmupLR(optimizer, **configs['train_conf']['scheduler_conf'])
159
- elif configs['train_conf']['scheduler'] == 'NoamHoldAnnealing':
160
- scheduler_type = NoamHoldAnnealing
161
- scheduler = NoamHoldAnnealing(optimizer, **configs['train_conf']['scheduler_conf'])
162
- elif configs['train_conf']['scheduler'] == 'constantlr':
163
- scheduler_type = ConstantLR
164
- scheduler = ConstantLR(optimizer)
165
- else:
166
- raise ValueError("unknown scheduler: " + configs['train_conf'])
 
 
 
 
 
167
 
168
- if configs['train_conf']['optim_d'] == 'adam':
169
- optimizer_d = optim.Adam(model.module.discriminator.parameters(), **configs['train_conf']['optim_conf'])
170
- elif configs['train_conf']['optim_d'] == 'adamw':
171
- optimizer_d = optim.AdamW(model.module.discriminator.parameters(), **configs['train_conf']['optim_conf'])
172
- else:
173
- raise ValueError("unknown optimizer: " + configs['train_conf'])
174
-
175
- if configs['train_conf']['scheduler_d'] == 'warmuplr':
176
- scheduler_type = WarmupLR
177
- scheduler_d = WarmupLR(optimizer_d, **configs['train_conf']['scheduler_conf'])
178
- elif configs['train_conf']['scheduler_d'] == 'NoamHoldAnnealing':
179
- scheduler_type = NoamHoldAnnealing
180
- scheduler_d = NoamHoldAnnealing(optimizer_d, **configs['train_conf']['scheduler_conf'])
181
- elif configs['train_conf']['scheduler'] == 'constantlr':
182
- scheduler_type = ConstantLR
183
- scheduler_d = ConstantLR(optimizer_d)
184
- else:
185
- raise ValueError("unknown scheduler: " + configs['train_conf'])
186
- return model, optimizer, scheduler, optimizer_d, scheduler_d
187
-
188
-
189
- def init_summarywriter(args):
190
- """Init summary writer"""
191
- writer = None
192
- if int(os.environ.get('RANK', 0)) == 0:
193
- os.makedirs(args.model_dir, exist_ok=True)
194
- writer = SummaryWriter(args.tensorboard_dir)
195
- return writer
196
 
197
 
198
  def save_model(model, model_name, info_dict):
@@ -295,21 +242,87 @@ def batch_backward(model, scaler, info_dict):
295
  return info_dict
296
 
297
 
298
- def update_parameter_and_lr(model, optimizer, scheduler, scaler, info_dict):
299
  """Update parameters and learning rate"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  grad_norm = 0.0
301
- if info_dict['train_engine'] == "deepspeed":
302
- info_dict["is_gradient_accumulation_boundary"] = model.is_gradient_accumulation_boundary()
303
- model.step()
304
- grad_norm = model.get_global_grad_norm()
305
- elif (info_dict['batch_idx'] + 1) % info_dict["accum_grad"] == 0:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  # Use mixed precision training
307
  if scaler is not None:
308
  scaler.unscale_(optimizer)
309
  grad_norm = clip_grad_norm_(model.parameters(), info_dict['grad_clip'])
310
- # We don't check grad here since that if the gradient
311
- # has inf/nan values, scaler.step will skip
312
- # optimizer.step().
313
  if torch.isfinite(grad_norm):
314
  scaler.step(optimizer)
315
  else:
@@ -325,11 +338,12 @@ def update_parameter_and_lr(model, optimizer, scheduler, scaler, info_dict):
325
  scheduler.step()
326
  info_dict["lr"] = optimizer.param_groups[0]['lr']
327
  info_dict["grad_norm"] = grad_norm
 
 
328
  return info_dict
329
 
330
-
331
- def log_per_step(writer, info_dict):
332
- """Log per step"""
333
  tag = info_dict["tag"]
334
  epoch = info_dict.get('epoch', 0)
335
  step = info_dict["step"]
@@ -337,39 +351,61 @@ def log_per_step(writer, info_dict):
337
  loss_dict = info_dict['loss_dict']
338
  rank = int(os.environ.get('RANK', 0))
339
 
340
- # only rank 0 write to tensorboard to avoid multi-process write
341
- if writer is not None:
342
  if (info_dict['train_engine'] == 'deepspeed' and info_dict['is_gradient_accumulation_boundary'] is True) or \
343
  (info_dict['train_engine'] == 'torch_ddp' and (info_dict['batch_idx'] + 1) % info_dict['accum_grad'] == 0):
344
- for k in ['epoch', 'lr', 'grad_norm']:
345
- writer.add_scalar(f'{tag}/{k}', info_dict[k], step + 1)
 
 
 
 
346
  for k, v in loss_dict.items():
347
- writer.add_scalar(f'{tag}/{k}', v, step + 1)
 
 
348
 
349
  # TRAIN & CV, Shell log (stdout)
350
  if (info_dict['batch_idx'] + 1) % info_dict['log_interval'] == 0:
351
  log_str = f'{tag} Batch {epoch}/{batch_idx + 1} '
352
  for name, value in loss_dict.items():
 
 
353
  log_str += f'{name} {value:.6f} '
354
  if tag == "TRAIN":
355
  log_str += f'lr {info_dict["lr"]:.8f} grad_norm {info_dict["grad_norm"]:.6f}'
356
  log_str += f' rank {rank}'
357
  logging.debug(log_str)
358
 
359
-
360
- def log_per_save(writer, info_dict):
361
- """Log per save"""
362
  tag = info_dict["tag"]
363
  epoch = info_dict["epoch"]
364
  step = info_dict["step"]
365
  loss_dict = info_dict["loss_dict"]
366
  lr = info_dict['lr']
367
  rank = int(os.environ.get('RANK', 0))
368
- logger.info(
369
- f'Epoch {epoch} Step {step + 1} CV info lr {lr} {rank} {''.join([f"{k} {v}" for k, v in loss_dict.items()])}')
370
-
371
- if writer is not None:
372
- for k in ['epoch', 'lr']:
373
- writer.add_scalar(f'{tag}/{k}', info_dict[k], step + 1)
 
 
 
 
 
374
  for k, v in loss_dict.items():
375
- writer.add_scalar(f'{tag}/{k}', v, step + 1)
 
 
 
 
 
 
 
 
 
 
 
26
  import torch.optim as optim
27
  import torch.distributed as dist
28
 
 
29
  from torch.utils.data import DataLoader
30
  from torch.nn.utils import clip_grad_norm_
31
  from loguru import logger
32
  from deepspeed.runtime.zero.stage_1_and_2 import estimate_zero2_model_states_mem_needs_all_live
33
 
34
  from cosyvoice.dataset.dataset import Dataset
 
35
 
36
+ from torch.optim.lr_scheduler import LinearLR, ConstantLR, SequentialLR
37
 
38
  def init_distributed(args):
39
  world_size = int(os.environ.get('WORLD_SIZE', 1))
 
48
  return world_size, local_rank, rank
49
 
50
 
51
+ def init_dataset_and_dataloader(args, configs, dpo):
52
+ data_pipeline = configs['data_pipeline']
53
+ train_dataset = Dataset(args.train_data, data_pipeline=data_pipeline, mode='train', gan=False, dpo=dpo, shuffle=True, partition=True)
54
+ cv_dataset = Dataset(args.cv_data, data_pipeline=data_pipeline, mode='train', gan=False, dpo=dpo, shuffle=False, partition=False)
55
 
56
  # do not use persistent_workers=True, as whisper tokenizer opens tiktoken file each time when the for loop starts
57
  train_data_loader = DataLoader(train_dataset,
 
108
  return model
109
 
110
 
111
+ def init_optimizer_and_scheduler(configs, model):
112
  """Init optimizer and scheduler"""
113
+ if configs['train_conf']['optim'] == 'adam':
114
+ optimizer = optim.Adam(model.parameters(), **configs['train_conf']['optim_conf'])
115
+ elif configs['train_conf']['optim'] == 'adamw':
116
+ optimizer = optim.AdamW(model.parameters(), **configs['train_conf']['optim_conf'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  else:
118
+ raise ValueError("unknown optimizer: " + configs['train_conf'])
119
+
120
+ # Create schedulers
121
+ warmup_scheduler = LinearLR(
122
+ optimizer,
123
+ start_factor=1e-9, # Start at nearly 0
124
+ end_factor=1.0, # End at base learning rate
125
+ total_iters=5000 # 5k warmup steps
126
+ )
127
+
128
+ constant_scheduler = ConstantLR(
129
+ optimizer,
130
+ factor=1.0, # Keep learning rate constant
131
+ total_iters=float('inf') # Run indefinitely
132
+ )
133
+
134
+ # Combine schedulers: warmup for 5k steps, then constant
135
+ scheduler = SequentialLR(
136
+ optimizer,
137
+ schedulers=[warmup_scheduler, constant_scheduler],
138
+ milestones=[5000] # Switch after 5k steps
139
+ )
140
+
141
+ return model, optimizer, scheduler
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
 
145
  def save_model(model, model_name, info_dict):
 
242
  return info_dict
243
 
244
 
245
+ def update_parameter_and_lr(model, optimizer, scheduler, scaler, info_dict, model_type='llm'):
246
  """Update parameters and learning rate"""
247
+
248
+ #Define key components based on model type
249
+ if model_type == 'llm':
250
+ key_components = {
251
+ # Text processing components
252
+ 'text_embedding': [],
253
+ 'text_encoder': [],
254
+ 'text_encoder_affine': [],
255
+
256
+ # LLM core components
257
+ 'llm_embedding': [],
258
+ 'llm.model': [], # Qwen2 model layers
259
+ 'llm_decoder': [],
260
+
261
+ # Speech components
262
+ 'speech_embedding': [],
263
+ 'spk_embed_affine': [],
264
+
265
+ # Other components
266
+ 'other': []
267
+ }
268
+ elif model_type == 'flow':
269
+ key_components = {
270
+ # Input processing
271
+ 'input_embedding': [],
272
+ 'spk_embed_affine': [],
273
+
274
+ # Encoder components
275
+ 'encoder': [],
276
+ 'encoder_proj': [],
277
+
278
+ # Flow/Diffusion components
279
+ 'decoder.cfm': [], # Conditional Flow Matching
280
+ 'decoder.unet': [], # UNet backbone
281
+ 'decoder.estimator': [], # Score/velocity estimator
282
+ 'decoder.time_embedding': [], # Time embeddings
283
+ 'decoder.conv': [], # Convolutional layers
284
+ 'decoder.attention': [], # Attention layers
285
+
286
+ # Length regulation
287
+ 'length_regulator': [],
288
+
289
+ # Other components
290
+ 'other': []
291
+ }
292
+
293
  grad_norm = 0.0
294
+ layer_grad_norms = {}
295
+
296
+ if (info_dict['batch_idx'] + 1) % info_dict["accum_grad"] == 0:
297
+
298
+ for name, param in model.named_parameters():
299
+ if param.grad is not None:
300
+ # Calculate gradient norm for this parameter
301
+ param_grad_norm = param.grad.data.norm(2).item()
302
+ layer_grad_norms[name] = param_grad_norm
303
+
304
+ # Categorize into key components
305
+ categorized = False
306
+ for component_key in key_components:
307
+ if component_key != 'other':
308
+ # Special handling for decoder sub-components in flow models
309
+ if model_type == 'flow' and component_key.startswith('decoder.'):
310
+ component_pattern = component_key.replace('decoder.', '')
311
+ if 'decoder' in name and component_pattern in name:
312
+ key_components[component_key].append((name, param_grad_norm))
313
+ categorized = True
314
+ break
315
+ elif component_key in name:
316
+ key_components[component_key].append((name, param_grad_norm))
317
+ categorized = True
318
+ break
319
+ if not categorized:
320
+ key_components['other'].append((name, param_grad_norm))
321
+
322
  # Use mixed precision training
323
  if scaler is not None:
324
  scaler.unscale_(optimizer)
325
  grad_norm = clip_grad_norm_(model.parameters(), info_dict['grad_clip'])
 
 
 
326
  if torch.isfinite(grad_norm):
327
  scaler.step(optimizer)
328
  else:
 
338
  scheduler.step()
339
  info_dict["lr"] = optimizer.param_groups[0]['lr']
340
  info_dict["grad_norm"] = grad_norm
341
+ info_dict["layer_grad_norms"] = layer_grad_norms
342
+ info_dict["key_component_grads"] = key_components
343
  return info_dict
344
 
345
+ def log_per_step(experiment, info_dict):
346
+ """Log per step using Comet ML"""
 
347
  tag = info_dict["tag"]
348
  epoch = info_dict.get('epoch', 0)
349
  step = info_dict["step"]
 
351
  loss_dict = info_dict['loss_dict']
352
  rank = int(os.environ.get('RANK', 0))
353
 
354
+ # Only rank 0 writes to Comet ML to avoid multi-process write
355
+ if experiment is not None and rank == 0:
356
  if (info_dict['train_engine'] == 'deepspeed' and info_dict['is_gradient_accumulation_boundary'] is True) or \
357
  (info_dict['train_engine'] == 'torch_ddp' and (info_dict['batch_idx'] + 1) % info_dict['accum_grad'] == 0):
358
+ # Log metrics to Comet ML
359
+ experiment.log_metric(f'{tag}_epoch', info_dict['epoch'], step=step + 1)
360
+ experiment.log_metric(f'{tag}_lr', info_dict['lr'], step=step + 1)
361
+ experiment.log_metric(f'{tag}_grad_norm', info_dict['grad_norm'], step=step + 1)
362
+
363
+ # Log all losses
364
  for k, v in loss_dict.items():
365
+ if isinstance(v, torch.Tensor):
366
+ v = v.item()
367
+ experiment.log_metric(f'{tag}_{k}', v, step=step + 1)
368
 
369
  # TRAIN & CV, Shell log (stdout)
370
  if (info_dict['batch_idx'] + 1) % info_dict['log_interval'] == 0:
371
  log_str = f'{tag} Batch {epoch}/{batch_idx + 1} '
372
  for name, value in loss_dict.items():
373
+ if isinstance(value, torch.Tensor):
374
+ value = value.item()
375
  log_str += f'{name} {value:.6f} '
376
  if tag == "TRAIN":
377
  log_str += f'lr {info_dict["lr"]:.8f} grad_norm {info_dict["grad_norm"]:.6f}'
378
  log_str += f' rank {rank}'
379
  logging.debug(log_str)
380
 
381
+ def log_per_save(experiment, info_dict):
382
+ """Log per save using Comet ML"""
 
383
  tag = info_dict["tag"]
384
  epoch = info_dict["epoch"]
385
  step = info_dict["step"]
386
  loss_dict = info_dict["loss_dict"]
387
  lr = info_dict['lr']
388
  rank = int(os.environ.get('RANK', 0))
389
+
390
+ # Create loss string for logging
391
+ loss_str = ' '.join([f"{k} {v.item() if isinstance(v, torch.Tensor) else v}" for k, v in loss_dict.items()])
392
+ logger.info(f'Epoch {epoch} Step {step + 1} CV info lr {lr} {rank} {loss_str}')
393
+
394
+ if experiment is not None and rank == 0:
395
+ # Log metrics to Comet ML
396
+ experiment.log_metric(f'{tag}_epoch', info_dict['epoch'], step=step + 1)
397
+ experiment.log_metric(f'{tag}_lr', info_dict['lr'], step=step + 1)
398
+
399
+ # Log all losses
400
  for k, v in loss_dict.items():
401
+ if isinstance(v, torch.Tensor):
402
+ v = v.item()
403
+ experiment.log_metric(f'{tag}_{k}', v, step=step + 1)
404
+
405
+ # Log additional validation info
406
+ if tag == "CV":
407
+ # Calculate average CV loss for the epoch
408
+ avg_loss = loss_dict.get('loss', 0)
409
+ if isinstance(avg_loss, torch.Tensor):
410
+ avg_loss = avg_loss.item()
411
+ experiment.log_metric('cv_avg_loss_per_epoch', avg_loss, epoch=epoch)
speech/cosyvoice2.yaml ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # set random seed, so that you may reproduce your result.
2
+ __set_seed1: !apply:random.seed [1986]
3
+ __set_seed2: !apply:numpy.random.seed [1986]
4
+ __set_seed3: !apply:torch.manual_seed [1986]
5
+ __set_seed4: !apply:torch.cuda.manual_seed_all [1986]
6
+
7
+ # fixed params
8
+ sample_rate: 24000
9
+ llm_input_size: 896
10
+ llm_output_size: 896
11
+ spk_embed_dim: 192
12
+ qwen_pretrain_path: ''
13
+ token_frame_rate: 25
14
+ token_mel_ratio: 2
15
+
16
+ # stream related params
17
+ chunk_size: 25 # streaming inference chunk size, in token
18
+ num_decoding_left_chunks: -1 # streaming inference flow decoder left chunk size, <0 means use all left chunks
19
+
20
+ # model params
21
+ # for all class/function included in this repo, we use !<name> or !<new> for intialization, so that user may find all corresponding class/function according to one single yaml.
22
+ # for system/third_party class/function, we do not require this.
23
+ llm: !new:cosyvoice.llm.llm.Qwen2LM
24
+ llm_input_size: !ref <llm_input_size>
25
+ llm_output_size: !ref <llm_output_size>
26
+ speech_token_size: 6561
27
+ length_normalized_loss: True
28
+ lsm_weight: 0
29
+ mix_ratio: [5, 15]
30
+ llm: !new:cosyvoice.llm.llm.Qwen2Encoder
31
+ pretrain_path: !ref <qwen_pretrain_path>
32
+ sampling: !name:cosyvoice.utils.common.ras_sampling
33
+ top_p: 0.8
34
+ top_k: 25
35
+ win_size: 10
36
+ tau_r: 0.1
37
+
38
+ flow: !new:cosyvoice.flow.flow.CausalMaskedDiffWithXvec
39
+ input_size: 512
40
+ output_size: 80
41
+ spk_embed_dim: !ref <spk_embed_dim>
42
+ output_type: 'mel'
43
+ vocab_size: 6561
44
+ input_frame_rate: !ref <token_frame_rate>
45
+ only_mask_loss: True
46
+ token_mel_ratio: !ref <token_mel_ratio>
47
+ pre_lookahead_len: 3
48
+ encoder: !new:cosyvoice.transformer.upsample_encoder.UpsampleConformerEncoder
49
+ output_size: 512
50
+ attention_heads: 8
51
+ linear_units: 2048
52
+ num_blocks: 6
53
+ dropout_rate: 0.1
54
+ positional_dropout_rate: 0.1
55
+ attention_dropout_rate: 0.1
56
+ normalize_before: True
57
+ input_layer: 'linear'
58
+ pos_enc_layer_type: 'rel_pos_espnet'
59
+ selfattention_layer_type: 'rel_selfattn'
60
+ input_size: 512
61
+ use_cnn_module: False
62
+ macaron_style: False
63
+ static_chunk_size: !ref <chunk_size>
64
+ decoder: !new:cosyvoice.flow.flow_matching.CausalConditionalCFM
65
+ in_channels: 240
66
+ n_spks: 1
67
+ spk_emb_dim: 80
68
+ cfm_params: !new:omegaconf.DictConfig
69
+ content:
70
+ sigma_min: 1e-06
71
+ solver: 'euler'
72
+ t_scheduler: 'cosine'
73
+ training_cfg_rate: 0.2
74
+ inference_cfg_rate: 0.7
75
+ reg_loss_type: 'l1'
76
+ estimator: !new:cosyvoice.flow.decoder.CausalConditionalDecoder
77
+ in_channels: 320
78
+ out_channels: 80
79
+ channels: [256]
80
+ dropout: 0.0
81
+ attention_head_dim: 64
82
+ n_blocks: 4
83
+ num_mid_blocks: 12
84
+ num_heads: 8
85
+ act_fn: 'gelu'
86
+ static_chunk_size: !ref <chunk_size> * <token_mel_ratio>
87
+ num_decoding_left_chunks: !ref <num_decoding_left_chunks>
88
+
89
+ hift: !new:cosyvoice.hifigan.generator.HiFTGenerator
90
+ in_channels: 80
91
+ base_channels: 512
92
+ nb_harmonics: 8
93
+ sampling_rate: !ref <sample_rate>
94
+ nsf_alpha: 0.1
95
+ nsf_sigma: 0.003
96
+ nsf_voiced_threshold: 10
97
+ upsample_rates: [8, 5, 3]
98
+ upsample_kernel_sizes: [16, 11, 7]
99
+ istft_params:
100
+ n_fft: 16
101
+ hop_len: 4
102
+ resblock_kernel_sizes: [3, 7, 11]
103
+ resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
104
+ source_resblock_kernel_sizes: [7, 7, 11]
105
+ source_resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
106
+ lrelu_slope: 0.1
107
+ audio_limit: 0.99
108
+ f0_predictor: !new:cosyvoice.hifigan.f0_predictor.ConvRNNF0Predictor
109
+ num_class: 1
110
+ in_channels: 80
111
+ cond_channels: 512
112
+
113
+ # gan related module
114
+ mel_spec_transform1: !name:matcha.utils.audio.mel_spectrogram
115
+ n_fft: 1920
116
+ num_mels: 80
117
+ sampling_rate: !ref <sample_rate>
118
+ hop_size: 480
119
+ win_size: 1920
120
+ fmin: 0
121
+ fmax: null
122
+ center: False
123
+ hifigan: !new:cosyvoice.hifigan.hifigan.HiFiGan
124
+ generator: !ref <hift>
125
+ discriminator: !new:cosyvoice.hifigan.discriminator.MultipleDiscriminator
126
+ mpd: !new:matcha.hifigan.models.MultiPeriodDiscriminator
127
+ mrd: !new:cosyvoice.hifigan.discriminator.MultiResSpecDiscriminator
128
+ mel_spec_transform: [
129
+ !ref <mel_spec_transform1>
130
+ ]
131
+
132
+ # processor functions
133
+ parquet_opener: !name:cosyvoice.dataset.processor.parquet_opener
134
+ get_tokenizer: !name:cosyvoice.tokenizer.tokenizer.get_qwen_tokenizer
135
+ token_path: !ref <qwen_pretrain_path>
136
+ skip_special_tokens: True
137
+ allowed_special: 'all'
138
+ tokenize: !name:cosyvoice.dataset.processor.tokenize
139
+ get_tokenizer: !ref <get_tokenizer>
140
+ allowed_special: !ref <allowed_special>
141
+ filter: !name:cosyvoice.dataset.processor.filter
142
+ max_length: 40960
143
+ min_length: 100
144
+ token_max_length: 200
145
+ token_min_length: 1
146
+ resample: !name:cosyvoice.dataset.processor.resample
147
+ resample_rate: !ref <sample_rate>
148
+ truncate: !name:cosyvoice.dataset.processor.truncate
149
+ truncate_length: 24480 # must be a multiplier of hop_size
150
+ feat_extractor: !name:matcha.utils.audio.mel_spectrogram
151
+ n_fft: 1920
152
+ num_mels: 80
153
+ sampling_rate: !ref <sample_rate>
154
+ hop_size: 480
155
+ win_size: 1920
156
+ fmin: 0
157
+ fmax: 8000
158
+ center: False
159
+ compute_fbank: !name:cosyvoice.dataset.processor.compute_fbank
160
+ feat_extractor: !ref <feat_extractor>
161
+ compute_f0: !name:cosyvoice.dataset.processor.compute_f0
162
+ sample_rate: !ref <sample_rate>
163
+ hop_size: 480
164
+ parse_embedding: !name:cosyvoice.dataset.processor.parse_embedding
165
+ normalize: True
166
+ shuffle: !name:cosyvoice.dataset.processor.shuffle
167
+ shuffle_size: 1000
168
+ sort: !name:cosyvoice.dataset.processor.sort
169
+ sort_size: 500 # sort_size should be less than shuffle_size
170
+ batch: !name:cosyvoice.dataset.processor.batch
171
+ batch_type: 'dynamic'
172
+ max_frames_in_batch: 2000
173
+ padding: !name:cosyvoice.dataset.processor.padding
174
+ use_spk_embedding: False # change to True during sft
175
+
176
+
177
+ # dataset processor pipeline
178
+ data_pipeline: [
179
+ !ref <parquet_opener>,
180
+ !ref <tokenize>,
181
+ !ref <filter>,
182
+ !ref <resample>,
183
+ !ref <compute_fbank>,
184
+ !ref <parse_embedding>,
185
+ !ref <shuffle>,
186
+ !ref <sort>,
187
+ !ref <batch>,
188
+ !ref <padding>,
189
+ ]
190
+ data_pipeline_gan: [
191
+ !ref <parquet_opener>,
192
+ !ref <tokenize>,
193
+ !ref <filter>,
194
+ !ref <resample>,
195
+ !ref <truncate>,
196
+ !ref <compute_fbank>,
197
+ !ref <compute_f0>,
198
+ !ref <parse_embedding>,
199
+ !ref <shuffle>,
200
+ !ref <sort>,
201
+ !ref <batch>,
202
+ !ref <padding>,
203
+ ]
204
+
205
+ # llm flow train conf
206
+ train_conf:
207
+ optim: adamw
208
+ optim_conf:
209
+ lr: 1e-5 # change to 1e-5 during sft
210
+ scheduler: constantlr # change to constantlr during sft
211
+ scheduler_conf:
212
+ warmup_steps: 2500
213
+ max_epoch: 200
214
+ grad_clip: 1
215
+ accum_grad: 1
216
+ log_interval: 100
217
+ save_per_step: -1
speech/{third_party/Matcha-TTS/matcha → matcha}/__init__.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/app.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/cli.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/LICENSE RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/README.md RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/__init__.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/config.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/denoiser.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/env.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/meldataset.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/models.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/xutils.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/models/__init__.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/models/baselightningmodule.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/__init__.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/decoder.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/flow_matching.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/text_encoder.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/transformer.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/models/matcha_tts.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/__init__.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/export.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/infer.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/text/__init__.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/text/cleaners.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/text/numbers.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/text/symbols.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/__init__.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/audio.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/generate_data_statistics.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/instantiators.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/logging_utils.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/model.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/__init__.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/core.pyx RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/setup.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/pylogger.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/rich_utils.py RENAMED
File without changes
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/utils.py RENAMED
File without changes
speech/third_party/Matcha-TTS/.env.example DELETED
@@ -1,6 +0,0 @@
1
- # example of file for storing private and user specific environment variables, like keys or system paths
2
- # rename it to ".env" (excluded from version control by default)
3
- # .env is loaded by train.py automatically
4
- # hydra allows you to reference variables in .yaml configs with special syntax: ${oc.env:MY_VAR}
5
-
6
- MY_VAR="/home/user/my/system/path"
 
 
 
 
 
 
 
speech/third_party/Matcha-TTS/.github/PULL_REQUEST_TEMPLATE.md DELETED
@@ -1,22 +0,0 @@
1
- ## What does this PR do?
2
-
3
- <!--
4
- Please include a summary of the change and which issue is fixed.
5
- Please also include relevant motivation and context.
6
- List any dependencies that are required for this change.
7
- List all the breaking changes introduced by this pull request.
8
- -->
9
-
10
- Fixes #\<issue_number>
11
-
12
- ## Before submitting
13
-
14
- - [ ] Did you make sure **title is self-explanatory** and **the description concisely explains the PR**?
15
- - [ ] Did you make sure your **PR does only one thing**, instead of bundling different changes together?
16
- - [ ] Did you list all the **breaking changes** introduced by this pull request?
17
- - [ ] Did you **test your PR locally** with `pytest` command?
18
- - [ ] Did you **run pre-commit hooks** with `pre-commit run -a` command?
19
-
20
- ## Did you have fun?
21
-
22
- Make sure you had fun coding 🙃
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
speech/third_party/Matcha-TTS/.github/codecov.yml DELETED
@@ -1,15 +0,0 @@
1
- coverage:
2
- status:
3
- # measures overall project coverage
4
- project:
5
- default:
6
- threshold: 100% # how much decrease in coverage is needed to not consider success
7
-
8
- # measures PR or single commit coverage
9
- patch:
10
- default:
11
- threshold: 100% # how much decrease in coverage is needed to not consider success
12
-
13
-
14
- # project: off
15
- # patch: off
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
speech/third_party/Matcha-TTS/.github/dependabot.yml DELETED
@@ -1,17 +0,0 @@
1
- # To get started with Dependabot version updates, you'll need to specify which
2
- # package ecosystems to update and where the package manifests are located.
3
- # Please see the documentation for all configuration options:
4
- # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5
-
6
- version: 2
7
- updates:
8
- - package-ecosystem: "pip" # See documentation for possible values
9
- directory: "/" # Location of package manifests
10
- target-branch: "dev"
11
- schedule:
12
- interval: "daily"
13
- ignore:
14
- - dependency-name: "pytorch-lightning"
15
- update-types: ["version-update:semver-patch"]
16
- - dependency-name: "torchmetrics"
17
- update-types: ["version-update:semver-patch"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
speech/third_party/Matcha-TTS/.github/release-drafter.yml DELETED
@@ -1,44 +0,0 @@
1
- name-template: "v$RESOLVED_VERSION"
2
- tag-template: "v$RESOLVED_VERSION"
3
-
4
- categories:
5
- - title: "🚀 Features"
6
- labels:
7
- - "feature"
8
- - "enhancement"
9
- - title: "🐛 Bug Fixes"
10
- labels:
11
- - "fix"
12
- - "bugfix"
13
- - "bug"
14
- - title: "🧹 Maintenance"
15
- labels:
16
- - "maintenance"
17
- - "dependencies"
18
- - "refactoring"
19
- - "cosmetic"
20
- - "chore"
21
- - title: "📝️ Documentation"
22
- labels:
23
- - "documentation"
24
- - "docs"
25
-
26
- change-template: "- $TITLE @$AUTHOR (#$NUMBER)"
27
- change-title-escapes: '\<*_&' # You can add # and @ to disable mentions
28
-
29
- version-resolver:
30
- major:
31
- labels:
32
- - "major"
33
- minor:
34
- labels:
35
- - "minor"
36
- patch:
37
- labels:
38
- - "patch"
39
- default: patch
40
-
41
- template: |
42
- ## Changes
43
-
44
- $CHANGES
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
speech/third_party/Matcha-TTS/.gitignore DELETED
@@ -1,163 +0,0 @@
1
- # Byte-compiled / optimized / DLL files
2
- __pycache__/
3
- *.py[cod]
4
- *$py.class
5
-
6
- # C extensions
7
- *.so
8
-
9
- # Distribution / packaging
10
- .Python
11
- build/
12
- develop-eggs/
13
- dist/
14
- downloads/
15
- eggs/
16
- .eggs/
17
- lib/
18
- lib64/
19
- parts/
20
- sdist/
21
- var/
22
- wheels/
23
- pip-wheel-metadata/
24
- share/python-wheels/
25
- *.egg-info/
26
- .installed.cfg
27
- *.egg
28
- MANIFEST
29
-
30
- # PyInstaller
31
- # Usually these files are written by a python script from a template
32
- # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
- *.manifest
34
- *.spec
35
-
36
- # Installer logs
37
- pip-log.txt
38
- pip-delete-this-directory.txt
39
-
40
- # Unit test / coverage reports
41
- htmlcov/
42
- .tox/
43
- .nox/
44
- .coverage
45
- .coverage.*
46
- .cache
47
- nosetests.xml
48
- coverage.xml
49
- *.cover
50
- *.py,cover
51
- .hypothesis/
52
- .pytest_cache/
53
-
54
- # Translations
55
- *.mo
56
- *.pot
57
-
58
- # Django stuff:
59
- *.log
60
- local_settings.py
61
- db.sqlite3
62
- db.sqlite3-journal
63
-
64
- # Flask stuff:
65
- instance/
66
- .webassets-cache
67
-
68
- # Scrapy stuff:
69
- .scrapy
70
-
71
- # Sphinx documentation
72
- docs/_build/
73
-
74
- # PyBuilder
75
- target/
76
-
77
- # Jupyter Notebook
78
- .ipynb_checkpoints
79
-
80
- # IPython
81
- profile_default/
82
- ipython_config.py
83
-
84
- # pyenv
85
- .python-version
86
-
87
- # pipenv
88
- # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
- # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
- # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
- # install all needed dependencies.
92
- #Pipfile.lock
93
-
94
- # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
- __pypackages__/
96
-
97
- # Celery stuff
98
- celerybeat-schedule
99
- celerybeat.pid
100
-
101
- # SageMath parsed files
102
- *.sage.py
103
-
104
- # Environments
105
- .venv
106
- env/
107
- venv/
108
- ENV/
109
- env.bak/
110
- venv.bak/
111
-
112
- # Spyder project settings
113
- .spyderproject
114
- .spyproject
115
-
116
- # Rope project settings
117
- .ropeproject
118
-
119
- # mkdocs documentation
120
- /site
121
-
122
- # mypy
123
- .mypy_cache/
124
- .dmypy.json
125
- dmypy.json
126
-
127
- # Pyre type checker
128
- .pyre/
129
-
130
- ### VisualStudioCode
131
- .vscode/*
132
- !.vscode/settings.json
133
- !.vscode/tasks.json
134
- !.vscode/launch.json
135
- !.vscode/extensions.json
136
- *.code-workspace
137
- **/.vscode
138
-
139
- # JetBrains
140
- .idea/
141
-
142
- # Data & Models
143
- *.h5
144
- *.tar
145
- *.tar.gz
146
-
147
- # Lightning-Hydra-Template
148
- configs/local/default.yaml
149
- /data/
150
- /logs/
151
- .env
152
-
153
- # Aim logging
154
- .aim
155
-
156
- # Cython complied files
157
- matcha/utils/monotonic_align/core.c
158
-
159
- # Ignoring hifigan checkpoint
160
- generator_v1
161
- g_02500000
162
- gradio_cached_examples/
163
- synth_output/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
speech/third_party/Matcha-TTS/.pre-commit-config.yaml DELETED
@@ -1,59 +0,0 @@
1
- default_language_version:
2
- python: python3.10
3
-
4
- repos:
5
- - repo: https://github.com/pre-commit/pre-commit-hooks
6
- rev: v4.5.0
7
- hooks:
8
- # list of supported hooks: https://pre-commit.com/hooks.html
9
- - id: trailing-whitespace
10
- - id: end-of-file-fixer
11
- # - id: check-docstring-first
12
- - id: check-yaml
13
- - id: debug-statements
14
- - id: detect-private-key
15
- - id: check-toml
16
- - id: check-case-conflict
17
- - id: check-added-large-files
18
-
19
- # python code formatting
20
- - repo: https://github.com/psf/black
21
- rev: 23.12.1
22
- hooks:
23
- - id: black
24
- args: [--line-length, "120"]
25
-
26
- # python import sorting
27
- - repo: https://github.com/PyCQA/isort
28
- rev: 5.13.2
29
- hooks:
30
- - id: isort
31
- args: ["--profile", "black", "--filter-files"]
32
-
33
- # python upgrading syntax to newer version
34
- - repo: https://github.com/asottile/pyupgrade
35
- rev: v3.15.0
36
- hooks:
37
- - id: pyupgrade
38
- args: [--py38-plus]
39
-
40
- # python check (PEP8), programming errors and code complexity
41
- - repo: https://github.com/PyCQA/flake8
42
- rev: 7.0.0
43
- hooks:
44
- - id: flake8
45
- args:
46
- [
47
- "--max-line-length", "120",
48
- "--extend-ignore",
49
- "E203,E402,E501,F401,F841,RST2,RST301",
50
- "--exclude",
51
- "logs/*,data/*,matcha/hifigan/*",
52
- ]
53
- additional_dependencies: [flake8-rst-docstrings==0.3.0]
54
-
55
- # pylint
56
- - repo: https://github.com/pycqa/pylint
57
- rev: v3.0.3
58
- hooks:
59
- - id: pylint
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
speech/third_party/Matcha-TTS/.project-root DELETED
@@ -1,2 +0,0 @@
1
- # this file is required for inferring the project root directory
2
- # do not delete