Spaces:

mnhatdaous
/

learnable-speech

Sleeping

App Files Files Community

Ubuntu commited on Jul 12

Commit

a7dc8e9

1 Parent(s): 434855f

update training code

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

speech/cosyvoice/utils/executor.py +9 -94
speech/cosyvoice/utils/train_utils.py +150 -114
speech/cosyvoice2.yaml +217 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/__init__.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/app.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/cli.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/LICENSE +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/README.md +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/__init__.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/config.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/denoiser.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/env.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/meldataset.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/models.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/xutils.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/models/__init__.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/models/baselightningmodule.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/__init__.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/decoder.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/flow_matching.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/text_encoder.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/transformer.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/models/matcha_tts.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/__init__.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/export.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/infer.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/text/__init__.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/text/cleaners.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/text/numbers.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/text/symbols.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/__init__.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/audio.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/generate_data_statistics.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/instantiators.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/logging_utils.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/model.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/__init__.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/core.pyx +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/setup.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/pylogger.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/rich_utils.py +0 -0
speech/{third_party/Matcha-TTS/matcha → matcha}/utils/utils.py +0 -0
speech/third_party/Matcha-TTS/.env.example +0 -6
speech/third_party/Matcha-TTS/.github/PULL_REQUEST_TEMPLATE.md +0 -22
speech/third_party/Matcha-TTS/.github/codecov.yml +0 -15
speech/third_party/Matcha-TTS/.github/dependabot.yml +0 -17
speech/third_party/Matcha-TTS/.github/release-drafter.yml +0 -44
speech/third_party/Matcha-TTS/.gitignore +0 -163
speech/third_party/Matcha-TTS/.pre-commit-config.yaml +0 -59
speech/third_party/Matcha-TTS/.project-root +0 -2

speech/cosyvoice/utils/executor.py CHANGED Viewed

@@ -49,10 +49,11 @@ class Executor:
         scheduler,
         train_data_loader,
         cv_data_loader,
-        writer,
         info_dict,
         scaler,
         group_join,
     ):
         """Train one epoch"""
@@ -101,10 +102,10 @@ class Executor:
                     info_dict = batch_backward(model, scaler, info_dict)
                 info_dict = update_parameter_and_lr(
-                    model, optimizer, scheduler, scaler, info_dict
                 )
-                log_per_step(writer, info_dict)
-                # NOTE specify save_per_step in cosyvoice.yaml if you want to enable step save
                 if (
                     info_dict["save_per_step"] > 0
                     and (self.step + 1) % info_dict["save_per_step"] == 0
@@ -112,102 +113,16 @@ class Executor:
                 ):
                     dist.barrier()
                     self.cv(
-                        model, cv_data_loader, writer, info_dict, on_batch_end=False
                     )
                     model.train()
                 if (batch_idx + 1) % info_dict["accum_grad"] == 0:
                     self.step += 1
         dist.barrier()
-        self.cv(model, cv_data_loader, writer, info_dict, on_batch_end=True)
-    def train_one_epoc_gan(
-        self,
-        model,
-        optimizer,
-        scheduler,
-        optimizer_d,
-        scheduler_d,
-        train_data_loader,
-        cv_data_loader,
-        writer,
-        info_dict,
-        scaler,
-        group_join,
-    ):
-        """Train one epoch"""
-        lr = optimizer.param_groups[0]["lr"]
-        logger.info(
-            f"Epoch {self.epoch} TRAIN info lr {lr} rank {self.rank}"
-        )
-        logger.info(
-            f"using accumulate grad, new batch size is {info_dict['accum_grad']} times larger than before"
-        )
-        # A context manager to be used in conjunction with an instance of
-        # torch.nn.parallel.DistributedDataParallel to be able to train
-        # with uneven inputs across participating processes.
-        model.train()
-        model_context = (
-            model.join if info_dict["train_engine"] == "torch_ddp" else nullcontext
-        )
-        with model_context():
-            for batch_idx, batch_dict in enumerate(train_data_loader):
-                info_dict["tag"] = "TRAIN"
-                info_dict["step"] = self.step
-                info_dict["epoch"] = self.epoch
-                info_dict["batch_idx"] = batch_idx
-                if cosyvoice_join(group_join, info_dict):
-                    break
-                # Disable gradient synchronizations across DDP processes.
-                # Within this context, gradients will be accumulated on module
-                # variables, which will later be synchronized.
-                if (
-                    info_dict["train_engine"] == "torch_ddp"
-                    and (batch_idx + 1) % info_dict["accum_grad"] != 0
-                ):
-                    context = model.no_sync
-                # Used for single gpu training and DDP gradient synchronization
-                # processes.
-                else:
-                    context = nullcontext
-                with context():
-                    batch_dict["turn"] = "discriminator"
-                    info_dict = batch_forward(model, batch_dict, scaler, info_dict)
-                    info_dict = batch_backward(model, scaler, info_dict)
-                info_dict = update_parameter_and_lr(
-                    model, optimizer_d, scheduler_d, scaler, info_dict
-                )
-                optimizer.zero_grad()
-                log_per_step(writer, info_dict)
-                with context():
-                    batch_dict["turn"] = "generator"
-                    info_dict = batch_forward(model, batch_dict, scaler, info_dict)
-                    info_dict = batch_backward(model, scaler, info_dict)
-                info_dict = update_parameter_and_lr(
-                    model, optimizer, scheduler, scaler, info_dict
-                )
-                optimizer_d.zero_grad()
-                log_per_step(writer, info_dict)
-                # NOTE specify save_per_step in cosyvoice.yaml if you want to enable step save
-                if (
-                    info_dict["save_per_step"] > 0
-                    and (self.step + 1) % info_dict["save_per_step"] == 0
-                    and (batch_idx + 1) % info_dict["accum_grad"] == 0
-                ):
-                    dist.barrier()
-                    self.cv(
-                        model, cv_data_loader, writer, info_dict, on_batch_end=False
-                    )
-                    model.train()
-                if (batch_idx + 1) % info_dict["accum_grad"] == 0:
-                    self.step += 1
-        dist.barrier()
-        # self.cv(model, cv_data_loader, writer, info_dict, on_batch_end=True)
     @torch.inference_mode()
-    def cv(self, model, cv_data_loader, writer, info_dict, on_batch_end=True):
         """Cross validation on"""
         logger.info(f"Epoch {self.epoch} Step {self.step + 1} on_batch_end {on_batch_end} CV rank {self.rank}")
         model.eval()
@@ -233,7 +148,7 @@ class Executor:
         for k, v in total_loss_dict.items():
             total_loss_dict[k] = sum(v) / total_num_utts
         info_dict["loss_dict"] = total_loss_dict
-        log_per_save(writer, info_dict)
         model_name = (
             f"epoch_{self.epoch}_whole"
             if on_batch_end

         scheduler,
         train_data_loader,
         cv_data_loader,
+        experiment,
         info_dict,
         scaler,
         group_join,
+        model_type
     ):
         """Train one epoch"""
                     info_dict = batch_backward(model, scaler, info_dict)
                 info_dict = update_parameter_and_lr(
+                    model, optimizer, scheduler, scaler, info_dict, model_type=model_type
                 )
+                log_per_step(experiment, info_dict)
                 if (
                     info_dict["save_per_step"] > 0
                     and (self.step + 1) % info_dict["save_per_step"] == 0
                 ):
                     dist.barrier()
                     self.cv(
+                        model, cv_data_loader, experiment, info_dict, on_batch_end=False
                     )
                     model.train()
                 if (batch_idx + 1) % info_dict["accum_grad"] == 0:
                     self.step += 1
         dist.barrier()
+        #self.cv(model, cv_data_loader, writer, info_dict, on_batch_end=True)
     @torch.inference_mode()
+    def cv(self, model, cv_data_loader, experiment, info_dict, on_batch_end=True):
         """Cross validation on"""
         logger.info(f"Epoch {self.epoch} Step {self.step + 1} on_batch_end {on_batch_end} CV rank {self.rank}")
         model.eval()
         for k, v in total_loss_dict.items():
             total_loss_dict[k] = sum(v) / total_num_utts
         info_dict["loss_dict"] = total_loss_dict
+        log_per_save(experiment, info_dict)
         model_name = (
             f"epoch_{self.epoch}_whole"
             if on_batch_end

speech/cosyvoice/utils/train_utils.py CHANGED Viewed

@@ -26,15 +26,14 @@ import deepspeed
 import torch.optim as optim
 import torch.distributed as dist
-from torch.utils.tensorboard import SummaryWriter
 from torch.utils.data import DataLoader
 from torch.nn.utils import clip_grad_norm_
 from loguru import logger
 from deepspeed.runtime.zero.stage_1_and_2 import estimate_zero2_model_states_mem_needs_all_live
 from cosyvoice.dataset.dataset import Dataset
-from cosyvoice.utils.scheduler import WarmupLR, NoamHoldAnnealing, ConstantLR
 def init_distributed(args):
     world_size = int(os.environ.get('WORLD_SIZE', 1))
@@ -49,10 +48,10 @@ def init_distributed(args):
     return world_size, local_rank, rank
-def init_dataset_and_dataloader(args, configs, gan, dpo):
-    data_pipeline = configs['data_pipeline_gan'] if gan is True else configs['data_pipeline']
-    train_dataset = Dataset(args.train_data, data_pipeline=data_pipeline, mode='train', gan=gan, dpo=dpo, shuffle=True, partition=True)
-    cv_dataset = Dataset(args.cv_data, data_pipeline=data_pipeline, mode='train', gan=gan, dpo=dpo, shuffle=False, partition=False)
     # do not use persistent_workers=True, as whisper tokenizer opens tiktoken file each time when the for loop starts
     train_data_loader = DataLoader(train_dataset,
@@ -109,90 +108,38 @@ def wrap_cuda_model(args, model):
     return model
-def init_optimizer_and_scheduler(args, configs, model, gan):
     """Init optimizer and scheduler"""
-    if gan is False:
-        if configs['train_conf']['optim'] == 'adam':
-            optimizer = optim.Adam(model.parameters(), **configs['train_conf']['optim_conf'])
-        elif configs['train_conf']['optim'] == 'adamw':
-            optimizer = optim.AdamW(model.parameters(), **configs['train_conf']['optim_conf'])
-        else:
-            raise ValueError("unknown optimizer: " + configs['train_conf'])
-        if configs['train_conf']['scheduler'] == 'warmuplr':
-            scheduler_type = WarmupLR
-            scheduler = WarmupLR(optimizer, **configs['train_conf']['scheduler_conf'])
-        elif configs['train_conf']['scheduler'] == 'NoamHoldAnnealing':
-            scheduler_type = NoamHoldAnnealing
-            scheduler = NoamHoldAnnealing(optimizer, **configs['train_conf']['scheduler_conf'])
-        elif configs['train_conf']['scheduler'] == 'constantlr':
-            scheduler_type = ConstantLR
-            scheduler = ConstantLR(optimizer)
-        else:
-            raise ValueError("unknown scheduler: " + configs['train_conf'])
-        # use deepspeed optimizer for speedup
-        if args.train_engine == "deepspeed":
-            def scheduler(opt):
-                return scheduler_type(opt, **configs['train_conf']['scheduler_conf'])
-            model, optimizer, _, scheduler = deepspeed.initialize(
-                args=args,
-                model=model,
-                optimizer=None,
-                lr_scheduler=scheduler,
-                model_parameters=model.parameters())
-        optimizer_d, scheduler_d = None, None
     else:
-        # currently we wrap generator and discriminator in one model, so we cannot use deepspeed
-        if configs['train_conf']['optim'] == 'adam':
-            optimizer = optim.Adam(model.module.generator.parameters(), **configs['train_conf']['optim_conf'])
-        elif configs['train_conf']['optim'] == 'adamw':
-            optimizer = optim.AdamW(model.module.generator.parameters(), **configs['train_conf']['optim_conf'])
-        else:
-            raise ValueError("unknown optimizer: " + configs['train_conf'])
-        if configs['train_conf']['scheduler'] == 'warmuplr':
-            scheduler_type = WarmupLR
-            scheduler = WarmupLR(optimizer, **configs['train_conf']['scheduler_conf'])
-        elif configs['train_conf']['scheduler'] == 'NoamHoldAnnealing':
-            scheduler_type = NoamHoldAnnealing
-            scheduler = NoamHoldAnnealing(optimizer, **configs['train_conf']['scheduler_conf'])
-        elif configs['train_conf']['scheduler'] == 'constantlr':
-            scheduler_type = ConstantLR
-            scheduler = ConstantLR(optimizer)
-        else:
-            raise ValueError("unknown scheduler: " + configs['train_conf'])
-        if configs['train_conf']['optim_d'] == 'adam':
-            optimizer_d = optim.Adam(model.module.discriminator.parameters(), **configs['train_conf']['optim_conf'])
-        elif configs['train_conf']['optim_d'] == 'adamw':
-            optimizer_d = optim.AdamW(model.module.discriminator.parameters(), **configs['train_conf']['optim_conf'])
-        else:
-            raise ValueError("unknown optimizer: " + configs['train_conf'])
-        if configs['train_conf']['scheduler_d'] == 'warmuplr':
-            scheduler_type = WarmupLR
-            scheduler_d = WarmupLR(optimizer_d, **configs['train_conf']['scheduler_conf'])
-        elif configs['train_conf']['scheduler_d'] == 'NoamHoldAnnealing':
-            scheduler_type = NoamHoldAnnealing
-            scheduler_d = NoamHoldAnnealing(optimizer_d, **configs['train_conf']['scheduler_conf'])
-        elif configs['train_conf']['scheduler'] == 'constantlr':
-            scheduler_type = ConstantLR
-            scheduler_d = ConstantLR(optimizer_d)
-        else:
-            raise ValueError("unknown scheduler: " + configs['train_conf'])
-    return model, optimizer, scheduler, optimizer_d, scheduler_d
-def init_summarywriter(args):
-    """Init summary writer"""
-    writer = None
-    if int(os.environ.get('RANK', 0)) == 0:
-        os.makedirs(args.model_dir, exist_ok=True)
-        writer = SummaryWriter(args.tensorboard_dir)
-    return writer
 def save_model(model, model_name, info_dict):
@@ -295,21 +242,87 @@ def batch_backward(model, scaler, info_dict):
     return info_dict
-def update_parameter_and_lr(model, optimizer, scheduler, scaler, info_dict):
     """Update parameters and learning rate"""
     grad_norm = 0.0
-    if info_dict['train_engine'] == "deepspeed":
-        info_dict["is_gradient_accumulation_boundary"] = model.is_gradient_accumulation_boundary()
-        model.step()
-        grad_norm = model.get_global_grad_norm()
-    elif (info_dict['batch_idx'] + 1) % info_dict["accum_grad"] == 0:
         # Use mixed precision training
         if scaler is not None:
             scaler.unscale_(optimizer)
             grad_norm = clip_grad_norm_(model.parameters(), info_dict['grad_clip'])
-            # We don't check grad here since that if the gradient
-            # has inf/nan values, scaler.step will skip
-            # optimizer.step().
             if torch.isfinite(grad_norm):
                 scaler.step(optimizer)
             else:
@@ -325,11 +338,12 @@ def update_parameter_and_lr(model, optimizer, scheduler, scaler, info_dict):
         scheduler.step()
     info_dict["lr"] = optimizer.param_groups[0]['lr']
     info_dict["grad_norm"] = grad_norm
     return info_dict
-def log_per_step(writer, info_dict):
-    """Log per step"""
     tag = info_dict["tag"]
     epoch = info_dict.get('epoch', 0)
     step = info_dict["step"]
@@ -337,39 +351,61 @@ def log_per_step(writer, info_dict):
     loss_dict = info_dict['loss_dict']
     rank = int(os.environ.get('RANK', 0))
-    # only rank 0 write to tensorboard to avoid multi-process write
-    if writer is not None:
         if (info_dict['train_engine'] == 'deepspeed' and info_dict['is_gradient_accumulation_boundary'] is True) or \
            (info_dict['train_engine'] == 'torch_ddp' and (info_dict['batch_idx'] + 1) % info_dict['accum_grad'] == 0):
-            for k in ['epoch', 'lr', 'grad_norm']:
-                writer.add_scalar(f'{tag}/{k}', info_dict[k], step + 1)
             for k, v in loss_dict.items():
-                writer.add_scalar(f'{tag}/{k}', v, step + 1)
     # TRAIN & CV, Shell log (stdout)
     if (info_dict['batch_idx'] + 1) % info_dict['log_interval'] == 0:
         log_str = f'{tag} Batch {epoch}/{batch_idx + 1} '
         for name, value in loss_dict.items():
             log_str += f'{name} {value:.6f} '
         if tag == "TRAIN":
             log_str += f'lr {info_dict["lr"]:.8f} grad_norm {info_dict["grad_norm"]:.6f}'
         log_str += f' rank {rank}'
         logging.debug(log_str)
-def log_per_save(writer, info_dict):
-    """Log per save"""
     tag = info_dict["tag"]
     epoch = info_dict["epoch"]
     step = info_dict["step"]
     loss_dict = info_dict["loss_dict"]
     lr = info_dict['lr']
     rank = int(os.environ.get('RANK', 0))
-    logger.info(
-        f'Epoch {epoch} Step {step + 1} CV info lr {lr} {rank} {''.join([f"{k} {v}" for k, v in loss_dict.items()])}')
-    if writer is not None:
-        for k in ['epoch', 'lr']:
-            writer.add_scalar(f'{tag}/{k}', info_dict[k], step + 1)
         for k, v in loss_dict.items():
-            writer.add_scalar(f'{tag}/{k}', v, step + 1)

 import torch.optim as optim
 import torch.distributed as dist
 from torch.utils.data import DataLoader
 from torch.nn.utils import clip_grad_norm_
 from loguru import logger
 from deepspeed.runtime.zero.stage_1_and_2 import estimate_zero2_model_states_mem_needs_all_live
 from cosyvoice.dataset.dataset import Dataset
+from torch.optim.lr_scheduler import LinearLR, ConstantLR, SequentialLR
 def init_distributed(args):
     world_size = int(os.environ.get('WORLD_SIZE', 1))
     return world_size, local_rank, rank
+def init_dataset_and_dataloader(args, configs, dpo):
+    data_pipeline = configs['data_pipeline']
+    train_dataset = Dataset(args.train_data, data_pipeline=data_pipeline, mode='train', gan=False, dpo=dpo, shuffle=True, partition=True)
+    cv_dataset = Dataset(args.cv_data, data_pipeline=data_pipeline, mode='train', gan=False, dpo=dpo, shuffle=False, partition=False)
     # do not use persistent_workers=True, as whisper tokenizer opens tiktoken file each time when the for loop starts
     train_data_loader = DataLoader(train_dataset,
     return model
+def init_optimizer_and_scheduler(configs, model):
     """Init optimizer and scheduler"""
+    if configs['train_conf']['optim'] == 'adam':
+        optimizer = optim.Adam(model.parameters(), **configs['train_conf']['optim_conf'])
+    elif configs['train_conf']['optim'] == 'adamw':
+        optimizer = optim.AdamW(model.parameters(), **configs['train_conf']['optim_conf'])
     else:
+        raise ValueError("unknown optimizer: " + configs['train_conf'])
+    # Create schedulers
+    warmup_scheduler = LinearLR(
+        optimizer,
+        start_factor=1e-9,  # Start at nearly 0
+        end_factor=1.0,     # End at base learning rate
+        total_iters=5000    # 5k warmup steps
+    )
+    constant_scheduler = ConstantLR(
+        optimizer,
+        factor=1.0,  # Keep learning rate constant
+        total_iters=float('inf')  # Run indefinitely
+    )
+    # Combine schedulers: warmup for 5k steps, then constant
+    scheduler = SequentialLR(
+        optimizer,
+        schedulers=[warmup_scheduler, constant_scheduler],
+        milestones=[5000]  # Switch after 5k steps
+    )
+    return model, optimizer, scheduler
 def save_model(model, model_name, info_dict):
     return info_dict
+def update_parameter_and_lr(model, optimizer, scheduler, scaler, info_dict, model_type='llm'):
     """Update parameters and learning rate"""
+    #Define key components based on model type
+    if model_type == 'llm':
+        key_components = {
+            # Text processing components
+            'text_embedding': [],
+            'text_encoder': [],
+            'text_encoder_affine': [],
+            # LLM core components
+            'llm_embedding': [],
+            'llm.model': [],  # Qwen2 model layers
+            'llm_decoder': [],
+            # Speech components
+            'speech_embedding': [],
+            'spk_embed_affine': [],
+            # Other components
+            'other': []
+        }
+    elif model_type == 'flow':
+        key_components = {
+            # Input processing
+            'input_embedding': [],
+            'spk_embed_affine': [],
+            # Encoder components
+            'encoder': [],
+            'encoder_proj': [],
+            # Flow/Diffusion components
+            'decoder.cfm': [],  # Conditional Flow Matching
+            'decoder.unet': [],  # UNet backbone
+            'decoder.estimator': [],  # Score/velocity estimator
+            'decoder.time_embedding': [],  # Time embeddings
+            'decoder.conv': [],  # Convolutional layers
+            'decoder.attention': [],  # Attention layers
+            # Length regulation
+            'length_regulator': [],
+            # Other components
+            'other': []
+        }
     grad_norm = 0.0
+    layer_grad_norms = {}
+    if (info_dict['batch_idx'] + 1) % info_dict["accum_grad"] == 0:
+        for name, param in model.named_parameters():
+            if param.grad is not None:
+                # Calculate gradient norm for this parameter
+                param_grad_norm = param.grad.data.norm(2).item()
+                layer_grad_norms[name] = param_grad_norm
+                # Categorize into key components
+                categorized = False
+                for component_key in key_components:
+                    if component_key != 'other':
+                        # Special handling for decoder sub-components in flow models
+                        if model_type == 'flow' and component_key.startswith('decoder.'):
+                            component_pattern = component_key.replace('decoder.', '')
+                            if 'decoder' in name and component_pattern in name:
+                                key_components[component_key].append((name, param_grad_norm))
+                                categorized = True
+                                break
+                        elif component_key in name:
+                            key_components[component_key].append((name, param_grad_norm))
+                            categorized = True
+                            break
+                if not categorized:
+                    key_components['other'].append((name, param_grad_norm))
         # Use mixed precision training
         if scaler is not None:
             scaler.unscale_(optimizer)
             grad_norm = clip_grad_norm_(model.parameters(), info_dict['grad_clip'])
             if torch.isfinite(grad_norm):
                 scaler.step(optimizer)
             else:
         scheduler.step()
     info_dict["lr"] = optimizer.param_groups[0]['lr']
     info_dict["grad_norm"] = grad_norm
+    info_dict["layer_grad_norms"] = layer_grad_norms
+    info_dict["key_component_grads"] = key_components
     return info_dict
+def log_per_step(experiment, info_dict):
+    """Log per step using Comet ML"""
     tag = info_dict["tag"]
     epoch = info_dict.get('epoch', 0)
     step = info_dict["step"]
     loss_dict = info_dict['loss_dict']
     rank = int(os.environ.get('RANK', 0))
+    # Only rank 0 writes to Comet ML to avoid multi-process write
+    if experiment is not None and rank == 0:
         if (info_dict['train_engine'] == 'deepspeed' and info_dict['is_gradient_accumulation_boundary'] is True) or \
            (info_dict['train_engine'] == 'torch_ddp' and (info_dict['batch_idx'] + 1) % info_dict['accum_grad'] == 0):
+            # Log metrics to Comet ML
+            experiment.log_metric(f'{tag}_epoch', info_dict['epoch'], step=step + 1)
+            experiment.log_metric(f'{tag}_lr', info_dict['lr'], step=step + 1)
+            experiment.log_metric(f'{tag}_grad_norm', info_dict['grad_norm'], step=step + 1)
+            # Log all losses
             for k, v in loss_dict.items():
+                if isinstance(v, torch.Tensor):
+                    v = v.item()
+                experiment.log_metric(f'{tag}_{k}', v, step=step + 1)
     # TRAIN & CV, Shell log (stdout)
     if (info_dict['batch_idx'] + 1) % info_dict['log_interval'] == 0:
         log_str = f'{tag} Batch {epoch}/{batch_idx + 1} '
         for name, value in loss_dict.items():
+            if isinstance(value, torch.Tensor):
+                value = value.item()
             log_str += f'{name} {value:.6f} '
         if tag == "TRAIN":
             log_str += f'lr {info_dict["lr"]:.8f} grad_norm {info_dict["grad_norm"]:.6f}'
         log_str += f' rank {rank}'
         logging.debug(log_str)
+def log_per_save(experiment, info_dict):
+    """Log per save using Comet ML"""
     tag = info_dict["tag"]
     epoch = info_dict["epoch"]
     step = info_dict["step"]
     loss_dict = info_dict["loss_dict"]
     lr = info_dict['lr']
     rank = int(os.environ.get('RANK', 0))
+    # Create loss string for logging
+    loss_str = ' '.join([f"{k} {v.item() if isinstance(v, torch.Tensor) else v}" for k, v in loss_dict.items()])
+    logger.info(f'Epoch {epoch} Step {step + 1} CV info lr {lr} {rank} {loss_str}')
+    if experiment is not None and rank == 0:
+        # Log metrics to Comet ML
+        experiment.log_metric(f'{tag}_epoch', info_dict['epoch'], step=step + 1)
+        experiment.log_metric(f'{tag}_lr', info_dict['lr'], step=step + 1)
+        # Log all losses
         for k, v in loss_dict.items():
+            if isinstance(v, torch.Tensor):
+                v = v.item()
+            experiment.log_metric(f'{tag}_{k}', v, step=step + 1)
+        # Log additional validation info
+        if tag == "CV":
+            # Calculate average CV loss for the epoch
+            avg_loss = loss_dict.get('loss', 0)
+            if isinstance(avg_loss, torch.Tensor):
+                avg_loss = avg_loss.item()
+            experiment.log_metric('cv_avg_loss_per_epoch', avg_loss, epoch=epoch)

speech/cosyvoice2.yaml ADDED Viewed

	@@ -0,0 +1,217 @@

+# set random seed, so that you may reproduce your result.
+__set_seed1: !apply:random.seed [1986]
+__set_seed2: !apply:numpy.random.seed [1986]
+__set_seed3: !apply:torch.manual_seed [1986]
+__set_seed4: !apply:torch.cuda.manual_seed_all [1986]
+# fixed params
+sample_rate: 24000
+llm_input_size: 896
+llm_output_size: 896
+spk_embed_dim: 192
+qwen_pretrain_path: ''
+token_frame_rate: 25
+token_mel_ratio: 2
+# stream related params
+chunk_size: 25 # streaming inference chunk size, in token
+num_decoding_left_chunks: -1 # streaming inference flow decoder left chunk size, <0 means use all left chunks
+# model params
+# for all class/function included in this repo, we use !<name> or !<new> for intialization, so that user may find all corresponding class/function according to one single yaml.
+# for system/third_party class/function, we do not require this.
+llm: !new:cosyvoice.llm.llm.Qwen2LM
+    llm_input_size: !ref <llm_input_size>
+    llm_output_size: !ref <llm_output_size>
+    speech_token_size: 6561
+    length_normalized_loss: True
+    lsm_weight: 0
+    mix_ratio: [5, 15]
+    llm: !new:cosyvoice.llm.llm.Qwen2Encoder
+        pretrain_path: !ref <qwen_pretrain_path>
+    sampling: !name:cosyvoice.utils.common.ras_sampling
+        top_p: 0.8
+        top_k: 25
+        win_size: 10
+        tau_r: 0.1
+flow: !new:cosyvoice.flow.flow.CausalMaskedDiffWithXvec
+    input_size: 512
+    output_size: 80
+    spk_embed_dim: !ref <spk_embed_dim>
+    output_type: 'mel'
+    vocab_size: 6561
+    input_frame_rate: !ref <token_frame_rate>
+    only_mask_loss: True
+    token_mel_ratio: !ref <token_mel_ratio>
+    pre_lookahead_len: 3
+    encoder: !new:cosyvoice.transformer.upsample_encoder.UpsampleConformerEncoder
+        output_size: 512
+        attention_heads: 8
+        linear_units: 2048
+        num_blocks: 6
+        dropout_rate: 0.1
+        positional_dropout_rate: 0.1
+        attention_dropout_rate: 0.1
+        normalize_before: True
+        input_layer: 'linear'
+        pos_enc_layer_type: 'rel_pos_espnet'
+        selfattention_layer_type: 'rel_selfattn'
+        input_size: 512
+        use_cnn_module: False
+        macaron_style: False
+        static_chunk_size: !ref <chunk_size>
+    decoder: !new:cosyvoice.flow.flow_matching.CausalConditionalCFM
+        in_channels: 240
+        n_spks: 1
+        spk_emb_dim: 80
+        cfm_params: !new:omegaconf.DictConfig
+            content:
+                sigma_min: 1e-06
+                solver: 'euler'
+                t_scheduler: 'cosine'
+                training_cfg_rate: 0.2
+                inference_cfg_rate: 0.7
+                reg_loss_type: 'l1'
+        estimator: !new:cosyvoice.flow.decoder.CausalConditionalDecoder
+            in_channels: 320
+            out_channels: 80
+            channels: [256]
+            dropout: 0.0
+            attention_head_dim: 64
+            n_blocks: 4
+            num_mid_blocks: 12
+            num_heads: 8
+            act_fn: 'gelu'
+            static_chunk_size: !ref <chunk_size> * <token_mel_ratio>
+            num_decoding_left_chunks: !ref <num_decoding_left_chunks>
+hift: !new:cosyvoice.hifigan.generator.HiFTGenerator
+    in_channels: 80
+    base_channels: 512
+    nb_harmonics: 8
+    sampling_rate: !ref <sample_rate>
+    nsf_alpha: 0.1
+    nsf_sigma: 0.003
+    nsf_voiced_threshold: 10
+    upsample_rates: [8, 5, 3]
+    upsample_kernel_sizes: [16, 11, 7]
+    istft_params:
+        n_fft: 16
+        hop_len: 4
+    resblock_kernel_sizes: [3, 7, 11]
+    resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
+    source_resblock_kernel_sizes: [7, 7, 11]
+    source_resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
+    lrelu_slope: 0.1
+    audio_limit: 0.99
+    f0_predictor: !new:cosyvoice.hifigan.f0_predictor.ConvRNNF0Predictor
+        num_class: 1
+        in_channels: 80
+        cond_channels: 512
+# gan related module
+mel_spec_transform1: !name:matcha.utils.audio.mel_spectrogram
+    n_fft: 1920
+    num_mels: 80
+    sampling_rate: !ref <sample_rate>
+    hop_size: 480
+    win_size: 1920
+    fmin: 0
+    fmax: null
+    center: False
+hifigan: !new:cosyvoice.hifigan.hifigan.HiFiGan
+    generator: !ref <hift>
+    discriminator: !new:cosyvoice.hifigan.discriminator.MultipleDiscriminator
+        mpd: !new:matcha.hifigan.models.MultiPeriodDiscriminator
+        mrd: !new:cosyvoice.hifigan.discriminator.MultiResSpecDiscriminator
+    mel_spec_transform: [
+        !ref <mel_spec_transform1>
+    ]
+# processor functions
+parquet_opener: !name:cosyvoice.dataset.processor.parquet_opener
+get_tokenizer: !name:cosyvoice.tokenizer.tokenizer.get_qwen_tokenizer
+    token_path: !ref <qwen_pretrain_path>
+    skip_special_tokens: True
+allowed_special: 'all'
+tokenize: !name:cosyvoice.dataset.processor.tokenize
+    get_tokenizer: !ref <get_tokenizer>
+    allowed_special: !ref <allowed_special>
+filter: !name:cosyvoice.dataset.processor.filter
+    max_length: 40960
+    min_length: 100
+    token_max_length: 200
+    token_min_length: 1
+resample: !name:cosyvoice.dataset.processor.resample
+    resample_rate: !ref <sample_rate>
+truncate: !name:cosyvoice.dataset.processor.truncate
+    truncate_length: 24480 # must be a multiplier of hop_size
+feat_extractor: !name:matcha.utils.audio.mel_spectrogram
+    n_fft: 1920
+    num_mels: 80
+    sampling_rate: !ref <sample_rate>
+    hop_size: 480
+    win_size: 1920
+    fmin: 0
+    fmax: 8000
+    center: False
+compute_fbank: !name:cosyvoice.dataset.processor.compute_fbank
+    feat_extractor: !ref <feat_extractor>
+compute_f0: !name:cosyvoice.dataset.processor.compute_f0
+    sample_rate: !ref <sample_rate>
+    hop_size: 480
+parse_embedding: !name:cosyvoice.dataset.processor.parse_embedding
+    normalize: True
+shuffle: !name:cosyvoice.dataset.processor.shuffle
+    shuffle_size: 1000
+sort: !name:cosyvoice.dataset.processor.sort
+    sort_size: 500  # sort_size should be less than shuffle_size
+batch: !name:cosyvoice.dataset.processor.batch
+    batch_type: 'dynamic'
+    max_frames_in_batch: 2000
+padding: !name:cosyvoice.dataset.processor.padding
+    use_spk_embedding: False # change to True during sft
+# dataset processor pipeline
+data_pipeline: [
+    !ref <parquet_opener>,
+    !ref <tokenize>,
+    !ref <filter>,
+    !ref <resample>,
+    !ref <compute_fbank>,
+    !ref <parse_embedding>,
+    !ref <shuffle>,
+    !ref <sort>,
+    !ref <batch>,
+    !ref <padding>,
+]
+data_pipeline_gan: [
+    !ref <parquet_opener>,
+    !ref <tokenize>,
+    !ref <filter>,
+    !ref <resample>,
+    !ref <truncate>,
+    !ref <compute_fbank>,
+    !ref <compute_f0>,
+    !ref <parse_embedding>,
+    !ref <shuffle>,
+    !ref <sort>,
+    !ref <batch>,
+    !ref <padding>,
+]
+# llm flow train conf
+train_conf:
+    optim: adamw
+    optim_conf:
+        lr: 1e-5 # change to 1e-5 during sft
+    scheduler: constantlr # change to constantlr during sft
+    scheduler_conf:
+        warmup_steps: 2500
+    max_epoch: 200
+    grad_clip: 1
+    accum_grad: 1
+    log_interval: 100
+    save_per_step: -1

speech/{third_party/Matcha-TTS/matcha → matcha}/__init__.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/app.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/cli.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/LICENSE RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/README.md RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/__init__.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/config.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/denoiser.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/env.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/meldataset.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/models.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/hifigan/xutils.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/models/__init__.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/models/baselightningmodule.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/__init__.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/decoder.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/flow_matching.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/text_encoder.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/models/components/transformer.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/models/matcha_tts.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/__init__.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/export.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/onnx/infer.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/text/__init__.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/text/cleaners.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/text/numbers.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/text/symbols.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/utils/__init__.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/utils/audio.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/utils/generate_data_statistics.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/utils/instantiators.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/utils/logging_utils.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/utils/model.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/__init__.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/core.pyx RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/utils/monotonic_align/setup.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/utils/pylogger.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/utils/rich_utils.py RENAMED Viewed

File without changes

speech/{third_party/Matcha-TTS/matcha → matcha}/utils/utils.py RENAMED Viewed

File without changes

speech/third_party/Matcha-TTS/.env.example DELETED Viewed

@@ -1,6 +0,0 @@
-# example of file for storing private and user specific environment variables, like keys or system paths
-# rename it to ".env" (excluded from version control by default)
-# .env is loaded by train.py automatically
-# hydra allows you to reference variables in .yaml configs with special syntax: ${oc.env:MY_VAR}
-MY_VAR="/home/user/my/system/path"

speech/third_party/Matcha-TTS/.github/PULL_REQUEST_TEMPLATE.md DELETED Viewed

@@ -1,22 +0,0 @@
-## What does this PR do?
-<!--
-Please include a summary of the change and which issue is fixed.
-Please also include relevant motivation and context.
-List any dependencies that are required for this change.
-List all the breaking changes introduced by this pull request.
--->
-Fixes #\<issue_number>
-## Before submitting
-- [ ] Did you make sure **title is self-explanatory** and **the description concisely explains the PR**?
-- [ ] Did you make sure your **PR does only one thing**, instead of bundling different changes together?
-- [ ] Did you list all the **breaking changes** introduced by this pull request?
-- [ ] Did you **test your PR locally** with `pytest` command?
-- [ ] Did you **run pre-commit hooks** with `pre-commit run -a` command?
-## Did you have fun?
-Make sure you had fun coding 🙃

speech/third_party/Matcha-TTS/.github/codecov.yml DELETED Viewed

@@ -1,15 +0,0 @@
-coverage:
-  status:
-    # measures overall project coverage
-    project:
-      default:
-        threshold: 100% # how much decrease in coverage is needed to not consider success
-    # measures PR or single commit coverage
-    patch:
-      default:
-        threshold: 100% # how much decrease in coverage is needed to not consider success
-    # project: off
-    # patch: off

speech/third_party/Matcha-TTS/.github/dependabot.yml DELETED Viewed

@@ -1,17 +0,0 @@
-# To get started with Dependabot version updates, you'll need to specify which
-# package ecosystems to update and where the package manifests are located.
-# Please see the documentation for all configuration options:
-# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
-version: 2
-updates:
-  - package-ecosystem: "pip" # See documentation for possible values
-    directory: "/" # Location of package manifests
-    target-branch: "dev"
-    schedule:
-      interval: "daily"
-    ignore:
-      - dependency-name: "pytorch-lightning"
-        update-types: ["version-update:semver-patch"]
-      - dependency-name: "torchmetrics"
-        update-types: ["version-update:semver-patch"]

speech/third_party/Matcha-TTS/.github/release-drafter.yml DELETED Viewed

@@ -1,44 +0,0 @@
-name-template: "v$RESOLVED_VERSION"
-tag-template: "v$RESOLVED_VERSION"
-categories:
-  - title: "🚀 Features"
-    labels:
-      - "feature"
-      - "enhancement"
-  - title: "🐛 Bug Fixes"
-    labels:
-      - "fix"
-      - "bugfix"
-      - "bug"
-  - title: "🧹 Maintenance"
-    labels:
-      - "maintenance"
-      - "dependencies"
-      - "refactoring"
-      - "cosmetic"
-      - "chore"
-  - title: "📝️ Documentation"
-    labels:
-      - "documentation"
-      - "docs"
-change-template: "- $TITLE @$AUTHOR (#$NUMBER)"
-change-title-escapes: '\<*_&' # You can add # and @ to disable mentions
-version-resolver:
-  major:
-    labels:
-      - "major"
-  minor:
-    labels:
-      - "minor"
-  patch:
-    labels:
-      - "patch"
-  default: patch
-template: |
-  ## Changes
-  $CHANGES

speech/third_party/Matcha-TTS/.gitignore DELETED Viewed

@@ -1,163 +0,0 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-# C extensions
-*.so
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-pip-wheel-metadata/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-# Translations
-*.mo
-*.pot
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-# Flask stuff:
-instance/
-.webassets-cache
-# Scrapy stuff:
-.scrapy
-# Sphinx documentation
-docs/_build/
-# PyBuilder
-target/
-# Jupyter Notebook
-.ipynb_checkpoints
-# IPython
-profile_default/
-ipython_config.py
-# pyenv
-.python-version
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow
-__pypackages__/
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-# SageMath parsed files
-*.sage.py
-# Environments
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-# Spyder project settings
-.spyderproject
-.spyproject
-# Rope project settings
-.ropeproject
-# mkdocs documentation
-/site
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-# Pyre type checker
-.pyre/
-### VisualStudioCode
-.vscode/*
-!.vscode/settings.json
-!.vscode/tasks.json
-!.vscode/launch.json
-!.vscode/extensions.json
-*.code-workspace
-**/.vscode
-# JetBrains
-.idea/
-# Data & Models
-*.h5
-*.tar
-*.tar.gz
-# Lightning-Hydra-Template
-configs/local/default.yaml
-/data/
-/logs/
-.env
-# Aim logging
-.aim
-# Cython complied files
-matcha/utils/monotonic_align/core.c
-# Ignoring hifigan checkpoint
-generator_v1
-g_02500000
-gradio_cached_examples/
-synth_output/

speech/third_party/Matcha-TTS/.pre-commit-config.yaml DELETED Viewed

@@ -1,59 +0,0 @@
-default_language_version:
-  python: python3.10
-repos:
-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
-    hooks:
-      # list of supported hooks: https://pre-commit.com/hooks.html
-      - id: trailing-whitespace
-      - id: end-of-file-fixer
-      # - id: check-docstring-first
-      - id: check-yaml
-      - id: debug-statements
-      - id: detect-private-key
-      - id: check-toml
-      - id: check-case-conflict
-      - id: check-added-large-files
-  # python code formatting
-  - repo: https://github.com/psf/black
-    rev: 23.12.1
-    hooks:
-      - id: black
-        args: [--line-length, "120"]
-  # python import sorting
-  - repo: https://github.com/PyCQA/isort
-    rev: 5.13.2
-    hooks:
-      - id: isort
-        args: ["--profile", "black", "--filter-files"]
-  # python upgrading syntax to newer version
-  - repo: https://github.com/asottile/pyupgrade
-    rev: v3.15.0
-    hooks:
-      - id: pyupgrade
-        args: [--py38-plus]
-  # python check (PEP8), programming errors and code complexity
-  - repo: https://github.com/PyCQA/flake8
-    rev: 7.0.0
-    hooks:
-      - id: flake8
-        args:
-          [
-            "--max-line-length", "120",
-            "--extend-ignore",
-            "E203,E402,E501,F401,F841,RST2,RST301",
-            "--exclude",
-            "logs/*,data/*,matcha/hifigan/*",
-          ]
-        additional_dependencies: [flake8-rst-docstrings==0.3.0]
-  # pylint
-  - repo: https://github.com/pycqa/pylint
-    rev: v3.0.3
-    hooks:
-    -   id: pylint

speech/third_party/Matcha-TTS/.project-root DELETED Viewed

	@@ -1,2 +0,0 @@
1	- # this file is required for inferring the project root directory
2	- # do not delete