Respair
/

NeMo_Canary

Model card Files Files and versions

Metrics Training metrics Community

NeMo_Canary / examples /speechlm2 /salm_train.py

Respair's picture

Upload folder using huggingface_hub

b386992 verified 9 months ago

history blame contribute delete

1.67 kB

	# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	import os

	import torch
	from lightning.pytorch import Trainer
	from omegaconf import OmegaConf

	from nemo.collections.speechlm2 import SALM, DataModule, SALMDataset
	from nemo.core.config import hydra_runner
	from nemo.utils.exp_manager import exp_manager
	from nemo.utils.trainer_utils import resolve_trainer_cfg

	torch.cuda.set_device(int(os.environ["LOCAL_RANK"]))


	@hydra_runner(config_path="conf", config_name="salm")
	def train(cfg):
	OmegaConf.resolve(cfg)
	torch.distributed.init_process_group(backend="nccl")
	torch.set_float32_matmul_precision("medium")
	trainer = Trainer(**resolve_trainer_cfg(cfg.trainer))
	log_dir = exp_manager(trainer, cfg.get("exp_manager", None))
	OmegaConf.save(cfg, log_dir / "exp_config.yaml")

	with trainer.init_module():
	model = SALM(OmegaConf.to_container(cfg.model, resolve=True))

	dataset = SALMDataset(tokenizer=model.tokenizer)
	datamodule = DataModule(cfg.data, tokenizer=model.tokenizer, dataset=dataset)

	trainer.fit(model, datamodule)


	if __name__ == "__main__":
	train()