| from transformers import Seq2SeqTrainingArguments
|
| import os
|
| import model
|
| import preprocess
|
| from my_trainer import InvertTextNormalizationTrainer
|
|
|
| os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
|
|
| if __name__ == "__main__":
|
| roberta, tokenizer = model.init_model()
|
|
|
| dataset = preprocess.init_data()
|
|
|
| data_collator = preprocess.DataCollatorInvertTextNormalization(tokenizer, model=roberta)
|
|
|
| num_epochs = 15
|
| checkpoint_path = "./checkpoints"
|
| batch_size = 8
|
| """
|
| eval_accumulation_steps (int, optional):
|
| Number of predictions steps to accumulate the output tensors for, before moving the results to the
|
| CPU. If left unset, the whole predictions are accumulated on GPU/TPU before being moved to
|
| the CPU (faster but requires more memory).
|
|
|
| save_steps (`int`, *optional*, defaults to 500):
|
| Number of updates steps before two checkpoint saves if `save_strategy="steps"`.
|
| """
|
| training_args = Seq2SeqTrainingArguments(
|
| output_dir=checkpoint_path,
|
| per_device_train_batch_size=batch_size,
|
| per_device_eval_batch_size=batch_size,
|
| evaluation_strategy="steps",
|
| save_strategy="steps",
|
| learning_rate=2e-5,
|
| gradient_accumulation_steps=1,
|
| predict_with_generate=True,
|
|
|
| do_train=True,
|
| do_eval=True,
|
| logging_steps=5000,
|
| save_steps=18740,
|
| eval_steps=5000,
|
| num_train_epochs=num_epochs,
|
| warmup_ratio=1/num_epochs,
|
| logging_dir=os.path.join(checkpoint_path,'log'),
|
| overwrite_output_dir=True,
|
| eval_accumulation_steps=10,
|
| dataloader_num_workers=0,
|
| generation_max_length=50,
|
| fp16=True,
|
| ignore_data_skip=True
|
| )
|
|
|
| trainer = InvertTextNormalizationTrainer(
|
| model=roberta,
|
| args=training_args,
|
| train_dataset=dataset['train'].shard(50, 0),
|
| eval_dataset=dataset['valid'],
|
| data_collator=data_collator,
|
| tokenizer=tokenizer
|
| )
|
| trainer.train()
|
|
|
|
|
| |