itn_vi / main.py
MinhHan1009's picture
Upload folder using huggingface_hub
1e9c44c verified
from transformers import Seq2SeqTrainingArguments
import os
import model
import preprocess
from my_trainer import InvertTextNormalizationTrainer
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
if __name__ == "__main__":
roberta, tokenizer = model.init_model()
dataset = preprocess.init_data()
data_collator = preprocess.DataCollatorInvertTextNormalization(tokenizer, model=roberta)
num_epochs = 15
checkpoint_path = "./checkpoints"
batch_size = 8
"""
eval_accumulation_steps (int, optional):
Number of predictions steps to accumulate the output tensors for, before moving the results to the
CPU. If left unset, the whole predictions are accumulated on GPU/TPU before being moved to
the CPU (faster but requires more memory).
save_steps (`int`, *optional*, defaults to 500):
Number of updates steps before two checkpoint saves if `save_strategy="steps"`.
"""
training_args = Seq2SeqTrainingArguments(
output_dir=checkpoint_path,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
evaluation_strategy="steps",
save_strategy="steps",
learning_rate=2e-5,
gradient_accumulation_steps=1,
predict_with_generate=True,
# save_total_limit=2,
do_train=True,
do_eval=True,
logging_steps=5000,
save_steps=18740,
eval_steps=5000,
num_train_epochs=num_epochs,
warmup_ratio=1/num_epochs,
logging_dir=os.path.join(checkpoint_path,'log'),
overwrite_output_dir=True,
eval_accumulation_steps=10,
dataloader_num_workers=0,
generation_max_length=50,
fp16=True,
ignore_data_skip=True
)
trainer = InvertTextNormalizationTrainer(
model=roberta,
args=training_args,
train_dataset=dataset['train'].shard(50, 0),
eval_dataset=dataset['valid'],
data_collator=data_collator,
tokenizer=tokenizer
)
trainer.train()
# trainer.evaluate()
# trainer.save_model(checkpoint_path)
# trainer.evaluate()