from transformers import Seq2SeqTrainingArguments
import os
import model
import preprocess
from my_trainer import InvertTextNormalizationTrainer

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

if __name__ == "__main__":
    roberta, tokenizer = model.init_model()

    dataset = preprocess.init_data()

    data_collator = preprocess.DataCollatorInvertTextNormalization(tokenizer, model=roberta)

    num_epochs = 15
    checkpoint_path = "./checkpoints"
    batch_size = 8
    """
    eval_accumulation_steps (int, optional): 
            Number of predictions steps to accumulate the output tensors for, before moving the results to the 
            CPU. If left unset, the whole predictions are accumulated on GPU/TPU before being moved to 
            the CPU (faster but requires more memory).

    save_steps (`int`, *optional*, defaults to 500):
            Number of updates steps before two checkpoint saves if `save_strategy="steps"`.
    """
    training_args = Seq2SeqTrainingArguments(
        output_dir=checkpoint_path,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        evaluation_strategy="steps",
        save_strategy="steps",
        learning_rate=2e-5,
        gradient_accumulation_steps=1,
        predict_with_generate=True,
        # save_total_limit=2,
        do_train=True,
        do_eval=True,
        logging_steps=5000,
        save_steps=18740,
        eval_steps=5000,
        num_train_epochs=num_epochs,
        warmup_ratio=1/num_epochs,
        logging_dir=os.path.join(checkpoint_path,'log'),
        overwrite_output_dir=True,
        eval_accumulation_steps=10,
        dataloader_num_workers=0,
        generation_max_length=50,
        fp16=True,
        ignore_data_skip=True
    )

    trainer = InvertTextNormalizationTrainer(
        model=roberta,
        args=training_args,
        train_dataset=dataset['train'].shard(50, 0),
        eval_dataset=dataset['valid'],
        data_collator=data_collator,
        tokenizer=tokenizer
    )
    trainer.train()
    # trainer.evaluate()
    # trainer.save_model(checkpoint_path)
    # trainer.evaluate()