checkpoint = "google/mt5-small"
tokenizer = MT5Tokenizer.from_pretrained(checkpoint, legacy=False)
model = MT5ForConditionalGeneration.from_pretarined(checkpoint)
Training
- Epochs: 200
- Optimizer: AdamW
- Learning Rate: 2e-5
- Weight decay: 0.01
- Warm-ups: 0.05*total_steps
- Scheduler: cosine