| import torch | |
| # Set device cuda for GPU if it is available, otherwise run on the CPU | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # loss | |
| t_p = 0.25 # optimal: 1/8 ~ 1/32 | |
| zeta = 3 # optimal: 2 ~ 5 | |
| # m = 0.2 | |
| # Training hyperparameters | |
| min_epochs = 3 | |
| max_epochs = 30 | |
| learning_rate = 5e-5 | |
| unfreeze_ratio = 1 | |
| mlm_weight = 0.5 # optimal: 0.5~0.75 | |
| # Dataset | |
| batch_size = 100 | |
| split_ratio = 0.2 | |
| # Logger | |
| log_every_n_steps = 50 | |
| ckcpt_every_n_steps = 5000 | |
| # Compute related | |
| accelerator = "gpu" | |
| devices = 1 # number of gpus | |
| precision = "16-mixed" | |