| { | |
| "epoch": 2, | |
| "global_step": 164919, | |
| "pytorch-lightning_version": "1.8.2", | |
| "loops": { | |
| "fit_loop": { | |
| "state_dict": {}, | |
| "epoch_loop.state_dict": { | |
| "_batches_that_stepped": 164919 | |
| }, | |
| "epoch_loop.batch_progress": { | |
| "total": { | |
| "ready": 659676, | |
| "completed": 659676, | |
| "started": 659676, | |
| "processed": 659676 | |
| }, | |
| "current": { | |
| "ready": 219892, | |
| "completed": 219892, | |
| "started": 219892, | |
| "processed": 219892 | |
| }, | |
| "is_last_batch": true | |
| }, | |
| "epoch_loop.scheduler_progress": { | |
| "total": { | |
| "ready": 0, | |
| "completed": 0 | |
| }, | |
| "current": { | |
| "ready": 0, | |
| "completed": 0 | |
| } | |
| }, | |
| "epoch_loop.batch_loop.state_dict": {}, | |
| "epoch_loop.batch_loop.optimizer_loop.state_dict": {}, | |
| "epoch_loop.batch_loop.optimizer_loop.optim_progress": { | |
| "optimizer": { | |
| "step": { | |
| "total": { | |
| "ready": 164919, | |
| "completed": 164919 | |
| }, | |
| "current": { | |
| "ready": 54973, | |
| "completed": 54973 | |
| } | |
| }, | |
| "zero_grad": { | |
| "total": { | |
| "ready": 164919, | |
| "completed": 164919, | |
| "started": 164919 | |
| }, | |
| "current": { | |
| "ready": 54973, | |
| "completed": 54973, | |
| "started": 54973 | |
| } | |
| } | |
| }, | |
| "optimizer_position": 1 | |
| }, | |
| "epoch_loop.batch_loop.manual_loop.state_dict": {}, | |
| "epoch_loop.batch_loop.manual_loop.optim_step_progress": { | |
| "total": { | |
| "ready": 0, | |
| "completed": 0 | |
| }, | |
| "current": { | |
| "ready": 0, | |
| "completed": 0 | |
| } | |
| }, | |
| "epoch_loop.val_loop.state_dict": {}, | |
| "epoch_loop.val_loop.dataloader_progress": { | |
| "total": { | |
| "ready": 12, | |
| "completed": 12 | |
| }, | |
| "current": { | |
| "ready": 4, | |
| "completed": 4 | |
| } | |
| }, | |
| "epoch_loop.val_loop.epoch_loop.state_dict": {}, | |
| "epoch_loop.val_loop.epoch_loop.batch_progress": { | |
| "total": { | |
| "ready": 7036, | |
| "completed": 7036, | |
| "started": 7036, | |
| "processed": 7036 | |
| }, | |
| "current": { | |
| "ready": 2438, | |
| "completed": 2438, | |
| "started": 2438, | |
| "processed": 2438 | |
| }, | |
| "is_last_batch": true | |
| }, | |
| "epoch_progress": { | |
| "total": { | |
| "ready": 3, | |
| "completed": 2, | |
| "started": 3, | |
| "processed": 3 | |
| }, | |
| "current": { | |
| "ready": 3, | |
| "completed": 2, | |
| "started": 3, | |
| "processed": 3 | |
| } | |
| } | |
| }, | |
| "validate_loop": { | |
| "state_dict": {}, | |
| "dataloader_progress": { | |
| "total": { | |
| "ready": 0, | |
| "completed": 0 | |
| }, | |
| "current": { | |
| "ready": 0, | |
| "completed": 0 | |
| } | |
| }, | |
| "epoch_loop.state_dict": {}, | |
| "epoch_loop.batch_progress": { | |
| "total": { | |
| "ready": 0, | |
| "completed": 0, | |
| "started": 0, | |
| "processed": 0 | |
| }, | |
| "current": { | |
| "ready": 0, | |
| "completed": 0, | |
| "started": 0, | |
| "processed": 0 | |
| }, | |
| "is_last_batch": false | |
| } | |
| }, | |
| "test_loop": { | |
| "state_dict": {}, | |
| "dataloader_progress": { | |
| "total": { | |
| "ready": 0, | |
| "completed": 0 | |
| }, | |
| "current": { | |
| "ready": 0, | |
| "completed": 0 | |
| } | |
| }, | |
| "epoch_loop.state_dict": {}, | |
| "epoch_loop.batch_progress": { | |
| "total": { | |
| "ready": 0, | |
| "completed": 0, | |
| "started": 0, | |
| "processed": 0 | |
| }, | |
| "current": { | |
| "ready": 0, | |
| "completed": 0, | |
| "started": 0, | |
| "processed": 0 | |
| }, | |
| "is_last_batch": false | |
| } | |
| }, | |
| "predict_loop": { | |
| "state_dict": {}, | |
| "dataloader_progress": { | |
| "total": { | |
| "ready": 0, | |
| "completed": 0 | |
| }, | |
| "current": { | |
| "ready": 0, | |
| "completed": 0 | |
| } | |
| }, | |
| "epoch_loop.state_dict": {}, | |
| "epoch_loop.batch_progress": { | |
| "total": { | |
| "ready": 0, | |
| "completed": 0, | |
| "started": 0, | |
| "processed": 0 | |
| }, | |
| "current": { | |
| "ready": 0, | |
| "completed": 0, | |
| "started": 0, | |
| "processed": 0 | |
| } | |
| } | |
| } | |
| }, | |
| "hparams_name": "kwargs", | |
| "hyper_parameters": { | |
| "nr_frozen_epochs": 0.3, | |
| "keep_embeddings_frozen": true, | |
| "optimizer": "AdamW", | |
| "encoder_learning_rate": 1e-06, | |
| "learning_rate": 1.5e-05, | |
| "layerwise_decay": 0.95, | |
| "encoder_model": "XLM-RoBERTa", | |
| "pretrained_model": "microsoft/infoxlm-large", | |
| "pool": "avg", | |
| "layer": "mix", | |
| "layer_transformation": "sparsemax", | |
| "layer_norm": false, | |
| "loss": "mse", | |
| "dropout": 0.1, | |
| "batch_size": 4, | |
| "train_data": [ | |
| "data/1720-da.mlqe-src.csv" | |
| ], | |
| "validation_data": [ | |
| "data/wmt-ende-newstest2021.csv", | |
| "data/wmt-enru-newstest2021.csv", | |
| "data/wmt-zhen-newstest2021.csv" | |
| ], | |
| "class_identifier": "unified_metric", | |
| "sent_layer": "mix", | |
| "word_layer": 24, | |
| "hidden_sizes": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "activations": "Tanh", | |
| "final_activation": null, | |
| "input_segments": [ | |
| "mt", | |
| "src" | |
| ], | |
| "word_level_training": false, | |
| "word_weights": [ | |
| 0.15, | |
| 0.85 | |
| ], | |
| "loss_lambda": 0.65 | |
| } | |
| } |