| { |
| "best_metric": 2.767585039138794, |
| "best_model_checkpoint": "narrativesAnalogues-allMiniLM/checkpoint-396", |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 396, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.12626262626262627, |
| "grad_norm": 0.0, |
| "learning_rate": 1.25e-05, |
| "loss": 1.2194, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.25252525252525254, |
| "grad_norm": 104.04041290283203, |
| "learning_rate": 2.5e-05, |
| "loss": 0.6345, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3787878787878788, |
| "grad_norm": 18.921247482299805, |
| "learning_rate": 2.915730337078652e-05, |
| "loss": 1.438, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.5050505050505051, |
| "grad_norm": 29.835416793823242, |
| "learning_rate": 2.7752808988764048e-05, |
| "loss": 0.8182, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6313131313131313, |
| "grad_norm": 183.91859436035156, |
| "learning_rate": 2.6348314606741574e-05, |
| "loss": 0.2745, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.7575757575757576, |
| "grad_norm": 11.979473114013672, |
| "learning_rate": 2.4943820224719103e-05, |
| "loss": 0.2943, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8838383838383839, |
| "grad_norm": 0.0606352724134922, |
| "learning_rate": 2.353932584269663e-05, |
| "loss": 0.3982, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 3.425248146057129, |
| "eval_runtime": 20.2363, |
| "eval_samples_per_second": 9.488, |
| "eval_steps_per_second": 0.593, |
| "eval_validation_pearson_cosine": 0.8060792568084406, |
| "eval_validation_spearman_cosine": 0.6571353246584904, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.0101010101010102, |
| "grad_norm": 0.5485407114028931, |
| "learning_rate": 2.2134831460674157e-05, |
| "loss": 0.1918, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.1363636363636362, |
| "grad_norm": 0.06130551919341087, |
| "learning_rate": 2.0730337078651686e-05, |
| "loss": 0.2697, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.2626262626262625, |
| "grad_norm": 13.252527236938477, |
| "learning_rate": 1.9325842696629215e-05, |
| "loss": 0.1762, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.3888888888888888, |
| "grad_norm": 1.0573351383209229, |
| "learning_rate": 1.792134831460674e-05, |
| "loss": 0.0707, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.5151515151515151, |
| "grad_norm": 0.13493157923221588, |
| "learning_rate": 1.651685393258427e-05, |
| "loss": 0.0129, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.6414141414141414, |
| "grad_norm": 0.03943366929888725, |
| "learning_rate": 1.51123595505618e-05, |
| "loss": 0.1397, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.7676767676767677, |
| "grad_norm": 0.6590216159820557, |
| "learning_rate": 1.3707865168539327e-05, |
| "loss": 0.0937, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.893939393939394, |
| "grad_norm": 111.35533905029297, |
| "learning_rate": 1.2303370786516854e-05, |
| "loss": 0.2818, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 2.767585039138794, |
| "eval_runtime": 23.6772, |
| "eval_samples_per_second": 8.109, |
| "eval_steps_per_second": 0.507, |
| "eval_validation_pearson_cosine": 0.8085373520361815, |
| "eval_validation_spearman_cosine": 0.6588712021140699, |
| "step": 396 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 594, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.01 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|