| { |
| "best_global_step": 987, |
| "best_metric": 6.014463901519775, |
| "best_model_checkpoint": "printing_press/dialogue-context-learning/notebooks/results/fact-ranker/intfloat/multilingual-e5-base/checkpoint-987", |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 1974, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.10131712259371833, |
| "grad_norm": 13.125, |
| "learning_rate": 4.8996960486322194e-05, |
| "loss": 2.4265, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.20263424518743667, |
| "grad_norm": 12.1875, |
| "learning_rate": 4.798378926038501e-05, |
| "loss": 2.1867, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.303951367781155, |
| "grad_norm": 13.875, |
| "learning_rate": 4.6970618034447823e-05, |
| "loss": 2.1658, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.40526849037487334, |
| "grad_norm": 11.375, |
| "learning_rate": 4.595744680851064e-05, |
| "loss": 2.0992, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5065856129685917, |
| "grad_norm": 11.875, |
| "learning_rate": 4.494427558257346e-05, |
| "loss": 2.0569, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.60790273556231, |
| "grad_norm": 11.8125, |
| "learning_rate": 4.393110435663627e-05, |
| "loss": 2.0444, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.7092198581560284, |
| "grad_norm": 12.0625, |
| "learning_rate": 4.291793313069909e-05, |
| "loss": 2.0274, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.8105369807497467, |
| "grad_norm": 9.9375, |
| "learning_rate": 4.190476190476191e-05, |
| "loss": 2.0095, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.9118541033434651, |
| "grad_norm": 13.0625, |
| "learning_rate": 4.0891590678824726e-05, |
| "loss": 2.0072, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_cosine_accuracy": 0.8960528665833184, |
| "eval_cosine_accuracy_threshold": 0.7867648005485535, |
| "eval_cosine_ap": 0.7222417311266273, |
| "eval_cosine_f1": 0.6442883895131085, |
| "eval_cosine_f1_threshold": 0.7727111577987671, |
| "eval_cosine_mcc": 0.5782083436360893, |
| "eval_cosine_precision": 0.6771304861247786, |
| "eval_cosine_recall": 0.6144847294159671, |
| "eval_loss": 6.014463901519775, |
| "eval_runtime": 98.4941, |
| "eval_samples_per_second": 682.153, |
| "eval_steps_per_second": 6.823, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.0131712259371835, |
| "grad_norm": 9.5625, |
| "learning_rate": 3.987841945288754e-05, |
| "loss": 1.9768, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.1144883485309016, |
| "grad_norm": 10.875, |
| "learning_rate": 3.8865248226950355e-05, |
| "loss": 1.932, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.21580547112462, |
| "grad_norm": 12.625, |
| "learning_rate": 3.7852077001013173e-05, |
| "loss": 1.9352, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.3171225937183384, |
| "grad_norm": 14.4375, |
| "learning_rate": 3.6838905775075985e-05, |
| "loss": 1.8966, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.4184397163120568, |
| "grad_norm": 11.9375, |
| "learning_rate": 3.58257345491388e-05, |
| "loss": 1.9461, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.5197568389057752, |
| "grad_norm": 10.0625, |
| "learning_rate": 3.481256332320163e-05, |
| "loss": 1.8999, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.6210739614994933, |
| "grad_norm": 10.5625, |
| "learning_rate": 3.379939209726444e-05, |
| "loss": 1.9098, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.7223910840932117, |
| "grad_norm": 10.5625, |
| "learning_rate": 3.278622087132726e-05, |
| "loss": 1.9049, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.8237082066869301, |
| "grad_norm": 10.1875, |
| "learning_rate": 3.1773049645390076e-05, |
| "loss": 1.9113, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.9250253292806483, |
| "grad_norm": 13.0, |
| "learning_rate": 3.075987841945289e-05, |
| "loss": 1.9211, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_cosine_accuracy": 0.8977793653628624, |
| "eval_cosine_accuracy_threshold": 0.7719855308532715, |
| "eval_cosine_ap": 0.7302000254958843, |
| "eval_cosine_f1": 0.6526667569886646, |
| "eval_cosine_f1_threshold": 0.7546476721763611, |
| "eval_cosine_mcc": 0.5842050499797425, |
| "eval_cosine_precision": 0.6602101416171768, |
| "eval_cosine_recall": 0.6452938024647259, |
| "eval_loss": 6.201447486877441, |
| "eval_runtime": 98.5542, |
| "eval_samples_per_second": 681.736, |
| "eval_steps_per_second": 6.819, |
| "step": 1974 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 4935, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 100, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|