| { |
| "best_global_step": 450, |
| "best_metric": 0.8513853904282116, |
| "best_model_checkpoint": "../models/retriever_trainer_bge_base/checkpoint-450", |
| "epoch": 1.9148936170212765, |
| "eval_steps": 50, |
| "global_step": 450, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0425531914893617, |
| "grad_norm": 0.8533166646957397, |
| "learning_rate": 1.5254237288135596e-06, |
| "loss": 0.2054, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0851063829787234, |
| "grad_norm": 0.7839147448539734, |
| "learning_rate": 3.2203389830508473e-06, |
| "loss": 0.2017, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1276595744680851, |
| "grad_norm": 0.9631144404411316, |
| "learning_rate": 4.915254237288136e-06, |
| "loss": 0.197, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1702127659574468, |
| "grad_norm": 1.077710747718811, |
| "learning_rate": 6.610169491525424e-06, |
| "loss": 0.1913, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2127659574468085, |
| "grad_norm": 0.8926519155502319, |
| "learning_rate": 8.305084745762712e-06, |
| "loss": 0.1827, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2127659574468085, |
| "eval_accuracy_at_0.5_thresh": 0.7808510638297872, |
| "eval_f1_at_0.5_thresh": 0.7444168734491315, |
| "eval_loss": 0.18353447318077087, |
| "eval_precision_at_0.5_thresh": 0.7389162561576355, |
| "eval_recall_at_0.5_thresh": 0.75, |
| "eval_runtime": 1.2282, |
| "eval_samples_per_second": 382.661, |
| "eval_steps_per_second": 12.213, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2553191489361702, |
| "grad_norm": 1.0001214742660522, |
| "learning_rate": 1e-05, |
| "loss": 0.1897, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2978723404255319, |
| "grad_norm": 0.8536041378974915, |
| "learning_rate": 1.169491525423729e-05, |
| "loss": 0.1811, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3404255319148936, |
| "grad_norm": 0.9402114748954773, |
| "learning_rate": 1.3389830508474577e-05, |
| "loss": 0.1777, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3829787234042553, |
| "grad_norm": 0.9630193710327148, |
| "learning_rate": 1.5084745762711865e-05, |
| "loss": 0.1697, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "grad_norm": 1.079608678817749, |
| "learning_rate": 1.6779661016949154e-05, |
| "loss": 0.166, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "eval_accuracy_at_0.5_thresh": 0.8063829787234043, |
| "eval_f1_at_0.5_thresh": 0.723404255319149, |
| "eval_loss": 0.15719293057918549, |
| "eval_precision_at_0.5_thresh": 0.9224806201550387, |
| "eval_recall_at_0.5_thresh": 0.595, |
| "eval_runtime": 1.1886, |
| "eval_samples_per_second": 395.432, |
| "eval_steps_per_second": 12.62, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.46808510638297873, |
| "grad_norm": 1.0034838914871216, |
| "learning_rate": 1.8474576271186443e-05, |
| "loss": 0.1685, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5106382978723404, |
| "grad_norm": 0.9138811230659485, |
| "learning_rate": 1.9981078524124883e-05, |
| "loss": 0.1708, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5531914893617021, |
| "grad_norm": 0.9457395076751709, |
| "learning_rate": 1.97918637653737e-05, |
| "loss": 0.1524, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5957446808510638, |
| "grad_norm": 1.1439883708953857, |
| "learning_rate": 1.960264900662252e-05, |
| "loss": 0.1642, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6382978723404256, |
| "grad_norm": 0.900199830532074, |
| "learning_rate": 1.9413434247871333e-05, |
| "loss": 0.1612, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6382978723404256, |
| "eval_accuracy_at_0.5_thresh": 0.8553191489361702, |
| "eval_f1_at_0.5_thresh": 0.8308457711442786, |
| "eval_loss": 0.14183281362056732, |
| "eval_precision_at_0.5_thresh": 0.8267326732673267, |
| "eval_recall_at_0.5_thresh": 0.835, |
| "eval_runtime": 1.1664, |
| "eval_samples_per_second": 402.933, |
| "eval_steps_per_second": 12.86, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6808510638297872, |
| "grad_norm": 1.2067737579345703, |
| "learning_rate": 1.922421948912015e-05, |
| "loss": 0.1553, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.723404255319149, |
| "grad_norm": 1.0066120624542236, |
| "learning_rate": 1.9035004730368973e-05, |
| "loss": 0.1495, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7659574468085106, |
| "grad_norm": 1.0830472707748413, |
| "learning_rate": 1.8845789971617787e-05, |
| "loss": 0.1364, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8085106382978723, |
| "grad_norm": 0.9800769686698914, |
| "learning_rate": 1.8656575212866605e-05, |
| "loss": 0.1295, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "grad_norm": 0.9936037659645081, |
| "learning_rate": 1.8467360454115423e-05, |
| "loss": 0.1545, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "eval_accuracy_at_0.5_thresh": 0.8382978723404255, |
| "eval_f1_at_0.5_thresh": 0.8020833333333334, |
| "eval_loss": 0.13116958737373352, |
| "eval_precision_at_0.5_thresh": 0.8369565217391305, |
| "eval_recall_at_0.5_thresh": 0.77, |
| "eval_runtime": 1.1719, |
| "eval_samples_per_second": 401.047, |
| "eval_steps_per_second": 12.799, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8936170212765957, |
| "grad_norm": 0.9179720878601074, |
| "learning_rate": 1.827814569536424e-05, |
| "loss": 0.1328, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9361702127659575, |
| "grad_norm": 1.5345299243927002, |
| "learning_rate": 1.808893093661306e-05, |
| "loss": 0.1355, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.9787234042553191, |
| "grad_norm": 1.037996530532837, |
| "learning_rate": 1.7899716177861873e-05, |
| "loss": 0.1289, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.0212765957446808, |
| "grad_norm": 1.2900911569595337, |
| "learning_rate": 1.771050141911069e-05, |
| "loss": 0.1245, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.0638297872340425, |
| "grad_norm": 0.7172983884811401, |
| "learning_rate": 1.752128666035951e-05, |
| "loss": 0.1067, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.0638297872340425, |
| "eval_accuracy_at_0.5_thresh": 0.8425531914893617, |
| "eval_f1_at_0.5_thresh": 0.8131313131313131, |
| "eval_loss": 0.12659965455532074, |
| "eval_precision_at_0.5_thresh": 0.8214285714285714, |
| "eval_recall_at_0.5_thresh": 0.805, |
| "eval_runtime": 1.1658, |
| "eval_samples_per_second": 403.141, |
| "eval_steps_per_second": 12.866, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.1063829787234043, |
| "grad_norm": 1.0125339031219482, |
| "learning_rate": 1.7332071901608327e-05, |
| "loss": 0.1177, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.148936170212766, |
| "grad_norm": 1.0160564184188843, |
| "learning_rate": 1.7142857142857142e-05, |
| "loss": 0.1034, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.1914893617021276, |
| "grad_norm": 1.3702187538146973, |
| "learning_rate": 1.6953642384105963e-05, |
| "loss": 0.1117, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.2340425531914894, |
| "grad_norm": 1.317497730255127, |
| "learning_rate": 1.676442762535478e-05, |
| "loss": 0.1235, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.2765957446808511, |
| "grad_norm": 0.8722613453865051, |
| "learning_rate": 1.6575212866603596e-05, |
| "loss": 0.0933, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.2765957446808511, |
| "eval_accuracy_at_0.5_thresh": 0.8489361702127659, |
| "eval_f1_at_0.5_thresh": 0.8202531645569621, |
| "eval_loss": 0.1204555556178093, |
| "eval_precision_at_0.5_thresh": 0.8307692307692308, |
| "eval_recall_at_0.5_thresh": 0.81, |
| "eval_runtime": 1.1714, |
| "eval_samples_per_second": 401.236, |
| "eval_steps_per_second": 12.805, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.3191489361702127, |
| "grad_norm": 1.0289169549942017, |
| "learning_rate": 1.6385998107852414e-05, |
| "loss": 0.1039, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.3617021276595744, |
| "grad_norm": 0.9575484991073608, |
| "learning_rate": 1.619678334910123e-05, |
| "loss": 0.1161, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.4042553191489362, |
| "grad_norm": 1.3465416431427002, |
| "learning_rate": 1.600756859035005e-05, |
| "loss": 0.0912, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.4468085106382977, |
| "grad_norm": 0.9446049332618713, |
| "learning_rate": 1.5818353831598864e-05, |
| "loss": 0.0868, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.4893617021276595, |
| "grad_norm": 1.3670283555984497, |
| "learning_rate": 1.5629139072847682e-05, |
| "loss": 0.0984, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.4893617021276595, |
| "eval_accuracy_at_0.5_thresh": 0.8617021276595744, |
| "eval_f1_at_0.5_thresh": 0.8387096774193549, |
| "eval_loss": 0.12064016610383987, |
| "eval_precision_at_0.5_thresh": 0.8325123152709359, |
| "eval_recall_at_0.5_thresh": 0.845, |
| "eval_runtime": 1.1704, |
| "eval_samples_per_second": 401.579, |
| "eval_steps_per_second": 12.816, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.5319148936170213, |
| "grad_norm": 1.2213770151138306, |
| "learning_rate": 1.54399243140965e-05, |
| "loss": 0.109, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.574468085106383, |
| "grad_norm": 0.8301506638526917, |
| "learning_rate": 1.5250709555345318e-05, |
| "loss": 0.1014, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.6170212765957448, |
| "grad_norm": 1.2703402042388916, |
| "learning_rate": 1.5061494796594136e-05, |
| "loss": 0.1054, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.6595744680851063, |
| "grad_norm": 1.1871310472488403, |
| "learning_rate": 1.4872280037842952e-05, |
| "loss": 0.1043, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.702127659574468, |
| "grad_norm": 0.7756622433662415, |
| "learning_rate": 1.468306527909177e-05, |
| "loss": 0.1052, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.702127659574468, |
| "eval_accuracy_at_0.5_thresh": 0.8659574468085106, |
| "eval_f1_at_0.5_thresh": 0.8372093023255814, |
| "eval_loss": 0.1149349957704544, |
| "eval_precision_at_0.5_thresh": 0.8663101604278075, |
| "eval_recall_at_0.5_thresh": 0.81, |
| "eval_runtime": 1.1721, |
| "eval_samples_per_second": 400.999, |
| "eval_steps_per_second": 12.798, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.7446808510638299, |
| "grad_norm": 1.2976441383361816, |
| "learning_rate": 1.4493850520340588e-05, |
| "loss": 0.1295, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.7872340425531914, |
| "grad_norm": 1.139825463294983, |
| "learning_rate": 1.4304635761589404e-05, |
| "loss": 0.1032, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.8297872340425532, |
| "grad_norm": 1.3335371017456055, |
| "learning_rate": 1.4115421002838224e-05, |
| "loss": 0.1001, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.872340425531915, |
| "grad_norm": 1.33684241771698, |
| "learning_rate": 1.3926206244087039e-05, |
| "loss": 0.1052, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.9148936170212765, |
| "grad_norm": 1.1832659244537354, |
| "learning_rate": 1.3736991485335858e-05, |
| "loss": 0.1064, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.9148936170212765, |
| "eval_accuracy_at_0.5_thresh": 0.874468085106383, |
| "eval_f1_at_0.5_thresh": 0.8513853904282116, |
| "eval_loss": 0.11494793742895126, |
| "eval_precision_at_0.5_thresh": 0.8578680203045685, |
| "eval_recall_at_0.5_thresh": 0.845, |
| "eval_runtime": 1.1713, |
| "eval_samples_per_second": 401.267, |
| "eval_steps_per_second": 12.806, |
| "step": 450 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1175, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|