| { |
| "best_metric": 0.8903650641441345, |
| "best_model_checkpoint": "./qlora/checkpoint-90", |
| "epoch": 1.8941798941798942, |
| "eval_steps": 30, |
| "global_step": 90, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 5e-05, |
| "loss": 1.5699, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0001, |
| "loss": 1.5336, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00015, |
| "loss": 1.4836, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.0002, |
| "loss": 1.4998, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00025, |
| "loss": 1.3778, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0003, |
| "loss": 1.4062, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00035, |
| "loss": 1.3872, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0004, |
| "loss": 1.2851, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00045000000000000004, |
| "loss": 1.2633, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0005, |
| "loss": 1.3413, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0004999281136632892, |
| "loss": 1.295, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0004997124959943201, |
| "loss": 1.3508, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0004993532709928075, |
| "loss": 1.2912, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0004988506452457066, |
| "loss": 1.3399, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0004982049078084071, |
| "loss": 1.2839, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0004974164300384998, |
| "loss": 1.2581, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0004964856653822122, |
| "loss": 1.2758, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0004954131491136361, |
| "loss": 1.257, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0004941994980268967, |
| "loss": 1.2517, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.000492845410081439, |
| "loss": 1.2877, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0004913516640006391, |
| "loss": 1.2128, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0004897191188239667, |
| "loss": 1.2197, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00048794871341296, |
| "loss": 1.2126, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00048604146591129483, |
| "loss": 1.182, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00048399847315926, |
| "loss": 1.2644, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00048182091006297446, |
| "loss": 1.2948, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00047951002891870987, |
| "loss": 1.2336, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00047706715869270635, |
| "loss": 1.2592, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.000474493704256897, |
| "loss": 1.2353, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0004717911455809782, |
| "loss": 1.2198, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.63, |
| "eval_loss": 0.9055019021034241, |
| "eval_runtime": 78.8597, |
| "eval_samples_per_second": 27.974, |
| "eval_steps_per_second": 2.802, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.0004689610368812938, |
| "loss": 1.2092, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.0004660050057270191, |
| "loss": 1.2702, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.0004629247521041611, |
| "loss": 1.2143, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.0004597220474379125, |
| "loss": 1.2066, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.0004563987335739216, |
| "loss": 1.1781, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.00045295672171906365, |
| "loss": 1.1901, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00044939799134232397, |
| "loss": 1.194, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.0004457245890364235, |
| "loss": 1.2206, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.0004419386273408428, |
| "loss": 1.2398, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.0004380422835269193, |
| "loss": 1.1949, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00043403779834572, |
| "loss": 1.1929, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.00042992747473940553, |
| "loss": 1.1823, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.00042571367651682995, |
| "loss": 1.2005, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.00042139882699413617, |
| "loss": 1.1782, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.00041698540760112926, |
| "loss": 1.2222, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.00041247595645422955, |
| "loss": 1.2339, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.0004078730668968252, |
| "loss": 1.1552, |
| "step": 47 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00040317938600786487, |
| "loss": 1.2564, |
| "step": 48 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.00039839761307954675, |
| "loss": 1.1184, |
| "step": 49 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.0003935304980649813, |
| "loss": 1.0166, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.0003885808399967186, |
| "loss": 1.097, |
| "step": 51 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.0003835514853770505, |
| "loss": 1.1164, |
| "step": 52 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.00037844532654101405, |
| "loss": 1.1085, |
| "step": 53 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.0003732652999930364, |
| "loss": 1.149, |
| "step": 54 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00036801438471817827, |
| "loss": 1.1033, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.00036269560046894763, |
| "loss": 1.1575, |
| "step": 56 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.0003573120060286679, |
| "loss": 1.0901, |
| "step": 57 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.00035186669745240024, |
| "loss": 1.1321, |
| "step": 58 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.0003463628062864312, |
| "loss": 1.061, |
| "step": 59 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00034080349776734924, |
| "loss": 1.1206, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.26, |
| "eval_loss": 0.8950722217559814, |
| "eval_runtime": 78.8097, |
| "eval_samples_per_second": 27.991, |
| "eval_steps_per_second": 2.804, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00033519196900174726, |
| "loss": 1.0955, |
| "step": 61 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.0003295314471275954, |
| "loss": 1.0754, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00032382518745834515, |
| "loss": 1.1503, |
| "step": 63 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00031807647161082795, |
| "loss": 1.1396, |
| "step": 64 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.0003122886056180284, |
| "loss": 1.1725, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.00030646491802781517, |
| "loss": 1.0785, |
| "step": 66 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00030060875798872436, |
| "loss": 1.1007, |
| "step": 67 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00029472349332389523, |
| "loss": 1.0888, |
| "step": 68 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.0002888125085942664, |
| "loss": 1.1254, |
| "step": 69 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 0.00028287920315214646, |
| "loss": 1.1497, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.0002769269891862778, |
| "loss": 1.1557, |
| "step": 71 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.0002709592897595191, |
| "loss": 1.0843, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 0.0002649795368402735, |
| "loss": 1.0626, |
| "step": 73 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 0.00025899116932879537, |
| "loss": 1.0813, |
| "step": 74 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 0.0002529976310795108, |
| "loss": 1.1033, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 0.0002470023689204893, |
| "loss": 1.0719, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 0.00024100883067120475, |
| "loss": 1.1511, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 0.00023502046315972656, |
| "loss": 1.1419, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 0.0002290407102404809, |
| "loss": 1.1272, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 0.00022307301081372224, |
| "loss": 1.1337, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 0.00021712079684785363, |
| "loss": 1.1324, |
| "step": 81 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 0.0002111874914057336, |
| "loss": 1.0984, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 0.00020527650667610475, |
| "loss": 1.124, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 0.0001993912420112756, |
| "loss": 1.1362, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 0.00019353508197218492, |
| "loss": 1.1449, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 0.00018771139438197168, |
| "loss": 1.078, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 0.00018192352838917208, |
| "loss": 1.0174, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 0.0001761748125416549, |
| "loss": 1.1056, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 0.00017046855287240463, |
| "loss": 1.0898, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 0.00016480803099825278, |
| "loss": 1.1319, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.89, |
| "eval_loss": 0.8903650641441345, |
| "eval_runtime": 78.9054, |
| "eval_samples_per_second": 27.958, |
| "eval_steps_per_second": 2.801, |
| "step": 90 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 141, |
| "num_train_epochs": 3, |
| "save_steps": 90, |
| "total_flos": 9.823903794855936e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|