| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.19080659150043366, |
| "eval_steps": 10, |
| "global_step": 550, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.003469210754553339, |
| "grad_norm": 31.211658477783203, |
| "learning_rate": 0.00019636363636363636, |
| "loss": 0.6808, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.003469210754553339, |
| "eval_accuracy": 0.5609713792800903, |
| "eval_loss": 0.6768143773078918, |
| "eval_runtime": 673.3248, |
| "eval_samples_per_second": 8.562, |
| "eval_steps_per_second": 2.142, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.006938421509106678, |
| "grad_norm": 2.6544265747070312, |
| "learning_rate": 0.00019272727272727274, |
| "loss": 0.6238, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.006938421509106678, |
| "eval_accuracy": 0.6926279067993164, |
| "eval_loss": 0.6432190537452698, |
| "eval_runtime": 674.7679, |
| "eval_samples_per_second": 8.544, |
| "eval_steps_per_second": 2.137, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.010407632263660017, |
| "grad_norm": 28.516557693481445, |
| "learning_rate": 0.0001890909090909091, |
| "loss": 0.6604, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.010407632263660017, |
| "eval_accuracy": 0.7094535827636719, |
| "eval_loss": 0.5888805985450745, |
| "eval_runtime": 675.7811, |
| "eval_samples_per_second": 8.531, |
| "eval_steps_per_second": 2.134, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.013876843018213356, |
| "grad_norm": 21.07656478881836, |
| "learning_rate": 0.00018545454545454545, |
| "loss": 0.6838, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.013876843018213356, |
| "eval_accuracy": 0.7339115142822266, |
| "eval_loss": 0.5620893239974976, |
| "eval_runtime": 673.4164, |
| "eval_samples_per_second": 8.561, |
| "eval_steps_per_second": 2.141, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.017346053772766695, |
| "grad_norm": 5.250032901763916, |
| "learning_rate": 0.00018181818181818183, |
| "loss": 0.5994, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.017346053772766695, |
| "eval_accuracy": 0.6922810077667236, |
| "eval_loss": 0.6344946026802063, |
| "eval_runtime": 680.1572, |
| "eval_samples_per_second": 8.476, |
| "eval_steps_per_second": 2.12, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.020815264527320035, |
| "grad_norm": 45.266639709472656, |
| "learning_rate": 0.0001781818181818182, |
| "loss": 0.6421, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.020815264527320035, |
| "eval_accuracy": 0.7212489247322083, |
| "eval_loss": 0.6085450649261475, |
| "eval_runtime": 679.9324, |
| "eval_samples_per_second": 8.479, |
| "eval_steps_per_second": 2.121, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.024284475281873375, |
| "grad_norm": 18.615516662597656, |
| "learning_rate": 0.00017454545454545454, |
| "loss": 0.549, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.024284475281873375, |
| "eval_accuracy": 0.7897658348083496, |
| "eval_loss": 0.4875045418739319, |
| "eval_runtime": 685.8032, |
| "eval_samples_per_second": 8.406, |
| "eval_steps_per_second": 2.103, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.027753686036426712, |
| "grad_norm": 8.090106010437012, |
| "learning_rate": 0.0001709090909090909, |
| "loss": 0.5293, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.027753686036426712, |
| "eval_accuracy": 0.8057242035865784, |
| "eval_loss": 0.4444285035133362, |
| "eval_runtime": 679.7237, |
| "eval_samples_per_second": 8.481, |
| "eval_steps_per_second": 2.121, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.031222896790980052, |
| "grad_norm": 12.921670913696289, |
| "learning_rate": 0.00016727272727272728, |
| "loss": 0.6783, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.031222896790980052, |
| "eval_accuracy": 0.49470946192741394, |
| "eval_loss": 0.8580413460731506, |
| "eval_runtime": 681.5405, |
| "eval_samples_per_second": 8.459, |
| "eval_steps_per_second": 2.116, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03469210754553339, |
| "grad_norm": 3.922827959060669, |
| "learning_rate": 0.00016363636363636366, |
| "loss": 0.7702, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03469210754553339, |
| "eval_accuracy": 0.5052905678749084, |
| "eval_loss": 0.6616266369819641, |
| "eval_runtime": 688.564, |
| "eval_samples_per_second": 8.372, |
| "eval_steps_per_second": 2.094, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03816131830008673, |
| "grad_norm": 2.154066801071167, |
| "learning_rate": 0.00016, |
| "loss": 0.7213, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.03816131830008673, |
| "eval_accuracy": 0.6563746929168701, |
| "eval_loss": 0.6373852491378784, |
| "eval_runtime": 684.8723, |
| "eval_samples_per_second": 8.418, |
| "eval_steps_per_second": 2.106, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04163052905464007, |
| "grad_norm": 1.061826467514038, |
| "learning_rate": 0.00015636363636363637, |
| "loss": 0.6699, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04163052905464007, |
| "eval_accuracy": 0.6971378922462463, |
| "eval_loss": 0.6286903619766235, |
| "eval_runtime": 681.0538, |
| "eval_samples_per_second": 8.465, |
| "eval_steps_per_second": 2.117, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.045099739809193407, |
| "grad_norm": 10.424234390258789, |
| "learning_rate": 0.00015272727272727275, |
| "loss": 0.5399, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.045099739809193407, |
| "eval_accuracy": 0.8036426901817322, |
| "eval_loss": 0.4253140985965729, |
| "eval_runtime": 690.0787, |
| "eval_samples_per_second": 8.354, |
| "eval_steps_per_second": 2.09, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.04856895056374675, |
| "grad_norm": 2.499521017074585, |
| "learning_rate": 0.0001490909090909091, |
| "loss": 0.5553, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.04856895056374675, |
| "eval_accuracy": 0.7542064189910889, |
| "eval_loss": 0.44444340467453003, |
| "eval_runtime": 685.7103, |
| "eval_samples_per_second": 8.407, |
| "eval_steps_per_second": 2.103, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05203816131830009, |
| "grad_norm": 14.523048400878906, |
| "learning_rate": 0.00014545454545454546, |
| "loss": 0.6979, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05203816131830009, |
| "eval_accuracy": 0.7439722418785095, |
| "eval_loss": 0.5539606213569641, |
| "eval_runtime": 691.0552, |
| "eval_samples_per_second": 8.342, |
| "eval_steps_per_second": 2.087, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.055507372072853424, |
| "grad_norm": 12.651830673217773, |
| "learning_rate": 0.00014181818181818184, |
| "loss": 0.4658, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.055507372072853424, |
| "eval_accuracy": 0.8102341890335083, |
| "eval_loss": 0.41497698426246643, |
| "eval_runtime": 683.4853, |
| "eval_samples_per_second": 8.435, |
| "eval_steps_per_second": 2.11, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.05897658282740677, |
| "grad_norm": 1.7211685180664062, |
| "learning_rate": 0.0001381818181818182, |
| "loss": 0.3741, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.05897658282740677, |
| "eval_accuracy": 0.8556808233261108, |
| "eval_loss": 0.36343446373939514, |
| "eval_runtime": 684.3542, |
| "eval_samples_per_second": 8.424, |
| "eval_steps_per_second": 2.107, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.062445793581960105, |
| "grad_norm": 0.5337616801261902, |
| "learning_rate": 0.00013454545454545455, |
| "loss": 0.2849, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.062445793581960105, |
| "eval_accuracy": 0.877883791923523, |
| "eval_loss": 0.2661490738391876, |
| "eval_runtime": 687.6402, |
| "eval_samples_per_second": 8.384, |
| "eval_steps_per_second": 2.097, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.06591500433651344, |
| "grad_norm": 240.42689514160156, |
| "learning_rate": 0.00013090909090909093, |
| "loss": 0.3898, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06591500433651344, |
| "eval_accuracy": 0.9306157827377319, |
| "eval_loss": 0.19569338858127594, |
| "eval_runtime": 686.2526, |
| "eval_samples_per_second": 8.401, |
| "eval_steps_per_second": 2.101, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06938421509106678, |
| "grad_norm": 7.064624786376953, |
| "learning_rate": 0.00012727272727272728, |
| "loss": 0.3303, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.06938421509106678, |
| "eval_accuracy": 0.9614917635917664, |
| "eval_loss": 0.13166119158267975, |
| "eval_runtime": 691.1196, |
| "eval_samples_per_second": 8.342, |
| "eval_steps_per_second": 2.086, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07285342584562012, |
| "grad_norm": 9.704263687133789, |
| "learning_rate": 0.00012363636363636364, |
| "loss": 0.1237, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.07285342584562012, |
| "eval_accuracy": 0.9715524911880493, |
| "eval_loss": 0.0882689580321312, |
| "eval_runtime": 689.479, |
| "eval_samples_per_second": 8.361, |
| "eval_steps_per_second": 2.091, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.07632263660017347, |
| "grad_norm": 1.1022824048995972, |
| "learning_rate": 0.00012, |
| "loss": 0.4275, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.07632263660017347, |
| "eval_accuracy": 0.7349522709846497, |
| "eval_loss": 0.7713552117347717, |
| "eval_runtime": 684.6118, |
| "eval_samples_per_second": 8.421, |
| "eval_steps_per_second": 2.106, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.0797918473547268, |
| "grad_norm": 3.139575481414795, |
| "learning_rate": 0.00011636363636363636, |
| "loss": 0.3942, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0797918473547268, |
| "eval_accuracy": 0.9280138611793518, |
| "eval_loss": 0.19574595987796783, |
| "eval_runtime": 690.4355, |
| "eval_samples_per_second": 8.35, |
| "eval_steps_per_second": 2.089, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.08326105810928014, |
| "grad_norm": 1.0898246765136719, |
| "learning_rate": 0.00011272727272727272, |
| "loss": 0.4053, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08326105810928014, |
| "eval_accuracy": 0.9436253309249878, |
| "eval_loss": 0.1560826450586319, |
| "eval_runtime": 683.6485, |
| "eval_samples_per_second": 8.433, |
| "eval_steps_per_second": 2.109, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08673026886383348, |
| "grad_norm": 6.503047943115234, |
| "learning_rate": 0.00010909090909090909, |
| "loss": 0.2822, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.08673026886383348, |
| "eval_accuracy": 0.9448395371437073, |
| "eval_loss": 0.15691135823726654, |
| "eval_runtime": 686.5667, |
| "eval_samples_per_second": 8.397, |
| "eval_steps_per_second": 2.1, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.09019947961838681, |
| "grad_norm": 0.2402292639017105, |
| "learning_rate": 0.00010545454545454545, |
| "loss": 0.112, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.09019947961838681, |
| "eval_accuracy": 0.9762359261512756, |
| "eval_loss": 0.10326409339904785, |
| "eval_runtime": 691.0661, |
| "eval_samples_per_second": 8.342, |
| "eval_steps_per_second": 2.087, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.09366869037294015, |
| "grad_norm": 72.20112609863281, |
| "learning_rate": 0.00010181818181818181, |
| "loss": 0.1256, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.09366869037294015, |
| "eval_accuracy": 0.9831743240356445, |
| "eval_loss": 0.07919599115848541, |
| "eval_runtime": 689.3527, |
| "eval_samples_per_second": 8.363, |
| "eval_steps_per_second": 2.092, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.0971379011274935, |
| "grad_norm": 4.924927711486816, |
| "learning_rate": 9.818181818181818e-05, |
| "loss": 0.2008, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.0971379011274935, |
| "eval_accuracy": 0.9845620393753052, |
| "eval_loss": 0.05440772697329521, |
| "eval_runtime": 694.2453, |
| "eval_samples_per_second": 8.304, |
| "eval_steps_per_second": 2.077, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.10060711188204684, |
| "grad_norm": 0.23780618607997894, |
| "learning_rate": 9.454545454545455e-05, |
| "loss": 0.096, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.10060711188204684, |
| "eval_accuracy": 0.984908938407898, |
| "eval_loss": 0.05138614773750305, |
| "eval_runtime": 684.3622, |
| "eval_samples_per_second": 8.424, |
| "eval_steps_per_second": 2.107, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.10407632263660017, |
| "grad_norm": 0.08319728076457977, |
| "learning_rate": 9.090909090909092e-05, |
| "loss": 0.0246, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10407632263660017, |
| "eval_accuracy": 0.9760624170303345, |
| "eval_loss": 0.1299581676721573, |
| "eval_runtime": 685.1071, |
| "eval_samples_per_second": 8.415, |
| "eval_steps_per_second": 2.105, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10754553339115351, |
| "grad_norm": 0.021385882049798965, |
| "learning_rate": 8.727272727272727e-05, |
| "loss": 0.121, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.10754553339115351, |
| "eval_accuracy": 0.9847354888916016, |
| "eval_loss": 0.06069515272974968, |
| "eval_runtime": 691.1118, |
| "eval_samples_per_second": 8.342, |
| "eval_steps_per_second": 2.086, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.11101474414570685, |
| "grad_norm": 0.015313231386244297, |
| "learning_rate": 8.363636363636364e-05, |
| "loss": 0.1667, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.11101474414570685, |
| "eval_accuracy": 0.9810928106307983, |
| "eval_loss": 0.10495974123477936, |
| "eval_runtime": 687.1574, |
| "eval_samples_per_second": 8.39, |
| "eval_steps_per_second": 2.099, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.11448395490026018, |
| "grad_norm": 0.036152616143226624, |
| "learning_rate": 8e-05, |
| "loss": 0.2541, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.11448395490026018, |
| "eval_accuracy": 0.9830008745193481, |
| "eval_loss": 0.09200656414031982, |
| "eval_runtime": 684.8484, |
| "eval_samples_per_second": 8.418, |
| "eval_steps_per_second": 2.106, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.11795316565481354, |
| "grad_norm": 0.0553424209356308, |
| "learning_rate": 7.636363636363637e-05, |
| "loss": 0.0719, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.11795316565481354, |
| "eval_accuracy": 0.9921942949295044, |
| "eval_loss": 0.03378007933497429, |
| "eval_runtime": 689.3401, |
| "eval_samples_per_second": 8.363, |
| "eval_steps_per_second": 2.092, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.12142237640936687, |
| "grad_norm": 0.08452742546796799, |
| "learning_rate": 7.272727272727273e-05, |
| "loss": 0.1828, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.12142237640936687, |
| "eval_accuracy": 0.9882046580314636, |
| "eval_loss": 0.043387919664382935, |
| "eval_runtime": 683.8045, |
| "eval_samples_per_second": 8.431, |
| "eval_steps_per_second": 2.109, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.12489158716392021, |
| "grad_norm": 0.02259010076522827, |
| "learning_rate": 6.90909090909091e-05, |
| "loss": 0.0743, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.12489158716392021, |
| "eval_accuracy": 0.9939289093017578, |
| "eval_loss": 0.025819096714258194, |
| "eval_runtime": 686.7645, |
| "eval_samples_per_second": 8.394, |
| "eval_steps_per_second": 2.1, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.12836079791847355, |
| "grad_norm": 0.03690154105424881, |
| "learning_rate": 6.545454545454546e-05, |
| "loss": 0.0055, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.12836079791847355, |
| "eval_accuracy": 0.9946227073669434, |
| "eval_loss": 0.026782656088471413, |
| "eval_runtime": 685.8688, |
| "eval_samples_per_second": 8.405, |
| "eval_steps_per_second": 2.102, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.13183000867302688, |
| "grad_norm": 0.4121657609939575, |
| "learning_rate": 6.181818181818182e-05, |
| "loss": 0.1603, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.13183000867302688, |
| "eval_accuracy": 0.9916738867759705, |
| "eval_loss": 0.0345265194773674, |
| "eval_runtime": 683.3279, |
| "eval_samples_per_second": 8.437, |
| "eval_steps_per_second": 2.11, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.13529921942758022, |
| "grad_norm": 0.11380640417337418, |
| "learning_rate": 5.818181818181818e-05, |
| "loss": 0.1733, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.13529921942758022, |
| "eval_accuracy": 0.9868170022964478, |
| "eval_loss": 0.0511435903608799, |
| "eval_runtime": 689.7162, |
| "eval_samples_per_second": 8.359, |
| "eval_steps_per_second": 2.091, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.13876843018213356, |
| "grad_norm": 0.6289365887641907, |
| "learning_rate": 5.4545454545454546e-05, |
| "loss": 0.2929, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13876843018213356, |
| "eval_accuracy": 0.9939289093017578, |
| "eval_loss": 0.033962786197662354, |
| "eval_runtime": 686.7558, |
| "eval_samples_per_second": 8.395, |
| "eval_steps_per_second": 2.1, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1422376409366869, |
| "grad_norm": 0.1024908572435379, |
| "learning_rate": 5.090909090909091e-05, |
| "loss": 0.0709, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1422376409366869, |
| "eval_accuracy": 0.9871639013290405, |
| "eval_loss": 0.08034045249223709, |
| "eval_runtime": 693.8335, |
| "eval_samples_per_second": 8.309, |
| "eval_steps_per_second": 2.078, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.14570685169124023, |
| "grad_norm": 0.05307464674115181, |
| "learning_rate": 4.7272727272727275e-05, |
| "loss": 0.0871, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.14570685169124023, |
| "eval_accuracy": 0.9918473362922668, |
| "eval_loss": 0.04723769426345825, |
| "eval_runtime": 695.8072, |
| "eval_samples_per_second": 8.285, |
| "eval_steps_per_second": 2.072, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.1491760624457936, |
| "grad_norm": 0.051310401409864426, |
| "learning_rate": 4.3636363636363636e-05, |
| "loss": 0.0112, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1491760624457936, |
| "eval_accuracy": 0.9928880929946899, |
| "eval_loss": 0.02936358004808426, |
| "eval_runtime": 687.912, |
| "eval_samples_per_second": 8.38, |
| "eval_steps_per_second": 2.096, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.15264527320034693, |
| "grad_norm": 0.13214071094989777, |
| "learning_rate": 4e-05, |
| "loss": 0.0048, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.15264527320034693, |
| "eval_accuracy": 0.9921942949295044, |
| "eval_loss": 0.03213657811284065, |
| "eval_runtime": 687.4824, |
| "eval_samples_per_second": 8.386, |
| "eval_steps_per_second": 2.098, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.15611448395490027, |
| "grad_norm": 0.04072779417037964, |
| "learning_rate": 3.6363636363636364e-05, |
| "loss": 0.1144, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.15611448395490027, |
| "eval_accuracy": 0.9935819506645203, |
| "eval_loss": 0.031503621488809586, |
| "eval_runtime": 687.6764, |
| "eval_samples_per_second": 8.383, |
| "eval_steps_per_second": 2.097, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.1595836947094536, |
| "grad_norm": 0.043403998017311096, |
| "learning_rate": 3.272727272727273e-05, |
| "loss": 0.1474, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.1595836947094536, |
| "eval_accuracy": 0.9935819506645203, |
| "eval_loss": 0.031036239117383957, |
| "eval_runtime": 686.2379, |
| "eval_samples_per_second": 8.401, |
| "eval_steps_per_second": 2.101, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.16305290546400694, |
| "grad_norm": 1.3192791938781738, |
| "learning_rate": 2.909090909090909e-05, |
| "loss": 0.1232, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.16305290546400694, |
| "eval_accuracy": 0.9939289093017578, |
| "eval_loss": 0.028218073770403862, |
| "eval_runtime": 688.1062, |
| "eval_samples_per_second": 8.378, |
| "eval_steps_per_second": 2.096, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.16652211621856028, |
| "grad_norm": 2.083038330078125, |
| "learning_rate": 2.5454545454545454e-05, |
| "loss": 0.0174, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.16652211621856028, |
| "eval_accuracy": 0.9937554001808167, |
| "eval_loss": 0.025646191090345383, |
| "eval_runtime": 683.4068, |
| "eval_samples_per_second": 8.436, |
| "eval_steps_per_second": 2.11, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.16999132697311362, |
| "grad_norm": 0.05482853576540947, |
| "learning_rate": 2.1818181818181818e-05, |
| "loss": 0.038, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.16999132697311362, |
| "eval_accuracy": 0.9939289093017578, |
| "eval_loss": 0.028373345732688904, |
| "eval_runtime": 691.4177, |
| "eval_samples_per_second": 8.338, |
| "eval_steps_per_second": 2.086, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.17346053772766695, |
| "grad_norm": 0.018828364089131355, |
| "learning_rate": 1.8181818181818182e-05, |
| "loss": 0.0172, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.17346053772766695, |
| "eval_accuracy": 0.9939289093017578, |
| "eval_loss": 0.031070245429873466, |
| "eval_runtime": 690.1128, |
| "eval_samples_per_second": 8.354, |
| "eval_steps_per_second": 2.09, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1769297484822203, |
| "grad_norm": 0.012764820829033852, |
| "learning_rate": 1.4545454545454545e-05, |
| "loss": 0.0046, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.1769297484822203, |
| "eval_accuracy": 0.9937554001808167, |
| "eval_loss": 0.03232884034514427, |
| "eval_runtime": 683.4107, |
| "eval_samples_per_second": 8.436, |
| "eval_steps_per_second": 2.11, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.18039895923677363, |
| "grad_norm": 0.02437267266213894, |
| "learning_rate": 1.0909090909090909e-05, |
| "loss": 0.0023, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.18039895923677363, |
| "eval_accuracy": 0.9930616021156311, |
| "eval_loss": 0.0331372506916523, |
| "eval_runtime": 687.1986, |
| "eval_samples_per_second": 8.389, |
| "eval_steps_per_second": 2.098, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.18386816999132696, |
| "grad_norm": 0.023814663290977478, |
| "learning_rate": 7.272727272727272e-06, |
| "loss": 0.0024, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.18386816999132696, |
| "eval_accuracy": 0.9928880929946899, |
| "eval_loss": 0.033249419182538986, |
| "eval_runtime": 687.1764, |
| "eval_samples_per_second": 8.389, |
| "eval_steps_per_second": 2.098, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.1873373807458803, |
| "grad_norm": 0.037307947874069214, |
| "learning_rate": 3.636363636363636e-06, |
| "loss": 0.1262, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.1873373807458803, |
| "eval_accuracy": 0.9939289093017578, |
| "eval_loss": 0.033335860818624496, |
| "eval_runtime": 685.229, |
| "eval_samples_per_second": 8.413, |
| "eval_steps_per_second": 2.104, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.19080659150043366, |
| "grad_norm": 0.11771216243505478, |
| "learning_rate": 0.0, |
| "loss": 0.003, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.19080659150043366, |
| "eval_accuracy": 0.9937554001808167, |
| "eval_loss": 0.03361057490110397, |
| "eval_runtime": 685.0012, |
| "eval_samples_per_second": 8.416, |
| "eval_steps_per_second": 2.105, |
| "step": 550 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 550, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 10, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.214558191809199e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|