| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.0, | |
| "eval_steps": 200, | |
| "global_step": 2925, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03076923076923077, | |
| "grad_norm": 24.726886749267578, | |
| "learning_rate": 1.8e-06, | |
| "loss": 11.4221, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06153846153846154, | |
| "grad_norm": 17.795185089111328, | |
| "learning_rate": 3.8e-06, | |
| "loss": 10.4941, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09230769230769231, | |
| "grad_norm": 14.8720703125, | |
| "learning_rate": 5.8e-06, | |
| "loss": 10.943, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.12307692307692308, | |
| "grad_norm": 26.370025634765625, | |
| "learning_rate": 7.8e-06, | |
| "loss": 9.7676, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 25.46526527404785, | |
| "learning_rate": 9.800000000000001e-06, | |
| "loss": 8.0724, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.18461538461538463, | |
| "grad_norm": 31.001745223999023, | |
| "learning_rate": 1.18e-05, | |
| "loss": 6.9668, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2153846153846154, | |
| "grad_norm": 9.090025901794434, | |
| "learning_rate": 1.3800000000000002e-05, | |
| "loss": 5.0298, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.24615384615384617, | |
| "grad_norm": 4.529256820678711, | |
| "learning_rate": 1.58e-05, | |
| "loss": 3.5286, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.27692307692307694, | |
| "grad_norm": 2.912689685821533, | |
| "learning_rate": 1.78e-05, | |
| "loss": 2.9783, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 2.04130220413208, | |
| "learning_rate": 1.9800000000000004e-05, | |
| "loss": 2.5693, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3384615384615385, | |
| "grad_norm": 1.7645025253295898, | |
| "learning_rate": 2.18e-05, | |
| "loss": 2.3032, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.36923076923076925, | |
| "grad_norm": 1.4153923988342285, | |
| "learning_rate": 2.38e-05, | |
| "loss": 2.1182, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 2.1566500663757324, | |
| "learning_rate": 2.58e-05, | |
| "loss": 2.2848, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.4307692307692308, | |
| "grad_norm": 1.7678470611572266, | |
| "learning_rate": 2.7800000000000005e-05, | |
| "loss": 2.2928, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 1.717806100845337, | |
| "learning_rate": 2.98e-05, | |
| "loss": 2.0866, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.49230769230769234, | |
| "grad_norm": 1.8488136529922485, | |
| "learning_rate": 3.18e-05, | |
| "loss": 2.2323, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5230769230769231, | |
| "grad_norm": 1.8968263864517212, | |
| "learning_rate": 3.38e-05, | |
| "loss": 2.2183, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5538461538461539, | |
| "grad_norm": 1.398100733757019, | |
| "learning_rate": 3.58e-05, | |
| "loss": 1.8395, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5846153846153846, | |
| "grad_norm": 1.5084631443023682, | |
| "learning_rate": 3.7800000000000004e-05, | |
| "loss": 1.9694, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 1.1019172668457031, | |
| "learning_rate": 3.9800000000000005e-05, | |
| "loss": 1.9702, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "eval_gen_len": 186.9088, | |
| "eval_loss": 1.8701356649398804, | |
| "eval_rouge1": 0.5735, | |
| "eval_rouge2": 0.2231, | |
| "eval_rougeL": 0.4346, | |
| "eval_runtime": 33.3119, | |
| "eval_samples_per_second": 8.225, | |
| "eval_steps_per_second": 2.071, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6461538461538462, | |
| "grad_norm": 1.5155857801437378, | |
| "learning_rate": 4.18e-05, | |
| "loss": 2.0705, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.676923076923077, | |
| "grad_norm": 1.3338611125946045, | |
| "learning_rate": 4.38e-05, | |
| "loss": 2.051, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.7076923076923077, | |
| "grad_norm": 1.2012193202972412, | |
| "learning_rate": 4.58e-05, | |
| "loss": 1.9834, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.7384615384615385, | |
| "grad_norm": 1.527007818222046, | |
| "learning_rate": 4.78e-05, | |
| "loss": 2.0451, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 1.2146987915039062, | |
| "learning_rate": 4.9800000000000004e-05, | |
| "loss": 1.974, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.7576699256896973, | |
| "learning_rate": 5.1800000000000005e-05, | |
| "loss": 2.0347, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.8307692307692308, | |
| "grad_norm": 1.17750084400177, | |
| "learning_rate": 5.380000000000001e-05, | |
| "loss": 1.9763, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.8615384615384616, | |
| "grad_norm": 1.3267815113067627, | |
| "learning_rate": 5.580000000000001e-05, | |
| "loss": 1.842, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.8923076923076924, | |
| "grad_norm": 1.0520875453948975, | |
| "learning_rate": 5.7799999999999995e-05, | |
| "loss": 2.0525, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 1.4701600074768066, | |
| "learning_rate": 5.9800000000000003e-05, | |
| "loss": 1.7418, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.9538461538461539, | |
| "grad_norm": 1.062267780303955, | |
| "learning_rate": 6.18e-05, | |
| "loss": 1.9685, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.9846153846153847, | |
| "grad_norm": 1.2724727392196655, | |
| "learning_rate": 6.38e-05, | |
| "loss": 1.7972, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.0153846153846153, | |
| "grad_norm": 1.2834393978118896, | |
| "learning_rate": 6.58e-05, | |
| "loss": 1.8395, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.0461538461538462, | |
| "grad_norm": 0.9657095074653625, | |
| "learning_rate": 6.780000000000001e-05, | |
| "loss": 1.836, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.0769230769230769, | |
| "grad_norm": 1.0390011072158813, | |
| "learning_rate": 6.98e-05, | |
| "loss": 1.9328, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.1076923076923078, | |
| "grad_norm": 1.2896322011947632, | |
| "learning_rate": 7.18e-05, | |
| "loss": 1.8227, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.1384615384615384, | |
| "grad_norm": 1.53290593624115, | |
| "learning_rate": 7.38e-05, | |
| "loss": 1.9214, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.1692307692307693, | |
| "grad_norm": 1.0145893096923828, | |
| "learning_rate": 7.58e-05, | |
| "loss": 1.8295, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 1.2127511501312256, | |
| "learning_rate": 7.780000000000001e-05, | |
| "loss": 1.7774, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.2307692307692308, | |
| "grad_norm": 1.1971853971481323, | |
| "learning_rate": 7.98e-05, | |
| "loss": 1.9926, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.2307692307692308, | |
| "eval_gen_len": 187.0146, | |
| "eval_loss": 1.7412512302398682, | |
| "eval_rouge1": 0.5961, | |
| "eval_rouge2": 0.2459, | |
| "eval_rougeL": 0.4577, | |
| "eval_runtime": 33.1213, | |
| "eval_samples_per_second": 8.273, | |
| "eval_steps_per_second": 2.083, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.2615384615384615, | |
| "grad_norm": 1.107351303100586, | |
| "learning_rate": 8.18e-05, | |
| "loss": 1.8515, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.2923076923076924, | |
| "grad_norm": 1.1407504081726074, | |
| "learning_rate": 8.38e-05, | |
| "loss": 1.7011, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.323076923076923, | |
| "grad_norm": 1.418338656425476, | |
| "learning_rate": 8.58e-05, | |
| "loss": 1.6384, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.353846153846154, | |
| "grad_norm": 1.3715286254882812, | |
| "learning_rate": 8.78e-05, | |
| "loss": 1.8502, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.3846153846153846, | |
| "grad_norm": 0.9774390459060669, | |
| "learning_rate": 8.98e-05, | |
| "loss": 1.8264, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.4153846153846155, | |
| "grad_norm": 1.4778176546096802, | |
| "learning_rate": 9.180000000000001e-05, | |
| "loss": 1.694, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.4461538461538461, | |
| "grad_norm": 1.2721563577651978, | |
| "learning_rate": 9.38e-05, | |
| "loss": 1.8213, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.476923076923077, | |
| "grad_norm": 0.94813472032547, | |
| "learning_rate": 9.58e-05, | |
| "loss": 1.6636, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.5076923076923077, | |
| "grad_norm": 1.0905983448028564, | |
| "learning_rate": 9.78e-05, | |
| "loss": 1.7712, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 1.1593286991119385, | |
| "learning_rate": 9.98e-05, | |
| "loss": 1.808, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.5692307692307692, | |
| "grad_norm": 1.0593713521957397, | |
| "learning_rate": 9.967272727272727e-05, | |
| "loss": 1.82, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.941973865032196, | |
| "learning_rate": 9.930909090909092e-05, | |
| "loss": 1.7341, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.6307692307692307, | |
| "grad_norm": 0.80891352891922, | |
| "learning_rate": 9.894545454545455e-05, | |
| "loss": 1.6166, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.6615384615384614, | |
| "grad_norm": 1.0325396060943604, | |
| "learning_rate": 9.858181818181819e-05, | |
| "loss": 1.8333, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.6923076923076923, | |
| "grad_norm": 1.3042590618133545, | |
| "learning_rate": 9.821818181818182e-05, | |
| "loss": 1.6287, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.7230769230769232, | |
| "grad_norm": 1.475900650024414, | |
| "learning_rate": 9.785454545454545e-05, | |
| "loss": 1.6019, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.7538461538461538, | |
| "grad_norm": 1.1589939594268799, | |
| "learning_rate": 9.74909090909091e-05, | |
| "loss": 1.6904, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.7846153846153845, | |
| "grad_norm": 1.2714788913726807, | |
| "learning_rate": 9.712727272727274e-05, | |
| "loss": 1.7928, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.8153846153846154, | |
| "grad_norm": 1.2037074565887451, | |
| "learning_rate": 9.676363636363637e-05, | |
| "loss": 1.8325, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "grad_norm": 1.1115801334381104, | |
| "learning_rate": 9.64e-05, | |
| "loss": 1.7673, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "eval_gen_len": 187.0146, | |
| "eval_loss": 1.69492506980896, | |
| "eval_rouge1": 0.6004, | |
| "eval_rouge2": 0.2505, | |
| "eval_rougeL": 0.4658, | |
| "eval_runtime": 33.2205, | |
| "eval_samples_per_second": 8.248, | |
| "eval_steps_per_second": 2.077, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.876923076923077, | |
| "grad_norm": 1.2008461952209473, | |
| "learning_rate": 9.603636363636364e-05, | |
| "loss": 1.7674, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.9076923076923076, | |
| "grad_norm": 1.1482900381088257, | |
| "learning_rate": 9.567272727272729e-05, | |
| "loss": 1.7932, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.9384615384615385, | |
| "grad_norm": 1.0144352912902832, | |
| "learning_rate": 9.530909090909092e-05, | |
| "loss": 1.6315, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.9692307692307693, | |
| "grad_norm": 0.9276631474494934, | |
| "learning_rate": 9.494545454545455e-05, | |
| "loss": 1.8373, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.0593888759613037, | |
| "learning_rate": 9.458181818181819e-05, | |
| "loss": 1.7967, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.0307692307692307, | |
| "grad_norm": 1.1406164169311523, | |
| "learning_rate": 9.421818181818183e-05, | |
| "loss": 1.7668, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.0615384615384613, | |
| "grad_norm": 0.9809508919715881, | |
| "learning_rate": 9.385454545454546e-05, | |
| "loss": 1.6602, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.0923076923076924, | |
| "grad_norm": 1.1698426008224487, | |
| "learning_rate": 9.349090909090909e-05, | |
| "loss": 1.6775, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.123076923076923, | |
| "grad_norm": 1.1539372205734253, | |
| "learning_rate": 9.312727272727274e-05, | |
| "loss": 1.533, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.1538461538461537, | |
| "grad_norm": 1.0783981084823608, | |
| "learning_rate": 9.276363636363637e-05, | |
| "loss": 1.5243, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.184615384615385, | |
| "grad_norm": 1.262705683708191, | |
| "learning_rate": 9.240000000000001e-05, | |
| "loss": 1.6625, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.2153846153846155, | |
| "grad_norm": 1.1545718908309937, | |
| "learning_rate": 9.203636363636364e-05, | |
| "loss": 1.9172, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.246153846153846, | |
| "grad_norm": 0.9896947741508484, | |
| "learning_rate": 9.167272727272728e-05, | |
| "loss": 1.5449, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.276923076923077, | |
| "grad_norm": 1.063262701034546, | |
| "learning_rate": 9.130909090909091e-05, | |
| "loss": 1.5318, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.3076923076923075, | |
| "grad_norm": 1.0846728086471558, | |
| "learning_rate": 9.094545454545454e-05, | |
| "loss": 1.5875, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.3384615384615386, | |
| "grad_norm": 1.0437549352645874, | |
| "learning_rate": 9.058181818181819e-05, | |
| "loss": 1.5724, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.3692307692307693, | |
| "grad_norm": 1.050115942955017, | |
| "learning_rate": 9.021818181818183e-05, | |
| "loss": 1.662, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.9663347601890564, | |
| "learning_rate": 8.985454545454546e-05, | |
| "loss": 1.6284, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.430769230769231, | |
| "grad_norm": 1.1656932830810547, | |
| "learning_rate": 8.949090909090909e-05, | |
| "loss": 1.5995, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.4615384615384617, | |
| "grad_norm": 1.073716402053833, | |
| "learning_rate": 8.912727272727273e-05, | |
| "loss": 1.4811, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.4615384615384617, | |
| "eval_gen_len": 187.0146, | |
| "eval_loss": 1.6769312620162964, | |
| "eval_rouge1": 0.6042, | |
| "eval_rouge2": 0.2561, | |
| "eval_rougeL": 0.4686, | |
| "eval_runtime": 33.5273, | |
| "eval_samples_per_second": 8.172, | |
| "eval_steps_per_second": 2.058, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.4923076923076923, | |
| "grad_norm": 0.9764583110809326, | |
| "learning_rate": 8.876363636363638e-05, | |
| "loss": 1.5478, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.523076923076923, | |
| "grad_norm": 0.9336417317390442, | |
| "learning_rate": 8.840000000000001e-05, | |
| "loss": 1.5138, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.5538461538461537, | |
| "grad_norm": 0.9714758992195129, | |
| "learning_rate": 8.803636363636364e-05, | |
| "loss": 1.5506, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.5846153846153848, | |
| "grad_norm": 0.9208464622497559, | |
| "learning_rate": 8.767272727272727e-05, | |
| "loss": 1.4944, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.6153846153846154, | |
| "grad_norm": 1.0252026319503784, | |
| "learning_rate": 8.730909090909092e-05, | |
| "loss": 1.6991, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.646153846153846, | |
| "grad_norm": 1.0464015007019043, | |
| "learning_rate": 8.694545454545455e-05, | |
| "loss": 1.679, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.676923076923077, | |
| "grad_norm": 1.3673149347305298, | |
| "learning_rate": 8.658181818181818e-05, | |
| "loss": 1.5021, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.707692307692308, | |
| "grad_norm": 1.1350778341293335, | |
| "learning_rate": 8.621818181818181e-05, | |
| "loss": 1.5898, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.7384615384615385, | |
| "grad_norm": 0.9916401505470276, | |
| "learning_rate": 8.585454545454546e-05, | |
| "loss": 1.6542, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.769230769230769, | |
| "grad_norm": 0.9967766404151917, | |
| "learning_rate": 8.54909090909091e-05, | |
| "loss": 1.7056, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 1.2031991481781006, | |
| "learning_rate": 8.512727272727273e-05, | |
| "loss": 1.6856, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.830769230769231, | |
| "grad_norm": 1.0159794092178345, | |
| "learning_rate": 8.476363636363636e-05, | |
| "loss": 1.5293, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.8615384615384616, | |
| "grad_norm": 1.3572866916656494, | |
| "learning_rate": 8.44e-05, | |
| "loss": 1.6191, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.8923076923076922, | |
| "grad_norm": 1.2567291259765625, | |
| "learning_rate": 8.403636363636364e-05, | |
| "loss": 1.7504, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.9230769230769234, | |
| "grad_norm": 1.2280553579330444, | |
| "learning_rate": 8.367272727272728e-05, | |
| "loss": 1.6523, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.953846153846154, | |
| "grad_norm": 1.0409953594207764, | |
| "learning_rate": 8.330909090909091e-05, | |
| "loss": 1.5903, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.9846153846153847, | |
| "grad_norm": 1.10386061668396, | |
| "learning_rate": 8.294545454545455e-05, | |
| "loss": 1.5235, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 3.0153846153846153, | |
| "grad_norm": 1.0341882705688477, | |
| "learning_rate": 8.258181818181818e-05, | |
| "loss": 1.6025, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 3.046153846153846, | |
| "grad_norm": 1.3020343780517578, | |
| "learning_rate": 8.221818181818183e-05, | |
| "loss": 1.4696, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 3.076923076923077, | |
| "grad_norm": 1.104643702507019, | |
| "learning_rate": 8.185454545454546e-05, | |
| "loss": 1.4009, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.076923076923077, | |
| "eval_gen_len": 187.0146, | |
| "eval_loss": 1.6721168756484985, | |
| "eval_rouge1": 0.6044, | |
| "eval_rouge2": 0.2558, | |
| "eval_rougeL": 0.4692, | |
| "eval_runtime": 34.4765, | |
| "eval_samples_per_second": 7.947, | |
| "eval_steps_per_second": 2.001, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.1076923076923078, | |
| "grad_norm": 1.1327263116836548, | |
| "learning_rate": 8.14909090909091e-05, | |
| "loss": 1.5893, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 3.1384615384615384, | |
| "grad_norm": 1.168095350265503, | |
| "learning_rate": 8.112727272727273e-05, | |
| "loss": 1.4248, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 3.169230769230769, | |
| "grad_norm": 0.9978489279747009, | |
| "learning_rate": 8.076363636363636e-05, | |
| "loss": 1.5407, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 1.0370062589645386, | |
| "learning_rate": 8.04e-05, | |
| "loss": 1.4867, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 3.230769230769231, | |
| "grad_norm": 0.9647369384765625, | |
| "learning_rate": 8.003636363636365e-05, | |
| "loss": 1.4806, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 3.2615384615384615, | |
| "grad_norm": 1.3316948413848877, | |
| "learning_rate": 7.967272727272728e-05, | |
| "loss": 1.4612, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 3.292307692307692, | |
| "grad_norm": 1.37971830368042, | |
| "learning_rate": 7.93090909090909e-05, | |
| "loss": 1.5745, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 3.3230769230769233, | |
| "grad_norm": 1.1220242977142334, | |
| "learning_rate": 7.894545454545455e-05, | |
| "loss": 1.3228, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 3.353846153846154, | |
| "grad_norm": 1.0595531463623047, | |
| "learning_rate": 7.85818181818182e-05, | |
| "loss": 1.4618, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 3.3846153846153846, | |
| "grad_norm": 1.3739666938781738, | |
| "learning_rate": 7.821818181818182e-05, | |
| "loss": 1.4973, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.4153846153846152, | |
| "grad_norm": 1.2643866539001465, | |
| "learning_rate": 7.785454545454545e-05, | |
| "loss": 1.533, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 3.4461538461538463, | |
| "grad_norm": 1.232230544090271, | |
| "learning_rate": 7.74909090909091e-05, | |
| "loss": 1.4867, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 3.476923076923077, | |
| "grad_norm": 0.9712868332862854, | |
| "learning_rate": 7.712727272727273e-05, | |
| "loss": 1.4916, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 3.5076923076923077, | |
| "grad_norm": 1.3414063453674316, | |
| "learning_rate": 7.676363636363637e-05, | |
| "loss": 1.5992, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 3.5384615384615383, | |
| "grad_norm": 1.0222588777542114, | |
| "learning_rate": 7.64e-05, | |
| "loss": 1.5378, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 3.569230769230769, | |
| "grad_norm": 1.1905276775360107, | |
| "learning_rate": 7.603636363636364e-05, | |
| "loss": 1.4324, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 1.2846956253051758, | |
| "learning_rate": 7.567272727272727e-05, | |
| "loss": 1.4927, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 3.6307692307692307, | |
| "grad_norm": 1.165310025215149, | |
| "learning_rate": 7.530909090909092e-05, | |
| "loss": 1.4277, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 3.6615384615384614, | |
| "grad_norm": 1.063883900642395, | |
| "learning_rate": 7.494545454545455e-05, | |
| "loss": 1.5226, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 3.6923076923076925, | |
| "grad_norm": 1.3430577516555786, | |
| "learning_rate": 7.458181818181819e-05, | |
| "loss": 1.5315, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.6923076923076925, | |
| "eval_gen_len": 187.0146, | |
| "eval_loss": 1.6592342853546143, | |
| "eval_rouge1": 0.6057, | |
| "eval_rouge2": 0.2572, | |
| "eval_rougeL": 0.4709, | |
| "eval_runtime": 33.3732, | |
| "eval_samples_per_second": 8.21, | |
| "eval_steps_per_second": 2.068, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.723076923076923, | |
| "grad_norm": 1.2028673887252808, | |
| "learning_rate": 7.421818181818182e-05, | |
| "loss": 1.5135, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 3.753846153846154, | |
| "grad_norm": 0.9091282486915588, | |
| "learning_rate": 7.385454545454545e-05, | |
| "loss": 1.3876, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 3.7846153846153845, | |
| "grad_norm": 0.9549902677536011, | |
| "learning_rate": 7.34909090909091e-05, | |
| "loss": 1.7408, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 3.815384615384615, | |
| "grad_norm": 1.109423279762268, | |
| "learning_rate": 7.312727272727274e-05, | |
| "loss": 1.3897, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 3.8461538461538463, | |
| "grad_norm": 1.1412984132766724, | |
| "learning_rate": 7.276363636363637e-05, | |
| "loss": 1.5034, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 3.876923076923077, | |
| "grad_norm": 1.176283359527588, | |
| "learning_rate": 7.24e-05, | |
| "loss": 1.5426, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 3.9076923076923076, | |
| "grad_norm": 1.3558485507965088, | |
| "learning_rate": 7.203636363636364e-05, | |
| "loss": 1.5287, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 3.9384615384615387, | |
| "grad_norm": 1.23688805103302, | |
| "learning_rate": 7.167272727272729e-05, | |
| "loss": 1.5794, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 3.9692307692307693, | |
| "grad_norm": 1.248528242111206, | |
| "learning_rate": 7.130909090909092e-05, | |
| "loss": 1.5166, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.8396208882331848, | |
| "learning_rate": 7.094545454545455e-05, | |
| "loss": 1.4648, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 4.030769230769231, | |
| "grad_norm": 1.0756162405014038, | |
| "learning_rate": 7.058181818181819e-05, | |
| "loss": 1.4147, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 4.061538461538461, | |
| "grad_norm": 1.1731723546981812, | |
| "learning_rate": 7.021818181818182e-05, | |
| "loss": 1.3732, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 4.092307692307692, | |
| "grad_norm": 1.2993149757385254, | |
| "learning_rate": 6.985454545454546e-05, | |
| "loss": 1.4118, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 4.123076923076923, | |
| "grad_norm": 1.1648204326629639, | |
| "learning_rate": 6.949090909090909e-05, | |
| "loss": 1.4258, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 4.153846153846154, | |
| "grad_norm": 1.1242045164108276, | |
| "learning_rate": 6.912727272727274e-05, | |
| "loss": 1.3598, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 4.184615384615385, | |
| "grad_norm": 1.53397536277771, | |
| "learning_rate": 6.876363636363637e-05, | |
| "loss": 1.3431, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 4.2153846153846155, | |
| "grad_norm": 1.3859331607818604, | |
| "learning_rate": 6.840000000000001e-05, | |
| "loss": 1.5014, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 4.246153846153846, | |
| "grad_norm": 1.0821737051010132, | |
| "learning_rate": 6.803636363636364e-05, | |
| "loss": 1.2638, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 4.276923076923077, | |
| "grad_norm": 1.2648742198944092, | |
| "learning_rate": 6.767272727272728e-05, | |
| "loss": 1.4567, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 4.3076923076923075, | |
| "grad_norm": 1.1357372999191284, | |
| "learning_rate": 6.730909090909091e-05, | |
| "loss": 1.4706, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 4.3076923076923075, | |
| "eval_gen_len": 187.0146, | |
| "eval_loss": 1.6691502332687378, | |
| "eval_rouge1": 0.6061, | |
| "eval_rouge2": 0.2593, | |
| "eval_rougeL": 0.4719, | |
| "eval_runtime": 33.439, | |
| "eval_samples_per_second": 8.194, | |
| "eval_steps_per_second": 2.063, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 4.338461538461538, | |
| "grad_norm": 1.4324264526367188, | |
| "learning_rate": 6.694545454545454e-05, | |
| "loss": 1.4032, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 4.36923076923077, | |
| "grad_norm": 1.3646095991134644, | |
| "learning_rate": 6.658181818181819e-05, | |
| "loss": 1.3943, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 0.9991398453712463, | |
| "learning_rate": 6.621818181818183e-05, | |
| "loss": 1.5292, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 4.430769230769231, | |
| "grad_norm": 1.1873986721038818, | |
| "learning_rate": 6.585454545454546e-05, | |
| "loss": 1.4813, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 4.461538461538462, | |
| "grad_norm": 1.0080267190933228, | |
| "learning_rate": 6.549090909090909e-05, | |
| "loss": 1.4951, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 4.492307692307692, | |
| "grad_norm": 1.5542734861373901, | |
| "learning_rate": 6.512727272727272e-05, | |
| "loss": 1.5603, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 4.523076923076923, | |
| "grad_norm": 1.2610498666763306, | |
| "learning_rate": 6.476363636363638e-05, | |
| "loss": 1.3286, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 4.553846153846154, | |
| "grad_norm": 1.0882760286331177, | |
| "learning_rate": 6.440000000000001e-05, | |
| "loss": 1.309, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 4.584615384615384, | |
| "grad_norm": 1.1589834690093994, | |
| "learning_rate": 6.403636363636364e-05, | |
| "loss": 1.414, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 4.615384615384615, | |
| "grad_norm": 1.3731642961502075, | |
| "learning_rate": 6.367272727272727e-05, | |
| "loss": 1.4529, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.6461538461538465, | |
| "grad_norm": 1.3053221702575684, | |
| "learning_rate": 6.330909090909091e-05, | |
| "loss": 1.4623, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 4.676923076923077, | |
| "grad_norm": 1.2154396772384644, | |
| "learning_rate": 6.294545454545455e-05, | |
| "loss": 1.4766, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 4.707692307692308, | |
| "grad_norm": 1.0947812795639038, | |
| "learning_rate": 6.258181818181818e-05, | |
| "loss": 1.3212, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 4.7384615384615385, | |
| "grad_norm": 1.005462646484375, | |
| "learning_rate": 6.221818181818181e-05, | |
| "loss": 1.3956, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 4.769230769230769, | |
| "grad_norm": 1.196108341217041, | |
| "learning_rate": 6.185454545454546e-05, | |
| "loss": 1.4048, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 1.355747103691101, | |
| "learning_rate": 6.14909090909091e-05, | |
| "loss": 1.3474, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 4.8307692307692305, | |
| "grad_norm": 1.177310585975647, | |
| "learning_rate": 6.112727272727273e-05, | |
| "loss": 1.3038, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 4.861538461538462, | |
| "grad_norm": 1.273474097251892, | |
| "learning_rate": 6.076363636363637e-05, | |
| "loss": 1.3858, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 4.892307692307693, | |
| "grad_norm": 1.2601613998413086, | |
| "learning_rate": 6.04e-05, | |
| "loss": 1.2742, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 4.923076923076923, | |
| "grad_norm": 1.052040696144104, | |
| "learning_rate": 6.0036363636363634e-05, | |
| "loss": 1.5551, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 4.923076923076923, | |
| "eval_gen_len": 187.0146, | |
| "eval_loss": 1.6678508520126343, | |
| "eval_rouge1": 0.6061, | |
| "eval_rouge2": 0.2582, | |
| "eval_rougeL": 0.4724, | |
| "eval_runtime": 33.608, | |
| "eval_samples_per_second": 8.153, | |
| "eval_steps_per_second": 2.053, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 4.953846153846154, | |
| "grad_norm": 1.4437050819396973, | |
| "learning_rate": 5.967272727272728e-05, | |
| "loss": 1.3613, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 4.984615384615385, | |
| "grad_norm": 1.398398518562317, | |
| "learning_rate": 5.9309090909090915e-05, | |
| "loss": 1.3861, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 5.015384615384615, | |
| "grad_norm": 1.1901689767837524, | |
| "learning_rate": 5.894545454545455e-05, | |
| "loss": 1.4525, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 5.046153846153846, | |
| "grad_norm": 1.2631349563598633, | |
| "learning_rate": 5.858181818181818e-05, | |
| "loss": 1.3698, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 5.076923076923077, | |
| "grad_norm": 1.3628337383270264, | |
| "learning_rate": 5.821818181818182e-05, | |
| "loss": 1.1917, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 5.107692307692307, | |
| "grad_norm": 1.2330440282821655, | |
| "learning_rate": 5.785454545454546e-05, | |
| "loss": 1.3692, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 5.138461538461539, | |
| "grad_norm": 1.069877028465271, | |
| "learning_rate": 5.74909090909091e-05, | |
| "loss": 1.2439, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 5.1692307692307695, | |
| "grad_norm": 1.1694751977920532, | |
| "learning_rate": 5.712727272727273e-05, | |
| "loss": 1.1194, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 1.424668312072754, | |
| "learning_rate": 5.6763636363636365e-05, | |
| "loss": 1.456, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 5.230769230769231, | |
| "grad_norm": 1.1166226863861084, | |
| "learning_rate": 5.6399999999999995e-05, | |
| "loss": 1.3011, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 5.2615384615384615, | |
| "grad_norm": 1.196712851524353, | |
| "learning_rate": 5.6036363636363646e-05, | |
| "loss": 1.3272, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 5.292307692307692, | |
| "grad_norm": 1.105592131614685, | |
| "learning_rate": 5.5672727272727276e-05, | |
| "loss": 1.3831, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 5.323076923076923, | |
| "grad_norm": 1.3789408206939697, | |
| "learning_rate": 5.530909090909091e-05, | |
| "loss": 1.3924, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 5.3538461538461535, | |
| "grad_norm": 1.1058343648910522, | |
| "learning_rate": 5.494545454545454e-05, | |
| "loss": 1.1278, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 5.384615384615385, | |
| "grad_norm": 1.0470004081726074, | |
| "learning_rate": 5.458181818181819e-05, | |
| "loss": 1.2675, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 5.415384615384616, | |
| "grad_norm": 1.5735996961593628, | |
| "learning_rate": 5.421818181818182e-05, | |
| "loss": 1.368, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 5.446153846153846, | |
| "grad_norm": 1.053110122680664, | |
| "learning_rate": 5.385454545454546e-05, | |
| "loss": 1.4409, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 5.476923076923077, | |
| "grad_norm": 1.2032136917114258, | |
| "learning_rate": 5.349090909090909e-05, | |
| "loss": 1.3919, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 5.507692307692308, | |
| "grad_norm": 1.3398489952087402, | |
| "learning_rate": 5.3127272727272726e-05, | |
| "loss": 1.3891, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 5.538461538461538, | |
| "grad_norm": 1.1674134731292725, | |
| "learning_rate": 5.276363636363637e-05, | |
| "loss": 1.4167, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 5.538461538461538, | |
| "eval_gen_len": 187.0146, | |
| "eval_loss": 1.6820106506347656, | |
| "eval_rouge1": 0.6051, | |
| "eval_rouge2": 0.256, | |
| "eval_rougeL": 0.4705, | |
| "eval_runtime": 34.3835, | |
| "eval_samples_per_second": 7.969, | |
| "eval_steps_per_second": 2.007, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 5.569230769230769, | |
| "grad_norm": 1.3142492771148682, | |
| "learning_rate": 5.2400000000000007e-05, | |
| "loss": 1.2869, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "grad_norm": 1.3149932622909546, | |
| "learning_rate": 5.2036363636363637e-05, | |
| "loss": 1.3653, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 5.63076923076923, | |
| "grad_norm": 1.1139847040176392, | |
| "learning_rate": 5.167272727272727e-05, | |
| "loss": 1.3807, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 5.661538461538462, | |
| "grad_norm": 1.302495002746582, | |
| "learning_rate": 5.130909090909091e-05, | |
| "loss": 1.3826, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 5.6923076923076925, | |
| "grad_norm": 1.2617419958114624, | |
| "learning_rate": 5.0945454545454554e-05, | |
| "loss": 1.283, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 5.723076923076923, | |
| "grad_norm": 1.252189040184021, | |
| "learning_rate": 5.0581818181818184e-05, | |
| "loss": 1.3425, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 5.753846153846154, | |
| "grad_norm": 1.0823620557785034, | |
| "learning_rate": 5.021818181818182e-05, | |
| "loss": 1.4646, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 5.7846153846153845, | |
| "grad_norm": 1.39573335647583, | |
| "learning_rate": 4.985454545454546e-05, | |
| "loss": 1.2957, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 5.815384615384615, | |
| "grad_norm": 1.217499852180481, | |
| "learning_rate": 4.9490909090909094e-05, | |
| "loss": 1.3232, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 5.846153846153846, | |
| "grad_norm": 1.3049825429916382, | |
| "learning_rate": 4.912727272727273e-05, | |
| "loss": 1.333, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 5.876923076923077, | |
| "grad_norm": 1.265807032585144, | |
| "learning_rate": 4.876363636363637e-05, | |
| "loss": 1.2635, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 5.907692307692308, | |
| "grad_norm": 1.4045813083648682, | |
| "learning_rate": 4.8400000000000004e-05, | |
| "loss": 1.3453, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 5.938461538461539, | |
| "grad_norm": 1.4151256084442139, | |
| "learning_rate": 4.803636363636364e-05, | |
| "loss": 1.2274, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 5.969230769230769, | |
| "grad_norm": 1.30918288230896, | |
| "learning_rate": 4.767272727272728e-05, | |
| "loss": 1.3835, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 1.1394106149673462, | |
| "learning_rate": 4.7309090909090914e-05, | |
| "loss": 1.4773, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 6.030769230769231, | |
| "grad_norm": 1.0991594791412354, | |
| "learning_rate": 4.694545454545455e-05, | |
| "loss": 1.1885, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 6.061538461538461, | |
| "grad_norm": 1.2676807641983032, | |
| "learning_rate": 4.658181818181818e-05, | |
| "loss": 1.2931, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 6.092307692307692, | |
| "grad_norm": 1.2844555377960205, | |
| "learning_rate": 4.6218181818181825e-05, | |
| "loss": 1.2758, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 6.123076923076923, | |
| "grad_norm": 1.2364481687545776, | |
| "learning_rate": 4.5854545454545455e-05, | |
| "loss": 1.1871, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 6.153846153846154, | |
| "grad_norm": 1.4142909049987793, | |
| "learning_rate": 4.54909090909091e-05, | |
| "loss": 1.2976, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 6.153846153846154, | |
| "eval_gen_len": 187.0146, | |
| "eval_loss": 1.6960315704345703, | |
| "eval_rouge1": 0.6074, | |
| "eval_rouge2": 0.2564, | |
| "eval_rougeL": 0.4698, | |
| "eval_runtime": 33.4283, | |
| "eval_samples_per_second": 8.197, | |
| "eval_steps_per_second": 2.064, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 6.184615384615385, | |
| "grad_norm": 1.234192132949829, | |
| "learning_rate": 4.512727272727273e-05, | |
| "loss": 1.3096, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 6.2153846153846155, | |
| "grad_norm": 1.1222543716430664, | |
| "learning_rate": 4.4763636363636365e-05, | |
| "loss": 1.2937, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 6.246153846153846, | |
| "grad_norm": 1.0864425897598267, | |
| "learning_rate": 4.44e-05, | |
| "loss": 1.278, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 6.276923076923077, | |
| "grad_norm": 1.3431516885757446, | |
| "learning_rate": 4.403636363636364e-05, | |
| "loss": 1.2601, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 6.3076923076923075, | |
| "grad_norm": 1.2604031562805176, | |
| "learning_rate": 4.3672727272727275e-05, | |
| "loss": 1.3587, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 6.338461538461538, | |
| "grad_norm": 1.4237326383590698, | |
| "learning_rate": 4.330909090909091e-05, | |
| "loss": 1.3936, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 6.36923076923077, | |
| "grad_norm": 1.2190274000167847, | |
| "learning_rate": 4.294545454545455e-05, | |
| "loss": 1.2805, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 1.3023786544799805, | |
| "learning_rate": 4.2581818181818186e-05, | |
| "loss": 1.2676, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 6.430769230769231, | |
| "grad_norm": 1.2170274257659912, | |
| "learning_rate": 4.2218181818181816e-05, | |
| "loss": 1.3696, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 6.461538461538462, | |
| "grad_norm": 1.613784670829773, | |
| "learning_rate": 4.185454545454546e-05, | |
| "loss": 1.28, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 6.492307692307692, | |
| "grad_norm": 1.2165530920028687, | |
| "learning_rate": 4.149090909090909e-05, | |
| "loss": 1.2933, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 6.523076923076923, | |
| "grad_norm": 1.2213079929351807, | |
| "learning_rate": 4.112727272727273e-05, | |
| "loss": 1.2062, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 6.553846153846154, | |
| "grad_norm": 1.5889174938201904, | |
| "learning_rate": 4.076363636363636e-05, | |
| "loss": 1.2481, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 6.584615384615384, | |
| "grad_norm": 1.2638423442840576, | |
| "learning_rate": 4.0400000000000006e-05, | |
| "loss": 1.29, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 6.615384615384615, | |
| "grad_norm": 1.0796576738357544, | |
| "learning_rate": 4.0036363636363636e-05, | |
| "loss": 1.1189, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 6.6461538461538465, | |
| "grad_norm": 1.5910948514938354, | |
| "learning_rate": 3.967272727272727e-05, | |
| "loss": 1.2767, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 6.676923076923077, | |
| "grad_norm": 1.3346668481826782, | |
| "learning_rate": 3.930909090909091e-05, | |
| "loss": 1.2212, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 6.707692307692308, | |
| "grad_norm": 1.1277836561203003, | |
| "learning_rate": 3.8945454545454547e-05, | |
| "loss": 1.2392, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 6.7384615384615385, | |
| "grad_norm": 1.1212108135223389, | |
| "learning_rate": 3.858181818181818e-05, | |
| "loss": 1.3223, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 6.769230769230769, | |
| "grad_norm": 1.2175902128219604, | |
| "learning_rate": 3.821818181818182e-05, | |
| "loss": 1.3124, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 6.769230769230769, | |
| "eval_gen_len": 187.0146, | |
| "eval_loss": 1.6920864582061768, | |
| "eval_rouge1": 0.606, | |
| "eval_rouge2": 0.2554, | |
| "eval_rougeL": 0.4694, | |
| "eval_runtime": 34.3853, | |
| "eval_samples_per_second": 7.969, | |
| "eval_steps_per_second": 2.007, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "grad_norm": 1.163404107093811, | |
| "learning_rate": 3.785454545454546e-05, | |
| "loss": 1.4557, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 6.8307692307692305, | |
| "grad_norm": 1.5031542778015137, | |
| "learning_rate": 3.7490909090909094e-05, | |
| "loss": 1.1773, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 6.861538461538462, | |
| "grad_norm": 1.0126748085021973, | |
| "learning_rate": 3.712727272727273e-05, | |
| "loss": 1.3922, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 6.892307692307693, | |
| "grad_norm": 1.0135952234268188, | |
| "learning_rate": 3.676363636363637e-05, | |
| "loss": 1.2028, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 6.923076923076923, | |
| "grad_norm": 1.16098153591156, | |
| "learning_rate": 3.6400000000000004e-05, | |
| "loss": 1.4111, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 6.953846153846154, | |
| "grad_norm": 1.488234519958496, | |
| "learning_rate": 3.603636363636364e-05, | |
| "loss": 1.3131, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 6.984615384615385, | |
| "grad_norm": 1.129989743232727, | |
| "learning_rate": 3.567272727272728e-05, | |
| "loss": 1.1701, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 7.015384615384615, | |
| "grad_norm": 1.218468189239502, | |
| "learning_rate": 3.530909090909091e-05, | |
| "loss": 1.2604, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 7.046153846153846, | |
| "grad_norm": 1.2339926958084106, | |
| "learning_rate": 3.494545454545455e-05, | |
| "loss": 1.0932, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 7.076923076923077, | |
| "grad_norm": 1.4972765445709229, | |
| "learning_rate": 3.458181818181818e-05, | |
| "loss": 1.1137, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 7.107692307692307, | |
| "grad_norm": 1.1884584426879883, | |
| "learning_rate": 3.4218181818181824e-05, | |
| "loss": 1.1522, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 7.138461538461539, | |
| "grad_norm": 1.4934840202331543, | |
| "learning_rate": 3.3854545454545454e-05, | |
| "loss": 1.3121, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 7.1692307692307695, | |
| "grad_norm": 1.1432678699493408, | |
| "learning_rate": 3.34909090909091e-05, | |
| "loss": 1.1549, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "grad_norm": 1.1708807945251465, | |
| "learning_rate": 3.312727272727273e-05, | |
| "loss": 1.1692, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 7.230769230769231, | |
| "grad_norm": 1.3824517726898193, | |
| "learning_rate": 3.2763636363636365e-05, | |
| "loss": 1.208, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 7.2615384615384615, | |
| "grad_norm": 1.1225407123565674, | |
| "learning_rate": 3.24e-05, | |
| "loss": 1.1542, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 7.292307692307692, | |
| "grad_norm": 1.2445507049560547, | |
| "learning_rate": 3.203636363636364e-05, | |
| "loss": 1.2265, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 7.323076923076923, | |
| "grad_norm": 1.256062626838684, | |
| "learning_rate": 3.1672727272727275e-05, | |
| "loss": 1.1822, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 7.3538461538461535, | |
| "grad_norm": 1.3986501693725586, | |
| "learning_rate": 3.130909090909091e-05, | |
| "loss": 1.262, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 7.384615384615385, | |
| "grad_norm": 1.1086236238479614, | |
| "learning_rate": 3.094545454545455e-05, | |
| "loss": 1.2275, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 7.384615384615385, | |
| "eval_gen_len": 187.0146, | |
| "eval_loss": 1.6998823881149292, | |
| "eval_rouge1": 0.6055, | |
| "eval_rouge2": 0.2541, | |
| "eval_rougeL": 0.4684, | |
| "eval_runtime": 33.5145, | |
| "eval_samples_per_second": 8.176, | |
| "eval_steps_per_second": 2.059, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 7.415384615384616, | |
| "grad_norm": 1.5682780742645264, | |
| "learning_rate": 3.0581818181818185e-05, | |
| "loss": 1.3442, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 7.446153846153846, | |
| "grad_norm": 1.034818410873413, | |
| "learning_rate": 3.021818181818182e-05, | |
| "loss": 1.2158, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 7.476923076923077, | |
| "grad_norm": 1.2816352844238281, | |
| "learning_rate": 2.985454545454546e-05, | |
| "loss": 1.1646, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 7.507692307692308, | |
| "grad_norm": 1.252765417098999, | |
| "learning_rate": 2.9490909090909092e-05, | |
| "loss": 1.1985, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 7.538461538461538, | |
| "grad_norm": 1.4074809551239014, | |
| "learning_rate": 2.9127272727272732e-05, | |
| "loss": 1.3245, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 7.569230769230769, | |
| "grad_norm": 1.3757801055908203, | |
| "learning_rate": 2.8763636363636366e-05, | |
| "loss": 1.2856, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "grad_norm": 1.355635643005371, | |
| "learning_rate": 2.84e-05, | |
| "loss": 1.229, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 7.63076923076923, | |
| "grad_norm": 1.185659646987915, | |
| "learning_rate": 2.803636363636364e-05, | |
| "loss": 1.2444, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 7.661538461538462, | |
| "grad_norm": 1.4726060628890991, | |
| "learning_rate": 2.7672727272727273e-05, | |
| "loss": 1.2877, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 7.6923076923076925, | |
| "grad_norm": 1.525718092918396, | |
| "learning_rate": 2.7309090909090913e-05, | |
| "loss": 1.2993, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 7.723076923076923, | |
| "grad_norm": 1.2432451248168945, | |
| "learning_rate": 2.6945454545454546e-05, | |
| "loss": 1.2803, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 7.753846153846154, | |
| "grad_norm": 1.2237839698791504, | |
| "learning_rate": 2.6581818181818186e-05, | |
| "loss": 1.2785, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 7.7846153846153845, | |
| "grad_norm": 1.5360924005508423, | |
| "learning_rate": 2.621818181818182e-05, | |
| "loss": 1.3164, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 7.815384615384615, | |
| "grad_norm": 1.4242442846298218, | |
| "learning_rate": 2.5854545454545453e-05, | |
| "loss": 1.1864, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 7.846153846153846, | |
| "grad_norm": 1.0664770603179932, | |
| "learning_rate": 2.5490909090909093e-05, | |
| "loss": 1.2769, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 7.876923076923077, | |
| "grad_norm": 1.3427962064743042, | |
| "learning_rate": 2.5127272727272727e-05, | |
| "loss": 1.183, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 7.907692307692308, | |
| "grad_norm": 1.2692787647247314, | |
| "learning_rate": 2.4763636363636363e-05, | |
| "loss": 1.3775, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 7.938461538461539, | |
| "grad_norm": 1.3220490217208862, | |
| "learning_rate": 2.44e-05, | |
| "loss": 1.1557, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 7.969230769230769, | |
| "grad_norm": 1.31517493724823, | |
| "learning_rate": 2.4036363636363637e-05, | |
| "loss": 1.3144, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 1.145848274230957, | |
| "learning_rate": 2.3672727272727274e-05, | |
| "loss": 1.4194, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_gen_len": 187.0146, | |
| "eval_loss": 1.6980103254318237, | |
| "eval_rouge1": 0.6066, | |
| "eval_rouge2": 0.2565, | |
| "eval_rougeL": 0.4699, | |
| "eval_runtime": 33.5275, | |
| "eval_samples_per_second": 8.172, | |
| "eval_steps_per_second": 2.058, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 8.03076923076923, | |
| "grad_norm": 1.3528770208358765, | |
| "learning_rate": 2.330909090909091e-05, | |
| "loss": 1.2504, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 8.061538461538461, | |
| "grad_norm": 1.1651642322540283, | |
| "learning_rate": 2.2945454545454547e-05, | |
| "loss": 1.0993, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 8.092307692307692, | |
| "grad_norm": 1.1845202445983887, | |
| "learning_rate": 2.258181818181818e-05, | |
| "loss": 1.1356, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 8.123076923076923, | |
| "grad_norm": 1.2000699043273926, | |
| "learning_rate": 2.2218181818181817e-05, | |
| "loss": 1.206, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 8.153846153846153, | |
| "grad_norm": 1.449044108390808, | |
| "learning_rate": 2.1854545454545454e-05, | |
| "loss": 1.2059, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 8.184615384615384, | |
| "grad_norm": 1.2176152467727661, | |
| "learning_rate": 2.149090909090909e-05, | |
| "loss": 1.1849, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 8.215384615384615, | |
| "grad_norm": 1.4765113592147827, | |
| "learning_rate": 2.1127272727272728e-05, | |
| "loss": 1.3351, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 8.246153846153845, | |
| "grad_norm": 1.5038341283798218, | |
| "learning_rate": 2.0763636363636364e-05, | |
| "loss": 1.2766, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 8.276923076923078, | |
| "grad_norm": 1.3483731746673584, | |
| "learning_rate": 2.04e-05, | |
| "loss": 1.1067, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 8.307692307692308, | |
| "grad_norm": 1.025032639503479, | |
| "learning_rate": 2.0036363636363638e-05, | |
| "loss": 1.2155, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 8.338461538461539, | |
| "grad_norm": 1.3824971914291382, | |
| "learning_rate": 1.9672727272727275e-05, | |
| "loss": 1.1767, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 8.36923076923077, | |
| "grad_norm": 1.2280523777008057, | |
| "learning_rate": 1.930909090909091e-05, | |
| "loss": 1.2881, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "grad_norm": 1.2223644256591797, | |
| "learning_rate": 1.8945454545454548e-05, | |
| "loss": 1.1898, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 8.430769230769231, | |
| "grad_norm": 1.349334955215454, | |
| "learning_rate": 1.8581818181818185e-05, | |
| "loss": 1.2984, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 8.461538461538462, | |
| "grad_norm": 1.2894556522369385, | |
| "learning_rate": 1.8218181818181822e-05, | |
| "loss": 1.3458, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 8.492307692307692, | |
| "grad_norm": 1.6086795330047607, | |
| "learning_rate": 1.7854545454545455e-05, | |
| "loss": 1.2394, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 8.523076923076923, | |
| "grad_norm": 1.4955778121948242, | |
| "learning_rate": 1.7490909090909092e-05, | |
| "loss": 1.1506, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 8.553846153846154, | |
| "grad_norm": 1.4156781435012817, | |
| "learning_rate": 1.712727272727273e-05, | |
| "loss": 1.1746, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 8.584615384615384, | |
| "grad_norm": 1.2073826789855957, | |
| "learning_rate": 1.6763636363636365e-05, | |
| "loss": 1.1301, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 8.615384615384615, | |
| "grad_norm": 1.574342966079712, | |
| "learning_rate": 1.6400000000000002e-05, | |
| "loss": 1.3976, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 8.615384615384615, | |
| "eval_gen_len": 187.0146, | |
| "eval_loss": 1.7040081024169922, | |
| "eval_rouge1": 0.6049, | |
| "eval_rouge2": 0.2551, | |
| "eval_rougeL": 0.469, | |
| "eval_runtime": 34.1499, | |
| "eval_samples_per_second": 8.023, | |
| "eval_steps_per_second": 2.021, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 8.646153846153846, | |
| "grad_norm": 1.207323670387268, | |
| "learning_rate": 1.603636363636364e-05, | |
| "loss": 1.2123, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 8.676923076923076, | |
| "grad_norm": 1.346170425415039, | |
| "learning_rate": 1.5672727272727272e-05, | |
| "loss": 1.1284, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 8.707692307692307, | |
| "grad_norm": 1.3920952081680298, | |
| "learning_rate": 1.530909090909091e-05, | |
| "loss": 1.1961, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 8.73846153846154, | |
| "grad_norm": 1.4912207126617432, | |
| "learning_rate": 1.4945454545454546e-05, | |
| "loss": 1.2558, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 8.76923076923077, | |
| "grad_norm": 0.9997207522392273, | |
| "learning_rate": 1.4581818181818183e-05, | |
| "loss": 1.1842, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "grad_norm": 1.207138180732727, | |
| "learning_rate": 1.421818181818182e-05, | |
| "loss": 1.2588, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 8.830769230769231, | |
| "grad_norm": 1.398917555809021, | |
| "learning_rate": 1.3854545454545456e-05, | |
| "loss": 1.2445, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 8.861538461538462, | |
| "grad_norm": 1.3793071508407593, | |
| "learning_rate": 1.3490909090909093e-05, | |
| "loss": 1.164, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 8.892307692307693, | |
| "grad_norm": 1.2650920152664185, | |
| "learning_rate": 1.3127272727272726e-05, | |
| "loss": 1.2235, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 8.923076923076923, | |
| "grad_norm": 1.3319740295410156, | |
| "learning_rate": 1.2763636363636363e-05, | |
| "loss": 1.1818, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 8.953846153846154, | |
| "grad_norm": 1.395668387413025, | |
| "learning_rate": 1.24e-05, | |
| "loss": 1.2237, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 8.984615384615385, | |
| "grad_norm": 1.1730422973632812, | |
| "learning_rate": 1.2036363636363637e-05, | |
| "loss": 1.0141, | |
| "step": 2920 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3250, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7124807319552000.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |