| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.967105263157895, |
| "eval_steps": 500, |
| "global_step": 755, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06578947368421052, |
| "grad_norm": 12741.283203125, |
| "learning_rate": 2.368421052631579e-06, |
| "loss": 1343.5036, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.13157894736842105, |
| "grad_norm": 3765.042724609375, |
| "learning_rate": 5e-06, |
| "loss": 1025.8134, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.19736842105263158, |
| "grad_norm": 3857.6552734375, |
| "learning_rate": 7.631578947368423e-06, |
| "loss": 931.7692, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2631578947368421, |
| "grad_norm": 3910.090087890625, |
| "learning_rate": 9.986052998605302e-06, |
| "loss": 828.1059, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.32894736842105265, |
| "grad_norm": 3312.494140625, |
| "learning_rate": 9.846582984658299e-06, |
| "loss": 701.0779, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.39473684210526316, |
| "grad_norm": 7742.6982421875, |
| "learning_rate": 9.707112970711298e-06, |
| "loss": 575.7785, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.4605263157894737, |
| "grad_norm": 4383.2861328125, |
| "learning_rate": 9.567642956764297e-06, |
| "loss": 683.3667, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "grad_norm": 4326.119140625, |
| "learning_rate": 9.428172942817295e-06, |
| "loss": 689.7952, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.5921052631578947, |
| "grad_norm": 3050.6357421875, |
| "learning_rate": 9.288702928870293e-06, |
| "loss": 529.22, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6578947368421053, |
| "grad_norm": 8133.0595703125, |
| "learning_rate": 9.149232914923292e-06, |
| "loss": 493.8726, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7236842105263158, |
| "grad_norm": 4629.41162109375, |
| "learning_rate": 9.00976290097629e-06, |
| "loss": 480.3524, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.7894736842105263, |
| "grad_norm": 3343.38232421875, |
| "learning_rate": 8.87029288702929e-06, |
| "loss": 519.6425, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.8552631578947368, |
| "grad_norm": 3955.08447265625, |
| "learning_rate": 8.730822873082288e-06, |
| "loss": 580.8739, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.9210526315789473, |
| "grad_norm": 4120.26123046875, |
| "learning_rate": 8.591352859135287e-06, |
| "loss": 566.2863, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.9868421052631579, |
| "grad_norm": 2743.97119140625, |
| "learning_rate": 8.451882845188284e-06, |
| "loss": 498.7171, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.0526315789473684, |
| "grad_norm": 3119.446044921875, |
| "learning_rate": 8.312412831241283e-06, |
| "loss": 448.7976, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.118421052631579, |
| "grad_norm": 3619.334228515625, |
| "learning_rate": 8.172942817294282e-06, |
| "loss": 490.538, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.1842105263157894, |
| "grad_norm": 3563.162353515625, |
| "learning_rate": 8.033472803347281e-06, |
| "loss": 550.9566, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 3006.31884765625, |
| "learning_rate": 7.89400278940028e-06, |
| "loss": 496.8316, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.3157894736842106, |
| "grad_norm": 3445.16650390625, |
| "learning_rate": 7.754532775453279e-06, |
| "loss": 468.8848, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.381578947368421, |
| "grad_norm": 2652.3056640625, |
| "learning_rate": 7.615062761506277e-06, |
| "loss": 386.9485, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.4473684210526316, |
| "grad_norm": 7152.8544921875, |
| "learning_rate": 7.475592747559275e-06, |
| "loss": 406.5552, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.513157894736842, |
| "grad_norm": 2007.9990234375, |
| "learning_rate": 7.3361227336122745e-06, |
| "loss": 453.7567, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.5789473684210527, |
| "grad_norm": 3840.892578125, |
| "learning_rate": 7.1966527196652726e-06, |
| "loss": 379.3861, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.6447368421052633, |
| "grad_norm": 3999.458740234375, |
| "learning_rate": 7.057182705718271e-06, |
| "loss": 477.9281, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.7105263157894737, |
| "grad_norm": 2132.456787109375, |
| "learning_rate": 6.91771269177127e-06, |
| "loss": 410.921, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.776315789473684, |
| "grad_norm": 3105.357421875, |
| "learning_rate": 6.778242677824268e-06, |
| "loss": 442.1194, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.8421052631578947, |
| "grad_norm": 2635.864501953125, |
| "learning_rate": 6.6387726638772664e-06, |
| "loss": 389.496, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.9078947368421053, |
| "grad_norm": 2923.394287109375, |
| "learning_rate": 6.499302649930266e-06, |
| "loss": 439.763, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.973684210526316, |
| "grad_norm": 3647.169921875, |
| "learning_rate": 6.359832635983264e-06, |
| "loss": 406.6518, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.039473684210526, |
| "grad_norm": 2935.630126953125, |
| "learning_rate": 6.220362622036262e-06, |
| "loss": 331.8123, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.1052631578947367, |
| "grad_norm": 2370.527099609375, |
| "learning_rate": 6.080892608089262e-06, |
| "loss": 343.2197, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.1710526315789473, |
| "grad_norm": 2738.644287109375, |
| "learning_rate": 5.94142259414226e-06, |
| "loss": 369.9035, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.236842105263158, |
| "grad_norm": 2592.207275390625, |
| "learning_rate": 5.801952580195258e-06, |
| "loss": 402.548, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.3026315789473686, |
| "grad_norm": 4248.5478515625, |
| "learning_rate": 5.662482566248258e-06, |
| "loss": 457.1924, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.3684210526315788, |
| "grad_norm": 2736.706298828125, |
| "learning_rate": 5.523012552301256e-06, |
| "loss": 381.4614, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.4342105263157894, |
| "grad_norm": 2056.372802734375, |
| "learning_rate": 5.383542538354254e-06, |
| "loss": 363.8397, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 2987.6806640625, |
| "learning_rate": 5.244072524407254e-06, |
| "loss": 432.6885, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.5657894736842106, |
| "grad_norm": 4919.654296875, |
| "learning_rate": 5.104602510460252e-06, |
| "loss": 395.5194, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.6315789473684212, |
| "grad_norm": 2719.568115234375, |
| "learning_rate": 4.9651324965132506e-06, |
| "loss": 377.963, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.6973684210526314, |
| "grad_norm": 3378.98876953125, |
| "learning_rate": 4.825662482566249e-06, |
| "loss": 454.2221, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.763157894736842, |
| "grad_norm": 941.3067016601562, |
| "learning_rate": 4.6861924686192475e-06, |
| "loss": 334.1994, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.8289473684210527, |
| "grad_norm": 3209.615478515625, |
| "learning_rate": 4.546722454672246e-06, |
| "loss": 386.7458, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.8947368421052633, |
| "grad_norm": 2738.612060546875, |
| "learning_rate": 4.407252440725244e-06, |
| "loss": 313.3205, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.9605263157894735, |
| "grad_norm": 2505.005126953125, |
| "learning_rate": 4.267782426778243e-06, |
| "loss": 318.3042, |
| "step": 450 |
| }, |
| { |
| "epoch": 3.026315789473684, |
| "grad_norm": 2280.536865234375, |
| "learning_rate": 4.128312412831242e-06, |
| "loss": 355.1427, |
| "step": 460 |
| }, |
| { |
| "epoch": 3.0921052631578947, |
| "grad_norm": 3746.776123046875, |
| "learning_rate": 3.98884239888424e-06, |
| "loss": 321.6535, |
| "step": 470 |
| }, |
| { |
| "epoch": 3.1578947368421053, |
| "grad_norm": 2580.090087890625, |
| "learning_rate": 3.849372384937239e-06, |
| "loss": 389.3218, |
| "step": 480 |
| }, |
| { |
| "epoch": 3.223684210526316, |
| "grad_norm": 2343.642578125, |
| "learning_rate": 3.7099023709902376e-06, |
| "loss": 359.2708, |
| "step": 490 |
| }, |
| { |
| "epoch": 3.2894736842105265, |
| "grad_norm": 2266.267822265625, |
| "learning_rate": 3.570432357043236e-06, |
| "loss": 435.1246, |
| "step": 500 |
| }, |
| { |
| "epoch": 3.3552631578947367, |
| "grad_norm": 3043.51318359375, |
| "learning_rate": 3.4309623430962345e-06, |
| "loss": 321.2599, |
| "step": 510 |
| }, |
| { |
| "epoch": 3.4210526315789473, |
| "grad_norm": 2253.009765625, |
| "learning_rate": 3.2914923291492334e-06, |
| "loss": 310.0588, |
| "step": 520 |
| }, |
| { |
| "epoch": 3.486842105263158, |
| "grad_norm": 4041.337890625, |
| "learning_rate": 3.152022315202232e-06, |
| "loss": 302.3955, |
| "step": 530 |
| }, |
| { |
| "epoch": 3.5526315789473686, |
| "grad_norm": 3892.867431640625, |
| "learning_rate": 3.0125523012552303e-06, |
| "loss": 372.87, |
| "step": 540 |
| }, |
| { |
| "epoch": 3.6184210526315788, |
| "grad_norm": 1721.0771484375, |
| "learning_rate": 2.873082287308229e-06, |
| "loss": 339.0192, |
| "step": 550 |
| }, |
| { |
| "epoch": 3.6842105263157894, |
| "grad_norm": 2730.7734375, |
| "learning_rate": 2.7336122733612273e-06, |
| "loss": 335.406, |
| "step": 560 |
| }, |
| { |
| "epoch": 3.75, |
| "grad_norm": 2706.309814453125, |
| "learning_rate": 2.594142259414226e-06, |
| "loss": 357.9787, |
| "step": 570 |
| }, |
| { |
| "epoch": 3.8157894736842106, |
| "grad_norm": 5227.97705078125, |
| "learning_rate": 2.4546722454672246e-06, |
| "loss": 362.4296, |
| "step": 580 |
| }, |
| { |
| "epoch": 3.8815789473684212, |
| "grad_norm": 2932.572998046875, |
| "learning_rate": 2.315202231520223e-06, |
| "loss": 329.5911, |
| "step": 590 |
| }, |
| { |
| "epoch": 3.9473684210526314, |
| "grad_norm": 2948.697265625, |
| "learning_rate": 2.175732217573222e-06, |
| "loss": 366.91, |
| "step": 600 |
| }, |
| { |
| "epoch": 4.0131578947368425, |
| "grad_norm": 2344.489013671875, |
| "learning_rate": 2.0362622036262205e-06, |
| "loss": 314.6336, |
| "step": 610 |
| }, |
| { |
| "epoch": 4.078947368421052, |
| "grad_norm": 3181.4521484375, |
| "learning_rate": 1.8967921896792191e-06, |
| "loss": 309.1377, |
| "step": 620 |
| }, |
| { |
| "epoch": 4.144736842105263, |
| "grad_norm": 2467.346435546875, |
| "learning_rate": 1.7573221757322176e-06, |
| "loss": 349.3991, |
| "step": 630 |
| }, |
| { |
| "epoch": 4.2105263157894735, |
| "grad_norm": 4259.5654296875, |
| "learning_rate": 1.6178521617852163e-06, |
| "loss": 318.5878, |
| "step": 640 |
| }, |
| { |
| "epoch": 4.276315789473684, |
| "grad_norm": 2608.833984375, |
| "learning_rate": 1.478382147838215e-06, |
| "loss": 329.6976, |
| "step": 650 |
| }, |
| { |
| "epoch": 4.342105263157895, |
| "grad_norm": 4429.38671875, |
| "learning_rate": 1.3389121338912134e-06, |
| "loss": 308.7365, |
| "step": 660 |
| }, |
| { |
| "epoch": 4.407894736842105, |
| "grad_norm": 3620.8515625, |
| "learning_rate": 1.199442119944212e-06, |
| "loss": 353.2343, |
| "step": 670 |
| }, |
| { |
| "epoch": 4.473684210526316, |
| "grad_norm": 3577.33203125, |
| "learning_rate": 1.0599721059972108e-06, |
| "loss": 304.5744, |
| "step": 680 |
| }, |
| { |
| "epoch": 4.5394736842105265, |
| "grad_norm": 2387.5029296875, |
| "learning_rate": 9.205020920502093e-07, |
| "loss": 388.6656, |
| "step": 690 |
| }, |
| { |
| "epoch": 4.605263157894737, |
| "grad_norm": 2653.211669921875, |
| "learning_rate": 7.810320781032078e-07, |
| "loss": 360.9954, |
| "step": 700 |
| }, |
| { |
| "epoch": 4.671052631578947, |
| "grad_norm": 3058.907470703125, |
| "learning_rate": 6.415620641562065e-07, |
| "loss": 417.5069, |
| "step": 710 |
| }, |
| { |
| "epoch": 4.7368421052631575, |
| "grad_norm": 3122.1025390625, |
| "learning_rate": 5.020920502092051e-07, |
| "loss": 286.6377, |
| "step": 720 |
| }, |
| { |
| "epoch": 4.802631578947368, |
| "grad_norm": 2571.658935546875, |
| "learning_rate": 3.6262203626220363e-07, |
| "loss": 289.6024, |
| "step": 730 |
| }, |
| { |
| "epoch": 4.868421052631579, |
| "grad_norm": 4391.81591796875, |
| "learning_rate": 2.2315202231520225e-07, |
| "loss": 301.8846, |
| "step": 740 |
| }, |
| { |
| "epoch": 4.934210526315789, |
| "grad_norm": 2866.98486328125, |
| "learning_rate": 8.368200836820084e-08, |
| "loss": 348.0844, |
| "step": 750 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 755, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|