Golf3 / trainer_state.json
MarcusB2012's picture
Upload 11 files
232e339 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.967105263157895,
"eval_steps": 500,
"global_step": 755,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06578947368421052,
"grad_norm": 12741.283203125,
"learning_rate": 2.368421052631579e-06,
"loss": 1343.5036,
"step": 10
},
{
"epoch": 0.13157894736842105,
"grad_norm": 3765.042724609375,
"learning_rate": 5e-06,
"loss": 1025.8134,
"step": 20
},
{
"epoch": 0.19736842105263158,
"grad_norm": 3857.6552734375,
"learning_rate": 7.631578947368423e-06,
"loss": 931.7692,
"step": 30
},
{
"epoch": 0.2631578947368421,
"grad_norm": 3910.090087890625,
"learning_rate": 9.986052998605302e-06,
"loss": 828.1059,
"step": 40
},
{
"epoch": 0.32894736842105265,
"grad_norm": 3312.494140625,
"learning_rate": 9.846582984658299e-06,
"loss": 701.0779,
"step": 50
},
{
"epoch": 0.39473684210526316,
"grad_norm": 7742.6982421875,
"learning_rate": 9.707112970711298e-06,
"loss": 575.7785,
"step": 60
},
{
"epoch": 0.4605263157894737,
"grad_norm": 4383.2861328125,
"learning_rate": 9.567642956764297e-06,
"loss": 683.3667,
"step": 70
},
{
"epoch": 0.5263157894736842,
"grad_norm": 4326.119140625,
"learning_rate": 9.428172942817295e-06,
"loss": 689.7952,
"step": 80
},
{
"epoch": 0.5921052631578947,
"grad_norm": 3050.6357421875,
"learning_rate": 9.288702928870293e-06,
"loss": 529.22,
"step": 90
},
{
"epoch": 0.6578947368421053,
"grad_norm": 8133.0595703125,
"learning_rate": 9.149232914923292e-06,
"loss": 493.8726,
"step": 100
},
{
"epoch": 0.7236842105263158,
"grad_norm": 4629.41162109375,
"learning_rate": 9.00976290097629e-06,
"loss": 480.3524,
"step": 110
},
{
"epoch": 0.7894736842105263,
"grad_norm": 3343.38232421875,
"learning_rate": 8.87029288702929e-06,
"loss": 519.6425,
"step": 120
},
{
"epoch": 0.8552631578947368,
"grad_norm": 3955.08447265625,
"learning_rate": 8.730822873082288e-06,
"loss": 580.8739,
"step": 130
},
{
"epoch": 0.9210526315789473,
"grad_norm": 4120.26123046875,
"learning_rate": 8.591352859135287e-06,
"loss": 566.2863,
"step": 140
},
{
"epoch": 0.9868421052631579,
"grad_norm": 2743.97119140625,
"learning_rate": 8.451882845188284e-06,
"loss": 498.7171,
"step": 150
},
{
"epoch": 1.0526315789473684,
"grad_norm": 3119.446044921875,
"learning_rate": 8.312412831241283e-06,
"loss": 448.7976,
"step": 160
},
{
"epoch": 1.118421052631579,
"grad_norm": 3619.334228515625,
"learning_rate": 8.172942817294282e-06,
"loss": 490.538,
"step": 170
},
{
"epoch": 1.1842105263157894,
"grad_norm": 3563.162353515625,
"learning_rate": 8.033472803347281e-06,
"loss": 550.9566,
"step": 180
},
{
"epoch": 1.25,
"grad_norm": 3006.31884765625,
"learning_rate": 7.89400278940028e-06,
"loss": 496.8316,
"step": 190
},
{
"epoch": 1.3157894736842106,
"grad_norm": 3445.16650390625,
"learning_rate": 7.754532775453279e-06,
"loss": 468.8848,
"step": 200
},
{
"epoch": 1.381578947368421,
"grad_norm": 2652.3056640625,
"learning_rate": 7.615062761506277e-06,
"loss": 386.9485,
"step": 210
},
{
"epoch": 1.4473684210526316,
"grad_norm": 7152.8544921875,
"learning_rate": 7.475592747559275e-06,
"loss": 406.5552,
"step": 220
},
{
"epoch": 1.513157894736842,
"grad_norm": 2007.9990234375,
"learning_rate": 7.3361227336122745e-06,
"loss": 453.7567,
"step": 230
},
{
"epoch": 1.5789473684210527,
"grad_norm": 3840.892578125,
"learning_rate": 7.1966527196652726e-06,
"loss": 379.3861,
"step": 240
},
{
"epoch": 1.6447368421052633,
"grad_norm": 3999.458740234375,
"learning_rate": 7.057182705718271e-06,
"loss": 477.9281,
"step": 250
},
{
"epoch": 1.7105263157894737,
"grad_norm": 2132.456787109375,
"learning_rate": 6.91771269177127e-06,
"loss": 410.921,
"step": 260
},
{
"epoch": 1.776315789473684,
"grad_norm": 3105.357421875,
"learning_rate": 6.778242677824268e-06,
"loss": 442.1194,
"step": 270
},
{
"epoch": 1.8421052631578947,
"grad_norm": 2635.864501953125,
"learning_rate": 6.6387726638772664e-06,
"loss": 389.496,
"step": 280
},
{
"epoch": 1.9078947368421053,
"grad_norm": 2923.394287109375,
"learning_rate": 6.499302649930266e-06,
"loss": 439.763,
"step": 290
},
{
"epoch": 1.973684210526316,
"grad_norm": 3647.169921875,
"learning_rate": 6.359832635983264e-06,
"loss": 406.6518,
"step": 300
},
{
"epoch": 2.039473684210526,
"grad_norm": 2935.630126953125,
"learning_rate": 6.220362622036262e-06,
"loss": 331.8123,
"step": 310
},
{
"epoch": 2.1052631578947367,
"grad_norm": 2370.527099609375,
"learning_rate": 6.080892608089262e-06,
"loss": 343.2197,
"step": 320
},
{
"epoch": 2.1710526315789473,
"grad_norm": 2738.644287109375,
"learning_rate": 5.94142259414226e-06,
"loss": 369.9035,
"step": 330
},
{
"epoch": 2.236842105263158,
"grad_norm": 2592.207275390625,
"learning_rate": 5.801952580195258e-06,
"loss": 402.548,
"step": 340
},
{
"epoch": 2.3026315789473686,
"grad_norm": 4248.5478515625,
"learning_rate": 5.662482566248258e-06,
"loss": 457.1924,
"step": 350
},
{
"epoch": 2.3684210526315788,
"grad_norm": 2736.706298828125,
"learning_rate": 5.523012552301256e-06,
"loss": 381.4614,
"step": 360
},
{
"epoch": 2.4342105263157894,
"grad_norm": 2056.372802734375,
"learning_rate": 5.383542538354254e-06,
"loss": 363.8397,
"step": 370
},
{
"epoch": 2.5,
"grad_norm": 2987.6806640625,
"learning_rate": 5.244072524407254e-06,
"loss": 432.6885,
"step": 380
},
{
"epoch": 2.5657894736842106,
"grad_norm": 4919.654296875,
"learning_rate": 5.104602510460252e-06,
"loss": 395.5194,
"step": 390
},
{
"epoch": 2.6315789473684212,
"grad_norm": 2719.568115234375,
"learning_rate": 4.9651324965132506e-06,
"loss": 377.963,
"step": 400
},
{
"epoch": 2.6973684210526314,
"grad_norm": 3378.98876953125,
"learning_rate": 4.825662482566249e-06,
"loss": 454.2221,
"step": 410
},
{
"epoch": 2.763157894736842,
"grad_norm": 941.3067016601562,
"learning_rate": 4.6861924686192475e-06,
"loss": 334.1994,
"step": 420
},
{
"epoch": 2.8289473684210527,
"grad_norm": 3209.615478515625,
"learning_rate": 4.546722454672246e-06,
"loss": 386.7458,
"step": 430
},
{
"epoch": 2.8947368421052633,
"grad_norm": 2738.612060546875,
"learning_rate": 4.407252440725244e-06,
"loss": 313.3205,
"step": 440
},
{
"epoch": 2.9605263157894735,
"grad_norm": 2505.005126953125,
"learning_rate": 4.267782426778243e-06,
"loss": 318.3042,
"step": 450
},
{
"epoch": 3.026315789473684,
"grad_norm": 2280.536865234375,
"learning_rate": 4.128312412831242e-06,
"loss": 355.1427,
"step": 460
},
{
"epoch": 3.0921052631578947,
"grad_norm": 3746.776123046875,
"learning_rate": 3.98884239888424e-06,
"loss": 321.6535,
"step": 470
},
{
"epoch": 3.1578947368421053,
"grad_norm": 2580.090087890625,
"learning_rate": 3.849372384937239e-06,
"loss": 389.3218,
"step": 480
},
{
"epoch": 3.223684210526316,
"grad_norm": 2343.642578125,
"learning_rate": 3.7099023709902376e-06,
"loss": 359.2708,
"step": 490
},
{
"epoch": 3.2894736842105265,
"grad_norm": 2266.267822265625,
"learning_rate": 3.570432357043236e-06,
"loss": 435.1246,
"step": 500
},
{
"epoch": 3.3552631578947367,
"grad_norm": 3043.51318359375,
"learning_rate": 3.4309623430962345e-06,
"loss": 321.2599,
"step": 510
},
{
"epoch": 3.4210526315789473,
"grad_norm": 2253.009765625,
"learning_rate": 3.2914923291492334e-06,
"loss": 310.0588,
"step": 520
},
{
"epoch": 3.486842105263158,
"grad_norm": 4041.337890625,
"learning_rate": 3.152022315202232e-06,
"loss": 302.3955,
"step": 530
},
{
"epoch": 3.5526315789473686,
"grad_norm": 3892.867431640625,
"learning_rate": 3.0125523012552303e-06,
"loss": 372.87,
"step": 540
},
{
"epoch": 3.6184210526315788,
"grad_norm": 1721.0771484375,
"learning_rate": 2.873082287308229e-06,
"loss": 339.0192,
"step": 550
},
{
"epoch": 3.6842105263157894,
"grad_norm": 2730.7734375,
"learning_rate": 2.7336122733612273e-06,
"loss": 335.406,
"step": 560
},
{
"epoch": 3.75,
"grad_norm": 2706.309814453125,
"learning_rate": 2.594142259414226e-06,
"loss": 357.9787,
"step": 570
},
{
"epoch": 3.8157894736842106,
"grad_norm": 5227.97705078125,
"learning_rate": 2.4546722454672246e-06,
"loss": 362.4296,
"step": 580
},
{
"epoch": 3.8815789473684212,
"grad_norm": 2932.572998046875,
"learning_rate": 2.315202231520223e-06,
"loss": 329.5911,
"step": 590
},
{
"epoch": 3.9473684210526314,
"grad_norm": 2948.697265625,
"learning_rate": 2.175732217573222e-06,
"loss": 366.91,
"step": 600
},
{
"epoch": 4.0131578947368425,
"grad_norm": 2344.489013671875,
"learning_rate": 2.0362622036262205e-06,
"loss": 314.6336,
"step": 610
},
{
"epoch": 4.078947368421052,
"grad_norm": 3181.4521484375,
"learning_rate": 1.8967921896792191e-06,
"loss": 309.1377,
"step": 620
},
{
"epoch": 4.144736842105263,
"grad_norm": 2467.346435546875,
"learning_rate": 1.7573221757322176e-06,
"loss": 349.3991,
"step": 630
},
{
"epoch": 4.2105263157894735,
"grad_norm": 4259.5654296875,
"learning_rate": 1.6178521617852163e-06,
"loss": 318.5878,
"step": 640
},
{
"epoch": 4.276315789473684,
"grad_norm": 2608.833984375,
"learning_rate": 1.478382147838215e-06,
"loss": 329.6976,
"step": 650
},
{
"epoch": 4.342105263157895,
"grad_norm": 4429.38671875,
"learning_rate": 1.3389121338912134e-06,
"loss": 308.7365,
"step": 660
},
{
"epoch": 4.407894736842105,
"grad_norm": 3620.8515625,
"learning_rate": 1.199442119944212e-06,
"loss": 353.2343,
"step": 670
},
{
"epoch": 4.473684210526316,
"grad_norm": 3577.33203125,
"learning_rate": 1.0599721059972108e-06,
"loss": 304.5744,
"step": 680
},
{
"epoch": 4.5394736842105265,
"grad_norm": 2387.5029296875,
"learning_rate": 9.205020920502093e-07,
"loss": 388.6656,
"step": 690
},
{
"epoch": 4.605263157894737,
"grad_norm": 2653.211669921875,
"learning_rate": 7.810320781032078e-07,
"loss": 360.9954,
"step": 700
},
{
"epoch": 4.671052631578947,
"grad_norm": 3058.907470703125,
"learning_rate": 6.415620641562065e-07,
"loss": 417.5069,
"step": 710
},
{
"epoch": 4.7368421052631575,
"grad_norm": 3122.1025390625,
"learning_rate": 5.020920502092051e-07,
"loss": 286.6377,
"step": 720
},
{
"epoch": 4.802631578947368,
"grad_norm": 2571.658935546875,
"learning_rate": 3.6262203626220363e-07,
"loss": 289.6024,
"step": 730
},
{
"epoch": 4.868421052631579,
"grad_norm": 4391.81591796875,
"learning_rate": 2.2315202231520225e-07,
"loss": 301.8846,
"step": 740
},
{
"epoch": 4.934210526315789,
"grad_norm": 2866.98486328125,
"learning_rate": 8.368200836820084e-08,
"loss": 348.0844,
"step": 750
}
],
"logging_steps": 10,
"max_steps": 755,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}