emrwa / trainer_state.json
PHZane's picture
Upload 13 files
d00c3a1 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9872122762148337,
"eval_steps": 100,
"global_step": 585,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05115089514066496,
"grad_norm": 12.8125,
"learning_rate": 9.846153846153848e-05,
"loss": 0.9959,
"step": 10
},
{
"epoch": 0.10230179028132992,
"grad_norm": 4.6875,
"learning_rate": 9.675213675213675e-05,
"loss": 0.5833,
"step": 20
},
{
"epoch": 0.1534526854219949,
"grad_norm": 3.9375,
"learning_rate": 9.504273504273504e-05,
"loss": 0.4739,
"step": 30
},
{
"epoch": 0.20460358056265984,
"grad_norm": 3.734375,
"learning_rate": 9.333333333333334e-05,
"loss": 0.4993,
"step": 40
},
{
"epoch": 0.2557544757033248,
"grad_norm": 3.65625,
"learning_rate": 9.162393162393162e-05,
"loss": 0.4797,
"step": 50
},
{
"epoch": 0.3069053708439898,
"grad_norm": 2.875,
"learning_rate": 8.991452991452992e-05,
"loss": 0.389,
"step": 60
},
{
"epoch": 0.35805626598465473,
"grad_norm": 2.8125,
"learning_rate": 8.820512820512821e-05,
"loss": 0.4691,
"step": 70
},
{
"epoch": 0.4092071611253197,
"grad_norm": 2.46875,
"learning_rate": 8.64957264957265e-05,
"loss": 0.3734,
"step": 80
},
{
"epoch": 0.46035805626598464,
"grad_norm": 2.28125,
"learning_rate": 8.478632478632479e-05,
"loss": 0.4401,
"step": 90
},
{
"epoch": 0.5115089514066496,
"grad_norm": 2.734375,
"learning_rate": 8.307692307692309e-05,
"loss": 0.4304,
"step": 100
},
{
"epoch": 0.5115089514066496,
"eval_loss": 0.8407434821128845,
"eval_runtime": 35.229,
"eval_samples_per_second": 11.099,
"eval_steps_per_second": 5.564,
"step": 100
},
{
"epoch": 0.5626598465473146,
"grad_norm": 2.03125,
"learning_rate": 8.136752136752138e-05,
"loss": 0.4492,
"step": 110
},
{
"epoch": 0.6138107416879796,
"grad_norm": 3.53125,
"learning_rate": 7.965811965811965e-05,
"loss": 0.3949,
"step": 120
},
{
"epoch": 0.6649616368286445,
"grad_norm": 2.03125,
"learning_rate": 7.794871794871795e-05,
"loss": 0.3737,
"step": 130
},
{
"epoch": 0.7161125319693095,
"grad_norm": 2.375,
"learning_rate": 7.623931623931624e-05,
"loss": 0.4031,
"step": 140
},
{
"epoch": 0.7672634271099744,
"grad_norm": 2.375,
"learning_rate": 7.452991452991453e-05,
"loss": 0.3672,
"step": 150
},
{
"epoch": 0.8184143222506394,
"grad_norm": 2.6875,
"learning_rate": 7.282051282051282e-05,
"loss": 0.3944,
"step": 160
},
{
"epoch": 0.8695652173913043,
"grad_norm": 2.625,
"learning_rate": 7.111111111111112e-05,
"loss": 0.4473,
"step": 170
},
{
"epoch": 0.9207161125319693,
"grad_norm": 2.21875,
"learning_rate": 6.940170940170941e-05,
"loss": 0.3476,
"step": 180
},
{
"epoch": 0.9718670076726342,
"grad_norm": 3.578125,
"learning_rate": 6.76923076923077e-05,
"loss": 0.3961,
"step": 190
},
{
"epoch": 1.020460358056266,
"grad_norm": 3.34375,
"learning_rate": 6.598290598290599e-05,
"loss": 0.3859,
"step": 200
},
{
"epoch": 1.020460358056266,
"eval_loss": 0.7718821167945862,
"eval_runtime": 33.9574,
"eval_samples_per_second": 11.514,
"eval_steps_per_second": 5.772,
"step": 200
},
{
"epoch": 1.0716112531969308,
"grad_norm": 2.578125,
"learning_rate": 6.427350427350429e-05,
"loss": 0.3484,
"step": 210
},
{
"epoch": 1.1227621483375958,
"grad_norm": 3.296875,
"learning_rate": 6.256410256410256e-05,
"loss": 0.3425,
"step": 220
},
{
"epoch": 1.1739130434782608,
"grad_norm": 2.09375,
"learning_rate": 6.085470085470085e-05,
"loss": 0.3622,
"step": 230
},
{
"epoch": 1.2250639386189257,
"grad_norm": 2.796875,
"learning_rate": 5.9145299145299146e-05,
"loss": 0.3369,
"step": 240
},
{
"epoch": 1.2762148337595907,
"grad_norm": 4.59375,
"learning_rate": 5.7435897435897434e-05,
"loss": 0.3215,
"step": 250
},
{
"epoch": 1.3273657289002558,
"grad_norm": 3.734375,
"learning_rate": 5.572649572649573e-05,
"loss": 0.328,
"step": 260
},
{
"epoch": 1.3785166240409208,
"grad_norm": 2.71875,
"learning_rate": 5.401709401709402e-05,
"loss": 0.3117,
"step": 270
},
{
"epoch": 1.4296675191815857,
"grad_norm": 2.328125,
"learning_rate": 5.230769230769231e-05,
"loss": 0.2767,
"step": 280
},
{
"epoch": 1.4808184143222507,
"grad_norm": 2.125,
"learning_rate": 5.05982905982906e-05,
"loss": 0.2631,
"step": 290
},
{
"epoch": 1.5319693094629157,
"grad_norm": 2.375,
"learning_rate": 4.888888888888889e-05,
"loss": 0.3234,
"step": 300
},
{
"epoch": 1.5319693094629157,
"eval_loss": 0.7180720567703247,
"eval_runtime": 34.1345,
"eval_samples_per_second": 11.455,
"eval_steps_per_second": 5.742,
"step": 300
},
{
"epoch": 1.5831202046035806,
"grad_norm": 2.328125,
"learning_rate": 4.717948717948718e-05,
"loss": 0.2915,
"step": 310
},
{
"epoch": 1.6342710997442456,
"grad_norm": 2.640625,
"learning_rate": 4.5470085470085474e-05,
"loss": 0.3123,
"step": 320
},
{
"epoch": 1.6854219948849105,
"grad_norm": 2.09375,
"learning_rate": 4.376068376068376e-05,
"loss": 0.3252,
"step": 330
},
{
"epoch": 1.7365728900255755,
"grad_norm": 1.7734375,
"learning_rate": 4.205128205128206e-05,
"loss": 0.3095,
"step": 340
},
{
"epoch": 1.7877237851662404,
"grad_norm": 2.28125,
"learning_rate": 4.0341880341880346e-05,
"loss": 0.2539,
"step": 350
},
{
"epoch": 1.8388746803069054,
"grad_norm": 2.5625,
"learning_rate": 3.8632478632478634e-05,
"loss": 0.2742,
"step": 360
},
{
"epoch": 1.8900255754475703,
"grad_norm": 3.078125,
"learning_rate": 3.692307692307693e-05,
"loss": 0.3169,
"step": 370
},
{
"epoch": 1.9411764705882353,
"grad_norm": 1.84375,
"learning_rate": 3.521367521367522e-05,
"loss": 0.2767,
"step": 380
},
{
"epoch": 1.9923273657289002,
"grad_norm": 1.859375,
"learning_rate": 3.3504273504273506e-05,
"loss": 0.3239,
"step": 390
},
{
"epoch": 2.040920716112532,
"grad_norm": 1.8203125,
"learning_rate": 3.1794871794871795e-05,
"loss": 0.269,
"step": 400
},
{
"epoch": 2.040920716112532,
"eval_loss": 0.6828166842460632,
"eval_runtime": 33.8312,
"eval_samples_per_second": 11.557,
"eval_steps_per_second": 5.793,
"step": 400
},
{
"epoch": 2.0920716112531967,
"grad_norm": 1.578125,
"learning_rate": 3.0085470085470086e-05,
"loss": 0.2818,
"step": 410
},
{
"epoch": 2.1432225063938617,
"grad_norm": 2.125,
"learning_rate": 2.8376068376068378e-05,
"loss": 0.2597,
"step": 420
},
{
"epoch": 2.1943734015345266,
"grad_norm": 2.21875,
"learning_rate": 2.6666666666666667e-05,
"loss": 0.2343,
"step": 430
},
{
"epoch": 2.2455242966751916,
"grad_norm": 3.203125,
"learning_rate": 2.495726495726496e-05,
"loss": 0.2483,
"step": 440
},
{
"epoch": 2.296675191815857,
"grad_norm": 2.015625,
"learning_rate": 2.324786324786325e-05,
"loss": 0.2751,
"step": 450
},
{
"epoch": 2.3478260869565215,
"grad_norm": 1.546875,
"learning_rate": 2.1538461538461542e-05,
"loss": 0.2468,
"step": 460
},
{
"epoch": 2.398976982097187,
"grad_norm": 2.09375,
"learning_rate": 1.982905982905983e-05,
"loss": 0.2586,
"step": 470
},
{
"epoch": 2.4501278772378514,
"grad_norm": 2.546875,
"learning_rate": 1.811965811965812e-05,
"loss": 0.249,
"step": 480
},
{
"epoch": 2.501278772378517,
"grad_norm": 1.7265625,
"learning_rate": 1.641025641025641e-05,
"loss": 0.2664,
"step": 490
},
{
"epoch": 2.5524296675191813,
"grad_norm": 1.9921875,
"learning_rate": 1.4700854700854703e-05,
"loss": 0.2642,
"step": 500
},
{
"epoch": 2.5524296675191813,
"eval_loss": 0.6801092028617859,
"eval_runtime": 33.1113,
"eval_samples_per_second": 11.809,
"eval_steps_per_second": 5.919,
"step": 500
},
{
"epoch": 2.6035805626598467,
"grad_norm": 2.8125,
"learning_rate": 1.2991452991452993e-05,
"loss": 0.2889,
"step": 510
},
{
"epoch": 2.6547314578005117,
"grad_norm": 2.171875,
"learning_rate": 1.1282051282051283e-05,
"loss": 0.2604,
"step": 520
},
{
"epoch": 2.7058823529411766,
"grad_norm": 2.5625,
"learning_rate": 9.572649572649575e-06,
"loss": 0.2438,
"step": 530
},
{
"epoch": 2.7570332480818416,
"grad_norm": 1.703125,
"learning_rate": 7.863247863247863e-06,
"loss": 0.2579,
"step": 540
},
{
"epoch": 2.8081841432225065,
"grad_norm": 5.09375,
"learning_rate": 6.153846153846155e-06,
"loss": 0.2926,
"step": 550
},
{
"epoch": 2.8593350383631715,
"grad_norm": 1.84375,
"learning_rate": 4.444444444444445e-06,
"loss": 0.2576,
"step": 560
},
{
"epoch": 2.9104859335038364,
"grad_norm": 1.8046875,
"learning_rate": 2.735042735042735e-06,
"loss": 0.2082,
"step": 570
},
{
"epoch": 2.9616368286445014,
"grad_norm": 1.6875,
"learning_rate": 1.0256410256410257e-06,
"loss": 0.2314,
"step": 580
}
],
"logging_steps": 10,
"max_steps": 585,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1654075985362944.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}