emrwa / trainer_state.json

Upload 13 files

d00c3a1 verified about 2 months ago

11.3 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.9872122762148337,
	"eval_steps": 100,
	"global_step": 585,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.05115089514066496,
	"grad_norm": 12.8125,
	"learning_rate": 9.846153846153848e-05,
	"loss": 0.9959,
	"step": 10
	},
	{
	"epoch": 0.10230179028132992,
	"grad_norm": 4.6875,
	"learning_rate": 9.675213675213675e-05,
	"loss": 0.5833,
	"step": 20
	},
	{
	"epoch": 0.1534526854219949,
	"grad_norm": 3.9375,
	"learning_rate": 9.504273504273504e-05,
	"loss": 0.4739,
	"step": 30
	},
	{
	"epoch": 0.20460358056265984,
	"grad_norm": 3.734375,
	"learning_rate": 9.333333333333334e-05,
	"loss": 0.4993,
	"step": 40
	},
	{
	"epoch": 0.2557544757033248,
	"grad_norm": 3.65625,
	"learning_rate": 9.162393162393162e-05,
	"loss": 0.4797,
	"step": 50
	},
	{
	"epoch": 0.3069053708439898,
	"grad_norm": 2.875,
	"learning_rate": 8.991452991452992e-05,
	"loss": 0.389,
	"step": 60
	},
	{
	"epoch": 0.35805626598465473,
	"grad_norm": 2.8125,
	"learning_rate": 8.820512820512821e-05,
	"loss": 0.4691,
	"step": 70
	},
	{
	"epoch": 0.4092071611253197,
	"grad_norm": 2.46875,
	"learning_rate": 8.64957264957265e-05,
	"loss": 0.3734,
	"step": 80
	},
	{
	"epoch": 0.46035805626598464,
	"grad_norm": 2.28125,
	"learning_rate": 8.478632478632479e-05,
	"loss": 0.4401,
	"step": 90
	},
	{
	"epoch": 0.5115089514066496,
	"grad_norm": 2.734375,
	"learning_rate": 8.307692307692309e-05,
	"loss": 0.4304,
	"step": 100
	},
	{
	"epoch": 0.5115089514066496,
	"eval_loss": 0.8407434821128845,
	"eval_runtime": 35.229,
	"eval_samples_per_second": 11.099,
	"eval_steps_per_second": 5.564,
	"step": 100
	},
	{
	"epoch": 0.5626598465473146,
	"grad_norm": 2.03125,
	"learning_rate": 8.136752136752138e-05,
	"loss": 0.4492,
	"step": 110
	},
	{
	"epoch": 0.6138107416879796,
	"grad_norm": 3.53125,
	"learning_rate": 7.965811965811965e-05,
	"loss": 0.3949,
	"step": 120
	},
	{
	"epoch": 0.6649616368286445,
	"grad_norm": 2.03125,
	"learning_rate": 7.794871794871795e-05,
	"loss": 0.3737,
	"step": 130
	},
	{
	"epoch": 0.7161125319693095,
	"grad_norm": 2.375,
	"learning_rate": 7.623931623931624e-05,
	"loss": 0.4031,
	"step": 140
	},
	{
	"epoch": 0.7672634271099744,
	"grad_norm": 2.375,
	"learning_rate": 7.452991452991453e-05,
	"loss": 0.3672,
	"step": 150
	},
	{
	"epoch": 0.8184143222506394,
	"grad_norm": 2.6875,
	"learning_rate": 7.282051282051282e-05,
	"loss": 0.3944,
	"step": 160
	},
	{
	"epoch": 0.8695652173913043,
	"grad_norm": 2.625,
	"learning_rate": 7.111111111111112e-05,
	"loss": 0.4473,
	"step": 170
	},
	{
	"epoch": 0.9207161125319693,
	"grad_norm": 2.21875,
	"learning_rate": 6.940170940170941e-05,
	"loss": 0.3476,
	"step": 180
	},
	{
	"epoch": 0.9718670076726342,
	"grad_norm": 3.578125,
	"learning_rate": 6.76923076923077e-05,
	"loss": 0.3961,
	"step": 190
	},
	{
	"epoch": 1.020460358056266,
	"grad_norm": 3.34375,
	"learning_rate": 6.598290598290599e-05,
	"loss": 0.3859,
	"step": 200
	},
	{
	"epoch": 1.020460358056266,
	"eval_loss": 0.7718821167945862,
	"eval_runtime": 33.9574,
	"eval_samples_per_second": 11.514,
	"eval_steps_per_second": 5.772,
	"step": 200
	},
	{
	"epoch": 1.0716112531969308,
	"grad_norm": 2.578125,
	"learning_rate": 6.427350427350429e-05,
	"loss": 0.3484,
	"step": 210
	},
	{
	"epoch": 1.1227621483375958,
	"grad_norm": 3.296875,
	"learning_rate": 6.256410256410256e-05,
	"loss": 0.3425,
	"step": 220
	},
	{
	"epoch": 1.1739130434782608,
	"grad_norm": 2.09375,
	"learning_rate": 6.085470085470085e-05,
	"loss": 0.3622,
	"step": 230
	},
	{
	"epoch": 1.2250639386189257,
	"grad_norm": 2.796875,
	"learning_rate": 5.9145299145299146e-05,
	"loss": 0.3369,
	"step": 240
	},
	{
	"epoch": 1.2762148337595907,
	"grad_norm": 4.59375,
	"learning_rate": 5.7435897435897434e-05,
	"loss": 0.3215,
	"step": 250
	},
	{
	"epoch": 1.3273657289002558,
	"grad_norm": 3.734375,
	"learning_rate": 5.572649572649573e-05,
	"loss": 0.328,
	"step": 260
	},
	{
	"epoch": 1.3785166240409208,
	"grad_norm": 2.71875,
	"learning_rate": 5.401709401709402e-05,
	"loss": 0.3117,
	"step": 270
	},
	{
	"epoch": 1.4296675191815857,
	"grad_norm": 2.328125,
	"learning_rate": 5.230769230769231e-05,
	"loss": 0.2767,
	"step": 280
	},
	{
	"epoch": 1.4808184143222507,
	"grad_norm": 2.125,
	"learning_rate": 5.05982905982906e-05,
	"loss": 0.2631,
	"step": 290
	},
	{
	"epoch": 1.5319693094629157,
	"grad_norm": 2.375,
	"learning_rate": 4.888888888888889e-05,
	"loss": 0.3234,
	"step": 300
	},
	{
	"epoch": 1.5319693094629157,
	"eval_loss": 0.7180720567703247,
	"eval_runtime": 34.1345,
	"eval_samples_per_second": 11.455,
	"eval_steps_per_second": 5.742,
	"step": 300
	},
	{
	"epoch": 1.5831202046035806,
	"grad_norm": 2.328125,
	"learning_rate": 4.717948717948718e-05,
	"loss": 0.2915,
	"step": 310
	},
	{
	"epoch": 1.6342710997442456,
	"grad_norm": 2.640625,
	"learning_rate": 4.5470085470085474e-05,
	"loss": 0.3123,
	"step": 320
	},
	{
	"epoch": 1.6854219948849105,
	"grad_norm": 2.09375,
	"learning_rate": 4.376068376068376e-05,
	"loss": 0.3252,
	"step": 330
	},
	{
	"epoch": 1.7365728900255755,
	"grad_norm": 1.7734375,
	"learning_rate": 4.205128205128206e-05,
	"loss": 0.3095,
	"step": 340
	},
	{
	"epoch": 1.7877237851662404,
	"grad_norm": 2.28125,
	"learning_rate": 4.0341880341880346e-05,
	"loss": 0.2539,
	"step": 350
	},
	{
	"epoch": 1.8388746803069054,
	"grad_norm": 2.5625,
	"learning_rate": 3.8632478632478634e-05,
	"loss": 0.2742,
	"step": 360
	},
	{
	"epoch": 1.8900255754475703,
	"grad_norm": 3.078125,
	"learning_rate": 3.692307692307693e-05,
	"loss": 0.3169,
	"step": 370
	},
	{
	"epoch": 1.9411764705882353,
	"grad_norm": 1.84375,
	"learning_rate": 3.521367521367522e-05,
	"loss": 0.2767,
	"step": 380
	},
	{
	"epoch": 1.9923273657289002,
	"grad_norm": 1.859375,
	"learning_rate": 3.3504273504273506e-05,
	"loss": 0.3239,
	"step": 390
	},
	{
	"epoch": 2.040920716112532,
	"grad_norm": 1.8203125,
	"learning_rate": 3.1794871794871795e-05,
	"loss": 0.269,
	"step": 400
	},
	{
	"epoch": 2.040920716112532,
	"eval_loss": 0.6828166842460632,
	"eval_runtime": 33.8312,
	"eval_samples_per_second": 11.557,
	"eval_steps_per_second": 5.793,
	"step": 400
	},
	{
	"epoch": 2.0920716112531967,
	"grad_norm": 1.578125,
	"learning_rate": 3.0085470085470086e-05,
	"loss": 0.2818,
	"step": 410
	},
	{
	"epoch": 2.1432225063938617,
	"grad_norm": 2.125,
	"learning_rate": 2.8376068376068378e-05,
	"loss": 0.2597,
	"step": 420
	},
	{
	"epoch": 2.1943734015345266,
	"grad_norm": 2.21875,
	"learning_rate": 2.6666666666666667e-05,
	"loss": 0.2343,
	"step": 430
	},
	{
	"epoch": 2.2455242966751916,
	"grad_norm": 3.203125,
	"learning_rate": 2.495726495726496e-05,
	"loss": 0.2483,
	"step": 440
	},
	{
	"epoch": 2.296675191815857,
	"grad_norm": 2.015625,
	"learning_rate": 2.324786324786325e-05,
	"loss": 0.2751,
	"step": 450
	},
	{
	"epoch": 2.3478260869565215,
	"grad_norm": 1.546875,
	"learning_rate": 2.1538461538461542e-05,
	"loss": 0.2468,
	"step": 460
	},
	{
	"epoch": 2.398976982097187,
	"grad_norm": 2.09375,
	"learning_rate": 1.982905982905983e-05,
	"loss": 0.2586,
	"step": 470
	},
	{
	"epoch": 2.4501278772378514,
	"grad_norm": 2.546875,
	"learning_rate": 1.811965811965812e-05,
	"loss": 0.249,
	"step": 480
	},
	{
	"epoch": 2.501278772378517,
	"grad_norm": 1.7265625,
	"learning_rate": 1.641025641025641e-05,
	"loss": 0.2664,
	"step": 490
	},
	{
	"epoch": 2.5524296675191813,
	"grad_norm": 1.9921875,
	"learning_rate": 1.4700854700854703e-05,
	"loss": 0.2642,
	"step": 500
	},
	{
	"epoch": 2.5524296675191813,
	"eval_loss": 0.6801092028617859,
	"eval_runtime": 33.1113,
	"eval_samples_per_second": 11.809,
	"eval_steps_per_second": 5.919,
	"step": 500
	},
	{
	"epoch": 2.6035805626598467,
	"grad_norm": 2.8125,
	"learning_rate": 1.2991452991452993e-05,
	"loss": 0.2889,
	"step": 510
	},
	{
	"epoch": 2.6547314578005117,
	"grad_norm": 2.171875,
	"learning_rate": 1.1282051282051283e-05,
	"loss": 0.2604,
	"step": 520
	},
	{
	"epoch": 2.7058823529411766,
	"grad_norm": 2.5625,
	"learning_rate": 9.572649572649575e-06,
	"loss": 0.2438,
	"step": 530
	},
	{
	"epoch": 2.7570332480818416,
	"grad_norm": 1.703125,
	"learning_rate": 7.863247863247863e-06,
	"loss": 0.2579,
	"step": 540
	},
	{
	"epoch": 2.8081841432225065,
	"grad_norm": 5.09375,
	"learning_rate": 6.153846153846155e-06,
	"loss": 0.2926,
	"step": 550
	},
	{
	"epoch": 2.8593350383631715,
	"grad_norm": 1.84375,
	"learning_rate": 4.444444444444445e-06,
	"loss": 0.2576,
	"step": 560
	},
	{
	"epoch": 2.9104859335038364,
	"grad_norm": 1.8046875,
	"learning_rate": 2.735042735042735e-06,
	"loss": 0.2082,
	"step": 570
	},
	{
	"epoch": 2.9616368286445014,
	"grad_norm": 1.6875,
	"learning_rate": 1.0256410256410257e-06,
	"loss": 0.2314,
	"step": 580
	}
	],
	"logging_steps": 10,
	"max_steps": 585,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 1500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1654075985362944.0,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}