sanity_style / trainer_state.json

Model save

826b3be verified over 1 year ago

8.35 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9975550122249389,
	"eval_steps": 500,
	"global_step": 204,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.004889975550122249,
	"grad_norm": 0.36700921478372245,
	"learning_rate": 9.523809523809523e-06,
	"loss": 1.8349,
	"step": 1
	},
	{
	"epoch": 0.02444987775061125,
	"grad_norm": 0.3892962682403987,
	"learning_rate": 4.761904761904762e-05,
	"loss": 1.7937,
	"step": 5
	},
	{
	"epoch": 0.0488997555012225,
	"grad_norm": 0.21809103204571412,
	"learning_rate": 9.523809523809524e-05,
	"loss": 1.7603,
	"step": 10
	},
	{
	"epoch": 0.07334963325183375,
	"grad_norm": 0.1763235766556296,
	"learning_rate": 0.00014285714285714287,
	"loss": 1.6895,
	"step": 15
	},
	{
	"epoch": 0.097799511002445,
	"grad_norm": 0.1617556242623244,
	"learning_rate": 0.00019047619047619048,
	"loss": 1.6347,
	"step": 20
	},
	{
	"epoch": 0.12224938875305623,
	"grad_norm": 0.15643068252772752,
	"learning_rate": 0.00019976432316860067,
	"loss": 1.556,
	"step": 25
	},
	{
	"epoch": 0.1466992665036675,
	"grad_norm": 0.15134695775897136,
	"learning_rate": 0.00019880878960910772,
	"loss": 1.5133,
	"step": 30
	},
	{
	"epoch": 0.17114914425427874,
	"grad_norm": 0.11939382176274925,
	"learning_rate": 0.00019712569994658315,
	"loss": 1.4775,
	"step": 35
	},
	{
	"epoch": 0.19559902200489,
	"grad_norm": 0.10829777367965486,
	"learning_rate": 0.0001947274472298717,
	"loss": 1.46,
	"step": 40
	},
	{
	"epoch": 0.2200488997555012,
	"grad_norm": 0.13772866457253763,
	"learning_rate": 0.0001916316904487005,
	"loss": 1.4135,
	"step": 45
	},
	{
	"epoch": 0.24449877750611246,
	"grad_norm": 0.09822827366409893,
	"learning_rate": 0.00018786122450571485,
	"loss": 1.4417,
	"step": 50
	},
	{
	"epoch": 0.26894865525672373,
	"grad_norm": 0.11587926176152005,
	"learning_rate": 0.00018344381237138472,
	"loss": 1.3964,
	"step": 55
	},
	{
	"epoch": 0.293398533007335,
	"grad_norm": 0.11240730186157354,
	"learning_rate": 0.00017841198065767107,
	"loss": 1.3837,
	"step": 60
	},
	{
	"epoch": 0.31784841075794623,
	"grad_norm": 0.1128346094848023,
	"learning_rate": 0.00017280278011569847,
	"loss": 1.3633,
	"step": 65
	},
	{
	"epoch": 0.3422982885085575,
	"grad_norm": 0.09015460553796734,
	"learning_rate": 0.00016665751282095634,
	"loss": 1.3806,
	"step": 70
	},
	{
	"epoch": 0.36674816625916873,
	"grad_norm": 0.10347143235019492,
	"learning_rate": 0.00016002142805483685,
	"loss": 1.3788,
	"step": 75
	},
	{
	"epoch": 0.39119804400978,
	"grad_norm": 0.6669561111631297,
	"learning_rate": 0.0001529433891218185,
	"loss": 1.3851,
	"step": 80
	},
	{
	"epoch": 0.4156479217603912,
	"grad_norm": 0.09186122563595991,
	"learning_rate": 0.0001454755135556106,
	"loss": 1.3688,
	"step": 85
	},
	{
	"epoch": 0.4400977995110024,
	"grad_norm": 0.09496253504589665,
	"learning_rate": 0.00013767278936351854,
	"loss": 1.3288,
	"step": 90
	},
	{
	"epoch": 0.46454767726161367,
	"grad_norm": 0.0934552174596641,
	"learning_rate": 0.00012959267013472892,
	"loss": 1.3816,
	"step": 95
	},
	{
	"epoch": 0.4889975550122249,
	"grad_norm": 0.10547587865492095,
	"learning_rate": 0.00012129465199384157,
	"loss": 1.3508,
	"step": 100
	},
	{
	"epoch": 0.5134474327628362,
	"grad_norm": 0.09760937352012419,
	"learning_rate": 0.00011283983551465511,
	"loss": 1.3434,
	"step": 105
	},
	{
	"epoch": 0.5378973105134475,
	"grad_norm": 0.09910476129769094,
	"learning_rate": 0.00010429047581995546,
	"loss": 1.337,
	"step": 110
	},
	{
	"epoch": 0.5623471882640587,
	"grad_norm": 0.09961470132705542,
	"learning_rate": 9.570952418004455e-05,
	"loss": 1.3663,
	"step": 115
	},
	{
	"epoch": 0.58679706601467,
	"grad_norm": 0.09739741126425534,
	"learning_rate": 8.71601644853449e-05,
	"loss": 1.3389,
	"step": 120
	},
	{
	"epoch": 0.6112469437652812,
	"grad_norm": 0.10283288071724549,
	"learning_rate": 7.870534800615845e-05,
	"loss": 1.3369,
	"step": 125
	},
	{
	"epoch": 0.6356968215158925,
	"grad_norm": 0.10097594818776578,
	"learning_rate": 7.040732986527108e-05,
	"loss": 1.3555,
	"step": 130
	},
	{
	"epoch": 0.6601466992665037,
	"grad_norm": 0.09722568422567038,
	"learning_rate": 6.232721063648148e-05,
	"loss": 1.3537,
	"step": 135
	},
	{
	"epoch": 0.684596577017115,
	"grad_norm": 0.09586006955336507,
	"learning_rate": 5.452448644438946e-05,
	"loss": 1.3479,
	"step": 140
	},
	{
	"epoch": 0.7090464547677262,
	"grad_norm": 0.09299592243784432,
	"learning_rate": 4.7056610878181486e-05,
	"loss": 1.3585,
	"step": 145
	},
	{
	"epoch": 0.7334963325183375,
	"grad_norm": 0.10069734955186067,
	"learning_rate": 3.997857194516319e-05,
	"loss": 1.36,
	"step": 150
	},
	{
	"epoch": 0.7579462102689487,
	"grad_norm": 0.09194306954899185,
	"learning_rate": 3.334248717904368e-05,
	"loss": 1.3571,
	"step": 155
	},
	{
	"epoch": 0.78239608801956,
	"grad_norm": 0.0926753184350128,
	"learning_rate": 2.719721988430153e-05,
	"loss": 1.3312,
	"step": 160
	},
	{
	"epoch": 0.8068459657701712,
	"grad_norm": 0.09878832937752359,
	"learning_rate": 2.1588019342328968e-05,
	"loss": 1.3208,
	"step": 165
	},
	{
	"epoch": 0.8312958435207825,
	"grad_norm": 0.0960429927663343,
	"learning_rate": 1.6556187628615273e-05,
	"loss": 1.3149,
	"step": 170
	},
	{
	"epoch": 0.8557457212713936,
	"grad_norm": 0.0899273808312634,
	"learning_rate": 1.2138775494285182e-05,
	"loss": 1.3296,
	"step": 175
	},
	{
	"epoch": 0.8801955990220048,
	"grad_norm": 0.09384612891193284,
	"learning_rate": 8.368309551299536e-06,
	"loss": 1.3378,
	"step": 180
	},
	{
	"epoch": 0.9046454767726161,
	"grad_norm": 0.11142899089067898,
	"learning_rate": 5.272552770128314e-06,
	"loss": 1.3591,
	"step": 185
	},
	{
	"epoch": 0.9290953545232273,
	"grad_norm": 0.09142510348253172,
	"learning_rate": 2.8743000534168675e-06,
	"loss": 1.3623,
	"step": 190
	},
	{
	"epoch": 0.9535452322738386,
	"grad_norm": 0.09320844556768892,
	"learning_rate": 1.1912103908922945e-06,
	"loss": 1.3553,
	"step": 195
	},
	{
	"epoch": 0.9779951100244498,
	"grad_norm": 0.09161534630758739,
	"learning_rate": 2.3567683139936735e-07,
	"loss": 1.3407,
	"step": 200
	},
	{
	"epoch": 0.9975550122249389,
	"eval_loss": 1.3294435739517212,
	"eval_runtime": 1.8191,
	"eval_samples_per_second": 3.848,
	"eval_steps_per_second": 0.55,
	"step": 204
	},
	{
	"epoch": 0.9975550122249389,
	"step": 204,
	"total_flos": 2562962409652224.0,
	"train_loss": 1.4071934585477792,
	"train_runtime": 4285.5681,
	"train_samples_per_second": 3.053,
	"train_steps_per_second": 0.048
	}
	],
	"logging_steps": 5,
	"max_steps": 204,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 25,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2562962409652224.0,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}