web-sample-filtered-len24k / trainer_state.json

Upload trainer_state.json with huggingface_hub

cf48185 verified 9 months ago

5.84 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 1000,
	"global_step": 287,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.03484320557491289,
	"grad_norm": 3.216470119539481,
	"learning_rate": 6e-06,
	"loss": 1.4213,
	"step": 10
	},
	{
	"epoch": 0.06968641114982578,
	"grad_norm": 0.9358414619228234,
	"learning_rate": 9.994664874011864e-06,
	"loss": 0.8739,
	"step": 20
	},
	{
	"epoch": 0.10452961672473868,
	"grad_norm": 0.762921970961328,
	"learning_rate": 9.93477538444123e-06,
	"loss": 0.7614,
	"step": 30
	},
	{
	"epoch": 0.13937282229965156,
	"grad_norm": 0.5825132744489881,
	"learning_rate": 9.809128215864096e-06,
	"loss": 0.6971,
	"step": 40
	},
	{
	"epoch": 0.17421602787456447,
	"grad_norm": 0.5779846846514067,
	"learning_rate": 9.619397662556434e-06,
	"loss": 0.6933,
	"step": 50
	},
	{
	"epoch": 0.20905923344947736,
	"grad_norm": 0.6432877395688604,
	"learning_rate": 9.368111953231849e-06,
	"loss": 0.6534,
	"step": 60
	},
	{
	"epoch": 0.24390243902439024,
	"grad_norm": 0.5161784198014567,
	"learning_rate": 9.058619561473308e-06,
	"loss": 0.6454,
	"step": 70
	},
	{
	"epoch": 0.2787456445993031,
	"grad_norm": 0.5515024392970438,
	"learning_rate": 8.695044586103297e-06,
	"loss": 0.6467,
	"step": 80
	},
	{
	"epoch": 0.313588850174216,
	"grad_norm": 0.5638319890229825,
	"learning_rate": 8.282231796065215e-06,
	"loss": 0.655,
	"step": 90
	},
	{
	"epoch": 0.34843205574912894,
	"grad_norm": 0.5348182598237928,
	"learning_rate": 7.82568207211296e-06,
	"loss": 0.6496,
	"step": 100
	},
	{
	"epoch": 0.3832752613240418,
	"grad_norm": 0.5584260715846722,
	"learning_rate": 7.33147910557174e-06,
	"loss": 0.6559,
	"step": 110
	},
	{
	"epoch": 0.4181184668989547,
	"grad_norm": 0.522415331465173,
	"learning_rate": 6.806208330935766e-06,
	"loss": 0.6036,
	"step": 120
	},
	{
	"epoch": 0.4529616724738676,
	"grad_norm": 0.6016439526998959,
	"learning_rate": 6.2568691725555144e-06,
	"loss": 0.6176,
	"step": 130
	},
	{
	"epoch": 0.4878048780487805,
	"grad_norm": 0.564282507025487,
	"learning_rate": 5.690781774759412e-06,
	"loss": 0.6249,
	"step": 140
	},
	{
	"epoch": 0.5226480836236934,
	"grad_norm": 0.6212522251481895,
	"learning_rate": 5.115489458265006e-06,
	"loss": 0.6282,
	"step": 150
	},
	{
	"epoch": 0.5574912891986062,
	"grad_norm": 0.5367960373375557,
	"learning_rate": 4.53865820268349e-06,
	"loss": 0.6033,
	"step": 160
	},
	{
	"epoch": 0.5923344947735192,
	"grad_norm": 0.5093764149323716,
	"learning_rate": 3.967974494549803e-06,
	"loss": 0.5936,
	"step": 170
	},
	{
	"epoch": 0.627177700348432,
	"grad_norm": 0.552515840679091,
	"learning_rate": 3.4110429020904924e-06,
	"loss": 0.625,
	"step": 180
	},
	{
	"epoch": 0.662020905923345,
	"grad_norm": 0.535450593688453,
	"learning_rate": 2.8752847415828923e-06,
	"loss": 0.6178,
	"step": 190
	},
	{
	"epoch": 0.6968641114982579,
	"grad_norm": 0.5983891293222291,
	"learning_rate": 2.3678391856132203e-06,
	"loss": 0.5937,
	"step": 200
	},
	{
	"epoch": 0.7317073170731707,
	"grad_norm": 0.5614553572127612,
	"learning_rate": 1.8954681310021434e-06,
	"loss": 0.609,
	"step": 210
	},
	{
	"epoch": 0.7665505226480837,
	"grad_norm": 0.4869927011463621,
	"learning_rate": 1.4644660940672628e-06,
	"loss": 0.6036,
	"step": 220
	},
	{
	"epoch": 0.8013937282229965,
	"grad_norm": 0.5359904941043641,
	"learning_rate": 1.0805763339010329e-06,
	"loss": 0.6067,
	"step": 230
	},
	{
	"epoch": 0.8362369337979094,
	"grad_norm": 0.5351035133523918,
	"learning_rate": 7.489143213519301e-07,
	"loss": 0.6201,
	"step": 240
	},
	{
	"epoch": 0.8710801393728222,
	"grad_norm": 0.546788838937359,
	"learning_rate": 4.738995735125895e-07,
	"loss": 0.6082,
	"step": 250
	},
	{
	"epoch": 0.9059233449477352,
	"grad_norm": 0.5760609679486035,
	"learning_rate": 2.5919676204517073e-07,
	"loss": 0.6264,
	"step": 260
	},
	{
	"epoch": 0.9407665505226481,
	"grad_norm": 0.47281891685039446,
	"learning_rate": 1.0766688009695548e-07,
	"loss": 0.5798,
	"step": 270
	},
	{
	"epoch": 0.975609756097561,
	"grad_norm": 0.5289223385943652,
	"learning_rate": 2.1329118524827662e-08,
	"loss": 0.5909,
	"step": 280
	},
	{
	"epoch": 1.0,
	"step": 287,
	"total_flos": 1387071100944384.0,
	"train_loss": 0.6654956722924102,
	"train_runtime": 27782.0837,
	"train_samples_per_second": 1.322,
	"train_steps_per_second": 0.01
	}
	],
	"logging_steps": 10,
	"max_steps": 287,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 1000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1387071100944384.0,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}