eunyu_test / checkpoint-254 /trainer_state.json

Upload folder using huggingface_hub

5304b60 verified about 1 year ago

5.03 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.9901768172888015,
	"eval_steps": 500,
	"global_step": 254,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.07858546168958742,
	"grad_norm": 0.9010041952133179,
	"learning_rate": 2e-05,
	"loss": 1.3369,
	"step": 10
	},
	{
	"epoch": 0.15717092337917485,
	"grad_norm": 0.9163873791694641,
	"learning_rate": 4e-05,
	"loss": 1.3095,
	"step": 20
	},
	{
	"epoch": 0.2357563850687623,
	"grad_norm": 0.8760176301002502,
	"learning_rate": 6e-05,
	"loss": 1.2597,
	"step": 30
	},
	{
	"epoch": 0.3143418467583497,
	"grad_norm": 0.9276160001754761,
	"learning_rate": 8e-05,
	"loss": 1.1331,
	"step": 40
	},
	{
	"epoch": 0.3929273084479371,
	"grad_norm": 0.9490565061569214,
	"learning_rate": 0.0001,
	"loss": 0.9676,
	"step": 50
	},
	{
	"epoch": 0.4715127701375246,
	"grad_norm": 1.088897943496704,
	"learning_rate": 9.509803921568627e-05,
	"loss": 0.7618,
	"step": 60
	},
	{
	"epoch": 0.550098231827112,
	"grad_norm": 0.7143833041191101,
	"learning_rate": 9.019607843137255e-05,
	"loss": 0.6247,
	"step": 70
	},
	{
	"epoch": 0.6286836935166994,
	"grad_norm": 0.6208414435386658,
	"learning_rate": 8.529411764705883e-05,
	"loss": 0.5592,
	"step": 80
	},
	{
	"epoch": 0.7072691552062869,
	"grad_norm": 0.5866327881813049,
	"learning_rate": 8.039215686274511e-05,
	"loss": 0.5452,
	"step": 90
	},
	{
	"epoch": 0.7858546168958742,
	"grad_norm": 0.48709139227867126,
	"learning_rate": 7.549019607843137e-05,
	"loss": 0.5323,
	"step": 100
	},
	{
	"epoch": 0.8644400785854617,
	"grad_norm": 0.45397838950157166,
	"learning_rate": 7.058823529411765e-05,
	"loss": 0.498,
	"step": 110
	},
	{
	"epoch": 0.9430255402750491,
	"grad_norm": 0.44005146622657776,
	"learning_rate": 6.568627450980392e-05,
	"loss": 0.5066,
	"step": 120
	},
	{
	"epoch": 1.0157170923379175,
	"grad_norm": 0.40322428941726685,
	"learning_rate": 6.078431372549019e-05,
	"loss": 0.4765,
	"step": 130
	},
	{
	"epoch": 1.0943025540275049,
	"grad_norm": 0.3713725805282593,
	"learning_rate": 5.588235294117647e-05,
	"loss": 0.4957,
	"step": 140
	},
	{
	"epoch": 1.1728880157170924,
	"grad_norm": 0.3928331732749939,
	"learning_rate": 5.0980392156862745e-05,
	"loss": 0.4857,
	"step": 150
	},
	{
	"epoch": 1.2514734774066798,
	"grad_norm": 0.5155062675476074,
	"learning_rate": 4.607843137254902e-05,
	"loss": 0.4885,
	"step": 160
	},
	{
	"epoch": 1.3300589390962672,
	"grad_norm": 0.43860283493995667,
	"learning_rate": 4.11764705882353e-05,
	"loss": 0.5031,
	"step": 170
	},
	{
	"epoch": 1.4086444007858545,
	"grad_norm": 0.3964126706123352,
	"learning_rate": 3.627450980392157e-05,
	"loss": 0.5079,
	"step": 180
	},
	{
	"epoch": 1.487229862475442,
	"grad_norm": 0.401035338640213,
	"learning_rate": 3.137254901960784e-05,
	"loss": 0.4938,
	"step": 190
	},
	{
	"epoch": 1.5658153241650294,
	"grad_norm": 0.4307032525539398,
	"learning_rate": 2.647058823529412e-05,
	"loss": 0.4737,
	"step": 200
	},
	{
	"epoch": 1.644400785854617,
	"grad_norm": 0.44220617413520813,
	"learning_rate": 2.1568627450980395e-05,
	"loss": 0.4633,
	"step": 210
	},
	{
	"epoch": 1.7229862475442044,
	"grad_norm": 0.4620322287082672,
	"learning_rate": 1.6666666666666667e-05,
	"loss": 0.4723,
	"step": 220
	},
	{
	"epoch": 1.8015717092337917,
	"grad_norm": 0.41082972288131714,
	"learning_rate": 1.1764705882352942e-05,
	"loss": 0.4767,
	"step": 230
	},
	{
	"epoch": 1.880157170923379,
	"grad_norm": 0.5318537950515747,
	"learning_rate": 6.862745098039216e-06,
	"loss": 0.4725,
	"step": 240
	},
	{
	"epoch": 1.9587426326129664,
	"grad_norm": 0.3858027458190918,
	"learning_rate": 1.96078431372549e-06,
	"loss": 0.4795,
	"step": 250
	}
	],
	"logging_steps": 10,
	"max_steps": 254,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 2,
	"save_steps": 100,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 6444092488679424.0,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}