Upload folder using huggingface_hub

19f66fb verified over 1 year ago

5.99 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 3.0,
	"eval_steps": 1000,
	"global_step": 1467,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.10224948875255624,
	"grad_norm": 0.22070330381393433,
	"learning_rate": 9.999694946400538e-05,
	"loss": 1.2022,
	"step": 50
	},
	{
	"epoch": 0.20449897750511248,
	"grad_norm": 0.32263442873954773,
	"learning_rate": 9.963133532962538e-05,
	"loss": 1.0028,
	"step": 100
	},
	{
	"epoch": 0.3067484662576687,
	"grad_norm": 0.2867143452167511,
	"learning_rate": 9.866072190997923e-05,
	"loss": 0.9375,
	"step": 150
	},
	{
	"epoch": 0.40899795501022496,
	"grad_norm": 0.2979172170162201,
	"learning_rate": 9.709694085177272e-05,
	"loss": 0.9275,
	"step": 200
	},
	{
	"epoch": 0.5112474437627812,
	"grad_norm": 0.3326389789581299,
	"learning_rate": 9.495905443524156e-05,
	"loss": 0.9066,
	"step": 250
	},
	{
	"epoch": 0.6134969325153374,
	"grad_norm": 0.3445422649383545,
	"learning_rate": 9.227312320752585e-05,
	"loss": 0.9226,
	"step": 300
	},
	{
	"epoch": 0.7157464212678937,
	"grad_norm": 0.3284899890422821,
	"learning_rate": 8.907188830811434e-05,
	"loss": 0.8938,
	"step": 350
	},
	{
	"epoch": 0.8179959100204499,
	"grad_norm": 0.3464341461658478,
	"learning_rate": 8.539437235876908e-05,
	"loss": 0.9039,
	"step": 400
	},
	{
	"epoch": 0.9202453987730062,
	"grad_norm": 0.35458436608314514,
	"learning_rate": 8.1285403783028e-05,
	"loss": 0.888,
	"step": 450
	},
	{
	"epoch": 1.0224948875255624,
	"grad_norm": 0.3531360626220703,
	"learning_rate": 7.679507035376672e-05,
	"loss": 0.8834,
	"step": 500
	},
	{
	"epoch": 1.1247443762781186,
	"grad_norm": 0.38273200392723083,
	"learning_rate": 7.197810863000116e-05,
	"loss": 0.8308,
	"step": 550
	},
	{
	"epoch": 1.2269938650306749,
	"grad_norm": 0.3767881691455841,
	"learning_rate": 6.689323672561398e-05,
	"loss": 0.8212,
	"step": 600
	},
	{
	"epoch": 1.329243353783231,
	"grad_norm": 0.4009556472301483,
	"learning_rate": 6.160243854346398e-05,
	"loss": 0.8223,
	"step": 650
	},
	{
	"epoch": 1.4314928425357873,
	"grad_norm": 0.430261492729187,
	"learning_rate": 5.617020819996831e-05,
	"loss": 0.8233,
	"step": 700
	},
	{
	"epoch": 1.5337423312883436,
	"grad_norm": 0.39679399132728577,
	"learning_rate": 5.0662763850519936e-05,
	"loss": 0.8224,
	"step": 750
	},
	{
	"epoch": 1.6359918200408998,
	"grad_norm": 0.4061990976333618,
	"learning_rate": 4.514724049910228e-05,
	"loss": 0.8018,
	"step": 800
	},
	{
	"epoch": 1.738241308793456,
	"grad_norm": 0.4112933874130249,
	"learning_rate": 3.969087163164348e-05,
	"loss": 0.8233,
	"step": 850
	},
	{
	"epoch": 1.8404907975460123,
	"grad_norm": 0.41034209728240967,
	"learning_rate": 3.436016964888865e-05,
	"loss": 0.8079,
	"step": 900
	},
	{
	"epoch": 1.9427402862985685,
	"grad_norm": 0.4243488907814026,
	"learning_rate": 2.922011508920362e-05,
	"loss": 0.8134,
	"step": 950
	},
	{
	"epoch": 2.044989775051125,
	"grad_norm": 0.432036817073822,
	"learning_rate": 2.433336452457431e-05,
	"loss": 0.763,
	"step": 1000
	},
	{
	"epoch": 2.044989775051125,
	"eval_loss": 0.8031564950942993,
	"eval_runtime": 238.4795,
	"eval_samples_per_second": 0.923,
	"eval_steps_per_second": 0.923,
	"step": 1000
	},
	{
	"epoch": 2.147239263803681,
	"grad_norm": 0.422557532787323,
	"learning_rate": 1.975948678544301e-05,
	"loss": 0.7697,
	"step": 1050
	},
	{
	"epoch": 2.2494887525562373,
	"grad_norm": 0.44817760586738586,
	"learning_rate": 1.5554236824697687e-05,
	"loss": 0.7843,
	"step": 1100
	},
	{
	"epoch": 2.3517382413087935,
	"grad_norm": 0.4573202133178711,
	"learning_rate": 1.176887607231434e-05,
	"loss": 0.7643,
	"step": 1150
	},
	{
	"epoch": 2.4539877300613497,
	"grad_norm": 0.4587993919849396,
	"learning_rate": 8.449547565437887e-06,
	"loss": 0.7501,
	"step": 1200
	},
	{
	"epoch": 2.556237218813906,
	"grad_norm": 0.4792107045650482,
	"learning_rate": 5.6367134709813644e-06,
	"loss": 0.763,
	"step": 1250
	},
	{
	"epoch": 2.658486707566462,
	"grad_norm": 0.46617749333381653,
	"learning_rate": 3.364661857267265e-06,
	"loss": 0.7504,
	"step": 1300
	},
	{
	"epoch": 2.7607361963190185,
	"grad_norm": 0.45423439145088196,
	"learning_rate": 1.6610887270981423e-06,
	"loss": 0.7539,
	"step": 1350
	},
	{
	"epoch": 2.8629856850715747,
	"grad_norm": 0.4944806694984436,
	"learning_rate": 5.467604072171062e-07,
	"loss": 0.7662,
	"step": 1400
	},
	{
	"epoch": 2.965235173824131,
	"grad_norm": 0.45094653964042664,
	"learning_rate": 3.526040958494181e-08,
	"loss": 0.7672,
	"step": 1450
	}
	],
	"logging_steps": 50,
	"max_steps": 1467,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 1000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 5.441743417518326e+17,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}