Upload folder using huggingface_hub

a1506a1 verified about 1 year ago

8.65 kB

	{
	"best_metric": 0.029243575409054756,
	"best_model_checkpoint": "saves/chess/no_explain/checkpoint-4000",
	"epoch": 3.202643171806167,
	"eval_steps": 1000,
	"global_step": 4000,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.08009611533840609,
	"grad_norm": 0.8625897724596373,
	"learning_rate": 4.006410256410257e-07,
	"loss": 1.3897,
	"step": 100
	},
	{
	"epoch": 0.16019223067681218,
	"grad_norm": 0.8895947937892531,
	"learning_rate": 8.012820512820515e-07,
	"loss": 0.0598,
	"step": 200
	},
	{
	"epoch": 0.24028834601521826,
	"grad_norm": 0.5221246844134636,
	"learning_rate": 1.201923076923077e-06,
	"loss": 0.0551,
	"step": 300
	},
	{
	"epoch": 0.32038446135362436,
	"grad_norm": 0.5590357289952654,
	"learning_rate": 1.602564102564103e-06,
	"loss": 0.0516,
	"step": 400
	},
	{
	"epoch": 0.4004805766920304,
	"grad_norm": 0.36991974174438536,
	"learning_rate": 2.0032051282051286e-06,
	"loss": 0.0501,
	"step": 500
	},
	{
	"epoch": 0.4805766920304365,
	"grad_norm": 0.6389443947236714,
	"learning_rate": 2.403846153846154e-06,
	"loss": 0.0486,
	"step": 600
	},
	{
	"epoch": 0.5606728073688426,
	"grad_norm": 0.44563280571067243,
	"learning_rate": 2.8044871794871797e-06,
	"loss": 0.0463,
	"step": 700
	},
	{
	"epoch": 0.6407689227072487,
	"grad_norm": 0.44266380357676305,
	"learning_rate": 3.205128205128206e-06,
	"loss": 0.0447,
	"step": 800
	},
	{
	"epoch": 0.7208650380456548,
	"grad_norm": 0.585654631503778,
	"learning_rate": 3.605769230769231e-06,
	"loss": 0.0441,
	"step": 900
	},
	{
	"epoch": 0.8009611533840608,
	"grad_norm": 0.600751877456253,
	"learning_rate": 4.006410256410257e-06,
	"loss": 0.0429,
	"step": 1000
	},
	{
	"epoch": 0.8009611533840608,
	"eval_loss": 0.042210426181554794,
	"eval_runtime": 97.133,
	"eval_samples_per_second": 1462.17,
	"eval_steps_per_second": 2.862,
	"step": 1000
	},
	{
	"epoch": 0.8810572687224669,
	"grad_norm": 0.2641551118831142,
	"learning_rate": 4.4070512820512826e-06,
	"loss": 0.0414,
	"step": 1100
	},
	{
	"epoch": 0.961153384060873,
	"grad_norm": 0.29049561928975876,
	"learning_rate": 4.807692307692308e-06,
	"loss": 0.0402,
	"step": 1200
	},
	{
	"epoch": 1.0408490188225872,
	"grad_norm": 0.5344113116420023,
	"learning_rate": 4.999735579817769e-06,
	"loss": 0.0386,
	"step": 1300
	},
	{
	"epoch": 1.1209451341609933,
	"grad_norm": 0.31257482202449377,
	"learning_rate": 4.997740994288484e-06,
	"loss": 0.0373,
	"step": 1400
	},
	{
	"epoch": 1.2010412494993994,
	"grad_norm": 0.4593106982622164,
	"learning_rate": 4.993792498360407e-06,
	"loss": 0.0366,
	"step": 1500
	},
	{
	"epoch": 1.2811373648378055,
	"grad_norm": 0.2012883704449717,
	"learning_rate": 4.9878931808274796e-06,
	"loss": 0.0357,
	"step": 1600
	},
	{
	"epoch": 1.3612334801762114,
	"grad_norm": 0.22908626001592647,
	"learning_rate": 4.980047656554856e-06,
	"loss": 0.0352,
	"step": 1700
	},
	{
	"epoch": 1.4413295955146175,
	"grad_norm": 0.3169879320183415,
	"learning_rate": 4.970262062868821e-06,
	"loss": 0.0346,
	"step": 1800
	},
	{
	"epoch": 1.5214257108530236,
	"grad_norm": 0.2078878255601618,
	"learning_rate": 4.958544054755741e-06,
	"loss": 0.0336,
	"step": 1900
	},
	{
	"epoch": 1.6015218261914297,
	"grad_norm": 0.2978110993331312,
	"learning_rate": 4.944902798873794e-06,
	"loss": 0.0329,
	"step": 2000
	},
	{
	"epoch": 1.6015218261914297,
	"eval_loss": 0.03361953794956207,
	"eval_runtime": 97.2876,
	"eval_samples_per_second": 1459.847,
	"eval_steps_per_second": 2.858,
	"step": 2000
	},
	{
	"epoch": 1.6816179415298358,
	"grad_norm": 0.16678424956102253,
	"learning_rate": 4.92934896638215e-06,
	"loss": 0.0328,
	"step": 2100
	},
	{
	"epoch": 1.761714056868242,
	"grad_norm": 0.19029664571581045,
	"learning_rate": 4.91189472459324e-06,
	"loss": 0.0316,
	"step": 2200
	},
	{
	"epoch": 1.841810172206648,
	"grad_norm": 0.2388908631462674,
	"learning_rate": 4.892553727454616e-06,
	"loss": 0.0317,
	"step": 2300
	},
	{
	"epoch": 1.921906287545054,
	"grad_norm": 0.15794270702360638,
	"learning_rate": 4.8713411048678635e-06,
	"loss": 0.0309,
	"step": 2400
	},
	{
	"epoch": 2.0016019223067683,
	"grad_norm": 0.2103115075663395,
	"learning_rate": 4.848273450852921e-06,
	"loss": 0.0305,
	"step": 2500
	},
	{
	"epoch": 2.0816980376451744,
	"grad_norm": 0.28601246983481904,
	"learning_rate": 4.823368810567056e-06,
	"loss": 0.0268,
	"step": 2600
	},
	{
	"epoch": 2.1617941529835805,
	"grad_norm": 0.25522616878445004,
	"learning_rate": 4.796646666188663e-06,
	"loss": 0.0268,
	"step": 2700
	},
	{
	"epoch": 2.2418902683219866,
	"grad_norm": 0.2343538332348778,
	"learning_rate": 4.768127921676916e-06,
	"loss": 0.0272,
	"step": 2800
	},
	{
	"epoch": 2.3219863836603922,
	"grad_norm": 0.22903658893889398,
	"learning_rate": 4.737834886419217e-06,
	"loss": 0.0297,
	"step": 2900
	},
	{
	"epoch": 2.4020824989987988,
	"grad_norm": 0.19855668130980528,
	"learning_rate": 4.705791257779196e-06,
	"loss": 0.0275,
	"step": 3000
	},
	{
	"epoch": 2.4020824989987988,
	"eval_loss": 0.029653793200850487,
	"eval_runtime": 97.2179,
	"eval_samples_per_second": 1460.893,
	"eval_steps_per_second": 2.86,
	"step": 3000
	},
	{
	"epoch": 2.4821786143372044,
	"grad_norm": 0.1868527106405498,
	"learning_rate": 4.672022102558958e-06,
	"loss": 0.0269,
	"step": 3100
	},
	{
	"epoch": 2.562274729675611,
	"grad_norm": 0.1985255713449175,
	"learning_rate": 4.636553837390051e-06,
	"loss": 0.0269,
	"step": 3200
	},
	{
	"epoch": 2.6423708450140166,
	"grad_norm": 0.17528235376425527,
	"learning_rate": 4.5994142080684956e-06,
	"loss": 0.026,
	"step": 3300
	},
	{
	"epoch": 2.7224669603524227,
	"grad_norm": 0.20238382028782428,
	"learning_rate": 4.560632267850054e-06,
	"loss": 0.026,
	"step": 3400
	},
	{
	"epoch": 2.802563075690829,
	"grad_norm": 0.20789525240306345,
	"learning_rate": 4.5202383547227134e-06,
	"loss": 0.0257,
	"step": 3500
	},
	{
	"epoch": 2.882659191029235,
	"grad_norm": 0.2849074845845128,
	"learning_rate": 4.478264067674155e-06,
	"loss": 0.0256,
	"step": 3600
	},
	{
	"epoch": 2.962755306367641,
	"grad_norm": 0.1826392119567578,
	"learning_rate": 4.43474224197278e-06,
	"loss": 0.0255,
	"step": 3700
	},
	{
	"epoch": 3.0424509411293554,
	"grad_norm": 0.3254043272458406,
	"learning_rate": 4.389706923481633e-06,
	"loss": 0.0224,
	"step": 3800
	},
	{
	"epoch": 3.122547056467761,
	"grad_norm": 0.2695456046362865,
	"learning_rate": 4.34319334202531e-06,
	"loss": 0.0198,
	"step": 3900
	},
	{
	"epoch": 3.202643171806167,
	"grad_norm": 0.24345073976828904,
	"learning_rate": 4.2952378838306855e-06,
	"loss": 0.0202,
	"step": 4000
	},
	{
	"epoch": 3.202643171806167,
	"eval_loss": 0.029243575409054756,
	"eval_runtime": 97.6159,
	"eval_samples_per_second": 1454.937,
	"eval_steps_per_second": 2.848,
	"step": 4000
	}
	],
	"logging_steps": 100,
	"max_steps": 12480,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 10,
	"save_steps": 1000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 892260770119680.0,
	"train_batch_size": 64,
	"trial_name": null,
	"trial_params": null
	}