hp_ablations_llama3_epoch1 / trainer_state.json

End of training

fcf163f verified about 1 year ago

7.82 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9985211475894705,
	"eval_steps": 500,
	"global_step": 422,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.023661638568470866,
	"grad_norm": 6.501171170453925,
	"learning_rate": 5e-06,
	"loss": 0.8881,
	"step": 10
	},
	{
	"epoch": 0.04732327713694173,
	"grad_norm": 5.9691618342357975,
	"learning_rate": 5e-06,
	"loss": 0.7939,
	"step": 20
	},
	{
	"epoch": 0.0709849157054126,
	"grad_norm": 3.800672425152988,
	"learning_rate": 5e-06,
	"loss": 0.7688,
	"step": 30
	},
	{
	"epoch": 0.09464655427388347,
	"grad_norm": 1.0801121583577953,
	"learning_rate": 5e-06,
	"loss": 0.7473,
	"step": 40
	},
	{
	"epoch": 0.11830819284235433,
	"grad_norm": 0.9685892726062331,
	"learning_rate": 5e-06,
	"loss": 0.7309,
	"step": 50
	},
	{
	"epoch": 0.1419698314108252,
	"grad_norm": 0.7335129796923299,
	"learning_rate": 5e-06,
	"loss": 0.7181,
	"step": 60
	},
	{
	"epoch": 0.16563146997929606,
	"grad_norm": 0.5463303979273281,
	"learning_rate": 5e-06,
	"loss": 0.7118,
	"step": 70
	},
	{
	"epoch": 0.18929310854776693,
	"grad_norm": 0.7971470498150557,
	"learning_rate": 5e-06,
	"loss": 0.7019,
	"step": 80
	},
	{
	"epoch": 0.2129547471162378,
	"grad_norm": 0.5202406996039837,
	"learning_rate": 5e-06,
	"loss": 0.6862,
	"step": 90
	},
	{
	"epoch": 0.23661638568470866,
	"grad_norm": 0.5912783446923657,
	"learning_rate": 5e-06,
	"loss": 0.6854,
	"step": 100
	},
	{
	"epoch": 0.26027802425317953,
	"grad_norm": 0.5421067803132418,
	"learning_rate": 5e-06,
	"loss": 0.6902,
	"step": 110
	},
	{
	"epoch": 0.2839396628216504,
	"grad_norm": 0.6577266833093017,
	"learning_rate": 5e-06,
	"loss": 0.6776,
	"step": 120
	},
	{
	"epoch": 0.30760130139012126,
	"grad_norm": 0.7220884795089095,
	"learning_rate": 5e-06,
	"loss": 0.6736,
	"step": 130
	},
	{
	"epoch": 0.33126293995859213,
	"grad_norm": 0.6298167534415481,
	"learning_rate": 5e-06,
	"loss": 0.6821,
	"step": 140
	},
	{
	"epoch": 0.354924578527063,
	"grad_norm": 0.6024637677965929,
	"learning_rate": 5e-06,
	"loss": 0.6647,
	"step": 150
	},
	{
	"epoch": 0.37858621709553386,
	"grad_norm": 0.5147721195366849,
	"learning_rate": 5e-06,
	"loss": 0.6691,
	"step": 160
	},
	{
	"epoch": 0.4022478556640047,
	"grad_norm": 0.7032904919994801,
	"learning_rate": 5e-06,
	"loss": 0.6672,
	"step": 170
	},
	{
	"epoch": 0.4259094942324756,
	"grad_norm": 0.5237180402877233,
	"learning_rate": 5e-06,
	"loss": 0.6739,
	"step": 180
	},
	{
	"epoch": 0.44957113280094646,
	"grad_norm": 0.5717691530030693,
	"learning_rate": 5e-06,
	"loss": 0.6698,
	"step": 190
	},
	{
	"epoch": 0.4732327713694173,
	"grad_norm": 0.4766660835336923,
	"learning_rate": 5e-06,
	"loss": 0.6688,
	"step": 200
	},
	{
	"epoch": 0.4968944099378882,
	"grad_norm": 0.6790711796986141,
	"learning_rate": 5e-06,
	"loss": 0.6695,
	"step": 210
	},
	{
	"epoch": 0.5205560485063591,
	"grad_norm": 0.5878534288094095,
	"learning_rate": 5e-06,
	"loss": 0.668,
	"step": 220
	},
	{
	"epoch": 0.54421768707483,
	"grad_norm": 0.4783030354173372,
	"learning_rate": 5e-06,
	"loss": 0.6603,
	"step": 230
	},
	{
	"epoch": 0.5678793256433008,
	"grad_norm": 0.5394816933853074,
	"learning_rate": 5e-06,
	"loss": 0.6645,
	"step": 240
	},
	{
	"epoch": 0.5915409642117717,
	"grad_norm": 0.6757841342023195,
	"learning_rate": 5e-06,
	"loss": 0.6616,
	"step": 250
	},
	{
	"epoch": 0.6152026027802425,
	"grad_norm": 0.620819409974346,
	"learning_rate": 5e-06,
	"loss": 0.652,
	"step": 260
	},
	{
	"epoch": 0.6388642413487134,
	"grad_norm": 0.4646800950253652,
	"learning_rate": 5e-06,
	"loss": 0.6639,
	"step": 270
	},
	{
	"epoch": 0.6625258799171843,
	"grad_norm": 0.584735854517762,
	"learning_rate": 5e-06,
	"loss": 0.6651,
	"step": 280
	},
	{
	"epoch": 0.6861875184856552,
	"grad_norm": 0.4893625135857586,
	"learning_rate": 5e-06,
	"loss": 0.6596,
	"step": 290
	},
	{
	"epoch": 0.709849157054126,
	"grad_norm": 0.6392477367293743,
	"learning_rate": 5e-06,
	"loss": 0.6535,
	"step": 300
	},
	{
	"epoch": 0.7335107956225969,
	"grad_norm": 0.5580856537983637,
	"learning_rate": 5e-06,
	"loss": 0.6619,
	"step": 310
	},
	{
	"epoch": 0.7571724341910677,
	"grad_norm": 0.6586565023491409,
	"learning_rate": 5e-06,
	"loss": 0.6556,
	"step": 320
	},
	{
	"epoch": 0.7808340727595386,
	"grad_norm": 0.7017087820497026,
	"learning_rate": 5e-06,
	"loss": 0.6541,
	"step": 330
	},
	{
	"epoch": 0.8044957113280095,
	"grad_norm": 0.6201315444988488,
	"learning_rate": 5e-06,
	"loss": 0.6538,
	"step": 340
	},
	{
	"epoch": 0.8281573498964804,
	"grad_norm": 0.5419722366809865,
	"learning_rate": 5e-06,
	"loss": 0.6602,
	"step": 350
	},
	{
	"epoch": 0.8518189884649512,
	"grad_norm": 0.47923619949498586,
	"learning_rate": 5e-06,
	"loss": 0.6537,
	"step": 360
	},
	{
	"epoch": 0.8754806270334221,
	"grad_norm": 0.48623500055139035,
	"learning_rate": 5e-06,
	"loss": 0.6565,
	"step": 370
	},
	{
	"epoch": 0.8991422656018929,
	"grad_norm": 0.4927729455615553,
	"learning_rate": 5e-06,
	"loss": 0.6477,
	"step": 380
	},
	{
	"epoch": 0.9228039041703638,
	"grad_norm": 0.47069043398418653,
	"learning_rate": 5e-06,
	"loss": 0.6552,
	"step": 390
	},
	{
	"epoch": 0.9464655427388347,
	"grad_norm": 0.5081907383490093,
	"learning_rate": 5e-06,
	"loss": 0.6464,
	"step": 400
	},
	{
	"epoch": 0.9701271813073056,
	"grad_norm": 0.4892217314838454,
	"learning_rate": 5e-06,
	"loss": 0.649,
	"step": 410
	},
	{
	"epoch": 0.9937888198757764,
	"grad_norm": 0.5410064668246478,
	"learning_rate": 5e-06,
	"loss": 0.648,
	"step": 420
	},
	{
	"epoch": 0.9985211475894705,
	"eval_loss": 0.6502951979637146,
	"eval_runtime": 446.2763,
	"eval_samples_per_second": 25.52,
	"eval_steps_per_second": 0.399,
	"step": 422
	},
	{
	"epoch": 0.9985211475894705,
	"step": 422,
	"total_flos": 706656337920000.0,
	"train_loss": 0.6818134089781774,
	"train_runtime": 24762.6241,
	"train_samples_per_second": 8.738,
	"train_steps_per_second": 0.017
	}
	],
	"logging_steps": 10,
	"max_steps": 422,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 706656337920000.0,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}