llama3_capec / trainer_state.json

Upload 12 files

0145316 verified about 1 month ago

9.1 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 30.0,
	"eval_steps": 500,
	"global_step": 240,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.6666666666666666,
	"grad_norm": 0.8689901232719421,
	"learning_rate": 1.6666666666666667e-05,
	"loss": 2.3654,
	"step": 5
	},
	{
	"epoch": 1.2666666666666666,
	"grad_norm": 0.7568275928497314,
	"learning_rate": 3.7500000000000003e-05,
	"loss": 1.8161,
	"step": 10
	},
	{
	"epoch": 1.9333333333333333,
	"grad_norm": 0.7656272053718567,
	"learning_rate": 5.833333333333334e-05,
	"loss": 1.4996,
	"step": 15
	},
	{
	"epoch": 2.533333333333333,
	"grad_norm": 0.7185885906219482,
	"learning_rate": 7.916666666666666e-05,
	"loss": 1.1473,
	"step": 20
	},
	{
	"epoch": 3.1333333333333333,
	"grad_norm": 1.1223357915878296,
	"learning_rate": 0.0001,
	"loss": 0.8881,
	"step": 25
	},
	{
	"epoch": 3.8,
	"grad_norm": 0.852708637714386,
	"learning_rate": 9.986784583502862e-05,
	"loss": 0.4632,
	"step": 30
	},
	{
	"epoch": 4.4,
	"grad_norm": 0.6442459225654602,
	"learning_rate": 9.947208192904722e-05,
	"loss": 0.2705,
	"step": 35
	},
	{
	"epoch": 5.0,
	"grad_norm": 0.8184784054756165,
	"learning_rate": 9.881480035599667e-05,
	"loss": 0.1617,
	"step": 40
	},
	{
	"epoch": 5.666666666666667,
	"grad_norm": 0.5641859769821167,
	"learning_rate": 9.789947561577445e-05,
	"loss": 0.0784,
	"step": 45
	},
	{
	"epoch": 6.266666666666667,
	"grad_norm": 0.3222081661224365,
	"learning_rate": 9.673094626744942e-05,
	"loss": 0.0533,
	"step": 50
	},
	{
	"epoch": 6.933333333333334,
	"grad_norm": 0.34017637372016907,
	"learning_rate": 9.53153893518325e-05,
	"loss": 0.0341,
	"step": 55
	},
	{
	"epoch": 7.533333333333333,
	"grad_norm": 0.375735342502594,
	"learning_rate": 9.36602877386098e-05,
	"loss": 0.0245,
	"step": 60
	},
	{
	"epoch": 8.133333333333333,
	"grad_norm": 0.14678707718849182,
	"learning_rate": 9.177439057064683e-05,
	"loss": 0.0208,
	"step": 65
	},
	{
	"epoch": 8.8,
	"grad_norm": 0.22148066759109497,
	"learning_rate": 8.966766701456177e-05,
	"loss": 0.0176,
	"step": 70
	},
	{
	"epoch": 9.4,
	"grad_norm": 0.21415655314922333,
	"learning_rate": 8.73512535620498e-05,
	"loss": 0.0102,
	"step": 75
	},
	{
	"epoch": 10.0,
	"grad_norm": 0.2622195780277252,
	"learning_rate": 8.483739516053276e-05,
	"loss": 0.0087,
	"step": 80
	},
	{
	"epoch": 10.666666666666666,
	"grad_norm": 0.16748470067977905,
	"learning_rate": 8.213938048432697e-05,
	"loss": 0.0055,
	"step": 85
	},
	{
	"epoch": 11.266666666666667,
	"grad_norm": 0.16798754036426544,
	"learning_rate": 7.927147168849704e-05,
	"loss": 0.0058,
	"step": 90
	},
	{
	"epoch": 11.933333333333334,
	"grad_norm": 0.14203017950057983,
	"learning_rate": 7.6248829016728e-05,
	"loss": 0.0052,
	"step": 95
	},
	{
	"epoch": 12.533333333333333,
	"grad_norm": 0.18258453905582428,
	"learning_rate": 7.308743066175172e-05,
	"loss": 0.0022,
	"step": 100
	},
	{
	"epoch": 13.133333333333333,
	"grad_norm": 0.015843752771615982,
	"learning_rate": 6.980398830195785e-05,
	"loss": 0.0022,
	"step": 105
	},
	{
	"epoch": 13.8,
	"grad_norm": 0.07418923079967499,
	"learning_rate": 6.641585876067807e-05,
	"loss": 0.0017,
	"step": 110
	},
	{
	"epoch": 14.4,
	"grad_norm": 0.023140624165534973,
	"learning_rate": 6.294095225512603e-05,
	"loss": 0.0012,
	"step": 115
	},
	{
	"epoch": 15.0,
	"grad_norm": 0.25409796833992004,
	"learning_rate": 5.9397637720005595e-05,
	"loss": 0.0032,
	"step": 120
	},
	{
	"epoch": 15.666666666666666,
	"grad_norm": 0.05325435474514961,
	"learning_rate": 5.5804645706261514e-05,
	"loss": 0.0007,
	"step": 125
	},
	{
	"epoch": 16.266666666666666,
	"grad_norm": 0.023507924750447273,
	"learning_rate": 5.218096936826681e-05,
	"loss": 0.0012,
	"step": 130
	},
	{
	"epoch": 16.933333333333334,
	"grad_norm": 0.10688629001379013,
	"learning_rate": 4.854576406284443e-05,
	"loss": 0.0022,
	"step": 135
	},
	{
	"epoch": 17.533333333333335,
	"grad_norm": 0.014077894389629364,
	"learning_rate": 4.491824609085991e-05,
	"loss": 0.0011,
	"step": 140
	},
	{
	"epoch": 18.133333333333333,
	"grad_norm": 0.01724964752793312,
	"learning_rate": 4.131759111665349e-05,
	"loss": 0.0009,
	"step": 145
	},
	{
	"epoch": 18.8,
	"grad_norm": 0.07156306505203247,
	"learning_rate": 3.776283280228381e-05,
	"loss": 0.0012,
	"step": 150
	},
	{
	"epoch": 19.4,
	"grad_norm": 0.0061793080531060696,
	"learning_rate": 3.427276219241933e-05,
	"loss": 0.0005,
	"step": 155
	},
	{
	"epoch": 20.0,
	"grad_norm": 0.007031604181975126,
	"learning_rate": 3.086582838174551e-05,
	"loss": 0.0005,
	"step": 160
	},
	{
	"epoch": 20.666666666666668,
	"grad_norm": 0.006758078932762146,
	"learning_rate": 2.7560040989976892e-05,
	"loss": 0.0003,
	"step": 165
	},
	{
	"epoch": 21.266666666666666,
	"grad_norm": 0.0022400650195777416,
	"learning_rate": 2.4372874960006743e-05,
	"loss": 0.0002,
	"step": 170
	},
	{
	"epoch": 21.933333333333334,
	"grad_norm": 0.005241368897259235,
	"learning_rate": 2.132117818244771e-05,
	"loss": 0.0003,
	"step": 175
	},
	{
	"epoch": 22.533333333333335,
	"grad_norm": 0.005505857989192009,
	"learning_rate": 1.842108243487513e-05,
	"loss": 0.0002,
	"step": 180
	},
	{
	"epoch": 23.133333333333333,
	"grad_norm": 0.0022109781857579947,
	"learning_rate": 1.5687918106563326e-05,
	"loss": 0.0002,
	"step": 185
	},
	{
	"epoch": 23.8,
	"grad_norm": 0.002861538203433156,
	"learning_rate": 1.3136133159493802e-05,
	"loss": 0.0002,
	"step": 190
	},
	{
	"epoch": 24.4,
	"grad_norm": 0.001845506951212883,
	"learning_rate": 1.0779216754021215e-05,
	"loss": 0.0001,
	"step": 195
	},
	{
	"epoch": 25.0,
	"grad_norm": 0.0022635224740952253,
	"learning_rate": 8.629627942924473e-06,
	"loss": 0.0002,
	"step": 200
	},
	{
	"epoch": 25.666666666666668,
	"grad_norm": 0.0018658298067748547,
	"learning_rate": 6.698729810778065e-06,
	"loss": 0.0001,
	"step": 205
	},
	{
	"epoch": 26.266666666666666,
	"grad_norm": 0.002295706421136856,
	"learning_rate": 4.996729406793943e-06,
	"loss": 0.0002,
	"step": 210
	},
	{
	"epoch": 26.933333333333334,
	"grad_norm": 0.00466426694765687,
	"learning_rate": 3.5326237886588732e-06,
	"loss": 0.0001,
	"step": 215
	},
	{
	"epoch": 27.533333333333335,
	"grad_norm": 0.0056126974523067474,
	"learning_rate": 2.314152462588659e-06,
	"loss": 0.0002,
	"step": 220
	},
	{
	"epoch": 28.133333333333333,
	"grad_norm": 0.005923236720263958,
	"learning_rate": 1.3477564710088098e-06,
	"loss": 0.0001,
	"step": 225
	},
	{
	"epoch": 28.8,
	"grad_norm": 0.0023947993759065866,
	"learning_rate": 6.385443441312978e-07,
	"loss": 0.0001,
	"step": 230
	},
	{
	"epoch": 29.4,
	"grad_norm": 0.001765914843417704,
	"learning_rate": 1.9026509541272275e-07,
	"loss": 0.0001,
	"step": 235
	},
	{
	"epoch": 30.0,
	"grad_norm": 0.0020649449434131384,
	"learning_rate": 5.2884036446265714e-09,
	"loss": 0.0001,
	"step": 240
	},
	{
	"epoch": 30.0,
	"step": 240,
	"total_flos": 1.6497175945019392e+17,
	"train_loss": 0.18536781801400745,
	"train_runtime": 7707.7803,
	"train_samples_per_second": 0.93,
	"train_steps_per_second": 0.031
	}
	],
	"logging_steps": 5,
	"max_steps": 240,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 30,
	"save_steps": 1000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.6497175945019392e+17,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}