initial-colbert / trainer_state.json

Upload folder using huggingface_hub

7a50d08 verified 8 months ago

6.4 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9961439588688946,
	"eval_steps": 500,
	"global_step": 15500,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.032133676092544985,
	"grad_norm": 7.696083068847656,
	"learning_rate": 2.9039845758354757e-06,
	"loss": 0.4976,
	"step": 500
	},
	{
	"epoch": 0.06426735218508997,
	"grad_norm": 4.741117000579834,
	"learning_rate": 2.807583547557841e-06,
	"loss": 0.3532,
	"step": 1000
	},
	{
	"epoch": 0.09640102827763496,
	"grad_norm": 6.354885101318359,
	"learning_rate": 2.711182519280206e-06,
	"loss": 0.3195,
	"step": 1500
	},
	{
	"epoch": 0.12853470437017994,
	"grad_norm": 4.841858386993408,
	"learning_rate": 2.614781491002571e-06,
	"loss": 0.3079,
	"step": 2000
	},
	{
	"epoch": 0.16066838046272494,
	"grad_norm": 4.776275634765625,
	"learning_rate": 2.518573264781491e-06,
	"loss": 0.3067,
	"step": 2500
	},
	{
	"epoch": 0.1928020565552699,
	"grad_norm": 5.285233974456787,
	"learning_rate": 2.422172236503856e-06,
	"loss": 0.2957,
	"step": 3000
	},
	{
	"epoch": 0.2249357326478149,
	"grad_norm": 5.628823757171631,
	"learning_rate": 2.3257712082262213e-06,
	"loss": 0.3086,
	"step": 3500
	},
	{
	"epoch": 0.2570694087403599,
	"grad_norm": 4.082389831542969,
	"learning_rate": 2.229370179948586e-06,
	"loss": 0.2927,
	"step": 4000
	},
	{
	"epoch": 0.2892030848329049,
	"grad_norm": 5.4696478843688965,
	"learning_rate": 2.1331619537275066e-06,
	"loss": 0.2922,
	"step": 4500
	},
	{
	"epoch": 0.3213367609254499,
	"grad_norm": 4.862800598144531,
	"learning_rate": 2.0367609254498712e-06,
	"loss": 0.2931,
	"step": 5000
	},
	{
	"epoch": 0.35347043701799485,
	"grad_norm": 4.961813449859619,
	"learning_rate": 1.9403598971722367e-06,
	"loss": 0.2957,
	"step": 5500
	},
	{
	"epoch": 0.3856041131105398,
	"grad_norm": 4.734184741973877,
	"learning_rate": 1.8439588688946016e-06,
	"loss": 0.2809,
	"step": 6000
	},
	{
	"epoch": 0.41773778920308485,
	"grad_norm": 4.716980934143066,
	"learning_rate": 1.7477506426735218e-06,
	"loss": 0.2773,
	"step": 6500
	},
	{
	"epoch": 0.4498714652956298,
	"grad_norm": 4.844335079193115,
	"learning_rate": 1.651349614395887e-06,
	"loss": 0.2728,
	"step": 7000
	},
	{
	"epoch": 0.4820051413881748,
	"grad_norm": 5.491813659667969,
	"learning_rate": 1.554948586118252e-06,
	"loss": 0.2888,
	"step": 7500
	},
	{
	"epoch": 0.5141388174807198,
	"grad_norm": 4.701641082763672,
	"learning_rate": 1.458547557840617e-06,
	"loss": 0.2863,
	"step": 8000
	},
	{
	"epoch": 0.5462724935732648,
	"grad_norm": 5.017972469329834,
	"learning_rate": 1.3623393316195374e-06,
	"loss": 0.2813,
	"step": 8500
	},
	{
	"epoch": 0.5784061696658098,
	"grad_norm": 5.8628764152526855,
	"learning_rate": 1.2659383033419025e-06,
	"loss": 0.2695,
	"step": 9000
	},
	{
	"epoch": 0.6105398457583547,
	"grad_norm": 5.396206378936768,
	"learning_rate": 1.1695372750642673e-06,
	"loss": 0.2834,
	"step": 9500
	},
	{
	"epoch": 0.6426735218508998,
	"grad_norm": 4.796625137329102,
	"learning_rate": 1.0731362467866324e-06,
	"loss": 0.2739,
	"step": 10000
	},
	{
	"epoch": 0.6748071979434447,
	"grad_norm": 3.604219436645508,
	"learning_rate": 9.769280205655526e-07,
	"loss": 0.2744,
	"step": 10500
	},
	{
	"epoch": 0.7069408740359897,
	"grad_norm": 4.8642048835754395,
	"learning_rate": 8.80719794344473e-07,
	"loss": 0.2849,
	"step": 11000
	},
	{
	"epoch": 0.7390745501285347,
	"grad_norm": 4.076746940612793,
	"learning_rate": 7.84318766066838e-07,
	"loss": 0.2808,
	"step": 11500
	},
	{
	"epoch": 0.7712082262210797,
	"grad_norm": 2.8937087059020996,
	"learning_rate": 6.879177377892031e-07,
	"loss": 0.2796,
	"step": 12000
	},
	{
	"epoch": 0.8033419023136247,
	"grad_norm": 4.379210948944092,
	"learning_rate": 5.915167095115681e-07,
	"loss": 0.2772,
	"step": 12500
	},
	{
	"epoch": 0.8354755784061697,
	"grad_norm": 6.368309020996094,
	"learning_rate": 4.951156812339331e-07,
	"loss": 0.2813,
	"step": 13000
	},
	{
	"epoch": 0.8676092544987146,
	"grad_norm": 5.409502029418945,
	"learning_rate": 3.9871465295629823e-07,
	"loss": 0.2756,
	"step": 13500
	},
	{
	"epoch": 0.8997429305912596,
	"grad_norm": 2.8725733757019043,
	"learning_rate": 3.0231362467866326e-07,
	"loss": 0.2771,
	"step": 14000
	},
	{
	"epoch": 0.9318766066838047,
	"grad_norm": 8.13409423828125,
	"learning_rate": 2.059125964010283e-07,
	"loss": 0.283,
	"step": 14500
	},
	{
	"epoch": 0.9640102827763496,
	"grad_norm": 5.422169208526611,
	"learning_rate": 1.0970437017994858e-07,
	"loss": 0.2731,
	"step": 15000
	},
	{
	"epoch": 0.9961439588688946,
	"grad_norm": 4.597813606262207,
	"learning_rate": 1.3303341902313626e-08,
	"loss": 0.2865,
	"step": 15500
	}
	],
	"logging_steps": 500,
	"max_steps": 15560,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 0.0,
	"train_batch_size": 32,
	"trial_name": null,
	"trial_params": null
	}