Model save

30cc802 verified about 1 year ago

6.92 kB

	{
	"best_global_step": 750,
	"best_metric": 1.321813941001892,
	"best_model_checkpoint": "./results_billsum_tfidf_hybrid_FINAL/checkpoint-750",
	"epoch": 4.0,
	"eval_steps": 500,
	"global_step": 1500,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.13333333333333333,
	"grad_norm": 895813.875,
	"learning_rate": 1.9477333333333334e-05,
	"loss": 4.0013,
	"step": 50
	},
	{
	"epoch": 0.26666666666666666,
	"grad_norm": 862744.8125,
	"learning_rate": 1.8944000000000004e-05,
	"loss": 2.6667,
	"step": 100
	},
	{
	"epoch": 0.4,
	"grad_norm": 899719.8125,
	"learning_rate": 1.8410666666666666e-05,
	"loss": 2.1126,
	"step": 150
	},
	{
	"epoch": 0.5333333333333333,
	"grad_norm": 320775.78125,
	"learning_rate": 1.7877333333333336e-05,
	"loss": 1.7175,
	"step": 200
	},
	{
	"epoch": 0.6666666666666666,
	"grad_norm": 193230.234375,
	"learning_rate": 1.7344000000000002e-05,
	"loss": 1.4785,
	"step": 250
	},
	{
	"epoch": 0.8,
	"grad_norm": 163820.453125,
	"learning_rate": 1.6810666666666668e-05,
	"loss": 1.3931,
	"step": 300
	},
	{
	"epoch": 0.9333333333333333,
	"grad_norm": 130176.8515625,
	"learning_rate": 1.6277333333333334e-05,
	"loss": 1.4148,
	"step": 350
	},
	{
	"epoch": 1.0,
	"eval_loss": 1.3602980375289917,
	"eval_runtime": 801.5536,
	"eval_samples_per_second": 4.078,
	"eval_steps_per_second": 0.51,
	"step": 375
	},
	{
	"epoch": 1.0666666666666667,
	"grad_norm": 159181.578125,
	"learning_rate": 1.5744e-05,
	"loss": 1.27,
	"step": 400
	},
	{
	"epoch": 1.2,
	"grad_norm": 165699.671875,
	"learning_rate": 1.5210666666666666e-05,
	"loss": 1.215,
	"step": 450
	},
	{
	"epoch": 1.3333333333333333,
	"grad_norm": 161035.640625,
	"learning_rate": 1.4677333333333334e-05,
	"loss": 1.2164,
	"step": 500
	},
	{
	"epoch": 1.4666666666666668,
	"grad_norm": 176868.390625,
	"learning_rate": 1.4144000000000002e-05,
	"loss": 1.1381,
	"step": 550
	},
	{
	"epoch": 1.6,
	"grad_norm": 136688.375,
	"learning_rate": 1.3610666666666668e-05,
	"loss": 1.113,
	"step": 600
	},
	{
	"epoch": 1.7333333333333334,
	"grad_norm": 127716.859375,
	"learning_rate": 1.3077333333333334e-05,
	"loss": 1.1693,
	"step": 650
	},
	{
	"epoch": 1.8666666666666667,
	"grad_norm": 223826.046875,
	"learning_rate": 1.2544e-05,
	"loss": 1.1511,
	"step": 700
	},
	{
	"epoch": 2.0,
	"grad_norm": 201256.53125,
	"learning_rate": 1.2010666666666668e-05,
	"loss": 1.1867,
	"step": 750
	},
	{
	"epoch": 2.0,
	"eval_loss": 1.321813941001892,
	"eval_runtime": 801.0496,
	"eval_samples_per_second": 4.081,
	"eval_steps_per_second": 0.511,
	"step": 750
	},
	{
	"epoch": 2.1333333333333333,
	"grad_norm": 145709.046875,
	"learning_rate": 1.1477333333333334e-05,
	"loss": 0.9941,
	"step": 800
	},
	{
	"epoch": 2.2666666666666666,
	"grad_norm": 150102.78125,
	"learning_rate": 1.0944e-05,
	"loss": 1.0089,
	"step": 850
	},
	{
	"epoch": 2.4,
	"grad_norm": 142779.484375,
	"learning_rate": 1.0410666666666667e-05,
	"loss": 1.0403,
	"step": 900
	},
	{
	"epoch": 2.533333333333333,
	"grad_norm": 133685.625,
	"learning_rate": 9.877333333333335e-06,
	"loss": 0.9613,
	"step": 950
	},
	{
	"epoch": 2.6666666666666665,
	"grad_norm": 140843.96875,
	"learning_rate": 9.344e-06,
	"loss": 1.0238,
	"step": 1000
	},
	{
	"epoch": 2.8,
	"grad_norm": 146774.203125,
	"learning_rate": 8.810666666666667e-06,
	"loss": 1.0013,
	"step": 1050
	},
	{
	"epoch": 2.9333333333333336,
	"grad_norm": 139126.03125,
	"learning_rate": 8.277333333333335e-06,
	"loss": 0.9859,
	"step": 1100
	},
	{
	"epoch": 3.0,
	"eval_loss": 1.3303395509719849,
	"eval_runtime": 801.6476,
	"eval_samples_per_second": 4.078,
	"eval_steps_per_second": 0.51,
	"step": 1125
	},
	{
	"epoch": 3.066666666666667,
	"grad_norm": 133347.453125,
	"learning_rate": 7.744e-06,
	"loss": 0.8982,
	"step": 1150
	},
	{
	"epoch": 3.2,
	"grad_norm": 136616.390625,
	"learning_rate": 7.210666666666667e-06,
	"loss": 0.9191,
	"step": 1200
	},
	{
	"epoch": 3.3333333333333335,
	"grad_norm": 149049.578125,
	"learning_rate": 6.677333333333334e-06,
	"loss": 0.8836,
	"step": 1250
	},
	{
	"epoch": 3.466666666666667,
	"grad_norm": 139382.484375,
	"learning_rate": 6.144e-06,
	"loss": 0.8808,
	"step": 1300
	},
	{
	"epoch": 3.6,
	"grad_norm": 162289.140625,
	"learning_rate": 5.610666666666668e-06,
	"loss": 0.9171,
	"step": 1350
	},
	{
	"epoch": 3.7333333333333334,
	"grad_norm": 154288.703125,
	"learning_rate": 5.077333333333334e-06,
	"loss": 0.8728,
	"step": 1400
	},
	{
	"epoch": 3.8666666666666667,
	"grad_norm": 137111.40625,
	"learning_rate": 4.544000000000001e-06,
	"loss": 0.8802,
	"step": 1450
	},
	{
	"epoch": 4.0,
	"grad_norm": 147018.34375,
	"learning_rate": 4.010666666666667e-06,
	"loss": 0.8782,
	"step": 1500
	},
	{
	"epoch": 4.0,
	"eval_loss": 1.354467749595642,
	"eval_runtime": 800.7032,
	"eval_samples_per_second": 4.083,
	"eval_steps_per_second": 0.511,
	"step": 1500
	},
	{
	"epoch": 4.0,
	"step": 1500,
	"total_flos": 2.8791111942144e+16,
	"train_loss": 1.2796544367472331,
	"train_runtime": 12353.8951,
	"train_samples_per_second": 1.214,
	"train_steps_per_second": 0.152
	}
	],
	"logging_steps": 50,
	"max_steps": 1875,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 5,
	"save_steps": 500,
	"stateful_callbacks": {
	"EarlyStoppingCallback": {
	"args": {
	"early_stopping_patience": 2,
	"early_stopping_threshold": 0.0
	},
	"attributes": {
	"early_stopping_patience_counter": 2
	}
	},
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2.8791111942144e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}