initial fine-tuning commit

4aa6851 verified over 1 year ago

6.41 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 90.0,
	"eval_steps": 500,
	"global_step": 360,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 2.5,
	"grad_norm": 0.5360991358757019,
	"learning_rate": 0.0009980973490458728,
	"loss": 2.5493,
	"step": 10
	},
	{
	"epoch": 5.0,
	"grad_norm": 0.2548345923423767,
	"learning_rate": 0.000992403876506104,
	"loss": 1.6144,
	"step": 20
	},
	{
	"epoch": 7.5,
	"grad_norm": 0.24080035090446472,
	"learning_rate": 0.0009829629131445341,
	"loss": 1.3919,
	"step": 30
	},
	{
	"epoch": 10.0,
	"grad_norm": 0.2729661166667938,
	"learning_rate": 0.0009698463103929542,
	"loss": 1.254,
	"step": 40
	},
	{
	"epoch": 12.5,
	"grad_norm": 0.30077192187309265,
	"learning_rate": 0.0009531538935183251,
	"loss": 1.138,
	"step": 50
	},
	{
	"epoch": 15.0,
	"grad_norm": 0.3787655234336853,
	"learning_rate": 0.0009330127018922195,
	"loss": 1.0206,
	"step": 60
	},
	{
	"epoch": 17.5,
	"grad_norm": 0.416939377784729,
	"learning_rate": 0.0009095760221444959,
	"loss": 0.91,
	"step": 70
	},
	{
	"epoch": 20.0,
	"grad_norm": 0.5407611727714539,
	"learning_rate": 0.000883022221559489,
	"loss": 0.8329,
	"step": 80
	},
	{
	"epoch": 22.5,
	"grad_norm": 0.5476299524307251,
	"learning_rate": 0.0008535533905932737,
	"loss": 0.7483,
	"step": 90
	},
	{
	"epoch": 25.0,
	"grad_norm": 0.5024413466453552,
	"learning_rate": 0.0008213938048432696,
	"loss": 0.6778,
	"step": 100
	},
	{
	"epoch": 27.5,
	"grad_norm": 0.5071346759796143,
	"learning_rate": 0.0007867882181755231,
	"loss": 0.6175,
	"step": 110
	},
	{
	"epoch": 30.0,
	"grad_norm": 0.5480501055717468,
	"learning_rate": 0.00075,
	"loss": 0.5726,
	"step": 120
	},
	{
	"epoch": 32.5,
	"grad_norm": 0.5539716482162476,
	"learning_rate": 0.0007113091308703497,
	"loss": 0.5276,
	"step": 130
	},
	{
	"epoch": 35.0,
	"grad_norm": 0.5367885231971741,
	"learning_rate": 0.0006710100716628344,
	"loss": 0.4889,
	"step": 140
	},
	{
	"epoch": 37.5,
	"grad_norm": 0.5145406126976013,
	"learning_rate": 0.0006294095225512603,
	"loss": 0.4513,
	"step": 150
	},
	{
	"epoch": 40.0,
	"grad_norm": 0.5838665962219238,
	"learning_rate": 0.0005868240888334653,
	"loss": 0.4196,
	"step": 160
	},
	{
	"epoch": 42.5,
	"grad_norm": 0.49232539534568787,
	"learning_rate": 0.0005435778713738292,
	"loss": 0.3972,
	"step": 170
	},
	{
	"epoch": 45.0,
	"grad_norm": 0.4959801137447357,
	"learning_rate": 0.0005,
	"loss": 0.372,
	"step": 180
	},
	{
	"epoch": 47.5,
	"grad_norm": 0.5067233443260193,
	"learning_rate": 0.00045642212862617086,
	"loss": 0.3527,
	"step": 190
	},
	{
	"epoch": 50.0,
	"grad_norm": 0.5176546573638916,
	"learning_rate": 0.00041317591116653486,
	"loss": 0.3385,
	"step": 200
	},
	{
	"epoch": 52.5,
	"grad_norm": 0.4937039911746979,
	"learning_rate": 0.0003705904774487396,
	"loss": 0.3191,
	"step": 210
	},
	{
	"epoch": 55.0,
	"grad_norm": 0.4794902205467224,
	"learning_rate": 0.0003289899283371657,
	"loss": 0.3082,
	"step": 220
	},
	{
	"epoch": 57.5,
	"grad_norm": 0.4285900294780731,
	"learning_rate": 0.0002886908691296504,
	"loss": 0.295,
	"step": 230
	},
	{
	"epoch": 60.0,
	"grad_norm": 0.45302724838256836,
	"learning_rate": 0.0002500000000000001,
	"loss": 0.2889,
	"step": 240
	},
	{
	"epoch": 62.5,
	"grad_norm": 0.42409127950668335,
	"learning_rate": 0.00021321178182447708,
	"loss": 0.2781,
	"step": 250
	},
	{
	"epoch": 65.0,
	"grad_norm": 0.4453699588775635,
	"learning_rate": 0.0001786061951567303,
	"loss": 0.273,
	"step": 260
	},
	{
	"epoch": 67.5,
	"grad_norm": 0.4217115044593811,
	"learning_rate": 0.00014644660940672628,
	"loss": 0.2646,
	"step": 270
	},
	{
	"epoch": 70.0,
	"grad_norm": 0.43468865752220154,
	"learning_rate": 0.00011697777844051105,
	"loss": 0.2611,
	"step": 280
	},
	{
	"epoch": 72.5,
	"grad_norm": 0.41657349467277527,
	"learning_rate": 9.042397785550405e-05,
	"loss": 0.2534,
	"step": 290
	},
	{
	"epoch": 75.0,
	"grad_norm": 0.40352940559387207,
	"learning_rate": 6.698729810778065e-05,
	"loss": 0.2509,
	"step": 300
	},
	{
	"epoch": 77.5,
	"grad_norm": 0.3772071301937103,
	"learning_rate": 4.684610648167503e-05,
	"loss": 0.2493,
	"step": 310
	},
	{
	"epoch": 80.0,
	"grad_norm": 0.3778958022594452,
	"learning_rate": 3.0153689607045842e-05,
	"loss": 0.249,
	"step": 320
	},
	{
	"epoch": 82.5,
	"grad_norm": 0.38997748494148254,
	"learning_rate": 1.70370868554659e-05,
	"loss": 0.2441,
	"step": 330
	},
	{
	"epoch": 85.0,
	"grad_norm": 0.3619975447654724,
	"learning_rate": 7.59612349389599e-06,
	"loss": 0.2472,
	"step": 340
	},
	{
	"epoch": 87.5,
	"grad_norm": 0.4022546708583832,
	"learning_rate": 1.9026509541272275e-06,
	"loss": 0.2493,
	"step": 350
	},
	{
	"epoch": 90.0,
	"grad_norm": 0.36714521050453186,
	"learning_rate": 0.0,
	"loss": 0.2444,
	"step": 360
	}
	],
	"logging_steps": 10,
	"max_steps": 360,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 90,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 5.02434468200448e+16,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}