llm_reason / codellama /c /dmcodegen /dmcodegen_base_c /trainer_state.json

update

fe7b4a6 2 months ago

6.54 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.0779220779220777,
	"eval_steps": 500,
	"global_step": 180,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.05772005772005772,
	"grad_norm": 0.018310546875,
	"learning_rate": 0.0001,
	"loss": 0.5558,
	"step": 5
	},
	{
	"epoch": 0.11544011544011544,
	"grad_norm": 0.01544189453125,
	"learning_rate": 0.0001,
	"loss": 0.4953,
	"step": 10
	},
	{
	"epoch": 0.17316017316017315,
	"grad_norm": 0.01611328125,
	"learning_rate": 0.0001,
	"loss": 0.4465,
	"step": 15
	},
	{
	"epoch": 0.23088023088023088,
	"grad_norm": 0.0186767578125,
	"learning_rate": 0.0001,
	"loss": 0.419,
	"step": 20
	},
	{
	"epoch": 0.2886002886002886,
	"grad_norm": 0.022705078125,
	"learning_rate": 0.0001,
	"loss": 0.5274,
	"step": 25
	},
	{
	"epoch": 0.3463203463203463,
	"grad_norm": 0.020751953125,
	"learning_rate": 0.0001,
	"loss": 0.5132,
	"step": 30
	},
	{
	"epoch": 0.40404040404040403,
	"grad_norm": 0.0174560546875,
	"learning_rate": 0.0001,
	"loss": 0.4466,
	"step": 35
	},
	{
	"epoch": 0.46176046176046176,
	"grad_norm": 0.01904296875,
	"learning_rate": 0.0001,
	"loss": 0.4116,
	"step": 40
	},
	{
	"epoch": 0.5194805194805194,
	"grad_norm": 0.026611328125,
	"learning_rate": 0.0001,
	"loss": 0.4913,
	"step": 45
	},
	{
	"epoch": 0.5772005772005772,
	"grad_norm": 0.0208740234375,
	"learning_rate": 0.0001,
	"loss": 0.4982,
	"step": 50
	},
	{
	"epoch": 0.6349206349206349,
	"grad_norm": 0.021484375,
	"learning_rate": 0.0001,
	"loss": 0.4586,
	"step": 55
	},
	{
	"epoch": 0.6926406926406926,
	"grad_norm": 0.0191650390625,
	"learning_rate": 0.0001,
	"loss": 0.4132,
	"step": 60
	},
	{
	"epoch": 0.7503607503607503,
	"grad_norm": 0.03125,
	"learning_rate": 0.0001,
	"loss": 0.445,
	"step": 65
	},
	{
	"epoch": 0.8080808080808081,
	"grad_norm": 0.0242919921875,
	"learning_rate": 0.0001,
	"loss": 0.5186,
	"step": 70
	},
	{
	"epoch": 0.8658008658008658,
	"grad_norm": 0.022216796875,
	"learning_rate": 0.0001,
	"loss": 0.463,
	"step": 75
	},
	{
	"epoch": 0.9235209235209235,
	"grad_norm": 0.02001953125,
	"learning_rate": 0.0001,
	"loss": 0.4233,
	"step": 80
	},
	{
	"epoch": 0.9812409812409812,
	"grad_norm": 0.0299072265625,
	"learning_rate": 0.0001,
	"loss": 0.4396,
	"step": 85
	},
	{
	"epoch": 1.0389610389610389,
	"grad_norm": 0.02685546875,
	"learning_rate": 0.0001,
	"loss": 0.4944,
	"step": 90
	},
	{
	"epoch": 1.0966810966810967,
	"grad_norm": 0.0260009765625,
	"learning_rate": 0.0001,
	"loss": 0.4896,
	"step": 95
	},
	{
	"epoch": 1.1544011544011543,
	"grad_norm": 0.0247802734375,
	"learning_rate": 0.0001,
	"loss": 0.4402,
	"step": 100
	},
	{
	"epoch": 1.2121212121212122,
	"grad_norm": 0.0240478515625,
	"learning_rate": 0.0001,
	"loss": 0.3963,
	"step": 105
	},
	{
	"epoch": 1.2698412698412698,
	"grad_norm": 0.037353515625,
	"learning_rate": 0.0001,
	"loss": 0.4535,
	"step": 110
	},
	{
	"epoch": 1.3275613275613276,
	"grad_norm": 0.032470703125,
	"learning_rate": 0.0001,
	"loss": 0.5045,
	"step": 115
	},
	{
	"epoch": 1.3852813852813852,
	"grad_norm": 0.0301513671875,
	"learning_rate": 0.0001,
	"loss": 0.4466,
	"step": 120
	},
	{
	"epoch": 1.443001443001443,
	"grad_norm": 0.0244140625,
	"learning_rate": 0.0001,
	"loss": 0.4095,
	"step": 125
	},
	{
	"epoch": 1.5007215007215007,
	"grad_norm": 0.046630859375,
	"learning_rate": 0.0001,
	"loss": 0.4346,
	"step": 130
	},
	{
	"epoch": 1.5584415584415585,
	"grad_norm": 0.0299072265625,
	"learning_rate": 0.0001,
	"loss": 0.5046,
	"step": 135
	},
	{
	"epoch": 1.6161616161616161,
	"grad_norm": 0.032958984375,
	"learning_rate": 0.0001,
	"loss": 0.4556,
	"step": 140
	},
	{
	"epoch": 1.6738816738816737,
	"grad_norm": 0.0272216796875,
	"learning_rate": 0.0001,
	"loss": 0.4245,
	"step": 145
	},
	{
	"epoch": 1.7316017316017316,
	"grad_norm": 0.036865234375,
	"learning_rate": 0.0001,
	"loss": 0.3834,
	"step": 150
	},
	{
	"epoch": 1.7893217893217894,
	"grad_norm": 0.03662109375,
	"learning_rate": 0.0001,
	"loss": 0.5163,
	"step": 155
	},
	{
	"epoch": 1.847041847041847,
	"grad_norm": 0.033935546875,
	"learning_rate": 0.0001,
	"loss": 0.4565,
	"step": 160
	},
	{
	"epoch": 1.9047619047619047,
	"grad_norm": 0.02880859375,
	"learning_rate": 0.0001,
	"loss": 0.4164,
	"step": 165
	},
	{
	"epoch": 1.9624819624819625,
	"grad_norm": 0.03271484375,
	"learning_rate": 0.0001,
	"loss": 0.3956,
	"step": 170
	},
	{
	"epoch": 2.0202020202020203,
	"grad_norm": 0.041748046875,
	"learning_rate": 0.0001,
	"loss": 0.4453,
	"step": 175
	},
	{
	"epoch": 2.0779220779220777,
	"grad_norm": 0.03857421875,
	"learning_rate": 0.0001,
	"loss": 0.4681,
	"step": 180
	},
	{
	"epoch": 2.0779220779220777,
	"step": 180,
	"total_flos": 1.8562430640540058e+18,
	"train_loss": 0.45838437411520216,
	"train_runtime": 56927.0701,
	"train_samples_per_second": 0.405,
	"train_steps_per_second": 0.003
	}
	],
	"logging_steps": 5,
	"max_steps": 180,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 180,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.8562430640540058e+18,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}