CodeLlama-7b-Instruct-hf-luau / checkpoint-117 /trainer_state.json

Add files using upload-large-folder tool

fdaaa60 verified about 2 months ago

4.93 kB

	{
	"best_global_step": 100,
	"best_metric": 0.6652931571006775,
	"best_model_checkpoint": "./luau-model/checkpoint-100",
	"epoch": 3.0,
	"eval_steps": 100,
	"global_step": 117,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.1282051282051282,
	"grad_norm": 0.3991023898124695,
	"learning_rate": 1.6000000000000003e-05,
	"loss": 1.1619,
	"step": 5
	},
	{
	"epoch": 0.2564102564102564,
	"grad_norm": 0.5307971239089966,
	"learning_rate": 3.6e-05,
	"loss": 1.1662,
	"step": 10
	},
	{
	"epoch": 0.38461538461538464,
	"grad_norm": 0.6414325833320618,
	"learning_rate": 5.6000000000000006e-05,
	"loss": 1.1415,
	"step": 15
	},
	{
	"epoch": 0.5128205128205128,
	"grad_norm": 4.2611846923828125,
	"learning_rate": 7.6e-05,
	"loss": 0.9051,
	"step": 20
	},
	{
	"epoch": 0.6410256410256411,
	"grad_norm": 0.38137564063072205,
	"learning_rate": 9.6e-05,
	"loss": 0.8326,
	"step": 25
	},
	{
	"epoch": 0.7692307692307693,
	"grad_norm": 0.32487988471984863,
	"learning_rate": 0.000116,
	"loss": 0.7638,
	"step": 30
	},
	{
	"epoch": 0.8974358974358975,
	"grad_norm": 0.39361336827278137,
	"learning_rate": 0.00013600000000000003,
	"loss": 0.7069,
	"step": 35
	},
	{
	"epoch": 1.0256410256410255,
	"grad_norm": 0.3810354471206665,
	"learning_rate": 0.00015600000000000002,
	"loss": 0.652,
	"step": 40
	},
	{
	"epoch": 1.1538461538461537,
	"grad_norm": 0.33373090624809265,
	"learning_rate": 0.00017600000000000002,
	"loss": 0.651,
	"step": 45
	},
	{
	"epoch": 1.282051282051282,
	"grad_norm": 0.46736040711402893,
	"learning_rate": 0.000196,
	"loss": 0.6554,
	"step": 50
	},
	{
	"epoch": 1.4102564102564101,
	"grad_norm": 0.34294310212135315,
	"learning_rate": 0.00018805970149253734,
	"loss": 0.557,
	"step": 55
	},
	{
	"epoch": 1.5384615384615383,
	"grad_norm": 0.36410510540008545,
	"learning_rate": 0.00017313432835820895,
	"loss": 0.5899,
	"step": 60
	},
	{
	"epoch": 1.6666666666666665,
	"grad_norm": 0.8159669041633606,
	"learning_rate": 0.00015820895522388059,
	"loss": 0.5576,
	"step": 65
	},
	{
	"epoch": 1.7948717948717947,
	"grad_norm": 0.41050034761428833,
	"learning_rate": 0.00014328358208955225,
	"loss": 0.5889,
	"step": 70
	},
	{
	"epoch": 1.9230769230769231,
	"grad_norm": 0.3205547034740448,
	"learning_rate": 0.00012835820895522389,
	"loss": 0.5561,
	"step": 75
	},
	{
	"epoch": 2.051282051282051,
	"grad_norm": 0.29004010558128357,
	"learning_rate": 0.00011343283582089552,
	"loss": 0.5052,
	"step": 80
	},
	{
	"epoch": 2.1794871794871793,
	"grad_norm": 0.40997835993766785,
	"learning_rate": 9.850746268656717e-05,
	"loss": 0.4612,
	"step": 85
	},
	{
	"epoch": 2.3076923076923075,
	"grad_norm": 0.3654220700263977,
	"learning_rate": 8.358208955223881e-05,
	"loss": 0.4086,
	"step": 90
	},
	{
	"epoch": 2.435897435897436,
	"grad_norm": 0.34712859988212585,
	"learning_rate": 6.865671641791044e-05,
	"loss": 0.4658,
	"step": 95
	},
	{
	"epoch": 2.564102564102564,
	"grad_norm": 0.4480563700199127,
	"learning_rate": 5.373134328358209e-05,
	"loss": 0.4577,
	"step": 100
	},
	{
	"epoch": 2.564102564102564,
	"eval_loss": 0.6652931571006775,
	"eval_runtime": 5.1656,
	"eval_samples_per_second": 10.067,
	"eval_steps_per_second": 1.742,
	"step": 100
	},
	{
	"epoch": 2.6923076923076925,
	"grad_norm": 0.47177863121032715,
	"learning_rate": 3.8805970149253736e-05,
	"loss": 0.4419,
	"step": 105
	},
	{
	"epoch": 2.8205128205128203,
	"grad_norm": 0.4423898160457611,
	"learning_rate": 2.3880597014925373e-05,
	"loss": 0.3984,
	"step": 110
	},
	{
	"epoch": 2.948717948717949,
	"grad_norm": 0.4147056043148041,
	"learning_rate": 8.955223880597016e-06,
	"loss": 0.3729,
	"step": 115
	}
	],
	"logging_steps": 5,
	"max_steps": 117,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 100,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2.859666246520013e+16,
	"train_batch_size": 6,
	"trial_name": null,
	"trial_params": null
	}