starcoder1b-finetune / checkpoint-75 /trainer_state.json

Initial model upload

6ea23d0 verified 7 months ago

3.3 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 100.0,
	"global_step": 75,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.06666666666666667,
	"grad_norm": 0.12692080438137054,
	"learning_rate": 0.00015,
	"loss": 0.673,
	"step": 5
	},
	{
	"epoch": 0.13333333333333333,
	"grad_norm": 0.2591029405593872,
	"learning_rate": 0.00029983513372285666,
	"loss": 0.5109,
	"step": 10
	},
	{
	"epoch": 0.2,
	"grad_norm": 0.1829962283372879,
	"learning_rate": 0.000294102778136526,
	"loss": 0.2893,
	"step": 15
	},
	{
	"epoch": 0.26666666666666666,
	"grad_norm": 0.13255836069583893,
	"learning_rate": 0.0002804859558613185,
	"loss": 0.137,
	"step": 20
	},
	{
	"epoch": 0.3333333333333333,
	"grad_norm": 0.1007906049489975,
	"learning_rate": 0.0002597297002895503,
	"loss": 0.0797,
	"step": 25
	},
	{
	"epoch": 0.4,
	"grad_norm": 0.07617493718862534,
	"learning_rate": 0.00023296967311877987,
	"loss": 0.061,
	"step": 30
	},
	{
	"epoch": 0.4666666666666667,
	"grad_norm": 0.06615506857633591,
	"learning_rate": 0.00020167002754264992,
	"loss": 0.0501,
	"step": 35
	},
	{
	"epoch": 0.5333333333333333,
	"grad_norm": 0.060833219438791275,
	"learning_rate": 0.0001675432982958124,
	"loss": 0.0472,
	"step": 40
	},
	{
	"epoch": 0.6,
	"grad_norm": 0.04784240573644638,
	"learning_rate": 0.00013245670170418763,
	"loss": 0.0434,
	"step": 45
	},
	{
	"epoch": 0.6666666666666666,
	"grad_norm": 0.06088828667998314,
	"learning_rate": 9.832997245735007e-05,
	"loss": 0.0418,
	"step": 50
	},
	{
	"epoch": 0.7333333333333333,
	"grad_norm": 0.043038949370384216,
	"learning_rate": 6.703032688122008e-05,
	"loss": 0.0389,
	"step": 55
	},
	{
	"epoch": 0.8,
	"grad_norm": 0.044740188866853714,
	"learning_rate": 4.027029971044968e-05,
	"loss": 0.0386,
	"step": 60
	},
	{
	"epoch": 0.8666666666666667,
	"grad_norm": 0.04038878157734871,
	"learning_rate": 1.9514044138681466e-05,
	"loss": 0.0359,
	"step": 65
	},
	{
	"epoch": 0.9333333333333333,
	"grad_norm": 0.04306699335575104,
	"learning_rate": 5.897221863473989e-06,
	"loss": 0.0382,
	"step": 70
	},
	{
	"epoch": 1.0,
	"grad_norm": 0.04545782133936882,
	"learning_rate": 1.6486627714329403e-07,
	"loss": 0.0397,
	"step": 75
	}
	],
	"logging_steps": 5,
	"max_steps": 75,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 9223372036854775807,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 6.14586868826112e+16,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}