engels / trainer_state.json

Upload phase 1 epoch 1 - finetuned gemma-4-e4b-it teacher

d2c050c verified 2 days ago

5.02 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 234,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.042735042735042736,
	"grad_norm": 25.421951293945312,
	"learning_rate": 3.888888888888889e-05,
	"loss": 14.74276123046875,
	"step": 10
	},
	{
	"epoch": 0.08547008547008547,
	"grad_norm": 12.622594833374023,
	"learning_rate": 9.444444444444444e-05,
	"loss": 9.163805389404297,
	"step": 20
	},
	{
	"epoch": 0.1282051282051282,
	"grad_norm": 8.006195068359375,
	"learning_rate": 0.00015000000000000001,
	"loss": 5.28853759765625,
	"step": 30
	},
	{
	"epoch": 0.17094017094017094,
	"grad_norm": 3.498350143432617,
	"learning_rate": 0.00019999888744757143,
	"loss": 4.076284027099609,
	"step": 40
	},
	{
	"epoch": 0.21367521367521367,
	"grad_norm": 4.330902576446533,
	"learning_rate": 0.00019986541110764565,
	"loss": 3.210728073120117,
	"step": 50
	},
	{
	"epoch": 0.2564102564102564,
	"grad_norm": 5.267370700836182,
	"learning_rate": 0.0001995097645450266,
	"loss": 2.6838237762451174,
	"step": 60
	},
	{
	"epoch": 0.29914529914529914,
	"grad_norm": 3.2072625160217285,
	"learning_rate": 0.00019893273896534936,
	"loss": 2.4369382858276367,
	"step": 70
	},
	{
	"epoch": 0.3418803418803419,
	"grad_norm": 3.1016528606414795,
	"learning_rate": 0.00019813561807535598,
	"loss": 2.205874443054199,
	"step": 80
	},
	{
	"epoch": 0.38461538461538464,
	"grad_norm": 3.8450214862823486,
	"learning_rate": 0.00019712017522703764,
	"loss": 1.9279813766479492,
	"step": 90
	},
	{
	"epoch": 0.42735042735042733,
	"grad_norm": 2.348071575164795,
	"learning_rate": 0.00019588866947246498,
	"loss": 1.8235645294189453,
	"step": 100
	},
	{
	"epoch": 0.4700854700854701,
	"grad_norm": 3.2652463912963867,
	"learning_rate": 0.00019444384053808288,
	"loss": 1.8220790863037108,
	"step": 110
	},
	{
	"epoch": 0.5128205128205128,
	"grad_norm": 2.6423192024230957,
	"learning_rate": 0.00019278890272965096,
	"loss": 1.7959518432617188,
	"step": 120
	},
	{
	"epoch": 0.5555555555555556,
	"grad_norm": 2.6279354095458984,
	"learning_rate": 0.00019092753778138886,
	"loss": 1.7804344177246094,
	"step": 130
	},
	{
	"epoch": 0.5982905982905983,
	"grad_norm": 2.6313953399658203,
	"learning_rate": 0.0001888638866652356,
	"loss": 1.642679214477539,
	"step": 140
	},
	{
	"epoch": 0.6410256410256411,
	"grad_norm": 2.1009438037872314,
	"learning_rate": 0.00018660254037844388,
	"loss": 1.545415496826172,
	"step": 150
	},
	{
	"epoch": 0.6837606837606838,
	"grad_norm": 2.672374963760376,
	"learning_rate": 0.00018414852973000503,
	"loss": 1.5645628929138184,
	"step": 160
	},
	{
	"epoch": 0.7264957264957265,
	"grad_norm": 2.6783759593963623,
	"learning_rate": 0.00018150731414862622,
	"loss": 1.5343215942382813,
	"step": 170
	},
	{
	"epoch": 0.7692307692307693,
	"grad_norm": 2.3677117824554443,
	"learning_rate": 0.000178684769537159,
	"loss": 1.5453574180603027,
	"step": 180
	},
	{
	"epoch": 0.811965811965812,
	"grad_norm": 2.3082728385925293,
	"learning_rate": 0.0001756871752004992,
	"loss": 1.5324308395385742,
	"step": 190
	},
	{
	"epoch": 0.8547008547008547,
	"grad_norm": 1.969205617904663,
	"learning_rate": 0.00017252119987603973,
	"loss": 1.5409900665283203,
	"step": 200
	},
	{
	"epoch": 0.8974358974358975,
	"grad_norm": 2.5397582054138184,
	"learning_rate": 0.00016919388689775464,
	"loss": 1.4344990730285645,
	"step": 210
	},
	{
	"epoch": 0.9401709401709402,
	"grad_norm": 2.0636305809020996,
	"learning_rate": 0.00016571263852691888,
	"loss": 1.4311028480529786,
	"step": 220
	},
	{
	"epoch": 0.9829059829059829,
	"grad_norm": 2.4687087535858154,
	"learning_rate": 0.0001620851994843244,
	"loss": 1.461498737335205,
	"step": 230
	}
	],
	"logging_steps": 10,
	"max_steps": 702,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 2.961214772268672e+16,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}