codellama-edugen-v2 / trainer_state.json

Upload folder using huggingface_hub

578bcf6 verified about 2 months ago

5.15 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.9950617283950618,
	"eval_steps": 100,
	"global_step": 202,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.09876543209876543,
	"grad_norm": 0.3113946318626404,
	"learning_rate": 0.00019988322268323268,
	"loss": 1.1897,
	"step": 10
	},
	{
	"epoch": 0.19753086419753085,
	"grad_norm": 0.25871261954307556,
	"learning_rate": 0.00019781476007338058,
	"loss": 0.6655,
	"step": 20
	},
	{
	"epoch": 0.2962962962962963,
	"grad_norm": 0.13192948698997498,
	"learning_rate": 0.0001932129465573568,
	"loss": 0.363,
	"step": 30
	},
	{
	"epoch": 0.3950617283950617,
	"grad_norm": 0.1178111806511879,
	"learning_rate": 0.00018619696668800492,
	"loss": 0.2902,
	"step": 40
	},
	{
	"epoch": 0.49382716049382713,
	"grad_norm": 0.06365109980106354,
	"learning_rate": 0.00017694853065861662,
	"loss": 0.2627,
	"step": 50
	},
	{
	"epoch": 0.5925925925925926,
	"grad_norm": 0.06805580109357834,
	"learning_rate": 0.0001657071681043731,
	"loss": 0.2479,
	"step": 60
	},
	{
	"epoch": 0.691358024691358,
	"grad_norm": 0.05795082449913025,
	"learning_rate": 0.0001527640244106133,
	"loss": 0.248,
	"step": 70
	},
	{
	"epoch": 0.7901234567901234,
	"grad_norm": 0.054490040987730026,
	"learning_rate": 0.0001384543202002851,
	"loss": 0.2374,
	"step": 80
	},
	{
	"epoch": 0.8888888888888888,
	"grad_norm": 0.050945691764354706,
	"learning_rate": 0.00012314866929589432,
	"loss": 0.2364,
	"step": 90
	},
	{
	"epoch": 0.9876543209876543,
	"grad_norm": 0.05336359888315201,
	"learning_rate": 0.00010724348001617625,
	"loss": 0.2314,
	"step": 100
	},
	{
	"epoch": 0.9876543209876543,
	"eval_loss": 0.23581312596797943,
	"eval_runtime": 270.7343,
	"eval_samples_per_second": 0.665,
	"eval_steps_per_second": 0.665,
	"step": 100
	},
	{
	"epoch": 1.0864197530864197,
	"grad_norm": 0.052394308149814606,
	"learning_rate": 9.115068840886417e-05,
	"loss": 0.2339,
	"step": 110
	},
	{
	"epoch": 1.1851851851851851,
	"grad_norm": 0.06545981019735336,
	"learning_rate": 7.528708932343304e-05,
	"loss": 0.2301,
	"step": 120
	},
	{
	"epoch": 1.2839506172839505,
	"grad_norm": 0.06937788426876068,
	"learning_rate": 6.006354164343046e-05,
	"loss": 0.2322,
	"step": 130
	},
	{
	"epoch": 1.382716049382716,
	"grad_norm": 0.07582119852304459,
	"learning_rate": 4.587432725720687e-05,
	"loss": 0.2234,
	"step": 140
	},
	{
	"epoch": 1.4814814814814814,
	"grad_norm": 0.11781789362430573,
	"learning_rate": 3.308693936411421e-05,
	"loss": 0.2164,
	"step": 150
	},
	{
	"epoch": 1.5802469135802468,
	"grad_norm": 0.08647400885820389,
	"learning_rate": 2.2032564593677774e-05,
	"loss": 0.2197,
	"step": 160
	},
	{
	"epoch": 1.6790123456790123,
	"grad_norm": 0.09350328147411346,
	"learning_rate": 1.2997505445856084e-05,
	"loss": 0.2194,
	"step": 170
	},
	{
	"epoch": 1.7777777777777777,
	"grad_norm": 0.14273136854171753,
	"learning_rate": 6.215765206679569e-06,
	"loss": 0.2219,
	"step": 180
	},
	{
	"epoch": 1.876543209876543,
	"grad_norm": 0.10584201663732529,
	"learning_rate": 1.8629873860586566e-06,
	"loss": 0.2298,
	"step": 190
	},
	{
	"epoch": 1.9753086419753085,
	"grad_norm": 0.08690895885229111,
	"learning_rate": 5.190664313851068e-08,
	"loss": 0.2228,
	"step": 200
	},
	{
	"epoch": 1.9753086419753085,
	"eval_loss": 0.2289411574602127,
	"eval_runtime": 273.2063,
	"eval_samples_per_second": 0.659,
	"eval_steps_per_second": 0.659,
	"step": 200
	},
	{
	"epoch": 1.9950617283950618,
	"step": 202,
	"total_flos": 1.2609051312227942e+17,
	"train_loss": 0.3101574260704588,
	"train_runtime": 14633.2305,
	"train_samples_per_second": 0.221,
	"train_steps_per_second": 0.014
	},
	{
	"epoch": 1.9950617283950618,
	"eval_loss": 0.22894252836704254,
	"eval_runtime": 273.8519,
	"eval_samples_per_second": 0.657,
	"eval_steps_per_second": 0.657,
	"step": 202
	}
	],
	"logging_steps": 10,
	"max_steps": 202,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 2,
	"save_steps": 100,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.2609051312227942e+17,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}