codellama-edugen-v2 / trainer_state.json
Pepisxd's picture
Upload folder using huggingface_hub
578bcf6 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9950617283950618,
"eval_steps": 100,
"global_step": 202,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09876543209876543,
"grad_norm": 0.3113946318626404,
"learning_rate": 0.00019988322268323268,
"loss": 1.1897,
"step": 10
},
{
"epoch": 0.19753086419753085,
"grad_norm": 0.25871261954307556,
"learning_rate": 0.00019781476007338058,
"loss": 0.6655,
"step": 20
},
{
"epoch": 0.2962962962962963,
"grad_norm": 0.13192948698997498,
"learning_rate": 0.0001932129465573568,
"loss": 0.363,
"step": 30
},
{
"epoch": 0.3950617283950617,
"grad_norm": 0.1178111806511879,
"learning_rate": 0.00018619696668800492,
"loss": 0.2902,
"step": 40
},
{
"epoch": 0.49382716049382713,
"grad_norm": 0.06365109980106354,
"learning_rate": 0.00017694853065861662,
"loss": 0.2627,
"step": 50
},
{
"epoch": 0.5925925925925926,
"grad_norm": 0.06805580109357834,
"learning_rate": 0.0001657071681043731,
"loss": 0.2479,
"step": 60
},
{
"epoch": 0.691358024691358,
"grad_norm": 0.05795082449913025,
"learning_rate": 0.0001527640244106133,
"loss": 0.248,
"step": 70
},
{
"epoch": 0.7901234567901234,
"grad_norm": 0.054490040987730026,
"learning_rate": 0.0001384543202002851,
"loss": 0.2374,
"step": 80
},
{
"epoch": 0.8888888888888888,
"grad_norm": 0.050945691764354706,
"learning_rate": 0.00012314866929589432,
"loss": 0.2364,
"step": 90
},
{
"epoch": 0.9876543209876543,
"grad_norm": 0.05336359888315201,
"learning_rate": 0.00010724348001617625,
"loss": 0.2314,
"step": 100
},
{
"epoch": 0.9876543209876543,
"eval_loss": 0.23581312596797943,
"eval_runtime": 270.7343,
"eval_samples_per_second": 0.665,
"eval_steps_per_second": 0.665,
"step": 100
},
{
"epoch": 1.0864197530864197,
"grad_norm": 0.052394308149814606,
"learning_rate": 9.115068840886417e-05,
"loss": 0.2339,
"step": 110
},
{
"epoch": 1.1851851851851851,
"grad_norm": 0.06545981019735336,
"learning_rate": 7.528708932343304e-05,
"loss": 0.2301,
"step": 120
},
{
"epoch": 1.2839506172839505,
"grad_norm": 0.06937788426876068,
"learning_rate": 6.006354164343046e-05,
"loss": 0.2322,
"step": 130
},
{
"epoch": 1.382716049382716,
"grad_norm": 0.07582119852304459,
"learning_rate": 4.587432725720687e-05,
"loss": 0.2234,
"step": 140
},
{
"epoch": 1.4814814814814814,
"grad_norm": 0.11781789362430573,
"learning_rate": 3.308693936411421e-05,
"loss": 0.2164,
"step": 150
},
{
"epoch": 1.5802469135802468,
"grad_norm": 0.08647400885820389,
"learning_rate": 2.2032564593677774e-05,
"loss": 0.2197,
"step": 160
},
{
"epoch": 1.6790123456790123,
"grad_norm": 0.09350328147411346,
"learning_rate": 1.2997505445856084e-05,
"loss": 0.2194,
"step": 170
},
{
"epoch": 1.7777777777777777,
"grad_norm": 0.14273136854171753,
"learning_rate": 6.215765206679569e-06,
"loss": 0.2219,
"step": 180
},
{
"epoch": 1.876543209876543,
"grad_norm": 0.10584201663732529,
"learning_rate": 1.8629873860586566e-06,
"loss": 0.2298,
"step": 190
},
{
"epoch": 1.9753086419753085,
"grad_norm": 0.08690895885229111,
"learning_rate": 5.190664313851068e-08,
"loss": 0.2228,
"step": 200
},
{
"epoch": 1.9753086419753085,
"eval_loss": 0.2289411574602127,
"eval_runtime": 273.2063,
"eval_samples_per_second": 0.659,
"eval_steps_per_second": 0.659,
"step": 200
},
{
"epoch": 1.9950617283950618,
"step": 202,
"total_flos": 1.2609051312227942e+17,
"train_loss": 0.3101574260704588,
"train_runtime": 14633.2305,
"train_samples_per_second": 0.221,
"train_steps_per_second": 0.014
},
{
"epoch": 1.9950617283950618,
"eval_loss": 0.22894252836704254,
"eval_runtime": 273.8519,
"eval_samples_per_second": 0.657,
"eval_steps_per_second": 0.657,
"step": 202
}
],
"logging_steps": 10,
"max_steps": 202,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2609051312227942e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}