sanity_style / trainer_state.json
terry69's picture
Model save
826b3be verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9975550122249389,
"eval_steps": 500,
"global_step": 204,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004889975550122249,
"grad_norm": 0.36700921478372245,
"learning_rate": 9.523809523809523e-06,
"loss": 1.8349,
"step": 1
},
{
"epoch": 0.02444987775061125,
"grad_norm": 0.3892962682403987,
"learning_rate": 4.761904761904762e-05,
"loss": 1.7937,
"step": 5
},
{
"epoch": 0.0488997555012225,
"grad_norm": 0.21809103204571412,
"learning_rate": 9.523809523809524e-05,
"loss": 1.7603,
"step": 10
},
{
"epoch": 0.07334963325183375,
"grad_norm": 0.1763235766556296,
"learning_rate": 0.00014285714285714287,
"loss": 1.6895,
"step": 15
},
{
"epoch": 0.097799511002445,
"grad_norm": 0.1617556242623244,
"learning_rate": 0.00019047619047619048,
"loss": 1.6347,
"step": 20
},
{
"epoch": 0.12224938875305623,
"grad_norm": 0.15643068252772752,
"learning_rate": 0.00019976432316860067,
"loss": 1.556,
"step": 25
},
{
"epoch": 0.1466992665036675,
"grad_norm": 0.15134695775897136,
"learning_rate": 0.00019880878960910772,
"loss": 1.5133,
"step": 30
},
{
"epoch": 0.17114914425427874,
"grad_norm": 0.11939382176274925,
"learning_rate": 0.00019712569994658315,
"loss": 1.4775,
"step": 35
},
{
"epoch": 0.19559902200489,
"grad_norm": 0.10829777367965486,
"learning_rate": 0.0001947274472298717,
"loss": 1.46,
"step": 40
},
{
"epoch": 0.2200488997555012,
"grad_norm": 0.13772866457253763,
"learning_rate": 0.0001916316904487005,
"loss": 1.4135,
"step": 45
},
{
"epoch": 0.24449877750611246,
"grad_norm": 0.09822827366409893,
"learning_rate": 0.00018786122450571485,
"loss": 1.4417,
"step": 50
},
{
"epoch": 0.26894865525672373,
"grad_norm": 0.11587926176152005,
"learning_rate": 0.00018344381237138472,
"loss": 1.3964,
"step": 55
},
{
"epoch": 0.293398533007335,
"grad_norm": 0.11240730186157354,
"learning_rate": 0.00017841198065767107,
"loss": 1.3837,
"step": 60
},
{
"epoch": 0.31784841075794623,
"grad_norm": 0.1128346094848023,
"learning_rate": 0.00017280278011569847,
"loss": 1.3633,
"step": 65
},
{
"epoch": 0.3422982885085575,
"grad_norm": 0.09015460553796734,
"learning_rate": 0.00016665751282095634,
"loss": 1.3806,
"step": 70
},
{
"epoch": 0.36674816625916873,
"grad_norm": 0.10347143235019492,
"learning_rate": 0.00016002142805483685,
"loss": 1.3788,
"step": 75
},
{
"epoch": 0.39119804400978,
"grad_norm": 0.6669561111631297,
"learning_rate": 0.0001529433891218185,
"loss": 1.3851,
"step": 80
},
{
"epoch": 0.4156479217603912,
"grad_norm": 0.09186122563595991,
"learning_rate": 0.0001454755135556106,
"loss": 1.3688,
"step": 85
},
{
"epoch": 0.4400977995110024,
"grad_norm": 0.09496253504589665,
"learning_rate": 0.00013767278936351854,
"loss": 1.3288,
"step": 90
},
{
"epoch": 0.46454767726161367,
"grad_norm": 0.0934552174596641,
"learning_rate": 0.00012959267013472892,
"loss": 1.3816,
"step": 95
},
{
"epoch": 0.4889975550122249,
"grad_norm": 0.10547587865492095,
"learning_rate": 0.00012129465199384157,
"loss": 1.3508,
"step": 100
},
{
"epoch": 0.5134474327628362,
"grad_norm": 0.09760937352012419,
"learning_rate": 0.00011283983551465511,
"loss": 1.3434,
"step": 105
},
{
"epoch": 0.5378973105134475,
"grad_norm": 0.09910476129769094,
"learning_rate": 0.00010429047581995546,
"loss": 1.337,
"step": 110
},
{
"epoch": 0.5623471882640587,
"grad_norm": 0.09961470132705542,
"learning_rate": 9.570952418004455e-05,
"loss": 1.3663,
"step": 115
},
{
"epoch": 0.58679706601467,
"grad_norm": 0.09739741126425534,
"learning_rate": 8.71601644853449e-05,
"loss": 1.3389,
"step": 120
},
{
"epoch": 0.6112469437652812,
"grad_norm": 0.10283288071724549,
"learning_rate": 7.870534800615845e-05,
"loss": 1.3369,
"step": 125
},
{
"epoch": 0.6356968215158925,
"grad_norm": 0.10097594818776578,
"learning_rate": 7.040732986527108e-05,
"loss": 1.3555,
"step": 130
},
{
"epoch": 0.6601466992665037,
"grad_norm": 0.09722568422567038,
"learning_rate": 6.232721063648148e-05,
"loss": 1.3537,
"step": 135
},
{
"epoch": 0.684596577017115,
"grad_norm": 0.09586006955336507,
"learning_rate": 5.452448644438946e-05,
"loss": 1.3479,
"step": 140
},
{
"epoch": 0.7090464547677262,
"grad_norm": 0.09299592243784432,
"learning_rate": 4.7056610878181486e-05,
"loss": 1.3585,
"step": 145
},
{
"epoch": 0.7334963325183375,
"grad_norm": 0.10069734955186067,
"learning_rate": 3.997857194516319e-05,
"loss": 1.36,
"step": 150
},
{
"epoch": 0.7579462102689487,
"grad_norm": 0.09194306954899185,
"learning_rate": 3.334248717904368e-05,
"loss": 1.3571,
"step": 155
},
{
"epoch": 0.78239608801956,
"grad_norm": 0.0926753184350128,
"learning_rate": 2.719721988430153e-05,
"loss": 1.3312,
"step": 160
},
{
"epoch": 0.8068459657701712,
"grad_norm": 0.09878832937752359,
"learning_rate": 2.1588019342328968e-05,
"loss": 1.3208,
"step": 165
},
{
"epoch": 0.8312958435207825,
"grad_norm": 0.0960429927663343,
"learning_rate": 1.6556187628615273e-05,
"loss": 1.3149,
"step": 170
},
{
"epoch": 0.8557457212713936,
"grad_norm": 0.0899273808312634,
"learning_rate": 1.2138775494285182e-05,
"loss": 1.3296,
"step": 175
},
{
"epoch": 0.8801955990220048,
"grad_norm": 0.09384612891193284,
"learning_rate": 8.368309551299536e-06,
"loss": 1.3378,
"step": 180
},
{
"epoch": 0.9046454767726161,
"grad_norm": 0.11142899089067898,
"learning_rate": 5.272552770128314e-06,
"loss": 1.3591,
"step": 185
},
{
"epoch": 0.9290953545232273,
"grad_norm": 0.09142510348253172,
"learning_rate": 2.8743000534168675e-06,
"loss": 1.3623,
"step": 190
},
{
"epoch": 0.9535452322738386,
"grad_norm": 0.09320844556768892,
"learning_rate": 1.1912103908922945e-06,
"loss": 1.3553,
"step": 195
},
{
"epoch": 0.9779951100244498,
"grad_norm": 0.09161534630758739,
"learning_rate": 2.3567683139936735e-07,
"loss": 1.3407,
"step": 200
},
{
"epoch": 0.9975550122249389,
"eval_loss": 1.3294435739517212,
"eval_runtime": 1.8191,
"eval_samples_per_second": 3.848,
"eval_steps_per_second": 0.55,
"step": 204
},
{
"epoch": 0.9975550122249389,
"step": 204,
"total_flos": 2562962409652224.0,
"train_loss": 1.4071934585477792,
"train_runtime": 4285.5681,
"train_samples_per_second": 3.053,
"train_steps_per_second": 0.048
}
],
"logging_steps": 5,
"max_steps": 204,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2562962409652224.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}