sunbirdtutor-gemma-4-e2b / trainer_state.json
jq's picture
Upload folder using huggingface_hub
6941e5d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 53,
"global_step": 528,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.018957345971563982,
"grad_norm": 1.484375,
"learning_rate": 4.5e-06,
"loss": 0.37855618000030516,
"step": 10
},
{
"epoch": 0.037914691943127965,
"grad_norm": 0.78125,
"learning_rate": 9.5e-06,
"loss": 0.374945330619812,
"step": 20
},
{
"epoch": 0.05687203791469194,
"grad_norm": 0.81640625,
"learning_rate": 1.45e-05,
"loss": 0.362351393699646,
"step": 30
},
{
"epoch": 0.07582938388625593,
"grad_norm": 0.94140625,
"learning_rate": 1.9500000000000003e-05,
"loss": 0.3529045104980469,
"step": 40
},
{
"epoch": 0.0947867298578199,
"grad_norm": 0.82421875,
"learning_rate": 2.45e-05,
"loss": 0.33787591457366944,
"step": 50
},
{
"epoch": 0.1004739336492891,
"eval_text_loss": 1.4636504650115967,
"eval_text_model_preparation_time": 0.0155,
"eval_text_runtime": 4.8317,
"eval_text_samples_per_second": 12.418,
"eval_text_steps_per_second": 12.418,
"step": 53
},
{
"epoch": 0.1004739336492891,
"eval_audio_loss": 0.7069099545478821,
"eval_audio_model_preparation_time": 0.0155,
"eval_audio_runtime": 21.032,
"eval_audio_samples_per_second": 6.324,
"eval_audio_steps_per_second": 6.324,
"step": 53
},
{
"epoch": 0.11374407582938388,
"grad_norm": 0.99609375,
"learning_rate": 2.95e-05,
"loss": 0.33856768608093263,
"step": 60
},
{
"epoch": 0.13270142180094788,
"grad_norm": 0.65234375,
"learning_rate": 3.45e-05,
"loss": 0.35529475212097167,
"step": 70
},
{
"epoch": 0.15165876777251186,
"grad_norm": 0.87109375,
"learning_rate": 3.9500000000000005e-05,
"loss": 0.2936519384384155,
"step": 80
},
{
"epoch": 0.17061611374407584,
"grad_norm": 0.68359375,
"learning_rate": 4.4500000000000004e-05,
"loss": 0.318320107460022,
"step": 90
},
{
"epoch": 0.1895734597156398,
"grad_norm": 0.7890625,
"learning_rate": 4.9500000000000004e-05,
"loss": 0.31594276428222656,
"step": 100
},
{
"epoch": 0.2009478672985782,
"eval_text_loss": 1.329001545906067,
"eval_text_model_preparation_time": 0.0155,
"eval_text_runtime": 4.4604,
"eval_text_samples_per_second": 13.452,
"eval_text_steps_per_second": 13.452,
"step": 106
},
{
"epoch": 0.2009478672985782,
"eval_audio_loss": 0.7277711629867554,
"eval_audio_model_preparation_time": 0.0155,
"eval_audio_runtime": 20.1176,
"eval_audio_samples_per_second": 6.611,
"eval_audio_steps_per_second": 6.611,
"step": 106
},
{
"epoch": 0.20853080568720378,
"grad_norm": 0.8203125,
"learning_rate": 4.9945468268142656e-05,
"loss": 0.31812191009521484,
"step": 110
},
{
"epoch": 0.22748815165876776,
"grad_norm": 1.0,
"learning_rate": 4.975726891929585e-05,
"loss": 0.29132957458496095,
"step": 120
},
{
"epoch": 0.24644549763033174,
"grad_norm": 0.609375,
"learning_rate": 4.943574200733625e-05,
"loss": 0.3463184595108032,
"step": 130
},
{
"epoch": 0.26540284360189575,
"grad_norm": 0.59375,
"learning_rate": 4.89826190802705e-05,
"loss": 0.3099159479141235,
"step": 140
},
{
"epoch": 0.2843601895734597,
"grad_norm": 0.9765625,
"learning_rate": 4.8400340382046866e-05,
"loss": 0.29767818450927735,
"step": 150
},
{
"epoch": 0.3014218009478673,
"eval_text_loss": 1.2909588813781738,
"eval_text_model_preparation_time": 0.0155,
"eval_text_runtime": 4.8675,
"eval_text_samples_per_second": 12.327,
"eval_text_steps_per_second": 12.327,
"step": 159
},
{
"epoch": 0.3014218009478673,
"eval_audio_loss": 0.7182490825653076,
"eval_audio_model_preparation_time": 0.0155,
"eval_audio_runtime": 19.9605,
"eval_audio_samples_per_second": 6.663,
"eval_audio_steps_per_second": 6.663,
"step": 159
},
{
"epoch": 0.3033175355450237,
"grad_norm": 1.0625,
"learning_rate": 4.769204171088849e-05,
"loss": 0.3072782039642334,
"step": 160
},
{
"epoch": 0.3222748815165877,
"grad_norm": 0.69921875,
"learning_rate": 4.6861537531795094e-05,
"loss": 0.30300824642181395,
"step": 170
},
{
"epoch": 0.3412322274881517,
"grad_norm": 0.8046875,
"learning_rate": 4.591330043415909e-05,
"loss": 0.3216289758682251,
"step": 180
},
{
"epoch": 0.36018957345971564,
"grad_norm": 0.90234375,
"learning_rate": 4.485243704512474e-05,
"loss": 0.3087095499038696,
"step": 190
},
{
"epoch": 0.3791469194312796,
"grad_norm": 0.84765625,
"learning_rate": 4.368466052840636e-05,
"loss": 0.3263149976730347,
"step": 200
},
{
"epoch": 0.3981042654028436,
"grad_norm": 1.46875,
"learning_rate": 4.2416259816670235e-05,
"loss": 0.29808921813964845,
"step": 210
},
{
"epoch": 0.4018957345971564,
"eval_text_loss": 1.2724202871322632,
"eval_text_model_preparation_time": 0.0155,
"eval_text_runtime": 4.8821,
"eval_text_samples_per_second": 12.29,
"eval_text_steps_per_second": 12.29,
"step": 212
},
{
"epoch": 0.4018957345971564,
"eval_audio_loss": 0.7229443788528442,
"eval_audio_model_preparation_time": 0.0155,
"eval_audio_runtime": 20.5668,
"eval_audio_samples_per_second": 6.467,
"eval_audio_steps_per_second": 6.467,
"step": 212
},
{
"epoch": 0.41706161137440756,
"grad_norm": 0.828125,
"learning_rate": 4.1054065743175786e-05,
"loss": 0.313119101524353,
"step": 220
},
{
"epoch": 0.43601895734597157,
"grad_norm": 1.03125,
"learning_rate": 3.9605414255070396e-05,
"loss": 0.2832280874252319,
"step": 230
},
{
"epoch": 0.4549763033175355,
"grad_norm": 0.70703125,
"learning_rate": 3.8078106906448683e-05,
"loss": 0.29762136936187744,
"step": 240
},
{
"epoch": 0.47393364928909953,
"grad_norm": 0.83203125,
"learning_rate": 3.648036884393646e-05,
"loss": 0.3122119665145874,
"step": 250
},
{
"epoch": 0.4928909952606635,
"grad_norm": 0.94140625,
"learning_rate": 3.4820804511063496e-05,
"loss": 0.2904823780059814,
"step": 260
},
{
"epoch": 0.5023696682464455,
"eval_text_loss": 1.224345088005066,
"eval_text_model_preparation_time": 0.0155,
"eval_text_runtime": 4.548,
"eval_text_samples_per_second": 13.193,
"eval_text_steps_per_second": 13.193,
"step": 265
},
{
"epoch": 0.5023696682464455,
"eval_audio_loss": 0.7233917713165283,
"eval_audio_model_preparation_time": 0.0155,
"eval_audio_runtime": 20.862,
"eval_audio_samples_per_second": 6.375,
"eval_audio_steps_per_second": 6.375,
"step": 265
},
{
"epoch": 0.5118483412322274,
"grad_norm": 1.2890625,
"learning_rate": 3.3108351309974284e-05,
"loss": 0.3033822298049927,
"step": 270
},
{
"epoch": 0.5308056872037915,
"grad_norm": 0.6484375,
"learning_rate": 3.1352231470026584e-05,
"loss": 0.3055255651473999,
"step": 280
},
{
"epoch": 0.5497630331753555,
"grad_norm": 0.90625,
"learning_rate": 2.956190238248425e-05,
"loss": 0.28981497287750246,
"step": 290
},
{
"epoch": 0.5687203791469194,
"grad_norm": 1.1171875,
"learning_rate": 2.7747005668771293e-05,
"loss": 0.32351953983306886,
"step": 300
},
{
"epoch": 0.5876777251184834,
"grad_norm": 0.71875,
"learning_rate": 2.591731525657454e-05,
"loss": 0.2949329614639282,
"step": 310
},
{
"epoch": 0.6028436018957346,
"eval_text_loss": 1.2085834741592407,
"eval_text_model_preparation_time": 0.0155,
"eval_text_runtime": 5.1662,
"eval_text_samples_per_second": 11.614,
"eval_text_steps_per_second": 11.614,
"step": 318
},
{
"epoch": 0.6028436018957346,
"eval_audio_loss": 0.7195463180541992,
"eval_audio_model_preparation_time": 0.0155,
"eval_audio_runtime": 20.8146,
"eval_audio_samples_per_second": 6.39,
"eval_audio_steps_per_second": 6.39,
"step": 318
},
{
"epoch": 0.6066350710900474,
"grad_norm": 0.7578125,
"learning_rate": 2.4082684743425458e-05,
"loss": 0.28318300247192385,
"step": 320
},
{
"epoch": 0.6255924170616114,
"grad_norm": 0.859375,
"learning_rate": 2.2252994331228713e-05,
"loss": 0.286310601234436,
"step": 330
},
{
"epoch": 0.6445497630331753,
"grad_norm": 0.97265625,
"learning_rate": 2.0438097617515745e-05,
"loss": 0.28706789016723633,
"step": 340
},
{
"epoch": 0.6635071090047393,
"grad_norm": 0.71875,
"learning_rate": 1.8647768529973425e-05,
"loss": 0.31264851093292234,
"step": 350
},
{
"epoch": 0.6824644549763034,
"grad_norm": 0.89453125,
"learning_rate": 1.6891648690025718e-05,
"loss": 0.2824721813201904,
"step": 360
},
{
"epoch": 0.7014218009478673,
"grad_norm": 0.84375,
"learning_rate": 1.5179195488936504e-05,
"loss": 0.2849812269210815,
"step": 370
},
{
"epoch": 0.7033175355450237,
"eval_text_loss": 1.1984789371490479,
"eval_text_model_preparation_time": 0.0155,
"eval_text_runtime": 4.7136,
"eval_text_samples_per_second": 12.729,
"eval_text_steps_per_second": 12.729,
"step": 371
},
{
"epoch": 0.7033175355450237,
"eval_audio_loss": 0.719244122505188,
"eval_audio_model_preparation_time": 0.0155,
"eval_audio_runtime": 19.3073,
"eval_audio_samples_per_second": 6.889,
"eval_audio_steps_per_second": 6.889,
"step": 371
},
{
"epoch": 0.7203791469194313,
"grad_norm": 0.671875,
"learning_rate": 1.3519631156063539e-05,
"loss": 0.30515313148498535,
"step": 380
},
{
"epoch": 0.7393364928909952,
"grad_norm": 0.9296875,
"learning_rate": 1.1921893093551322e-05,
"loss": 0.298003101348877,
"step": 390
},
{
"epoch": 0.7582938388625592,
"grad_norm": 0.57421875,
"learning_rate": 1.0394585744929605e-05,
"loss": 0.29361729621887206,
"step": 400
},
{
"epoch": 0.7772511848341233,
"grad_norm": 0.76171875,
"learning_rate": 8.945934256824218e-06,
"loss": 0.28779690265655516,
"step": 410
},
{
"epoch": 0.7962085308056872,
"grad_norm": 0.7421875,
"learning_rate": 7.583740183329768e-06,
"loss": 0.27545139789581297,
"step": 420
},
{
"epoch": 0.8037914691943128,
"eval_text_loss": 1.195946455001831,
"eval_text_model_preparation_time": 0.0155,
"eval_text_runtime": 5.0743,
"eval_text_samples_per_second": 11.824,
"eval_text_steps_per_second": 11.824,
"step": 424
},
{
"epoch": 0.8037914691943128,
"eval_audio_loss": 0.7187640070915222,
"eval_audio_model_preparation_time": 0.0155,
"eval_audio_runtime": 20.0259,
"eval_audio_samples_per_second": 6.641,
"eval_audio_steps_per_second": 6.641,
"step": 424
},
{
"epoch": 0.8151658767772512,
"grad_norm": 0.9375,
"learning_rate": 6.315339471593646e-06,
"loss": 0.2728050947189331,
"step": 430
},
{
"epoch": 0.8341232227488151,
"grad_norm": 0.7421875,
"learning_rate": 5.147562954875268e-06,
"loss": 0.3111764907836914,
"step": 440
},
{
"epoch": 0.8530805687203792,
"grad_norm": 0.92578125,
"learning_rate": 4.086699565840915e-06,
"loss": 0.26120471954345703,
"step": 450
},
{
"epoch": 0.8720379146919431,
"grad_norm": 0.8125,
"learning_rate": 3.138462468204914e-06,
"loss": 0.2936476469039917,
"step": 460
},
{
"epoch": 0.8909952606635071,
"grad_norm": 0.66796875,
"learning_rate": 2.3079582891115144e-06,
"loss": 0.2918030023574829,
"step": 470
},
{
"epoch": 0.9042654028436019,
"eval_text_loss": 1.1954913139343262,
"eval_text_model_preparation_time": 0.0155,
"eval_text_runtime": 5.6311,
"eval_text_samples_per_second": 10.655,
"eval_text_steps_per_second": 10.655,
"step": 477
},
{
"epoch": 0.9042654028436019,
"eval_audio_loss": 0.7186709642410278,
"eval_audio_model_preparation_time": 0.0155,
"eval_audio_runtime": 21.3981,
"eval_audio_samples_per_second": 6.216,
"eval_audio_steps_per_second": 6.216,
"step": 477
},
{
"epoch": 0.909952606635071,
"grad_norm": 0.52734375,
"learning_rate": 1.5996596179531364e-06,
"loss": 0.29052374362945554,
"step": 480
},
{
"epoch": 0.9289099526066351,
"grad_norm": 0.625,
"learning_rate": 1.0173809197295075e-06,
"loss": 0.27139732837677,
"step": 490
},
{
"epoch": 0.9478672985781991,
"grad_norm": 0.67578125,
"learning_rate": 5.642579926637554e-07,
"loss": 0.29912357330322265,
"step": 500
},
{
"epoch": 0.966824644549763,
"grad_norm": 1.171875,
"learning_rate": 2.4273108070415607e-07,
"loss": 0.2772815465927124,
"step": 510
},
{
"epoch": 0.985781990521327,
"grad_norm": 0.60546875,
"learning_rate": 5.453173185734073e-08,
"loss": 0.29728262424468993,
"step": 520
},
{
"epoch": 1.0,
"eval_text_loss": 1.196468710899353,
"eval_text_model_preparation_time": 0.0155,
"eval_text_runtime": 4.6934,
"eval_text_samples_per_second": 12.784,
"eval_text_steps_per_second": 12.784,
"step": 528
},
{
"epoch": 1.0,
"eval_audio_loss": 0.7178842425346375,
"eval_audio_model_preparation_time": 0.0155,
"eval_audio_runtime": 20.4901,
"eval_audio_samples_per_second": 6.491,
"eval_audio_steps_per_second": 6.491,
"step": 528
}
],
"logging_steps": 10,
"max_steps": 528,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 11,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.645359034142144e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}