vla_checkpoints / trainer_state.json
yiyangd's picture
Add files using upload-large-folder tool
b40c7c4 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 6250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016,
"grad_norm": 5.986245155334473,
"learning_rate": 1.5840000000000002e-06,
"loss": 1.9745,
"step": 100
},
{
"epoch": 0.032,
"grad_norm": 5.599625110626221,
"learning_rate": 3.1840000000000003e-06,
"loss": 1.125,
"step": 200
},
{
"epoch": 0.048,
"grad_norm": 4.610442161560059,
"learning_rate": 4.784e-06,
"loss": 1.025,
"step": 300
},
{
"epoch": 0.064,
"grad_norm": 5.483785152435303,
"learning_rate": 6.384e-06,
"loss": 0.9741,
"step": 400
},
{
"epoch": 0.08,
"grad_norm": 4.7527313232421875,
"learning_rate": 7.984e-06,
"loss": 0.9544,
"step": 500
},
{
"epoch": 0.096,
"grad_norm": 4.325509548187256,
"learning_rate": 9.584000000000002e-06,
"loss": 0.9134,
"step": 600
},
{
"epoch": 0.112,
"grad_norm": 4.1613898277282715,
"learning_rate": 9.995730310237113e-06,
"loss": 0.8952,
"step": 700
},
{
"epoch": 0.128,
"grad_norm": 5.373358726501465,
"learning_rate": 9.976408726659296e-06,
"loss": 0.8752,
"step": 800
},
{
"epoch": 0.144,
"grad_norm": 5.119192600250244,
"learning_rate": 9.941568353618064e-06,
"loss": 0.8654,
"step": 900
},
{
"epoch": 0.16,
"grad_norm": 3.383653402328491,
"learning_rate": 9.891317839828527e-06,
"loss": 0.845,
"step": 1000
},
{
"epoch": 0.176,
"grad_norm": 3.9472246170043945,
"learning_rate": 9.825813890092639e-06,
"loss": 0.8178,
"step": 1100
},
{
"epoch": 0.192,
"grad_norm": 3.940248489379883,
"learning_rate": 9.745260776619698e-06,
"loss": 0.8142,
"step": 1200
},
{
"epoch": 0.208,
"grad_norm": 3.9536900520324707,
"learning_rate": 9.649909702009265e-06,
"loss": 0.8028,
"step": 1300
},
{
"epoch": 0.224,
"grad_norm": 4.985702037811279,
"learning_rate": 9.54005801588298e-06,
"loss": 0.795,
"step": 1400
},
{
"epoch": 0.24,
"grad_norm": 5.107704162597656,
"learning_rate": 9.416048287608195e-06,
"loss": 0.7805,
"step": 1500
},
{
"epoch": 0.256,
"grad_norm": 3.63683819770813,
"learning_rate": 9.27826723800513e-06,
"loss": 0.7734,
"step": 1600
},
{
"epoch": 0.272,
"grad_norm": 5.008887767791748,
"learning_rate": 9.127144533368956e-06,
"loss": 0.7681,
"step": 1700
},
{
"epoch": 0.288,
"grad_norm": 4.119167327880859,
"learning_rate": 8.963151445567642e-06,
"loss": 0.7479,
"step": 1800
},
{
"epoch": 0.304,
"grad_norm": 4.297443866729736,
"learning_rate": 8.786799382394e-06,
"loss": 0.7478,
"step": 1900
},
{
"epoch": 0.32,
"grad_norm": 4.435776710510254,
"learning_rate": 8.598638292755e-06,
"loss": 0.7389,
"step": 2000
},
{
"epoch": 0.336,
"grad_norm": 3.9187700748443604,
"learning_rate": 8.399254951671681e-06,
"loss": 0.7226,
"step": 2100
},
{
"epoch": 0.352,
"grad_norm": 3.695847988128662,
"learning_rate": 8.18927113043791e-06,
"loss": 0.7138,
"step": 2200
},
{
"epoch": 0.368,
"grad_norm": 4.540302753448486,
"learning_rate": 7.969341657644236e-06,
"loss": 0.7126,
"step": 2300
},
{
"epoch": 0.384,
"grad_norm": 4.550365447998047,
"learning_rate": 7.740152377113493e-06,
"loss": 0.7063,
"step": 2400
},
{
"epoch": 0.4,
"grad_norm": 3.8343756198883057,
"learning_rate": 7.5024180091162976e-06,
"loss": 0.6911,
"step": 2500
},
{
"epoch": 0.416,
"grad_norm": 4.098830223083496,
"learning_rate": 7.256879921536164e-06,
"loss": 0.6991,
"step": 2600
},
{
"epoch": 0.432,
"grad_norm": 3.9230875968933105,
"learning_rate": 7.004303817934775e-06,
"loss": 0.6848,
"step": 2700
},
{
"epoch": 0.448,
"grad_norm": 4.32880163192749,
"learning_rate": 6.745477349727154e-06,
"loss": 0.6643,
"step": 2800
},
{
"epoch": 0.464,
"grad_norm": 4.100039482116699,
"learning_rate": 6.481207659913062e-06,
"loss": 0.6791,
"step": 2900
},
{
"epoch": 0.48,
"grad_norm": 4.455363750457764,
"learning_rate": 6.212318866024449e-06,
"loss": 0.6568,
"step": 3000
},
{
"epoch": 0.496,
"grad_norm": 3.64780330657959,
"learning_rate": 5.939649490138305e-06,
"loss": 0.6609,
"step": 3100
},
{
"epoch": 0.512,
"grad_norm": 4.5561418533325195,
"learning_rate": 5.664049843969348e-06,
"loss": 0.6598,
"step": 3200
},
{
"epoch": 0.528,
"grad_norm": 4.1221747398376465,
"learning_rate": 5.386379377197056e-06,
"loss": 0.6499,
"step": 3300
},
{
"epoch": 0.544,
"grad_norm": 4.2754106521606445,
"learning_rate": 5.107503997296225e-06,
"loss": 0.6534,
"step": 3400
},
{
"epoch": 0.56,
"grad_norm": 3.418328285217285,
"learning_rate": 4.8282933692290665e-06,
"loss": 0.6511,
"step": 3500
},
{
"epoch": 0.576,
"grad_norm": 4.334653854370117,
"learning_rate": 4.549618203419684e-06,
"loss": 0.6388,
"step": 3600
},
{
"epoch": 0.592,
"grad_norm": 3.9164488315582275,
"learning_rate": 4.272347540468327e-06,
"loss": 0.6327,
"step": 3700
},
{
"epoch": 0.608,
"grad_norm": 4.421480655670166,
"learning_rate": 3.997346041072912e-06,
"loss": 0.6378,
"step": 3800
},
{
"epoch": 0.624,
"grad_norm": 4.170716762542725,
"learning_rate": 3.725471289609174e-06,
"loss": 0.6336,
"step": 3900
},
{
"epoch": 0.64,
"grad_norm": 5.108222961425781,
"learning_rate": 3.457571119778104e-06,
"loss": 0.613,
"step": 4000
},
{
"epoch": 0.656,
"grad_norm": 4.666893005371094,
"learning_rate": 3.1944809706606123e-06,
"loss": 0.6106,
"step": 4100
},
{
"epoch": 0.672,
"grad_norm": 4.099169731140137,
"learning_rate": 2.9370212814244436e-06,
"loss": 0.5947,
"step": 4200
},
{
"epoch": 0.688,
"grad_norm": 3.848003625869751,
"learning_rate": 2.6859949328079005e-06,
"loss": 0.5981,
"step": 4300
},
{
"epoch": 0.704,
"grad_norm": 4.583881378173828,
"learning_rate": 2.4421847433590466e-06,
"loss": 0.6008,
"step": 4400
},
{
"epoch": 0.72,
"grad_norm": 4.723909378051758,
"learning_rate": 2.2063510282382517e-06,
"loss": 0.5932,
"step": 4500
},
{
"epoch": 0.736,
"grad_norm": 4.5427045822143555,
"learning_rate": 1.979229228196942e-06,
"loss": 0.5972,
"step": 4600
},
{
"epoch": 0.752,
"grad_norm": 5.033416748046875,
"learning_rate": 1.761527616126475e-06,
"loss": 0.5964,
"step": 4700
},
{
"epoch": 0.768,
"grad_norm": 5.443480491638184,
"learning_rate": 1.5539250883292078e-06,
"loss": 0.589,
"step": 4800
},
{
"epoch": 0.784,
"grad_norm": 4.804011821746826,
"learning_rate": 1.3570690473996483e-06,
"loss": 0.5812,
"step": 4900
},
{
"epoch": 0.8,
"grad_norm": 5.263527870178223,
"learning_rate": 1.1715733833178178e-06,
"loss": 0.5747,
"step": 5000
},
{
"epoch": 0.816,
"grad_norm": 5.167060852050781,
"learning_rate": 9.98016559050765e-07,
"loss": 0.5652,
"step": 5100
},
{
"epoch": 0.832,
"grad_norm": 5.249851226806641,
"learning_rate": 8.369398066322049e-07,
"loss": 0.5808,
"step": 5200
},
{
"epoch": 0.848,
"grad_norm": 5.08359956741333,
"learning_rate": 6.888454393457817e-07,
"loss": 0.5656,
"step": 5300
},
{
"epoch": 0.864,
"grad_norm": 4.25005578994751,
"learning_rate": 5.541952852753341e-07,
"loss": 0.5745,
"step": 5400
},
{
"epoch": 0.88,
"grad_norm": 4.67172908782959,
"learning_rate": 4.334092471071194e-07,
"loss": 0.5695,
"step": 5500
},
{
"epoch": 0.896,
"grad_norm": 3.975259780883789,
"learning_rate": 3.268639926751943e-07,
"loss": 0.5632,
"step": 5600
},
{
"epoch": 0.912,
"grad_norm": 4.989463806152344,
"learning_rate": 2.3489178033345994e-07,
"loss": 0.5807,
"step": 5700
},
{
"epoch": 0.928,
"grad_norm": 4.873440742492676,
"learning_rate": 1.5777942281740789e-07,
"loss": 0.5645,
"step": 5800
},
{
"epoch": 0.944,
"grad_norm": 4.344705581665039,
"learning_rate": 9.576739282673886e-08,
"loss": 0.5681,
"step": 5900
},
{
"epoch": 0.96,
"grad_norm": 4.521224498748779,
"learning_rate": 4.9049073118072057e-08,
"loss": 0.5692,
"step": 6000
},
{
"epoch": 0.976,
"grad_norm": 4.890485763549805,
"learning_rate": 1.7770153446302618e-08,
"loss": 0.5742,
"step": 6100
},
{
"epoch": 0.992,
"grad_norm": 5.1736626625061035,
"learning_rate": 2.0281762352331034e-09,
"loss": 0.5735,
"step": 6200
}
],
"logging_steps": 100,
"max_steps": 6250,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2582174620450816.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}