ImpromptuVLAModel / 3B_AD /trainer_state.json
aaaaaap's picture
Upload folder using huggingface_hub
e6bc8c0 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9990041824337781,
"eval_steps": 100,
"global_step": 627,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01593308105954989,
"grad_norm": 5.976239945638455,
"learning_rate": 2.631578947368421e-06,
"loss": 0.8689,
"step": 10
},
{
"epoch": 0.03186616211909978,
"grad_norm": 4.067977130361014,
"learning_rate": 4.999966626509336e-06,
"loss": 0.6193,
"step": 20
},
{
"epoch": 0.04779924317864967,
"grad_norm": 3.93622618786125,
"learning_rate": 4.995962885666031e-06,
"loss": 0.4451,
"step": 30
},
{
"epoch": 0.06373232423819956,
"grad_norm": 1.979025192947513,
"learning_rate": 4.985296693000866e-06,
"loss": 0.3984,
"step": 40
},
{
"epoch": 0.07966540529774946,
"grad_norm": 4.958516213423643,
"learning_rate": 4.967996519688298e-06,
"loss": 0.3429,
"step": 50
},
{
"epoch": 0.09559848635729934,
"grad_norm": 1.901531491013398,
"learning_rate": 4.944108544929091e-06,
"loss": 0.3135,
"step": 60
},
{
"epoch": 0.11153156741684923,
"grad_norm": 2.6637317005625105,
"learning_rate": 4.913696532684593e-06,
"loss": 0.3014,
"step": 70
},
{
"epoch": 0.12746464847639913,
"grad_norm": 1.721469735007685,
"learning_rate": 4.876841661472136e-06,
"loss": 0.3393,
"step": 80
},
{
"epoch": 0.143397729535949,
"grad_norm": 2.285020500263398,
"learning_rate": 4.833642307675948e-06,
"loss": 0.2983,
"step": 90
},
{
"epoch": 0.15933081059549892,
"grad_norm": 1.846260142695851,
"learning_rate": 4.784213782951926e-06,
"loss": 0.2895,
"step": 100
},
{
"epoch": 0.1752638916550488,
"grad_norm": 2.437561340834138,
"learning_rate": 4.728688026427245e-06,
"loss": 0.2802,
"step": 110
},
{
"epoch": 0.19119697271459868,
"grad_norm": 1.814073132276511,
"learning_rate": 4.667213252516408e-06,
"loss": 0.2764,
"step": 120
},
{
"epoch": 0.2071300537741486,
"grad_norm": 1.7520200232040006,
"learning_rate": 4.599953555293807e-06,
"loss": 0.2723,
"step": 130
},
{
"epoch": 0.22306313483369847,
"grad_norm": 1.7012731088046187,
"learning_rate": 4.527088470478851e-06,
"loss": 0.2766,
"step": 140
},
{
"epoch": 0.23899621589324835,
"grad_norm": 1.4552675894129845,
"learning_rate": 4.448812496202849e-06,
"loss": 0.2675,
"step": 150
},
{
"epoch": 0.25492929695279826,
"grad_norm": 1.3972467983262729,
"learning_rate": 4.365334573836851e-06,
"loss": 0.2693,
"step": 160
},
{
"epoch": 0.27086237801234814,
"grad_norm": 1.7017564789089161,
"learning_rate": 4.276877530266284e-06,
"loss": 0.2648,
"step": 170
},
{
"epoch": 0.286795459071898,
"grad_norm": 1.3763883863698911,
"learning_rate": 4.183677483101101e-06,
"loss": 0.2668,
"step": 180
},
{
"epoch": 0.3027285401314479,
"grad_norm": 2.187618573791854,
"learning_rate": 4.085983210409114e-06,
"loss": 0.2788,
"step": 190
},
{
"epoch": 0.31866162119099783,
"grad_norm": 1.457524811306034,
"learning_rate": 3.98405548665489e-06,
"loss": 0.2548,
"step": 200
},
{
"epoch": 0.3345947022505477,
"grad_norm": 1.4844524404614106,
"learning_rate": 3.878166386616752e-06,
"loss": 0.2334,
"step": 210
},
{
"epoch": 0.3505277833100976,
"grad_norm": 1.7097951513131715,
"learning_rate": 3.7685985591399677e-06,
"loss": 0.2526,
"step": 220
},
{
"epoch": 0.3664608643696475,
"grad_norm": 2.5926772898483885,
"learning_rate": 3.655644472664667e-06,
"loss": 0.2537,
"step": 230
},
{
"epoch": 0.38239394542919736,
"grad_norm": 2.3399494453043745,
"learning_rate": 3.539605634542399e-06,
"loss": 0.261,
"step": 240
},
{
"epoch": 0.39832702648874724,
"grad_norm": 2.3700041684156394,
"learning_rate": 3.4207917862252083e-06,
"loss": 0.2655,
"step": 250
},
{
"epoch": 0.4142601075482972,
"grad_norm": 2.0696626816914088,
"learning_rate": 3.2995200764754924e-06,
"loss": 0.2488,
"step": 260
},
{
"epoch": 0.43019318860784705,
"grad_norm": 1.3941586957894263,
"learning_rate": 3.1761142148035993e-06,
"loss": 0.2404,
"step": 270
},
{
"epoch": 0.44612626966739694,
"grad_norm": 1.740968716187846,
"learning_rate": 3.0509036073928686e-06,
"loss": 0.2497,
"step": 280
},
{
"epoch": 0.4620593507269468,
"grad_norm": 1.463183890861242,
"learning_rate": 2.9242224778185985e-06,
"loss": 0.2522,
"step": 290
},
{
"epoch": 0.4779924317864967,
"grad_norm": 2.646999989780854,
"learning_rate": 2.7964089749079907e-06,
"loss": 0.2439,
"step": 300
},
{
"epoch": 0.4939255128460466,
"grad_norm": 1.3983229614766708,
"learning_rate": 2.667804270122454e-06,
"loss": 0.2331,
"step": 310
},
{
"epoch": 0.5098585939055965,
"grad_norm": 1.4379739717093198,
"learning_rate": 2.538751646871617e-06,
"loss": 0.2331,
"step": 320
},
{
"epoch": 0.5257916749651463,
"grad_norm": 1.499769972907629,
"learning_rate": 2.4095955841899372e-06,
"loss": 0.2374,
"step": 330
},
{
"epoch": 0.5417247560246963,
"grad_norm": 1.3346410958529098,
"learning_rate": 2.280680837221835e-06,
"loss": 0.2252,
"step": 340
},
{
"epoch": 0.5576578370842462,
"grad_norm": 1.4481302153632802,
"learning_rate": 2.1523515169698144e-06,
"loss": 0.241,
"step": 350
},
{
"epoch": 0.573590918143796,
"grad_norm": 2.286737307402928,
"learning_rate": 2.0249501717619894e-06,
"loss": 0.2319,
"step": 360
},
{
"epoch": 0.589523999203346,
"grad_norm": 1.28289335918518,
"learning_rate": 1.8988168728908277e-06,
"loss": 0.2202,
"step": 370
},
{
"epoch": 0.6054570802628958,
"grad_norm": 1.4034454327013157,
"learning_rate": 1.7742883068638447e-06,
"loss": 0.2325,
"step": 380
},
{
"epoch": 0.6213901613224457,
"grad_norm": 1.581917998658176,
"learning_rate": 1.65169687668926e-06,
"loss": 0.2273,
"step": 390
},
{
"epoch": 0.6373232423819957,
"grad_norm": 1.499413918873696,
"learning_rate": 1.531369814595567e-06,
"loss": 0.2401,
"step": 400
},
{
"epoch": 0.6532563234415455,
"grad_norm": 1.26561372061873,
"learning_rate": 1.4136283085534158e-06,
"loss": 0.2134,
"step": 410
},
{
"epoch": 0.6691894045010954,
"grad_norm": 1.399381000277604,
"learning_rate": 1.2987866449313824e-06,
"loss": 0.2232,
"step": 420
},
{
"epoch": 0.6851224855606453,
"grad_norm": 1.500478318908901,
"learning_rate": 1.187151369574127e-06,
"loss": 0.2304,
"step": 430
},
{
"epoch": 0.7010555666201952,
"grad_norm": 1.4005423659968532,
"learning_rate": 1.0790204695422571e-06,
"loss": 0.215,
"step": 440
},
{
"epoch": 0.716988647679745,
"grad_norm": 1.5134639641942145,
"learning_rate": 9.746825776980864e-07,
"loss": 0.2259,
"step": 450
},
{
"epoch": 0.732921728739295,
"grad_norm": 1.16505843435412,
"learning_rate": 8.744162022604671e-07,
"loss": 0.2012,
"step": 460
},
{
"epoch": 0.7488548097988449,
"grad_norm": 1.8363118423070643,
"learning_rate": 7.784889833852433e-07,
"loss": 0.208,
"step": 470
},
{
"epoch": 0.7647878908583947,
"grad_norm": 1.3521882743327087,
"learning_rate": 6.871569787557375e-07,
"loss": 0.2169,
"step": 480
},
{
"epoch": 0.7807209719179447,
"grad_norm": 1.3456412659902603,
"learning_rate": 6.006639800902223e-07,
"loss": 0.2353,
"step": 490
},
{
"epoch": 0.7966540529774945,
"grad_norm": 1.2307940792861065,
"learning_rate": 5.192408623908246e-07,
"loss": 0.2202,
"step": 500
},
{
"epoch": 0.8125871340370444,
"grad_norm": 2.6755130100833613,
"learning_rate": 4.431049676709093e-07,
"loss": 0.2282,
"step": 510
},
{
"epoch": 0.8285202150965943,
"grad_norm": 1.4296114226991548,
"learning_rate": 3.72459524805954e-07,
"loss": 0.2265,
"step": 520
},
{
"epoch": 0.8444532961561442,
"grad_norm": 1.3346792046123495,
"learning_rate": 3.074931070564921e-07,
"loss": 0.2103,
"step": 530
},
{
"epoch": 0.8603863772156941,
"grad_norm": 1.3521565356216243,
"learning_rate": 2.4837912871116645e-07,
"loss": 0.2192,
"step": 540
},
{
"epoch": 0.8763194582752439,
"grad_norm": 1.3074956128752757,
"learning_rate": 1.9527538219348775e-07,
"loss": 0.2159,
"step": 550
},
{
"epoch": 0.8922525393347939,
"grad_norm": 1.6764404498838295,
"learning_rate": 1.4832361686790697e-07,
"loss": 0.2052,
"step": 560
},
{
"epoch": 0.9081856203943438,
"grad_norm": 1.319097300978499,
"learning_rate": 1.0764916066947795e-07,
"loss": 0.2036,
"step": 570
},
{
"epoch": 0.9241187014538936,
"grad_norm": 1.4874961576772896,
"learning_rate": 7.336058556710241e-08,
"loss": 0.2054,
"step": 580
},
{
"epoch": 0.9400517825134436,
"grad_norm": 1.2833278629486689,
"learning_rate": 4.5549417753326106e-08,
"loss": 0.2259,
"step": 590
},
{
"epoch": 0.9559848635729934,
"grad_norm": 1.76408107001633,
"learning_rate": 2.4289893334276116e-08,
"loss": 0.227,
"step": 600
},
{
"epoch": 0.9719179446325433,
"grad_norm": 2.1005708168542325,
"learning_rate": 9.638760171873373e-09,
"loss": 0.2119,
"step": 610
},
{
"epoch": 0.9878510256920932,
"grad_norm": 1.5104598295315261,
"learning_rate": 1.6351264072653194e-09,
"loss": 0.2176,
"step": 620
}
],
"logging_steps": 10,
"max_steps": 627,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 183428678942720.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}