ImpromptuVLAModel / 7B_AD /trainer_state.json
aaaaaap's picture
Upload folder using huggingface_hub
878ddb7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9990041824337781,
"eval_steps": 100,
"global_step": 627,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01593308105954989,
"grad_norm": 6.123967491063448,
"learning_rate": 2.631578947368421e-06,
"loss": 0.7844,
"step": 10
},
{
"epoch": 0.03186616211909978,
"grad_norm": 4.195346424811152,
"learning_rate": 4.999966626509336e-06,
"loss": 0.4873,
"step": 20
},
{
"epoch": 0.04779924317864967,
"grad_norm": 2.3937960648002496,
"learning_rate": 4.995962885666031e-06,
"loss": 0.3576,
"step": 30
},
{
"epoch": 0.06373232423819956,
"grad_norm": 2.0119365099499613,
"learning_rate": 4.985296693000866e-06,
"loss": 0.3362,
"step": 40
},
{
"epoch": 0.07966540529774946,
"grad_norm": 6.364486514806231,
"learning_rate": 4.967996519688298e-06,
"loss": 0.3002,
"step": 50
},
{
"epoch": 0.09559848635729934,
"grad_norm": 2.149725330685237,
"learning_rate": 4.944108544929091e-06,
"loss": 0.2702,
"step": 60
},
{
"epoch": 0.11153156741684923,
"grad_norm": 179.0810015887418,
"learning_rate": 4.913696532684593e-06,
"loss": 0.3265,
"step": 70
},
{
"epoch": 0.12746464847639913,
"grad_norm": 2.3719183425912354,
"learning_rate": 4.876841661472136e-06,
"loss": 0.4095,
"step": 80
},
{
"epoch": 0.143397729535949,
"grad_norm": 5.36119881007005,
"learning_rate": 4.833642307675948e-06,
"loss": 0.2789,
"step": 90
},
{
"epoch": 0.15933081059549892,
"grad_norm": 1.884067588257083,
"learning_rate": 4.784213782951926e-06,
"loss": 0.2808,
"step": 100
},
{
"epoch": 0.1752638916550488,
"grad_norm": 37.862372083970605,
"learning_rate": 4.728688026427245e-06,
"loss": 0.2711,
"step": 110
},
{
"epoch": 0.19119697271459868,
"grad_norm": 2.462558490018586,
"learning_rate": 4.667213252516408e-06,
"loss": 0.2496,
"step": 120
},
{
"epoch": 0.2071300537741486,
"grad_norm": 2.109315595561499,
"learning_rate": 4.599953555293807e-06,
"loss": 0.2455,
"step": 130
},
{
"epoch": 0.22306313483369847,
"grad_norm": 2.0083488558558833,
"learning_rate": 4.527088470478851e-06,
"loss": 0.2522,
"step": 140
},
{
"epoch": 0.23899621589324835,
"grad_norm": 1.655486020092584,
"learning_rate": 4.448812496202849e-06,
"loss": 0.2517,
"step": 150
},
{
"epoch": 0.25492929695279826,
"grad_norm": 1.6528337383021185,
"learning_rate": 4.365334573836851e-06,
"loss": 0.2566,
"step": 160
},
{
"epoch": 0.27086237801234814,
"grad_norm": 1.8785516918363623,
"learning_rate": 4.276877530266284e-06,
"loss": 0.2404,
"step": 170
},
{
"epoch": 0.286795459071898,
"grad_norm": 1.6846196856668103,
"learning_rate": 4.183677483101101e-06,
"loss": 0.2452,
"step": 180
},
{
"epoch": 0.3027285401314479,
"grad_norm": 2.020720522276084,
"learning_rate": 4.085983210409114e-06,
"loss": 0.2523,
"step": 190
},
{
"epoch": 0.31866162119099783,
"grad_norm": 1.6343932551444422,
"learning_rate": 3.98405548665489e-06,
"loss": 0.2284,
"step": 200
},
{
"epoch": 0.3345947022505477,
"grad_norm": 1.830257135625606,
"learning_rate": 3.878166386616752e-06,
"loss": 0.2164,
"step": 210
},
{
"epoch": 0.3505277833100976,
"grad_norm": 2.6262705273633036,
"learning_rate": 3.7685985591399677e-06,
"loss": 0.2358,
"step": 220
},
{
"epoch": 0.3664608643696475,
"grad_norm": 3.891410262587011,
"learning_rate": 3.655644472664667e-06,
"loss": 0.234,
"step": 230
},
{
"epoch": 0.38239394542919736,
"grad_norm": 3.0175466455395115,
"learning_rate": 3.539605634542399e-06,
"loss": 0.237,
"step": 240
},
{
"epoch": 0.39832702648874724,
"grad_norm": 1.7383677821741295,
"learning_rate": 3.4207917862252083e-06,
"loss": 0.2406,
"step": 250
},
{
"epoch": 0.4142601075482972,
"grad_norm": 1.9322292163817105,
"learning_rate": 3.2995200764754924e-06,
"loss": 0.224,
"step": 260
},
{
"epoch": 0.43019318860784705,
"grad_norm": 1.477693042337343,
"learning_rate": 3.1761142148035993e-06,
"loss": 0.2201,
"step": 270
},
{
"epoch": 0.44612626966739694,
"grad_norm": 1.8474067145641697,
"learning_rate": 3.0509036073928686e-06,
"loss": 0.225,
"step": 280
},
{
"epoch": 0.4620593507269468,
"grad_norm": 1.5510783177161291,
"learning_rate": 2.9242224778185985e-06,
"loss": 0.2314,
"step": 290
},
{
"epoch": 0.4779924317864967,
"grad_norm": 2.433823482819606,
"learning_rate": 2.7964089749079907e-06,
"loss": 0.2199,
"step": 300
},
{
"epoch": 0.4939255128460466,
"grad_norm": 1.5158466644893986,
"learning_rate": 2.667804270122454e-06,
"loss": 0.2101,
"step": 310
},
{
"epoch": 0.5098585939055965,
"grad_norm": 1.496925875732646,
"learning_rate": 2.538751646871617e-06,
"loss": 0.21,
"step": 320
},
{
"epoch": 0.5257916749651463,
"grad_norm": 1.6528604902515285,
"learning_rate": 2.4095955841899372e-06,
"loss": 0.2148,
"step": 330
},
{
"epoch": 0.5417247560246963,
"grad_norm": 1.4331715070790367,
"learning_rate": 2.280680837221835e-06,
"loss": 0.2015,
"step": 340
},
{
"epoch": 0.5576578370842462,
"grad_norm": 1.8026216721078392,
"learning_rate": 2.1523515169698144e-06,
"loss": 0.2189,
"step": 350
},
{
"epoch": 0.573590918143796,
"grad_norm": 2.769205324123664,
"learning_rate": 2.0249501717619894e-06,
"loss": 0.2098,
"step": 360
},
{
"epoch": 0.589523999203346,
"grad_norm": 1.4824634904460874,
"learning_rate": 1.8988168728908277e-06,
"loss": 0.1976,
"step": 370
},
{
"epoch": 0.6054570802628958,
"grad_norm": 1.5417502167114872,
"learning_rate": 1.7742883068638447e-06,
"loss": 0.2071,
"step": 380
},
{
"epoch": 0.6213901613224457,
"grad_norm": 1.6185776121068578,
"learning_rate": 1.65169687668926e-06,
"loss": 0.2043,
"step": 390
},
{
"epoch": 0.6373232423819957,
"grad_norm": 1.6099311926386768,
"learning_rate": 1.531369814595567e-06,
"loss": 0.2118,
"step": 400
},
{
"epoch": 0.6532563234415455,
"grad_norm": 1.4106643427656438,
"learning_rate": 1.4136283085534158e-06,
"loss": 0.1883,
"step": 410
},
{
"epoch": 0.6691894045010954,
"grad_norm": 1.4983565765754983,
"learning_rate": 1.2987866449313824e-06,
"loss": 0.1971,
"step": 420
},
{
"epoch": 0.6851224855606453,
"grad_norm": 1.6344675457287534,
"learning_rate": 1.187151369574127e-06,
"loss": 0.2033,
"step": 430
},
{
"epoch": 0.7010555666201952,
"grad_norm": 1.5810003041618759,
"learning_rate": 1.0790204695422571e-06,
"loss": 0.191,
"step": 440
},
{
"epoch": 0.716988647679745,
"grad_norm": 1.669954129289104,
"learning_rate": 9.746825776980864e-07,
"loss": 0.1984,
"step": 450
},
{
"epoch": 0.732921728739295,
"grad_norm": 1.3119689193301776,
"learning_rate": 8.744162022604671e-07,
"loss": 0.1782,
"step": 460
},
{
"epoch": 0.7488548097988449,
"grad_norm": 2.142011140780938,
"learning_rate": 7.784889833852433e-07,
"loss": 0.1844,
"step": 470
},
{
"epoch": 0.7647878908583947,
"grad_norm": 1.3982306847588983,
"learning_rate": 6.871569787557375e-07,
"loss": 0.1884,
"step": 480
},
{
"epoch": 0.7807209719179447,
"grad_norm": 1.4269559759325665,
"learning_rate": 6.006639800902223e-07,
"loss": 0.2041,
"step": 490
},
{
"epoch": 0.7966540529774945,
"grad_norm": 1.3402320824249279,
"learning_rate": 5.192408623908246e-07,
"loss": 0.1935,
"step": 500
},
{
"epoch": 0.8125871340370444,
"grad_norm": 4.081818040872233,
"learning_rate": 4.431049676709093e-07,
"loss": 0.2023,
"step": 510
},
{
"epoch": 0.8285202150965943,
"grad_norm": 1.5620677708803261,
"learning_rate": 3.72459524805954e-07,
"loss": 0.2009,
"step": 520
},
{
"epoch": 0.8444532961561442,
"grad_norm": 1.657553455475968,
"learning_rate": 3.074931070564921e-07,
"loss": 0.1861,
"step": 530
},
{
"epoch": 0.8603863772156941,
"grad_norm": 1.4767107380746103,
"learning_rate": 2.4837912871116645e-07,
"loss": 0.1933,
"step": 540
},
{
"epoch": 0.8763194582752439,
"grad_norm": 1.4093479736420487,
"learning_rate": 1.9527538219348775e-07,
"loss": 0.1896,
"step": 550
},
{
"epoch": 0.8922525393347939,
"grad_norm": 1.8203174948013054,
"learning_rate": 1.4832361686790697e-07,
"loss": 0.1768,
"step": 560
},
{
"epoch": 0.9081856203943438,
"grad_norm": 1.3643578889605608,
"learning_rate": 1.0764916066947795e-07,
"loss": 0.1763,
"step": 570
},
{
"epoch": 0.9241187014538936,
"grad_norm": 1.6532008723644016,
"learning_rate": 7.336058556710241e-08,
"loss": 0.1801,
"step": 580
},
{
"epoch": 0.9400517825134436,
"grad_norm": 1.4228921803437908,
"learning_rate": 4.5549417753326106e-08,
"loss": 0.1974,
"step": 590
},
{
"epoch": 0.9559848635729934,
"grad_norm": 1.7441673390589285,
"learning_rate": 2.4289893334276116e-08,
"loss": 0.196,
"step": 600
},
{
"epoch": 0.9719179446325433,
"grad_norm": 2.271358181919917,
"learning_rate": 9.638760171873373e-09,
"loss": 0.1817,
"step": 610
},
{
"epoch": 0.9878510256920932,
"grad_norm": 1.5715295700071357,
"learning_rate": 1.6351264072653194e-09,
"loss": 0.1918,
"step": 620
}
],
"logging_steps": 10,
"max_steps": 627,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 206046578081792.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}