GeoSense / trainer_state.json
Henry012's picture
Upload initial model weights
1039edb verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.43873012004143563,
"eval_steps": 500,
"global_step": 4500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004874779111571507,
"grad_norm": 581.0810546875,
"learning_rate": 1.6233766233766232e-07,
"loss": 42.9948,
"step": 50
},
{
"epoch": 0.009749558223143015,
"grad_norm": 331.4380798339844,
"learning_rate": 3.2467532467532465e-07,
"loss": 36.4433,
"step": 100
},
{
"epoch": 0.014624337334714521,
"grad_norm": 399.9220886230469,
"learning_rate": 4.87012987012987e-07,
"loss": 33.9757,
"step": 150
},
{
"epoch": 0.01949911644628603,
"grad_norm": 227.4508514404297,
"learning_rate": 6.493506493506493e-07,
"loss": 24.5654,
"step": 200
},
{
"epoch": 0.024373895557857534,
"grad_norm": 169.74786376953125,
"learning_rate": 8.116883116883116e-07,
"loss": 18.221,
"step": 250
},
{
"epoch": 0.029248674669429042,
"grad_norm": 151.637451171875,
"learning_rate": 9.74025974025974e-07,
"loss": 15.1792,
"step": 300
},
{
"epoch": 0.03412345378100055,
"grad_norm": 140.70602416992188,
"learning_rate": 9.99956019474448e-07,
"loss": 13.3062,
"step": 350
},
{
"epoch": 0.03899823289257206,
"grad_norm": 163.72286987304688,
"learning_rate": 9.997889850109673e-07,
"loss": 12.1289,
"step": 400
},
{
"epoch": 0.04387301200414356,
"grad_norm": 205.9079132080078,
"learning_rate": 9.994973425669175e-07,
"loss": 11.2206,
"step": 450
},
{
"epoch": 0.04874779111571507,
"grad_norm": 190.47525024414062,
"learning_rate": 9.990811648549374e-07,
"loss": 10.3846,
"step": 500
},
{
"epoch": 0.05362257022728657,
"grad_norm": 63.44021224975586,
"learning_rate": 9.98540555636946e-07,
"loss": 10.0807,
"step": 550
},
{
"epoch": 0.058497349338858085,
"grad_norm": 134.91310119628906,
"learning_rate": 9.978756496982724e-07,
"loss": 9.2068,
"step": 600
},
{
"epoch": 0.0633721284504296,
"grad_norm": 125.61437225341797,
"learning_rate": 9.97086612814052e-07,
"loss": 8.8829,
"step": 650
},
{
"epoch": 0.0682469075620011,
"grad_norm": 148.3848876953125,
"learning_rate": 9.961736417078928e-07,
"loss": 8.8043,
"step": 700
},
{
"epoch": 0.0731216866735726,
"grad_norm": 360.50970458984375,
"learning_rate": 9.951369640028304e-07,
"loss": 9.8165,
"step": 750
},
{
"epoch": 0.07799646578514412,
"grad_norm": 89.09446716308594,
"learning_rate": 9.939768381645761e-07,
"loss": 8.9056,
"step": 800
},
{
"epoch": 0.08287124489671562,
"grad_norm": 163.72415161132812,
"learning_rate": 9.92693553437075e-07,
"loss": 9.5952,
"step": 850
},
{
"epoch": 0.08774602400828713,
"grad_norm": 147.0449981689453,
"learning_rate": 9.912874297703925e-07,
"loss": 9.0044,
"step": 900
},
{
"epoch": 0.09262080311985862,
"grad_norm": 171.43394470214844,
"learning_rate": 9.897588177409434e-07,
"loss": 9.1141,
"step": 950
},
{
"epoch": 0.09749558223143014,
"grad_norm": 206.17628479003906,
"learning_rate": 9.88108098464086e-07,
"loss": 8.1566,
"step": 1000
},
{
"epoch": 0.10237036134300165,
"grad_norm": 131.0735321044922,
"learning_rate": 9.863356834991016e-07,
"loss": 8.4912,
"step": 1050
},
{
"epoch": 0.10724514045457315,
"grad_norm": 140.00730895996094,
"learning_rate": 9.844420147465848e-07,
"loss": 8.1491,
"step": 1100
},
{
"epoch": 0.11211991956614466,
"grad_norm": 163.5810089111328,
"learning_rate": 9.824275643382676e-07,
"loss": 8.6904,
"step": 1150
},
{
"epoch": 0.11699469867771617,
"grad_norm": 129.7275848388672,
"learning_rate": 9.802928345193068e-07,
"loss": 8.1686,
"step": 1200
},
{
"epoch": 0.12186947778928767,
"grad_norm": 115.78919219970703,
"learning_rate": 9.780383575230648e-07,
"loss": 7.6378,
"step": 1250
},
{
"epoch": 0.1267442569008592,
"grad_norm": 74.11811828613281,
"learning_rate": 9.756646954384115e-07,
"loss": 7.8103,
"step": 1300
},
{
"epoch": 0.1316190360124307,
"grad_norm": 93.70452880859375,
"learning_rate": 9.731724400695836e-07,
"loss": 8.122,
"step": 1350
},
{
"epoch": 0.1364938151240022,
"grad_norm": 110.20355987548828,
"learning_rate": 9.70562212788636e-07,
"loss": 7.8767,
"step": 1400
},
{
"epoch": 0.1413685942355737,
"grad_norm": 96.42841339111328,
"learning_rate": 9.6783466438052e-07,
"loss": 8.0516,
"step": 1450
},
{
"epoch": 0.1462433733471452,
"grad_norm": 98.32221221923828,
"learning_rate": 9.649904748808292e-07,
"loss": 7.6941,
"step": 1500
},
{
"epoch": 0.1511181524587167,
"grad_norm": 107.42027282714844,
"learning_rate": 9.620303534062518e-07,
"loss": 8.0057,
"step": 1550
},
{
"epoch": 0.15599293157028823,
"grad_norm": 55.05694580078125,
"learning_rate": 9.589550379777732e-07,
"loss": 7.4756,
"step": 1600
},
{
"epoch": 0.16086771068185973,
"grad_norm": 117.27649688720703,
"learning_rate": 9.557652953366717e-07,
"loss": 6.8833,
"step": 1650
},
{
"epoch": 0.16574248979343123,
"grad_norm": 174.34263610839844,
"learning_rate": 9.52461920753353e-07,
"loss": 7.4795,
"step": 1700
},
{
"epoch": 0.17061726890500276,
"grad_norm": 119.58318328857422,
"learning_rate": 9.490457378290737e-07,
"loss": 7.7871,
"step": 1750
},
{
"epoch": 0.17549204801657425,
"grad_norm": 142.45582580566406,
"learning_rate": 9.455175982905988e-07,
"loss": 8.1505,
"step": 1800
},
{
"epoch": 0.18036682712814575,
"grad_norm": 122.0265884399414,
"learning_rate": 9.418783817778484e-07,
"loss": 7.6914,
"step": 1850
},
{
"epoch": 0.18524160623971725,
"grad_norm": 96.58927917480469,
"learning_rate": 9.381289956245861e-07,
"loss": 7.5846,
"step": 1900
},
{
"epoch": 0.19011638535128877,
"grad_norm": 238.81964111328125,
"learning_rate": 9.342703746321997e-07,
"loss": 7.7886,
"step": 1950
},
{
"epoch": 0.19499116446286027,
"grad_norm": 61.6027946472168,
"learning_rate": 9.303034808366366e-07,
"loss": 7.2491,
"step": 2000
},
{
"epoch": 0.19986594357443177,
"grad_norm": 96.19196319580078,
"learning_rate": 9.262293032685475e-07,
"loss": 6.8776,
"step": 2050
},
{
"epoch": 0.2047407226860033,
"grad_norm": 72.44068908691406,
"learning_rate": 9.220488577066996e-07,
"loss": 7.2714,
"step": 2100
},
{
"epoch": 0.2096155017975748,
"grad_norm": 160.9955291748047,
"learning_rate": 9.177631864247226e-07,
"loss": 7.4344,
"step": 2150
},
{
"epoch": 0.2144902809091463,
"grad_norm": 78.55003356933594,
"learning_rate": 9.133733579312468e-07,
"loss": 7.2211,
"step": 2200
},
{
"epoch": 0.21936506002071782,
"grad_norm": 141.0493621826172,
"learning_rate": 9.088804667035016e-07,
"loss": 7.3533,
"step": 2250
},
{
"epoch": 0.22423983913228931,
"grad_norm": 106.55406951904297,
"learning_rate": 9.042856329144392e-07,
"loss": 7.526,
"step": 2300
},
{
"epoch": 0.2291146182438608,
"grad_norm": 116.47844696044922,
"learning_rate": 8.995900021534517e-07,
"loss": 6.5839,
"step": 2350
},
{
"epoch": 0.23398939735543234,
"grad_norm": 87.43718719482422,
"learning_rate": 8.947947451407512e-07,
"loss": 7.2284,
"step": 2400
},
{
"epoch": 0.23886417646700384,
"grad_norm": 135.8431854248047,
"learning_rate": 8.89901057435485e-07,
"loss": 7.6484,
"step": 2450
},
{
"epoch": 0.24373895557857533,
"grad_norm": 87.54926300048828,
"learning_rate": 8.849101591376568e-07,
"loss": 7.2991,
"step": 2500
},
{
"epoch": 0.24861373469014686,
"grad_norm": 109.77268981933594,
"learning_rate": 8.798232945839304e-07,
"loss": 6.9895,
"step": 2550
},
{
"epoch": 0.2534885138017184,
"grad_norm": 118.82627868652344,
"learning_rate": 8.746417320373896e-07,
"loss": 7.4786,
"step": 2600
},
{
"epoch": 0.25836329291328985,
"grad_norm": 175.50918579101562,
"learning_rate": 8.693667633713338e-07,
"loss": 6.6877,
"step": 2650
},
{
"epoch": 0.2632380720248614,
"grad_norm": 190.9921875,
"learning_rate": 8.639997037471867e-07,
"loss": 6.8118,
"step": 2700
},
{
"epoch": 0.2681128511364329,
"grad_norm": 163.4025115966797,
"learning_rate": 8.585418912865986e-07,
"loss": 6.9759,
"step": 2750
},
{
"epoch": 0.2729876302480044,
"grad_norm": 149.16815185546875,
"learning_rate": 8.529946867378241e-07,
"loss": 7.2147,
"step": 2800
},
{
"epoch": 0.2778624093595759,
"grad_norm": 97.56202697753906,
"learning_rate": 8.473594731364587e-07,
"loss": 7.1163,
"step": 2850
},
{
"epoch": 0.2827371884711474,
"grad_norm": 104.49602508544922,
"learning_rate": 8.416376554606195e-07,
"loss": 7.5656,
"step": 2900
},
{
"epoch": 0.2876119675827189,
"grad_norm": 130.78414916992188,
"learning_rate": 8.358306602806534e-07,
"loss": 6.901,
"step": 2950
},
{
"epoch": 0.2924867466942904,
"grad_norm": 152.71543884277344,
"learning_rate": 8.299399354034633e-07,
"loss": 7.0534,
"step": 3000
},
{
"epoch": 0.29736152580586195,
"grad_norm": 128.23233032226562,
"learning_rate": 8.239669495115393e-07,
"loss": 7.2949,
"step": 3050
},
{
"epoch": 0.3022363049174334,
"grad_norm": 212.286865234375,
"learning_rate": 8.179131917967852e-07,
"loss": 7.1819,
"step": 3100
},
{
"epoch": 0.30711108402900494,
"grad_norm": 146.38832092285156,
"learning_rate": 8.117801715892306e-07,
"loss": 7.3945,
"step": 3150
},
{
"epoch": 0.31198586314057647,
"grad_norm": 98.95829010009766,
"learning_rate": 8.05569417980724e-07,
"loss": 7.0111,
"step": 3200
},
{
"epoch": 0.31686064225214794,
"grad_norm": 147.78128051757812,
"learning_rate": 7.992824794436971e-07,
"loss": 7.1754,
"step": 3250
},
{
"epoch": 0.32173542136371946,
"grad_norm": 137.8080596923828,
"learning_rate": 7.92920923445098e-07,
"loss": 7.2801,
"step": 3300
},
{
"epoch": 0.326610200475291,
"grad_norm": 156.37132263183594,
"learning_rate": 7.864863360555886e-07,
"loss": 7.1625,
"step": 3350
},
{
"epoch": 0.33148497958686246,
"grad_norm": 176.6288299560547,
"learning_rate": 7.799803215541036e-07,
"loss": 7.5386,
"step": 3400
},
{
"epoch": 0.336359758698434,
"grad_norm": 150.56468200683594,
"learning_rate": 7.734045020278694e-07,
"loss": 6.9751,
"step": 3450
},
{
"epoch": 0.3412345378100055,
"grad_norm": 138.31723022460938,
"learning_rate": 7.667605169679842e-07,
"loss": 6.8245,
"step": 3500
},
{
"epoch": 0.346109316921577,
"grad_norm": 100.9895248413086,
"learning_rate": 7.600500228606573e-07,
"loss": 6.947,
"step": 3550
},
{
"epoch": 0.3509840960331485,
"grad_norm": 142.1031036376953,
"learning_rate": 7.532746927742119e-07,
"loss": 6.9751,
"step": 3600
},
{
"epoch": 0.35585887514472,
"grad_norm": 182.1207275390625,
"learning_rate": 7.464362159419551e-07,
"loss": 7.1473,
"step": 3650
},
{
"epoch": 0.3607336542562915,
"grad_norm": 162.8542938232422,
"learning_rate": 7.395362973410145e-07,
"loss": 7.7815,
"step": 3700
},
{
"epoch": 0.36560843336786303,
"grad_norm": 168.93276977539062,
"learning_rate": 7.325766572672528e-07,
"loss": 7.7646,
"step": 3750
},
{
"epoch": 0.3704832124794345,
"grad_norm": 159.7053985595703,
"learning_rate": 7.255590309063604e-07,
"loss": 6.4885,
"step": 3800
},
{
"epoch": 0.375357991591006,
"grad_norm": 96.07608795166016,
"learning_rate": 7.184851679012374e-07,
"loss": 6.9556,
"step": 3850
},
{
"epoch": 0.38023277070257755,
"grad_norm": 158.5734100341797,
"learning_rate": 7.113568319157707e-07,
"loss": 6.9754,
"step": 3900
},
{
"epoch": 0.385107549814149,
"grad_norm": 151.5749969482422,
"learning_rate": 7.041758001951149e-07,
"loss": 6.6478,
"step": 3950
},
{
"epoch": 0.38998232892572054,
"grad_norm": 118.84528350830078,
"learning_rate": 6.969438631225877e-07,
"loss": 6.3464,
"step": 4000
},
{
"epoch": 0.39485710803729207,
"grad_norm": 130.48410034179688,
"learning_rate": 6.896628237732894e-07,
"loss": 7.0122,
"step": 4050
},
{
"epoch": 0.39973188714886354,
"grad_norm": 92.12020874023438,
"learning_rate": 6.823344974645576e-07,
"loss": 7.2089,
"step": 4100
},
{
"epoch": 0.40460666626043507,
"grad_norm": 113.18313598632812,
"learning_rate": 6.749607113033709e-07,
"loss": 7.2546,
"step": 4150
},
{
"epoch": 0.4094814453720066,
"grad_norm": 106.96129608154297,
"learning_rate": 6.675433037308119e-07,
"loss": 7.3078,
"step": 4200
},
{
"epoch": 0.41435622448357806,
"grad_norm": 150.419677734375,
"learning_rate": 6.600841240637052e-07,
"loss": 7.1537,
"step": 4250
},
{
"epoch": 0.4192310035951496,
"grad_norm": 88.62400817871094,
"learning_rate": 6.525850320335433e-07,
"loss": 7.0714,
"step": 4300
},
{
"epoch": 0.4241057827067211,
"grad_norm": 83.91474151611328,
"learning_rate": 6.450478973228162e-07,
"loss": 6.9181,
"step": 4350
},
{
"epoch": 0.4289805618182926,
"grad_norm": 135.38629150390625,
"learning_rate": 6.374745990988598e-07,
"loss": 7.1421,
"step": 4400
},
{
"epoch": 0.4338553409298641,
"grad_norm": 101.2317123413086,
"learning_rate": 6.298670255453404e-07,
"loss": 6.6926,
"step": 4450
},
{
"epoch": 0.43873012004143563,
"grad_norm": 112.95464324951172,
"learning_rate": 6.222270733914895e-07,
"loss": 6.7252,
"step": 4500
}
],
"logging_steps": 50,
"max_steps": 10256,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.8045681436358345e+19,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}