Climate-ModernBERT-2100 / trainer_state.json
Michaelyya's picture
Upload folder using huggingface_hub
8ffcc40 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.328180737217598,
"eval_steps": 500,
"global_step": 2100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03170828378913991,
"grad_norm": 11.620649337768555,
"learning_rate": 0.00011399999999999999,
"loss": 7.7223,
"step": 20
},
{
"epoch": 0.06341656757827982,
"grad_norm": 1.8820377588272095,
"learning_rate": 0.000234,
"loss": 1.8545,
"step": 40
},
{
"epoch": 0.09512485136741974,
"grad_norm": 1.3315837383270264,
"learning_rate": 0.00029868292682926826,
"loss": 1.2718,
"step": 60
},
{
"epoch": 0.12683313515655964,
"grad_norm": 1.0603790283203125,
"learning_rate": 0.00029575609756097557,
"loss": 1.1449,
"step": 80
},
{
"epoch": 0.15854141894569956,
"grad_norm": 1.1477017402648926,
"learning_rate": 0.00029282926829268287,
"loss": 1.1018,
"step": 100
},
{
"epoch": 0.1902497027348395,
"grad_norm": 1.0604023933410645,
"learning_rate": 0.0002899024390243902,
"loss": 1.0883,
"step": 120
},
{
"epoch": 0.22195798652397938,
"grad_norm": 0.8798519372940063,
"learning_rate": 0.0002869756097560975,
"loss": 1.0645,
"step": 140
},
{
"epoch": 0.2536662703131193,
"grad_norm": 1.0906599760055542,
"learning_rate": 0.0002840487804878048,
"loss": 1.0418,
"step": 160
},
{
"epoch": 0.2853745541022592,
"grad_norm": 0.9430219531059265,
"learning_rate": 0.0002811219512195122,
"loss": 1.0316,
"step": 180
},
{
"epoch": 0.3170828378913991,
"grad_norm": 1.0706809759140015,
"learning_rate": 0.0002781951219512195,
"loss": 1.0286,
"step": 200
},
{
"epoch": 0.34879112168053905,
"grad_norm": 0.8156995177268982,
"learning_rate": 0.00027526829268292684,
"loss": 1.0293,
"step": 220
},
{
"epoch": 0.380499405469679,
"grad_norm": 0.8572260737419128,
"learning_rate": 0.00027234146341463414,
"loss": 1.0051,
"step": 240
},
{
"epoch": 0.41220768925881884,
"grad_norm": 0.9287059307098389,
"learning_rate": 0.00026941463414634144,
"loss": 1.0152,
"step": 260
},
{
"epoch": 0.44391597304795877,
"grad_norm": 0.8125821352005005,
"learning_rate": 0.00026648780487804874,
"loss": 1.0098,
"step": 280
},
{
"epoch": 0.4756242568370987,
"grad_norm": 0.8053847551345825,
"learning_rate": 0.0002635609756097561,
"loss": 1.0029,
"step": 300
},
{
"epoch": 0.5073325406262386,
"grad_norm": 0.8253501653671265,
"learning_rate": 0.0002606341463414634,
"loss": 0.9859,
"step": 320
},
{
"epoch": 0.5390408244153785,
"grad_norm": 0.8003745675086975,
"learning_rate": 0.0002577073170731707,
"loss": 0.9773,
"step": 340
},
{
"epoch": 0.5707491082045184,
"grad_norm": 0.7730509042739868,
"learning_rate": 0.000254780487804878,
"loss": 0.982,
"step": 360
},
{
"epoch": 0.6024573919936583,
"grad_norm": 0.7878270149230957,
"learning_rate": 0.00025185365853658536,
"loss": 0.987,
"step": 380
},
{
"epoch": 0.6341656757827983,
"grad_norm": 0.7862978577613831,
"learning_rate": 0.00024892682926829266,
"loss": 0.9802,
"step": 400
},
{
"epoch": 0.6658739595719382,
"grad_norm": 2.1581475734710693,
"learning_rate": 0.00024599999999999996,
"loss": 0.9663,
"step": 420
},
{
"epoch": 0.6975822433610781,
"grad_norm": 0.7952772974967957,
"learning_rate": 0.00024307317073170732,
"loss": 0.9788,
"step": 440
},
{
"epoch": 0.729290527150218,
"grad_norm": 0.697483241558075,
"learning_rate": 0.00024014634146341462,
"loss": 0.9568,
"step": 460
},
{
"epoch": 0.760998810939358,
"grad_norm": 0.6820840835571289,
"learning_rate": 0.00023721951219512195,
"loss": 0.9609,
"step": 480
},
{
"epoch": 0.7927070947284979,
"grad_norm": 0.7179127931594849,
"learning_rate": 0.00023429268292682925,
"loss": 0.9726,
"step": 500
},
{
"epoch": 0.8244153785176377,
"grad_norm": 0.6777030229568481,
"learning_rate": 0.00023136585365853658,
"loss": 0.9601,
"step": 520
},
{
"epoch": 0.8561236623067776,
"grad_norm": 0.7026005387306213,
"learning_rate": 0.00022843902439024388,
"loss": 0.9522,
"step": 540
},
{
"epoch": 0.8878319460959175,
"grad_norm": 0.9660900831222534,
"learning_rate": 0.0002255121951219512,
"loss": 0.9525,
"step": 560
},
{
"epoch": 0.9195402298850575,
"grad_norm": 0.650274932384491,
"learning_rate": 0.0002225853658536585,
"loss": 0.9511,
"step": 580
},
{
"epoch": 0.9512485136741974,
"grad_norm": 0.6481014490127563,
"learning_rate": 0.00021965853658536584,
"loss": 0.9527,
"step": 600
},
{
"epoch": 0.9829567974633373,
"grad_norm": 0.6192963719367981,
"learning_rate": 0.00021673170731707314,
"loss": 0.9525,
"step": 620
},
{
"epoch": 1.014268727705113,
"grad_norm": 0.6381222009658813,
"learning_rate": 0.00021380487804878047,
"loss": 0.9434,
"step": 640
},
{
"epoch": 1.0459770114942528,
"grad_norm": 0.6611652970314026,
"learning_rate": 0.00021087804878048777,
"loss": 0.9355,
"step": 660
},
{
"epoch": 1.0776852952833929,
"grad_norm": 0.6494982242584229,
"learning_rate": 0.0002079512195121951,
"loss": 0.9411,
"step": 680
},
{
"epoch": 1.1093935790725327,
"grad_norm": 2.5105600357055664,
"learning_rate": 0.00020502439024390243,
"loss": 0.9471,
"step": 700
},
{
"epoch": 1.1411018628616727,
"grad_norm": 0.6972084045410156,
"learning_rate": 0.00020209756097560976,
"loss": 0.9553,
"step": 720
},
{
"epoch": 1.1728101466508125,
"grad_norm": 0.6206223368644714,
"learning_rate": 0.00019917073170731706,
"loss": 0.9335,
"step": 740
},
{
"epoch": 1.2045184304399523,
"grad_norm": 0.7724215388298035,
"learning_rate": 0.00019624390243902439,
"loss": 0.9308,
"step": 760
},
{
"epoch": 1.2362267142290924,
"grad_norm": 0.5925254821777344,
"learning_rate": 0.0001933170731707317,
"loss": 0.9325,
"step": 780
},
{
"epoch": 1.2679349980182324,
"grad_norm": 0.522939920425415,
"learning_rate": 0.00019039024390243902,
"loss": 0.9316,
"step": 800
},
{
"epoch": 1.2996432818073722,
"grad_norm": 0.5890282392501831,
"learning_rate": 0.00018746341463414632,
"loss": 0.9289,
"step": 820
},
{
"epoch": 1.331351565596512,
"grad_norm": 0.7248061299324036,
"learning_rate": 0.00018453658536585365,
"loss": 0.9194,
"step": 840
},
{
"epoch": 1.363059849385652,
"grad_norm": 0.6553404927253723,
"learning_rate": 0.00018160975609756095,
"loss": 0.9312,
"step": 860
},
{
"epoch": 1.3947681331747919,
"grad_norm": 0.5486903786659241,
"learning_rate": 0.00017868292682926828,
"loss": 0.9217,
"step": 880
},
{
"epoch": 1.426476416963932,
"grad_norm": 0.6329432725906372,
"learning_rate": 0.00017575609756097558,
"loss": 0.9181,
"step": 900
},
{
"epoch": 1.4581847007530717,
"grad_norm": 0.5233189463615417,
"learning_rate": 0.0001728292682926829,
"loss": 0.942,
"step": 920
},
{
"epoch": 1.4898929845422115,
"grad_norm": 0.6567553877830505,
"learning_rate": 0.0001699024390243902,
"loss": 0.9111,
"step": 940
},
{
"epoch": 1.5216012683313516,
"grad_norm": 0.5668836236000061,
"learning_rate": 0.00016697560975609756,
"loss": 0.9253,
"step": 960
},
{
"epoch": 1.5533095521204916,
"grad_norm": 0.5501447916030884,
"learning_rate": 0.00016404878048780486,
"loss": 0.9208,
"step": 980
},
{
"epoch": 1.5850178359096314,
"grad_norm": 0.543779194355011,
"learning_rate": 0.0001611219512195122,
"loss": 0.9176,
"step": 1000
},
{
"epoch": 1.6167261196987712,
"grad_norm": 0.6107056140899658,
"learning_rate": 0.0001581951219512195,
"loss": 0.9147,
"step": 1020
},
{
"epoch": 1.6484344034879113,
"grad_norm": 0.4941338002681732,
"learning_rate": 0.00015526829268292682,
"loss": 0.9166,
"step": 1040
},
{
"epoch": 1.6801426872770513,
"grad_norm": 0.5821026563644409,
"learning_rate": 0.00015234146341463412,
"loss": 0.9139,
"step": 1060
},
{
"epoch": 1.711850971066191,
"grad_norm": 0.5568034052848816,
"learning_rate": 0.00014941463414634145,
"loss": 0.9177,
"step": 1080
},
{
"epoch": 1.743559254855331,
"grad_norm": 0.5890582203865051,
"learning_rate": 0.00014648780487804875,
"loss": 0.9106,
"step": 1100
},
{
"epoch": 1.7752675386444707,
"grad_norm": 0.6119087338447571,
"learning_rate": 0.00014356097560975608,
"loss": 0.9051,
"step": 1120
},
{
"epoch": 1.8069758224336108,
"grad_norm": 0.562029242515564,
"learning_rate": 0.00014063414634146338,
"loss": 0.9212,
"step": 1140
},
{
"epoch": 1.8386841062227508,
"grad_norm": 0.5576140284538269,
"learning_rate": 0.0001377073170731707,
"loss": 0.9078,
"step": 1160
},
{
"epoch": 1.8703923900118906,
"grad_norm": 0.518469512462616,
"learning_rate": 0.00013478048780487804,
"loss": 0.9127,
"step": 1180
},
{
"epoch": 1.9021006738010304,
"grad_norm": 0.4536910951137543,
"learning_rate": 0.00013185365853658534,
"loss": 0.9018,
"step": 1200
},
{
"epoch": 1.9338089575901705,
"grad_norm": 0.5300338268280029,
"learning_rate": 0.00012892682926829267,
"loss": 0.9142,
"step": 1220
},
{
"epoch": 1.9655172413793105,
"grad_norm": 0.5239934325218201,
"learning_rate": 0.00012599999999999997,
"loss": 0.9068,
"step": 1240
},
{
"epoch": 1.9972255251684503,
"grad_norm": 0.4621521830558777,
"learning_rate": 0.0001230731707317073,
"loss": 0.9011,
"step": 1260
},
{
"epoch": 2.028537455410226,
"grad_norm": 0.5628905296325684,
"learning_rate": 0.00012014634146341463,
"loss": 0.9064,
"step": 1280
},
{
"epoch": 2.060245739199366,
"grad_norm": 0.5678831934928894,
"learning_rate": 0.00011721951219512194,
"loss": 0.9192,
"step": 1300
},
{
"epoch": 2.0919540229885056,
"grad_norm": 0.5180283188819885,
"learning_rate": 0.00011429268292682926,
"loss": 0.8935,
"step": 1320
},
{
"epoch": 2.1236623067776454,
"grad_norm": 0.5497546195983887,
"learning_rate": 0.00011136585365853657,
"loss": 0.8939,
"step": 1340
},
{
"epoch": 2.1553705905667857,
"grad_norm": 0.5264196991920471,
"learning_rate": 0.00010843902439024389,
"loss": 0.8968,
"step": 1360
},
{
"epoch": 2.1870788743559255,
"grad_norm": 0.48166030645370483,
"learning_rate": 0.0001055121951219512,
"loss": 0.8891,
"step": 1380
},
{
"epoch": 2.2187871581450653,
"grad_norm": 0.5162549018859863,
"learning_rate": 0.00010258536585365853,
"loss": 0.9003,
"step": 1400
},
{
"epoch": 2.250495441934205,
"grad_norm": 0.5740045309066772,
"learning_rate": 9.965853658536585e-05,
"loss": 0.8955,
"step": 1420
},
{
"epoch": 2.2822037257233454,
"grad_norm": 0.507210910320282,
"learning_rate": 9.673170731707316e-05,
"loss": 0.8845,
"step": 1440
},
{
"epoch": 2.313912009512485,
"grad_norm": 0.5239551663398743,
"learning_rate": 9.380487804878048e-05,
"loss": 0.8971,
"step": 1460
},
{
"epoch": 2.345620293301625,
"grad_norm": 0.46981072425842285,
"learning_rate": 9.08780487804878e-05,
"loss": 0.8897,
"step": 1480
},
{
"epoch": 2.377328577090765,
"grad_norm": 0.5130921602249146,
"learning_rate": 8.795121951219511e-05,
"loss": 0.8939,
"step": 1500
},
{
"epoch": 2.4090368608799047,
"grad_norm": 0.5038473606109619,
"learning_rate": 8.502439024390242e-05,
"loss": 0.9096,
"step": 1520
},
{
"epoch": 2.440745144669045,
"grad_norm": 0.4756928086280823,
"learning_rate": 8.209756097560975e-05,
"loss": 0.8815,
"step": 1540
},
{
"epoch": 2.4724534284581847,
"grad_norm": 0.5105359554290771,
"learning_rate": 7.917073170731707e-05,
"loss": 0.8857,
"step": 1560
},
{
"epoch": 2.5041617122473245,
"grad_norm": 0.5070236921310425,
"learning_rate": 7.624390243902438e-05,
"loss": 0.8935,
"step": 1580
},
{
"epoch": 2.535869996036465,
"grad_norm": 0.5580913424491882,
"learning_rate": 7.33170731707317e-05,
"loss": 0.8894,
"step": 1600
},
{
"epoch": 2.5675782798256046,
"grad_norm": 0.5412284731864929,
"learning_rate": 7.039024390243901e-05,
"loss": 0.8838,
"step": 1620
},
{
"epoch": 2.5992865636147444,
"grad_norm": 0.5017954111099243,
"learning_rate": 6.746341463414634e-05,
"loss": 0.8764,
"step": 1640
},
{
"epoch": 2.6309948474038842,
"grad_norm": 0.46863794326782227,
"learning_rate": 6.453658536585366e-05,
"loss": 0.8731,
"step": 1660
},
{
"epoch": 2.662703131193024,
"grad_norm": 0.4468729496002197,
"learning_rate": 6.160975609756097e-05,
"loss": 0.8901,
"step": 1680
},
{
"epoch": 2.694411414982164,
"grad_norm": 0.5184731483459473,
"learning_rate": 5.868292682926829e-05,
"loss": 0.8921,
"step": 1700
},
{
"epoch": 2.726119698771304,
"grad_norm": 0.44308602809906006,
"learning_rate": 5.575609756097561e-05,
"loss": 0.8772,
"step": 1720
},
{
"epoch": 2.757827982560444,
"grad_norm": 0.47546738386154175,
"learning_rate": 5.2829268292682916e-05,
"loss": 0.8861,
"step": 1740
},
{
"epoch": 2.7895362663495837,
"grad_norm": 0.4518582224845886,
"learning_rate": 4.9902439024390244e-05,
"loss": 0.8794,
"step": 1760
},
{
"epoch": 2.821244550138724,
"grad_norm": 0.49235859513282776,
"learning_rate": 4.697560975609756e-05,
"loss": 0.8846,
"step": 1780
},
{
"epoch": 2.852952833927864,
"grad_norm": 0.45942848920822144,
"learning_rate": 4.4048780487804874e-05,
"loss": 0.8812,
"step": 1800
},
{
"epoch": 2.8846611177170036,
"grad_norm": 0.4411655068397522,
"learning_rate": 4.1121951219512196e-05,
"loss": 0.8748,
"step": 1820
},
{
"epoch": 2.9163694015061434,
"grad_norm": 0.46850547194480896,
"learning_rate": 3.819512195121951e-05,
"loss": 0.8783,
"step": 1840
},
{
"epoch": 2.9480776852952832,
"grad_norm": 0.42767903208732605,
"learning_rate": 3.5268292682926826e-05,
"loss": 0.8776,
"step": 1860
},
{
"epoch": 2.979785969084423,
"grad_norm": 0.47117599844932556,
"learning_rate": 3.234146341463414e-05,
"loss": 0.9012,
"step": 1880
},
{
"epoch": 3.0110978993261988,
"grad_norm": 0.46736887097358704,
"learning_rate": 2.9414634146341463e-05,
"loss": 0.8801,
"step": 1900
},
{
"epoch": 3.042806183115339,
"grad_norm": 0.4541435241699219,
"learning_rate": 2.6487804878048778e-05,
"loss": 0.8877,
"step": 1920
},
{
"epoch": 3.074514466904479,
"grad_norm": 0.49423545598983765,
"learning_rate": 2.3560975609756097e-05,
"loss": 0.8761,
"step": 1940
},
{
"epoch": 3.1062227506936186,
"grad_norm": 0.432778000831604,
"learning_rate": 2.0634146341463415e-05,
"loss": 0.8883,
"step": 1960
},
{
"epoch": 3.1379310344827585,
"grad_norm": 0.46009406447410583,
"learning_rate": 1.770731707317073e-05,
"loss": 0.8798,
"step": 1980
},
{
"epoch": 3.1696393182718987,
"grad_norm": 0.45351386070251465,
"learning_rate": 1.4780487804878048e-05,
"loss": 0.8706,
"step": 2000
},
{
"epoch": 3.2013476020610385,
"grad_norm": 0.46693041920661926,
"learning_rate": 1.1853658536585365e-05,
"loss": 0.8805,
"step": 2020
},
{
"epoch": 3.2330558858501783,
"grad_norm": 0.4250204563140869,
"learning_rate": 8.926829268292682e-06,
"loss": 0.8667,
"step": 2040
},
{
"epoch": 3.264764169639318,
"grad_norm": 0.43274539709091187,
"learning_rate": 5.999999999999999e-06,
"loss": 0.8879,
"step": 2060
},
{
"epoch": 3.296472453428458,
"grad_norm": 0.3914950489997864,
"learning_rate": 3.073170731707317e-06,
"loss": 0.8669,
"step": 2080
},
{
"epoch": 3.328180737217598,
"grad_norm": 0.41752079129219055,
"learning_rate": 1.4634146341463413e-07,
"loss": 0.8665,
"step": 2100
}
],
"logging_steps": 20,
"max_steps": 2100,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.0992069136896492e+18,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}