SpamX_V1 / checkpoint-644 /trainer_state.json
J7B's picture
Upload folder using huggingface_hub
c38e2f4 verified
{
"best_global_step": 644,
"best_metric": 0.17823560535907745,
"best_model_checkpoint": "D:\\Major Project\\SpamX\\ml\\xlmr\\xlmr_v1\\checkpoint-644",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 644,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03108003108003108,
"grad_norm": 29.43787956237793,
"learning_rate": 4.945652173913044e-06,
"loss": 1.2356471061706542,
"step": 10
},
{
"epoch": 0.06216006216006216,
"grad_norm": 36.178550720214844,
"learning_rate": 4.875776397515528e-06,
"loss": 1.1817050933837892,
"step": 20
},
{
"epoch": 0.09324009324009325,
"grad_norm": 29.694974899291992,
"learning_rate": 4.798136645962733e-06,
"loss": 1.0811898231506347,
"step": 30
},
{
"epoch": 0.12432012432012432,
"grad_norm": 8.485957145690918,
"learning_rate": 4.7204968944099384e-06,
"loss": 0.876597785949707,
"step": 40
},
{
"epoch": 0.1554001554001554,
"grad_norm": 21.316192626953125,
"learning_rate": 4.642857142857144e-06,
"loss": 0.9505236625671387,
"step": 50
},
{
"epoch": 0.1864801864801865,
"grad_norm": 34.35721969604492,
"learning_rate": 4.565217391304348e-06,
"loss": 1.1337352752685548,
"step": 60
},
{
"epoch": 0.21756021756021757,
"grad_norm": 48.13325881958008,
"learning_rate": 4.487577639751553e-06,
"loss": 1.063378143310547,
"step": 70
},
{
"epoch": 0.24864024864024864,
"grad_norm": 38.45956802368164,
"learning_rate": 4.4177018633540375e-06,
"loss": 1.1818448066711427,
"step": 80
},
{
"epoch": 0.27972027972027974,
"grad_norm": 18.295886993408203,
"learning_rate": 4.340062111801243e-06,
"loss": 0.91037015914917,
"step": 90
},
{
"epoch": 0.3108003108003108,
"grad_norm": 13.219426155090332,
"learning_rate": 4.262422360248447e-06,
"loss": 0.9483179092407227,
"step": 100
},
{
"epoch": 0.3418803418803419,
"grad_norm": 23.118179321289062,
"learning_rate": 4.184782608695653e-06,
"loss": 0.992742919921875,
"step": 110
},
{
"epoch": 0.372960372960373,
"grad_norm": 13.910191535949707,
"learning_rate": 4.107142857142857e-06,
"loss": 0.8459652900695801,
"step": 120
},
{
"epoch": 0.40404040404040403,
"grad_norm": 124.82660675048828,
"learning_rate": 4.0295031055900625e-06,
"loss": 0.9497438430786133,
"step": 130
},
{
"epoch": 0.43512043512043513,
"grad_norm": 46.57713317871094,
"learning_rate": 3.951863354037268e-06,
"loss": 1.1179959297180175,
"step": 140
},
{
"epoch": 0.4662004662004662,
"grad_norm": 17.421274185180664,
"learning_rate": 3.874223602484472e-06,
"loss": 0.7715085983276367,
"step": 150
},
{
"epoch": 0.4972804972804973,
"grad_norm": 16.492841720581055,
"learning_rate": 3.7965838509316772e-06,
"loss": 0.9310503959655761,
"step": 160
},
{
"epoch": 0.5283605283605284,
"grad_norm": 38.246829986572266,
"learning_rate": 3.718944099378882e-06,
"loss": 0.89602632522583,
"step": 170
},
{
"epoch": 0.5594405594405595,
"grad_norm": 28.985132217407227,
"learning_rate": 3.6413043478260875e-06,
"loss": 0.8126945495605469,
"step": 180
},
{
"epoch": 0.5905205905205905,
"grad_norm": 14.60274600982666,
"learning_rate": 3.5636645962732924e-06,
"loss": 0.7371460914611816,
"step": 190
},
{
"epoch": 0.6216006216006216,
"grad_norm": 30.11294937133789,
"learning_rate": 3.486024844720497e-06,
"loss": 1.0865836143493652,
"step": 200
},
{
"epoch": 0.6526806526806527,
"grad_norm": 17.661558151245117,
"learning_rate": 3.4083850931677022e-06,
"loss": 1.0465456008911134,
"step": 210
},
{
"epoch": 0.6837606837606838,
"grad_norm": 34.937503814697266,
"learning_rate": 3.3385093167701865e-06,
"loss": 0.8159684181213379,
"step": 220
},
{
"epoch": 0.7148407148407149,
"grad_norm": 58.97747802734375,
"learning_rate": 3.2608695652173914e-06,
"loss": 0.820067310333252,
"step": 230
},
{
"epoch": 0.745920745920746,
"grad_norm": 41.07950973510742,
"learning_rate": 3.1832298136645968e-06,
"loss": 1.0034560203552245,
"step": 240
},
{
"epoch": 0.777000777000777,
"grad_norm": 31.760068893432617,
"learning_rate": 3.1055900621118013e-06,
"loss": 0.7825074672698975,
"step": 250
},
{
"epoch": 0.8080808080808081,
"grad_norm": 29.330337524414062,
"learning_rate": 3.027950310559006e-06,
"loss": 0.8985923767089844,
"step": 260
},
{
"epoch": 0.8391608391608392,
"grad_norm": 28.913965225219727,
"learning_rate": 2.9503105590062115e-06,
"loss": 0.8792219161987305,
"step": 270
},
{
"epoch": 0.8702408702408703,
"grad_norm": 17.811166763305664,
"learning_rate": 2.8726708074534164e-06,
"loss": 0.8635202407836914,
"step": 280
},
{
"epoch": 0.9013209013209014,
"grad_norm": 19.338523864746094,
"learning_rate": 2.795031055900621e-06,
"loss": 0.890287208557129,
"step": 290
},
{
"epoch": 0.9324009324009324,
"grad_norm": 18.355663299560547,
"learning_rate": 2.7173913043478263e-06,
"loss": 0.9401198387145996,
"step": 300
},
{
"epoch": 0.9634809634809635,
"grad_norm": 20.48928451538086,
"learning_rate": 2.639751552795031e-06,
"loss": 0.831356430053711,
"step": 310
},
{
"epoch": 0.9945609945609946,
"grad_norm": 59.57387161254883,
"learning_rate": 2.5621118012422365e-06,
"loss": 0.7133886814117432,
"step": 320
},
{
"epoch": 1.0,
"eval_loss": 0.18244007229804993,
"eval_runtime": 4.4097,
"eval_samples_per_second": 259.426,
"eval_steps_per_second": 32.428,
"step": 322
},
{
"epoch": 1.0248640248640248,
"grad_norm": 39.35393524169922,
"learning_rate": 2.484472049689441e-06,
"loss": 0.7849094867706299,
"step": 330
},
{
"epoch": 1.055944055944056,
"grad_norm": 39.5618896484375,
"learning_rate": 2.4068322981366464e-06,
"loss": 0.5631073474884033,
"step": 340
},
{
"epoch": 1.087024087024087,
"grad_norm": 27.039840698242188,
"learning_rate": 2.3291925465838513e-06,
"loss": 0.8551163673400879,
"step": 350
},
{
"epoch": 1.118104118104118,
"grad_norm": 31.069948196411133,
"learning_rate": 2.251552795031056e-06,
"loss": 0.940975284576416,
"step": 360
},
{
"epoch": 1.1491841491841492,
"grad_norm": 34.83683776855469,
"learning_rate": 2.173913043478261e-06,
"loss": 0.8859931945800781,
"step": 370
},
{
"epoch": 1.1802641802641802,
"grad_norm": 43.3201789855957,
"learning_rate": 2.096273291925466e-06,
"loss": 0.6784295558929443,
"step": 380
},
{
"epoch": 1.2113442113442114,
"grad_norm": 26.327808380126953,
"learning_rate": 2.018633540372671e-06,
"loss": 0.7212324619293213,
"step": 390
},
{
"epoch": 1.2424242424242424,
"grad_norm": 22.891172409057617,
"learning_rate": 1.940993788819876e-06,
"loss": 0.8333398818969726,
"step": 400
},
{
"epoch": 1.2735042735042734,
"grad_norm": 56.2291374206543,
"learning_rate": 1.8633540372670808e-06,
"loss": 1.0169650077819825,
"step": 410
},
{
"epoch": 1.3045843045843046,
"grad_norm": 38.48230743408203,
"learning_rate": 1.7857142857142859e-06,
"loss": 0.7382871627807617,
"step": 420
},
{
"epoch": 1.3356643356643356,
"grad_norm": 38.05250549316406,
"learning_rate": 1.7080745341614908e-06,
"loss": 0.8073366165161133,
"step": 430
},
{
"epoch": 1.3667443667443666,
"grad_norm": 30.15036392211914,
"learning_rate": 1.6304347826086957e-06,
"loss": 0.8878802299499512,
"step": 440
},
{
"epoch": 1.3978243978243978,
"grad_norm": 35.91055679321289,
"learning_rate": 1.5527950310559006e-06,
"loss": 0.5926938533782959,
"step": 450
},
{
"epoch": 1.428904428904429,
"grad_norm": 18.346158981323242,
"learning_rate": 1.4751552795031058e-06,
"loss": 0.590770959854126,
"step": 460
},
{
"epoch": 1.45998445998446,
"grad_norm": 31.250991821289062,
"learning_rate": 1.3975155279503105e-06,
"loss": 0.7174652099609375,
"step": 470
},
{
"epoch": 1.491064491064491,
"grad_norm": 35.001522064208984,
"learning_rate": 1.3198757763975156e-06,
"loss": 0.8534024238586426,
"step": 480
},
{
"epoch": 1.5221445221445222,
"grad_norm": 32.24079513549805,
"learning_rate": 1.2422360248447205e-06,
"loss": 0.8570188522338867,
"step": 490
},
{
"epoch": 1.5532245532245532,
"grad_norm": 19.8613224029541,
"learning_rate": 1.1645962732919256e-06,
"loss": 0.6434041023254394,
"step": 500
},
{
"epoch": 1.5843045843045842,
"grad_norm": 46.252769470214844,
"learning_rate": 1.0869565217391306e-06,
"loss": 0.5186689376831055,
"step": 510
},
{
"epoch": 1.6153846153846154,
"grad_norm": 20.27726936340332,
"learning_rate": 1.0093167701863355e-06,
"loss": 1.0002134323120118,
"step": 520
},
{
"epoch": 1.6464646464646466,
"grad_norm": 22.71544075012207,
"learning_rate": 9.316770186335404e-07,
"loss": 0.6581085681915283,
"step": 530
},
{
"epoch": 1.6775446775446774,
"grad_norm": 34.64213180541992,
"learning_rate": 8.540372670807454e-07,
"loss": 1.042281150817871,
"step": 540
},
{
"epoch": 1.7086247086247086,
"grad_norm": 17.567914962768555,
"learning_rate": 7.763975155279503e-07,
"loss": 0.7002626419067383,
"step": 550
},
{
"epoch": 1.7397047397047398,
"grad_norm": 31.270509719848633,
"learning_rate": 6.987577639751552e-07,
"loss": 0.7479125022888183,
"step": 560
},
{
"epoch": 1.7707847707847708,
"grad_norm": 33.94447708129883,
"learning_rate": 6.211180124223603e-07,
"loss": 0.6186141014099121,
"step": 570
},
{
"epoch": 1.8018648018648018,
"grad_norm": 18.601036071777344,
"learning_rate": 5.434782608695653e-07,
"loss": 0.6978522777557373,
"step": 580
},
{
"epoch": 1.832944832944833,
"grad_norm": 29.79970359802246,
"learning_rate": 4.658385093167702e-07,
"loss": 0.838288402557373,
"step": 590
},
{
"epoch": 1.864024864024864,
"grad_norm": 15.155320167541504,
"learning_rate": 3.8819875776397516e-07,
"loss": 0.4178286552429199,
"step": 600
},
{
"epoch": 1.895104895104895,
"grad_norm": 32.92871856689453,
"learning_rate": 3.1055900621118013e-07,
"loss": 0.8897994041442872,
"step": 610
},
{
"epoch": 1.9261849261849262,
"grad_norm": 6.8692827224731445,
"learning_rate": 2.329192546583851e-07,
"loss": 0.6488828659057617,
"step": 620
},
{
"epoch": 1.9572649572649574,
"grad_norm": 54.483192443847656,
"learning_rate": 1.5527950310559006e-07,
"loss": 1.0308164596557616,
"step": 630
},
{
"epoch": 1.9883449883449882,
"grad_norm": 30.52878761291504,
"learning_rate": 7.763975155279503e-08,
"loss": 0.7207555294036865,
"step": 640
},
{
"epoch": 2.0,
"eval_loss": 0.17823560535907745,
"eval_runtime": 5.6175,
"eval_samples_per_second": 203.648,
"eval_steps_per_second": 25.456,
"step": 644
}
],
"logging_steps": 10,
"max_steps": 644,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 777428418602520.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}