| { | |
| "best_global_step": 644, | |
| "best_metric": 0.17823560535907745, | |
| "best_model_checkpoint": "D:\\Major Project\\SpamX\\ml\\xlmr\\xlmr_v1\\checkpoint-644", | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 644, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03108003108003108, | |
| "grad_norm": 29.43787956237793, | |
| "learning_rate": 4.945652173913044e-06, | |
| "loss": 1.2356471061706542, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06216006216006216, | |
| "grad_norm": 36.178550720214844, | |
| "learning_rate": 4.875776397515528e-06, | |
| "loss": 1.1817050933837892, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09324009324009325, | |
| "grad_norm": 29.694974899291992, | |
| "learning_rate": 4.798136645962733e-06, | |
| "loss": 1.0811898231506347, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.12432012432012432, | |
| "grad_norm": 8.485957145690918, | |
| "learning_rate": 4.7204968944099384e-06, | |
| "loss": 0.876597785949707, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1554001554001554, | |
| "grad_norm": 21.316192626953125, | |
| "learning_rate": 4.642857142857144e-06, | |
| "loss": 0.9505236625671387, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1864801864801865, | |
| "grad_norm": 34.35721969604492, | |
| "learning_rate": 4.565217391304348e-06, | |
| "loss": 1.1337352752685548, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.21756021756021757, | |
| "grad_norm": 48.13325881958008, | |
| "learning_rate": 4.487577639751553e-06, | |
| "loss": 1.063378143310547, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.24864024864024864, | |
| "grad_norm": 38.45956802368164, | |
| "learning_rate": 4.4177018633540375e-06, | |
| "loss": 1.1818448066711427, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.27972027972027974, | |
| "grad_norm": 18.295886993408203, | |
| "learning_rate": 4.340062111801243e-06, | |
| "loss": 0.91037015914917, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3108003108003108, | |
| "grad_norm": 13.219426155090332, | |
| "learning_rate": 4.262422360248447e-06, | |
| "loss": 0.9483179092407227, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3418803418803419, | |
| "grad_norm": 23.118179321289062, | |
| "learning_rate": 4.184782608695653e-06, | |
| "loss": 0.992742919921875, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.372960372960373, | |
| "grad_norm": 13.910191535949707, | |
| "learning_rate": 4.107142857142857e-06, | |
| "loss": 0.8459652900695801, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.40404040404040403, | |
| "grad_norm": 124.82660675048828, | |
| "learning_rate": 4.0295031055900625e-06, | |
| "loss": 0.9497438430786133, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.43512043512043513, | |
| "grad_norm": 46.57713317871094, | |
| "learning_rate": 3.951863354037268e-06, | |
| "loss": 1.1179959297180175, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.4662004662004662, | |
| "grad_norm": 17.421274185180664, | |
| "learning_rate": 3.874223602484472e-06, | |
| "loss": 0.7715085983276367, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.4972804972804973, | |
| "grad_norm": 16.492841720581055, | |
| "learning_rate": 3.7965838509316772e-06, | |
| "loss": 0.9310503959655761, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5283605283605284, | |
| "grad_norm": 38.246829986572266, | |
| "learning_rate": 3.718944099378882e-06, | |
| "loss": 0.89602632522583, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5594405594405595, | |
| "grad_norm": 28.985132217407227, | |
| "learning_rate": 3.6413043478260875e-06, | |
| "loss": 0.8126945495605469, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5905205905205905, | |
| "grad_norm": 14.60274600982666, | |
| "learning_rate": 3.5636645962732924e-06, | |
| "loss": 0.7371460914611816, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.6216006216006216, | |
| "grad_norm": 30.11294937133789, | |
| "learning_rate": 3.486024844720497e-06, | |
| "loss": 1.0865836143493652, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6526806526806527, | |
| "grad_norm": 17.661558151245117, | |
| "learning_rate": 3.4083850931677022e-06, | |
| "loss": 1.0465456008911134, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6837606837606838, | |
| "grad_norm": 34.937503814697266, | |
| "learning_rate": 3.3385093167701865e-06, | |
| "loss": 0.8159684181213379, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.7148407148407149, | |
| "grad_norm": 58.97747802734375, | |
| "learning_rate": 3.2608695652173914e-06, | |
| "loss": 0.820067310333252, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.745920745920746, | |
| "grad_norm": 41.07950973510742, | |
| "learning_rate": 3.1832298136645968e-06, | |
| "loss": 1.0034560203552245, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.777000777000777, | |
| "grad_norm": 31.760068893432617, | |
| "learning_rate": 3.1055900621118013e-06, | |
| "loss": 0.7825074672698975, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.8080808080808081, | |
| "grad_norm": 29.330337524414062, | |
| "learning_rate": 3.027950310559006e-06, | |
| "loss": 0.8985923767089844, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.8391608391608392, | |
| "grad_norm": 28.913965225219727, | |
| "learning_rate": 2.9503105590062115e-06, | |
| "loss": 0.8792219161987305, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.8702408702408703, | |
| "grad_norm": 17.811166763305664, | |
| "learning_rate": 2.8726708074534164e-06, | |
| "loss": 0.8635202407836914, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.9013209013209014, | |
| "grad_norm": 19.338523864746094, | |
| "learning_rate": 2.795031055900621e-06, | |
| "loss": 0.890287208557129, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.9324009324009324, | |
| "grad_norm": 18.355663299560547, | |
| "learning_rate": 2.7173913043478263e-06, | |
| "loss": 0.9401198387145996, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.9634809634809635, | |
| "grad_norm": 20.48928451538086, | |
| "learning_rate": 2.639751552795031e-06, | |
| "loss": 0.831356430053711, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.9945609945609946, | |
| "grad_norm": 59.57387161254883, | |
| "learning_rate": 2.5621118012422365e-06, | |
| "loss": 0.7133886814117432, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.18244007229804993, | |
| "eval_runtime": 4.4097, | |
| "eval_samples_per_second": 259.426, | |
| "eval_steps_per_second": 32.428, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.0248640248640248, | |
| "grad_norm": 39.35393524169922, | |
| "learning_rate": 2.484472049689441e-06, | |
| "loss": 0.7849094867706299, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.055944055944056, | |
| "grad_norm": 39.5618896484375, | |
| "learning_rate": 2.4068322981366464e-06, | |
| "loss": 0.5631073474884033, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.087024087024087, | |
| "grad_norm": 27.039840698242188, | |
| "learning_rate": 2.3291925465838513e-06, | |
| "loss": 0.8551163673400879, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.118104118104118, | |
| "grad_norm": 31.069948196411133, | |
| "learning_rate": 2.251552795031056e-06, | |
| "loss": 0.940975284576416, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.1491841491841492, | |
| "grad_norm": 34.83683776855469, | |
| "learning_rate": 2.173913043478261e-06, | |
| "loss": 0.8859931945800781, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.1802641802641802, | |
| "grad_norm": 43.3201789855957, | |
| "learning_rate": 2.096273291925466e-06, | |
| "loss": 0.6784295558929443, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.2113442113442114, | |
| "grad_norm": 26.327808380126953, | |
| "learning_rate": 2.018633540372671e-06, | |
| "loss": 0.7212324619293213, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.2424242424242424, | |
| "grad_norm": 22.891172409057617, | |
| "learning_rate": 1.940993788819876e-06, | |
| "loss": 0.8333398818969726, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.2735042735042734, | |
| "grad_norm": 56.2291374206543, | |
| "learning_rate": 1.8633540372670808e-06, | |
| "loss": 1.0169650077819825, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.3045843045843046, | |
| "grad_norm": 38.48230743408203, | |
| "learning_rate": 1.7857142857142859e-06, | |
| "loss": 0.7382871627807617, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.3356643356643356, | |
| "grad_norm": 38.05250549316406, | |
| "learning_rate": 1.7080745341614908e-06, | |
| "loss": 0.8073366165161133, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.3667443667443666, | |
| "grad_norm": 30.15036392211914, | |
| "learning_rate": 1.6304347826086957e-06, | |
| "loss": 0.8878802299499512, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.3978243978243978, | |
| "grad_norm": 35.91055679321289, | |
| "learning_rate": 1.5527950310559006e-06, | |
| "loss": 0.5926938533782959, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.428904428904429, | |
| "grad_norm": 18.346158981323242, | |
| "learning_rate": 1.4751552795031058e-06, | |
| "loss": 0.590770959854126, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.45998445998446, | |
| "grad_norm": 31.250991821289062, | |
| "learning_rate": 1.3975155279503105e-06, | |
| "loss": 0.7174652099609375, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.491064491064491, | |
| "grad_norm": 35.001522064208984, | |
| "learning_rate": 1.3198757763975156e-06, | |
| "loss": 0.8534024238586426, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.5221445221445222, | |
| "grad_norm": 32.24079513549805, | |
| "learning_rate": 1.2422360248447205e-06, | |
| "loss": 0.8570188522338867, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.5532245532245532, | |
| "grad_norm": 19.8613224029541, | |
| "learning_rate": 1.1645962732919256e-06, | |
| "loss": 0.6434041023254394, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.5843045843045842, | |
| "grad_norm": 46.252769470214844, | |
| "learning_rate": 1.0869565217391306e-06, | |
| "loss": 0.5186689376831055, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.6153846153846154, | |
| "grad_norm": 20.27726936340332, | |
| "learning_rate": 1.0093167701863355e-06, | |
| "loss": 1.0002134323120118, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.6464646464646466, | |
| "grad_norm": 22.71544075012207, | |
| "learning_rate": 9.316770186335404e-07, | |
| "loss": 0.6581085681915283, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.6775446775446774, | |
| "grad_norm": 34.64213180541992, | |
| "learning_rate": 8.540372670807454e-07, | |
| "loss": 1.042281150817871, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.7086247086247086, | |
| "grad_norm": 17.567914962768555, | |
| "learning_rate": 7.763975155279503e-07, | |
| "loss": 0.7002626419067383, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.7397047397047398, | |
| "grad_norm": 31.270509719848633, | |
| "learning_rate": 6.987577639751552e-07, | |
| "loss": 0.7479125022888183, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.7707847707847708, | |
| "grad_norm": 33.94447708129883, | |
| "learning_rate": 6.211180124223603e-07, | |
| "loss": 0.6186141014099121, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.8018648018648018, | |
| "grad_norm": 18.601036071777344, | |
| "learning_rate": 5.434782608695653e-07, | |
| "loss": 0.6978522777557373, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.832944832944833, | |
| "grad_norm": 29.79970359802246, | |
| "learning_rate": 4.658385093167702e-07, | |
| "loss": 0.838288402557373, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.864024864024864, | |
| "grad_norm": 15.155320167541504, | |
| "learning_rate": 3.8819875776397516e-07, | |
| "loss": 0.4178286552429199, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.895104895104895, | |
| "grad_norm": 32.92871856689453, | |
| "learning_rate": 3.1055900621118013e-07, | |
| "loss": 0.8897994041442872, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.9261849261849262, | |
| "grad_norm": 6.8692827224731445, | |
| "learning_rate": 2.329192546583851e-07, | |
| "loss": 0.6488828659057617, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.9572649572649574, | |
| "grad_norm": 54.483192443847656, | |
| "learning_rate": 1.5527950310559006e-07, | |
| "loss": 1.0308164596557616, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.9883449883449882, | |
| "grad_norm": 30.52878761291504, | |
| "learning_rate": 7.763975155279503e-08, | |
| "loss": 0.7207555294036865, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.17823560535907745, | |
| "eval_runtime": 5.6175, | |
| "eval_samples_per_second": 203.648, | |
| "eval_steps_per_second": 25.456, | |
| "step": 644 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 644, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 777428418602520.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |