| { |
| "best_global_step": 2000, |
| "best_metric": 0.797186028145479, |
| "best_model_checkpoint": "./output_checkpoints/graphcodebert-rdrop/checkpoint-2000", |
| "epoch": 0.1408, |
| "eval_steps": 1000, |
| "global_step": 2200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00064, |
| "grad_norm": 1.8688431978225708, |
| "learning_rate": 3.840409643695328e-08, |
| "loss": 0.7168, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00128, |
| "grad_norm": 1.835353970527649, |
| "learning_rate": 8.10753147002347e-08, |
| "loss": 0.7179, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00192, |
| "grad_norm": 1.2541388273239136, |
| "learning_rate": 1.2374653296351612e-07, |
| "loss": 0.7177, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.00256, |
| "grad_norm": 1.1381633281707764, |
| "learning_rate": 1.6641775122679754e-07, |
| "loss": 0.7312, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0032, |
| "grad_norm": 1.6313824653625488, |
| "learning_rate": 2.0908896949007894e-07, |
| "loss": 0.7167, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.00384, |
| "grad_norm": 1.092464566230774, |
| "learning_rate": 2.517601877533604e-07, |
| "loss": 0.7201, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.00448, |
| "grad_norm": 1.164099097251892, |
| "learning_rate": 2.944314060166418e-07, |
| "loss": 0.7195, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.00512, |
| "grad_norm": 1.9283920526504517, |
| "learning_rate": 3.371026242799232e-07, |
| "loss": 0.7223, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.00576, |
| "grad_norm": 1.8716129064559937, |
| "learning_rate": 3.7977384254320464e-07, |
| "loss": 0.7151, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0064, |
| "grad_norm": 1.5499262809753418, |
| "learning_rate": 4.22445060806486e-07, |
| "loss": 0.7211, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.00704, |
| "grad_norm": 1.375114917755127, |
| "learning_rate": 4.651162790697675e-07, |
| "loss": 0.7247, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.00768, |
| "grad_norm": 1.384915828704834, |
| "learning_rate": 5.077874973330489e-07, |
| "loss": 0.7156, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.00832, |
| "grad_norm": 1.6376659870147705, |
| "learning_rate": 5.504587155963304e-07, |
| "loss": 0.7196, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.00896, |
| "grad_norm": 1.709489345550537, |
| "learning_rate": 5.931299338596117e-07, |
| "loss": 0.7215, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0096, |
| "grad_norm": 1.350512146949768, |
| "learning_rate": 6.358011521228932e-07, |
| "loss": 0.71, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.01024, |
| "grad_norm": 2.33050537109375, |
| "learning_rate": 6.784723703861745e-07, |
| "loss": 0.7191, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.01088, |
| "grad_norm": 1.0042874813079834, |
| "learning_rate": 7.21143588649456e-07, |
| "loss": 0.72, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.01152, |
| "grad_norm": 1.1835744380950928, |
| "learning_rate": 7.638148069127374e-07, |
| "loss": 0.7122, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.01216, |
| "grad_norm": 1.949506402015686, |
| "learning_rate": 8.064860251760189e-07, |
| "loss": 0.7091, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0128, |
| "grad_norm": 1.139626383781433, |
| "learning_rate": 8.491572434393003e-07, |
| "loss": 0.7046, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.01344, |
| "grad_norm": 1.7734779119491577, |
| "learning_rate": 8.918284617025817e-07, |
| "loss": 0.7135, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.01408, |
| "grad_norm": 1.3442974090576172, |
| "learning_rate": 9.344996799658632e-07, |
| "loss": 0.7127, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.01472, |
| "grad_norm": 1.6148335933685303, |
| "learning_rate": 9.771708982291445e-07, |
| "loss": 0.7168, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.01536, |
| "grad_norm": 0.9220213294029236, |
| "learning_rate": 1.0198421164924258e-06, |
| "loss": 0.7109, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 1.5631815195083618, |
| "learning_rate": 1.0625133347557074e-06, |
| "loss": 0.7149, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.01664, |
| "grad_norm": 1.5052822828292847, |
| "learning_rate": 1.1051845530189888e-06, |
| "loss": 0.7112, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.01728, |
| "grad_norm": 1.6714903116226196, |
| "learning_rate": 1.1478557712822702e-06, |
| "loss": 0.7128, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.01792, |
| "grad_norm": 1.6667262315750122, |
| "learning_rate": 1.1905269895455517e-06, |
| "loss": 0.7091, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.01856, |
| "grad_norm": 1.6861907243728638, |
| "learning_rate": 1.233198207808833e-06, |
| "loss": 0.7078, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.0192, |
| "grad_norm": 1.6259212493896484, |
| "learning_rate": 1.2758694260721145e-06, |
| "loss": 0.7087, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.01984, |
| "grad_norm": 1.207320213317871, |
| "learning_rate": 1.318540644335396e-06, |
| "loss": 0.7087, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.02048, |
| "grad_norm": 1.087124228477478, |
| "learning_rate": 1.3612118625986772e-06, |
| "loss": 0.7174, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.02112, |
| "grad_norm": 1.5668721199035645, |
| "learning_rate": 1.4038830808619588e-06, |
| "loss": 0.7066, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.02176, |
| "grad_norm": 1.5332859754562378, |
| "learning_rate": 1.4465542991252401e-06, |
| "loss": 0.7115, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.0224, |
| "grad_norm": 2.092994451522827, |
| "learning_rate": 1.4892255173885215e-06, |
| "loss": 0.7085, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.02304, |
| "grad_norm": 1.266851544380188, |
| "learning_rate": 1.531896735651803e-06, |
| "loss": 0.7091, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.02368, |
| "grad_norm": 1.290616512298584, |
| "learning_rate": 1.5745679539150842e-06, |
| "loss": 0.7103, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.02432, |
| "grad_norm": 1.2045000791549683, |
| "learning_rate": 1.6172391721783658e-06, |
| "loss": 0.705, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.02496, |
| "grad_norm": 1.828832983970642, |
| "learning_rate": 1.6599103904416472e-06, |
| "loss": 0.7038, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.0256, |
| "grad_norm": 1.7793196439743042, |
| "learning_rate": 1.7025816087049288e-06, |
| "loss": 0.7085, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.02624, |
| "grad_norm": 0.8949472904205322, |
| "learning_rate": 1.7452528269682101e-06, |
| "loss": 0.7045, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.02688, |
| "grad_norm": 1.3071945905685425, |
| "learning_rate": 1.7879240452314913e-06, |
| "loss": 0.702, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.02752, |
| "grad_norm": 1.7497148513793945, |
| "learning_rate": 1.8305952634947729e-06, |
| "loss": 0.7073, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.02816, |
| "grad_norm": 1.0856297016143799, |
| "learning_rate": 1.8732664817580542e-06, |
| "loss": 0.7008, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.0288, |
| "grad_norm": 1.369019627571106, |
| "learning_rate": 1.915937700021336e-06, |
| "loss": 0.7059, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.02944, |
| "grad_norm": 2.0961010456085205, |
| "learning_rate": 1.958608918284617e-06, |
| "loss": 0.6952, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.03008, |
| "grad_norm": 1.6076347827911377, |
| "learning_rate": 2.0012801365478988e-06, |
| "loss": 0.7074, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.03072, |
| "grad_norm": 1.3129311800003052, |
| "learning_rate": 2.04395135481118e-06, |
| "loss": 0.6994, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.03136, |
| "grad_norm": 1.3621476888656616, |
| "learning_rate": 2.0866225730744615e-06, |
| "loss": 0.696, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 0.9786806106567383, |
| "learning_rate": 2.129293791337743e-06, |
| "loss": 0.7011, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.03264, |
| "grad_norm": 1.4611176252365112, |
| "learning_rate": 2.1719650096010242e-06, |
| "loss": 0.699, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.03328, |
| "grad_norm": 1.0675945281982422, |
| "learning_rate": 2.214636227864306e-06, |
| "loss": 0.6906, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.03392, |
| "grad_norm": 1.6564017534255981, |
| "learning_rate": 2.257307446127587e-06, |
| "loss": 0.6923, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.03456, |
| "grad_norm": 1.228119134902954, |
| "learning_rate": 2.2999786643908685e-06, |
| "loss": 0.6928, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.0352, |
| "grad_norm": 1.9868593215942383, |
| "learning_rate": 2.34264988265415e-06, |
| "loss": 0.6912, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.03584, |
| "grad_norm": 1.2531176805496216, |
| "learning_rate": 2.3853211009174317e-06, |
| "loss": 0.6923, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.03648, |
| "grad_norm": 1.413602352142334, |
| "learning_rate": 2.427992319180713e-06, |
| "loss": 0.6993, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.03712, |
| "grad_norm": 0.9655390977859497, |
| "learning_rate": 2.470663537443994e-06, |
| "loss": 0.7017, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.03776, |
| "grad_norm": 1.962438941001892, |
| "learning_rate": 2.5133347557072756e-06, |
| "loss": 0.6933, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.0384, |
| "grad_norm": 1.1099931001663208, |
| "learning_rate": 2.556005973970557e-06, |
| "loss": 0.6925, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.03904, |
| "grad_norm": 1.1766624450683594, |
| "learning_rate": 2.5986771922338383e-06, |
| "loss": 0.6894, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.03968, |
| "grad_norm": 1.574353814125061, |
| "learning_rate": 2.64134841049712e-06, |
| "loss": 0.6923, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.04032, |
| "grad_norm": 1.194074273109436, |
| "learning_rate": 2.6840196287604015e-06, |
| "loss": 0.6855, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.04096, |
| "grad_norm": 1.1750593185424805, |
| "learning_rate": 2.7266908470236826e-06, |
| "loss": 0.7002, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.0416, |
| "grad_norm": 1.3165347576141357, |
| "learning_rate": 2.7693620652869642e-06, |
| "loss": 0.6893, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.04224, |
| "grad_norm": 1.1079384088516235, |
| "learning_rate": 2.812033283550246e-06, |
| "loss": 0.6852, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.04288, |
| "grad_norm": 1.231327772140503, |
| "learning_rate": 2.8547045018135274e-06, |
| "loss": 0.695, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.04352, |
| "grad_norm": 1.966036319732666, |
| "learning_rate": 2.897375720076808e-06, |
| "loss": 0.6833, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.04416, |
| "grad_norm": 1.0460131168365479, |
| "learning_rate": 2.9400469383400897e-06, |
| "loss": 0.6802, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.0448, |
| "grad_norm": 1.2445200681686401, |
| "learning_rate": 2.9827181566033713e-06, |
| "loss": 0.6883, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.04544, |
| "grad_norm": 1.1759636402130127, |
| "learning_rate": 3.025389374866653e-06, |
| "loss": 0.682, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.04608, |
| "grad_norm": 1.2478561401367188, |
| "learning_rate": 3.068060593129934e-06, |
| "loss": 0.6802, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.04672, |
| "grad_norm": 0.8945108652114868, |
| "learning_rate": 3.1107318113932156e-06, |
| "loss": 0.6873, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.04736, |
| "grad_norm": 1.6810317039489746, |
| "learning_rate": 3.153403029656497e-06, |
| "loss": 0.6828, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 2.882283926010132, |
| "learning_rate": 3.1960742479197783e-06, |
| "loss": 0.6797, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.04864, |
| "grad_norm": 1.575766921043396, |
| "learning_rate": 3.23874546618306e-06, |
| "loss": 0.6826, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.04928, |
| "grad_norm": 1.8044737577438354, |
| "learning_rate": 3.2814166844463415e-06, |
| "loss": 0.6837, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.04992, |
| "grad_norm": 1.4755513668060303, |
| "learning_rate": 3.324087902709623e-06, |
| "loss": 0.6815, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.05056, |
| "grad_norm": 2.0735654830932617, |
| "learning_rate": 3.3667591209729038e-06, |
| "loss": 0.6773, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.0512, |
| "grad_norm": 0.9823655486106873, |
| "learning_rate": 3.4094303392361854e-06, |
| "loss": 0.6689, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.05184, |
| "grad_norm": 1.0731920003890991, |
| "learning_rate": 3.452101557499467e-06, |
| "loss": 0.6689, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.05248, |
| "grad_norm": 1.1081087589263916, |
| "learning_rate": 3.494772775762748e-06, |
| "loss": 0.6694, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.05312, |
| "grad_norm": 1.2130361795425415, |
| "learning_rate": 3.5374439940260297e-06, |
| "loss": 0.682, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.05376, |
| "grad_norm": 1.170345664024353, |
| "learning_rate": 3.5801152122893113e-06, |
| "loss": 0.6756, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.0544, |
| "grad_norm": 1.2610124349594116, |
| "learning_rate": 3.622786430552593e-06, |
| "loss": 0.67, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.05504, |
| "grad_norm": 1.0138545036315918, |
| "learning_rate": 3.665457648815874e-06, |
| "loss": 0.6807, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.05568, |
| "grad_norm": 1.0937036275863647, |
| "learning_rate": 3.7081288670791556e-06, |
| "loss": 0.6774, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.05632, |
| "grad_norm": 1.119221568107605, |
| "learning_rate": 3.750800085342437e-06, |
| "loss": 0.6728, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.05696, |
| "grad_norm": 1.9351646900177002, |
| "learning_rate": 3.793471303605718e-06, |
| "loss": 0.6664, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.0576, |
| "grad_norm": 1.6208665370941162, |
| "learning_rate": 3.836142521869e-06, |
| "loss": 0.6653, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.05824, |
| "grad_norm": 1.3460793495178223, |
| "learning_rate": 3.8788137401322815e-06, |
| "loss": 0.6651, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.05888, |
| "grad_norm": 1.8987629413604736, |
| "learning_rate": 3.921484958395563e-06, |
| "loss": 0.6694, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.05952, |
| "grad_norm": 1.3637055158615112, |
| "learning_rate": 3.964156176658844e-06, |
| "loss": 0.6691, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.06016, |
| "grad_norm": 1.3319580554962158, |
| "learning_rate": 4.006827394922125e-06, |
| "loss": 0.6749, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.0608, |
| "grad_norm": 1.3574756383895874, |
| "learning_rate": 4.049498613185407e-06, |
| "loss": 0.6688, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.06144, |
| "grad_norm": 1.136319637298584, |
| "learning_rate": 4.092169831448688e-06, |
| "loss": 0.6657, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.06208, |
| "grad_norm": 1.7666376829147339, |
| "learning_rate": 4.134841049711969e-06, |
| "loss": 0.6667, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.06272, |
| "grad_norm": 1.332014560699463, |
| "learning_rate": 4.177512267975251e-06, |
| "loss": 0.6715, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.06336, |
| "grad_norm": 1.3127824068069458, |
| "learning_rate": 4.220183486238532e-06, |
| "loss": 0.6737, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 1.293678641319275, |
| "learning_rate": 4.262854704501814e-06, |
| "loss": 0.6557, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.064, |
| "eval_accuracy": 0.76589, |
| "eval_loss": 0.6282070279121399, |
| "eval_macro_f1": 0.7657238614598489, |
| "eval_precision": 0.7708542319194018, |
| "eval_recall": 0.7687514011652705, |
| "eval_runtime": 806.5969, |
| "eval_samples_per_second": 123.978, |
| "eval_steps_per_second": 1.938, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.06464, |
| "grad_norm": 1.2433074712753296, |
| "learning_rate": 4.3055259227650956e-06, |
| "loss": 0.6583, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.06528, |
| "grad_norm": 1.5115686655044556, |
| "learning_rate": 4.348197141028377e-06, |
| "loss": 0.6659, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.06592, |
| "grad_norm": 1.3560088872909546, |
| "learning_rate": 4.390868359291658e-06, |
| "loss": 0.6675, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.06656, |
| "grad_norm": 0.8229928016662598, |
| "learning_rate": 4.4335395775549394e-06, |
| "loss": 0.6629, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.0672, |
| "grad_norm": 1.5138262510299683, |
| "learning_rate": 4.476210795818221e-06, |
| "loss": 0.6635, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.06784, |
| "grad_norm": 1.1732233762741089, |
| "learning_rate": 4.518882014081503e-06, |
| "loss": 0.6531, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.06848, |
| "grad_norm": 1.6118066310882568, |
| "learning_rate": 4.561553232344783e-06, |
| "loss": 0.6644, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.06912, |
| "grad_norm": 0.9575800895690918, |
| "learning_rate": 4.604224450608065e-06, |
| "loss": 0.6542, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.06976, |
| "grad_norm": 1.157271146774292, |
| "learning_rate": 4.6468956688713465e-06, |
| "loss": 0.6505, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.0704, |
| "grad_norm": 1.4502589702606201, |
| "learning_rate": 4.689566887134628e-06, |
| "loss": 0.6725, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.07104, |
| "grad_norm": 1.122251033782959, |
| "learning_rate": 4.73223810539791e-06, |
| "loss": 0.6625, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.07168, |
| "grad_norm": 0.9529566764831543, |
| "learning_rate": 4.774909323661191e-06, |
| "loss": 0.6684, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.07232, |
| "grad_norm": 1.6433424949645996, |
| "learning_rate": 4.817580541924473e-06, |
| "loss": 0.6599, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.07296, |
| "grad_norm": 1.6565576791763306, |
| "learning_rate": 4.8602517601877535e-06, |
| "loss": 0.6591, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.0736, |
| "grad_norm": 1.3955042362213135, |
| "learning_rate": 4.902922978451035e-06, |
| "loss": 0.6649, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.07424, |
| "grad_norm": 1.530819296836853, |
| "learning_rate": 4.945594196714317e-06, |
| "loss": 0.65, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.07488, |
| "grad_norm": 1.0729092359542847, |
| "learning_rate": 4.9882654149775974e-06, |
| "loss": 0.6603, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.07552, |
| "grad_norm": 1.6307648420333862, |
| "learning_rate": 5.030936633240879e-06, |
| "loss": 0.6702, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.07616, |
| "grad_norm": 1.324406623840332, |
| "learning_rate": 5.073607851504161e-06, |
| "loss": 0.6634, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.0768, |
| "grad_norm": 1.1919140815734863, |
| "learning_rate": 5.116279069767442e-06, |
| "loss": 0.6586, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.07744, |
| "grad_norm": 1.0658751726150513, |
| "learning_rate": 5.158950288030724e-06, |
| "loss": 0.6573, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.07808, |
| "grad_norm": 1.7762951850891113, |
| "learning_rate": 5.201621506294005e-06, |
| "loss": 0.6596, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.07872, |
| "grad_norm": 1.1911667585372925, |
| "learning_rate": 5.244292724557286e-06, |
| "loss": 0.651, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.07936, |
| "grad_norm": 1.442029595375061, |
| "learning_rate": 5.286963942820568e-06, |
| "loss": 0.6598, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.9955100417137146, |
| "learning_rate": 5.329635161083849e-06, |
| "loss": 0.6587, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.08064, |
| "grad_norm": 1.1801563501358032, |
| "learning_rate": 5.372306379347131e-06, |
| "loss": 0.6669, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.08128, |
| "grad_norm": 1.8548506498336792, |
| "learning_rate": 5.414977597610412e-06, |
| "loss": 0.6534, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.08192, |
| "grad_norm": 1.8297194242477417, |
| "learning_rate": 5.457648815873694e-06, |
| "loss": 0.668, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.08256, |
| "grad_norm": 1.3471113443374634, |
| "learning_rate": 5.5003200341369755e-06, |
| "loss": 0.659, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.0832, |
| "grad_norm": 1.4015250205993652, |
| "learning_rate": 5.542991252400256e-06, |
| "loss": 0.6602, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.08384, |
| "grad_norm": 1.0796362161636353, |
| "learning_rate": 5.585662470663538e-06, |
| "loss": 0.6652, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.08448, |
| "grad_norm": 2.0007293224334717, |
| "learning_rate": 5.628333688926819e-06, |
| "loss": 0.6573, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.08512, |
| "grad_norm": 1.3140546083450317, |
| "learning_rate": 5.671004907190101e-06, |
| "loss": 0.6642, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.08576, |
| "grad_norm": 1.6801965236663818, |
| "learning_rate": 5.7136761254533826e-06, |
| "loss": 0.6527, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.0864, |
| "grad_norm": 1.9289913177490234, |
| "learning_rate": 5.756347343716664e-06, |
| "loss": 0.6567, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.08704, |
| "grad_norm": 1.420436143875122, |
| "learning_rate": 5.799018561979946e-06, |
| "loss": 0.6479, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.08768, |
| "grad_norm": 1.3068914413452148, |
| "learning_rate": 5.841689780243226e-06, |
| "loss": 0.6522, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.08832, |
| "grad_norm": 1.8793392181396484, |
| "learning_rate": 5.884360998506507e-06, |
| "loss": 0.6491, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.08896, |
| "grad_norm": 1.3998247385025024, |
| "learning_rate": 5.927032216769789e-06, |
| "loss": 0.6638, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.0896, |
| "grad_norm": 1.371824026107788, |
| "learning_rate": 5.96970343503307e-06, |
| "loss": 0.6517, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.09024, |
| "grad_norm": 2.0050251483917236, |
| "learning_rate": 6.012374653296352e-06, |
| "loss": 0.6556, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.09088, |
| "grad_norm": 1.499855399131775, |
| "learning_rate": 6.0550458715596335e-06, |
| "loss": 0.6597, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.09152, |
| "grad_norm": 1.2066516876220703, |
| "learning_rate": 6.097717089822915e-06, |
| "loss": 0.6546, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.09216, |
| "grad_norm": 1.3586395978927612, |
| "learning_rate": 6.140388308086196e-06, |
| "loss": 0.6434, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.0928, |
| "grad_norm": 1.1413036584854126, |
| "learning_rate": 6.183059526349477e-06, |
| "loss": 0.6554, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.09344, |
| "grad_norm": 1.1844472885131836, |
| "learning_rate": 6.225730744612759e-06, |
| "loss": 0.6357, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.09408, |
| "grad_norm": 1.7203835248947144, |
| "learning_rate": 6.2684019628760406e-06, |
| "loss": 0.6546, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.09472, |
| "grad_norm": 1.1869184970855713, |
| "learning_rate": 6.311073181139322e-06, |
| "loss": 0.6376, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.09536, |
| "grad_norm": 1.3234835863113403, |
| "learning_rate": 6.353744399402604e-06, |
| "loss": 0.6494, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 1.1924010515213013, |
| "learning_rate": 6.396415617665885e-06, |
| "loss": 0.652, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.09664, |
| "grad_norm": 1.4505853652954102, |
| "learning_rate": 6.439086835929166e-06, |
| "loss": 0.6606, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.09728, |
| "grad_norm": 1.6351200342178345, |
| "learning_rate": 6.481758054192448e-06, |
| "loss": 0.6567, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.09792, |
| "grad_norm": 1.4423909187316895, |
| "learning_rate": 6.524429272455729e-06, |
| "loss": 0.653, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.09856, |
| "grad_norm": 0.8749169111251831, |
| "learning_rate": 6.567100490719011e-06, |
| "loss": 0.6417, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.0992, |
| "grad_norm": 1.2707208395004272, |
| "learning_rate": 6.609771708982292e-06, |
| "loss": 0.6563, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.09984, |
| "grad_norm": 1.163254976272583, |
| "learning_rate": 6.652442927245574e-06, |
| "loss": 0.6425, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.10048, |
| "grad_norm": 1.4674128293991089, |
| "learning_rate": 6.6951141455088555e-06, |
| "loss": 0.6561, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.10112, |
| "grad_norm": 1.6261919736862183, |
| "learning_rate": 6.737785363772135e-06, |
| "loss": 0.6473, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.10176, |
| "grad_norm": 1.2146090269088745, |
| "learning_rate": 6.780456582035417e-06, |
| "loss": 0.6467, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.1024, |
| "grad_norm": 1.3596712350845337, |
| "learning_rate": 6.8231278002986985e-06, |
| "loss": 0.6431, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.10304, |
| "grad_norm": 1.7861741781234741, |
| "learning_rate": 6.86579901856198e-06, |
| "loss": 0.6586, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.10368, |
| "grad_norm": 1.456587553024292, |
| "learning_rate": 6.908470236825262e-06, |
| "loss": 0.6496, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.10432, |
| "grad_norm": 1.5652499198913574, |
| "learning_rate": 6.951141455088543e-06, |
| "loss": 0.6436, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.10496, |
| "grad_norm": 1.1411017179489136, |
| "learning_rate": 6.993812673351825e-06, |
| "loss": 0.6572, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.1056, |
| "grad_norm": 1.3867056369781494, |
| "learning_rate": 7.036483891615106e-06, |
| "loss": 0.6425, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.10624, |
| "grad_norm": 1.2714463472366333, |
| "learning_rate": 7.079155109878387e-06, |
| "loss": 0.6558, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.10688, |
| "grad_norm": 1.32753586769104, |
| "learning_rate": 7.121826328141669e-06, |
| "loss": 0.6537, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.10752, |
| "grad_norm": 1.5559014081954956, |
| "learning_rate": 7.16449754640495e-06, |
| "loss": 0.6613, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.10816, |
| "grad_norm": 1.1726677417755127, |
| "learning_rate": 7.207168764668232e-06, |
| "loss": 0.6516, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.1088, |
| "grad_norm": 1.3453316688537598, |
| "learning_rate": 7.2498399829315135e-06, |
| "loss": 0.6455, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.10944, |
| "grad_norm": 1.6420834064483643, |
| "learning_rate": 7.292511201194795e-06, |
| "loss": 0.6457, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.11008, |
| "grad_norm": 1.0518358945846558, |
| "learning_rate": 7.335182419458076e-06, |
| "loss": 0.6315, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.11072, |
| "grad_norm": 1.4042829275131226, |
| "learning_rate": 7.377853637721357e-06, |
| "loss": 0.6445, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.11136, |
| "grad_norm": 1.2282599210739136, |
| "learning_rate": 7.420524855984639e-06, |
| "loss": 0.648, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 1.2711114883422852, |
| "learning_rate": 7.4631960742479205e-06, |
| "loss": 0.6522, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.11264, |
| "grad_norm": 1.2256455421447754, |
| "learning_rate": 7.505867292511202e-06, |
| "loss": 0.6414, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.11328, |
| "grad_norm": 1.2845845222473145, |
| "learning_rate": 7.548538510774484e-06, |
| "loss": 0.6372, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.11392, |
| "grad_norm": 1.557308554649353, |
| "learning_rate": 7.591209729037765e-06, |
| "loss": 0.6472, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.11456, |
| "grad_norm": 1.5465153455734253, |
| "learning_rate": 7.633880947301045e-06, |
| "loss": 0.6387, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.1152, |
| "grad_norm": 1.0359770059585571, |
| "learning_rate": 7.676552165564327e-06, |
| "loss": 0.6335, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.11584, |
| "grad_norm": 1.9624851942062378, |
| "learning_rate": 7.719223383827608e-06, |
| "loss": 0.6448, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.11648, |
| "grad_norm": 1.4034361839294434, |
| "learning_rate": 7.76189460209089e-06, |
| "loss": 0.6711, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.11712, |
| "grad_norm": 1.503132700920105, |
| "learning_rate": 7.804565820354171e-06, |
| "loss": 0.6457, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.11776, |
| "grad_norm": 1.0341029167175293, |
| "learning_rate": 7.847237038617453e-06, |
| "loss": 0.6393, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.1184, |
| "grad_norm": 1.884297251701355, |
| "learning_rate": 7.889908256880735e-06, |
| "loss": 0.6415, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.11904, |
| "grad_norm": 1.3774992227554321, |
| "learning_rate": 7.932579475144016e-06, |
| "loss": 0.635, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.11968, |
| "grad_norm": 1.9972243309020996, |
| "learning_rate": 7.975250693407298e-06, |
| "loss": 0.64, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.12032, |
| "grad_norm": 1.1215760707855225, |
| "learning_rate": 8.01792191167058e-06, |
| "loss": 0.6358, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.12096, |
| "grad_norm": 1.1372530460357666, |
| "learning_rate": 8.060593129933861e-06, |
| "loss": 0.6264, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.1216, |
| "grad_norm": 1.0789086818695068, |
| "learning_rate": 8.103264348197143e-06, |
| "loss": 0.6396, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.12224, |
| "grad_norm": 1.3709907531738281, |
| "learning_rate": 8.145935566460424e-06, |
| "loss": 0.638, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.12288, |
| "grad_norm": 1.5928348302841187, |
| "learning_rate": 8.188606784723706e-06, |
| "loss": 0.6495, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.12352, |
| "grad_norm": 1.0440688133239746, |
| "learning_rate": 8.231278002986986e-06, |
| "loss": 0.6236, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.12416, |
| "grad_norm": 1.2079739570617676, |
| "learning_rate": 8.273949221250267e-06, |
| "loss": 0.6489, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.1248, |
| "grad_norm": 1.161354422569275, |
| "learning_rate": 8.316620439513549e-06, |
| "loss": 0.6412, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.12544, |
| "grad_norm": 1.7108139991760254, |
| "learning_rate": 8.35929165777683e-06, |
| "loss": 0.6383, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.12608, |
| "grad_norm": 1.4153392314910889, |
| "learning_rate": 8.401962876040112e-06, |
| "loss": 0.6482, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.12672, |
| "grad_norm": 1.6085275411605835, |
| "learning_rate": 8.444634094303393e-06, |
| "loss": 0.6488, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.12736, |
| "grad_norm": 1.2328271865844727, |
| "learning_rate": 8.487305312566675e-06, |
| "loss": 0.6466, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.9865418672561646, |
| "learning_rate": 8.529976530829955e-06, |
| "loss": 0.6468, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.128, |
| "eval_accuracy": 0.7972, |
| "eval_loss": 0.578827440738678, |
| "eval_macro_f1": 0.797186028145479, |
| "eval_precision": 0.7981394308396883, |
| "eval_recall": 0.7985143807471076, |
| "eval_runtime": 813.4399, |
| "eval_samples_per_second": 122.935, |
| "eval_steps_per_second": 1.921, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.12864, |
| "grad_norm": 1.0044854879379272, |
| "learning_rate": 8.572647749093236e-06, |
| "loss": 0.6464, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.12928, |
| "grad_norm": 1.3742347955703735, |
| "learning_rate": 8.615318967356518e-06, |
| "loss": 0.6373, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.12992, |
| "grad_norm": 1.3543511629104614, |
| "learning_rate": 8.6579901856198e-06, |
| "loss": 0.6344, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.13056, |
| "grad_norm": 1.865698218345642, |
| "learning_rate": 8.700661403883081e-06, |
| "loss": 0.6426, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.1312, |
| "grad_norm": 1.3193360567092896, |
| "learning_rate": 8.743332622146363e-06, |
| "loss": 0.6351, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.13184, |
| "grad_norm": 1.5337750911712646, |
| "learning_rate": 8.786003840409644e-06, |
| "loss": 0.6374, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.13248, |
| "grad_norm": 1.7162808179855347, |
| "learning_rate": 8.828675058672926e-06, |
| "loss": 0.6445, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.13312, |
| "grad_norm": 1.9505610466003418, |
| "learning_rate": 8.871346276936208e-06, |
| "loss": 0.6223, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.13376, |
| "grad_norm": 2.593158721923828, |
| "learning_rate": 8.914017495199489e-06, |
| "loss": 0.6272, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.1344, |
| "grad_norm": 0.9756277203559875, |
| "learning_rate": 8.95668871346277e-06, |
| "loss": 0.6383, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.13504, |
| "grad_norm": 1.3016871213912964, |
| "learning_rate": 8.999359931726052e-06, |
| "loss": 0.6208, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.13568, |
| "grad_norm": 1.475502610206604, |
| "learning_rate": 9.042031149989334e-06, |
| "loss": 0.6354, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.13632, |
| "grad_norm": 1.6524313688278198, |
| "learning_rate": 9.084702368252615e-06, |
| "loss": 0.6515, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.13696, |
| "grad_norm": 1.5008305311203003, |
| "learning_rate": 9.127373586515895e-06, |
| "loss": 0.6468, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.1376, |
| "grad_norm": 1.2291077375411987, |
| "learning_rate": 9.170044804779177e-06, |
| "loss": 0.6271, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.13824, |
| "grad_norm": 1.5787503719329834, |
| "learning_rate": 9.212716023042458e-06, |
| "loss": 0.6495, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.13888, |
| "grad_norm": 1.6643162965774536, |
| "learning_rate": 9.25538724130574e-06, |
| "loss": 0.6417, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.13952, |
| "grad_norm": 1.5859993696212769, |
| "learning_rate": 9.298058459569022e-06, |
| "loss": 0.6362, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.14016, |
| "grad_norm": 1.2435864210128784, |
| "learning_rate": 9.340729677832303e-06, |
| "loss": 0.6337, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.1408, |
| "grad_norm": 1.1807382106781006, |
| "learning_rate": 9.383400896095585e-06, |
| "loss": 0.6344, |
| "step": 2200 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 46875, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.851581563220352e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|