Instructions to use dzungpham/graphcodebert-code-classification with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use dzungpham/graphcodebert-code-classification with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("dzungpham/graphcodebert-code-classification", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": 2000, | |
| "best_metric": 0.797186028145479, | |
| "best_model_checkpoint": "./output_checkpoints/graphcodebert-rdrop/checkpoint-2000", | |
| "epoch": 0.128, | |
| "eval_steps": 1000, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00064, | |
| "grad_norm": 1.8688431978225708, | |
| "learning_rate": 3.840409643695328e-08, | |
| "loss": 0.7168, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00128, | |
| "grad_norm": 1.835353970527649, | |
| "learning_rate": 8.10753147002347e-08, | |
| "loss": 0.7179, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.00192, | |
| "grad_norm": 1.2541388273239136, | |
| "learning_rate": 1.2374653296351612e-07, | |
| "loss": 0.7177, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.00256, | |
| "grad_norm": 1.1381633281707764, | |
| "learning_rate": 1.6641775122679754e-07, | |
| "loss": 0.7312, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0032, | |
| "grad_norm": 1.6313824653625488, | |
| "learning_rate": 2.0908896949007894e-07, | |
| "loss": 0.7167, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.00384, | |
| "grad_norm": 1.092464566230774, | |
| "learning_rate": 2.517601877533604e-07, | |
| "loss": 0.7201, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.00448, | |
| "grad_norm": 1.164099097251892, | |
| "learning_rate": 2.944314060166418e-07, | |
| "loss": 0.7195, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.00512, | |
| "grad_norm": 1.9283920526504517, | |
| "learning_rate": 3.371026242799232e-07, | |
| "loss": 0.7223, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.00576, | |
| "grad_norm": 1.8716129064559937, | |
| "learning_rate": 3.7977384254320464e-07, | |
| "loss": 0.7151, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0064, | |
| "grad_norm": 1.5499262809753418, | |
| "learning_rate": 4.22445060806486e-07, | |
| "loss": 0.7211, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.00704, | |
| "grad_norm": 1.375114917755127, | |
| "learning_rate": 4.651162790697675e-07, | |
| "loss": 0.7247, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.00768, | |
| "grad_norm": 1.384915828704834, | |
| "learning_rate": 5.077874973330489e-07, | |
| "loss": 0.7156, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.00832, | |
| "grad_norm": 1.6376659870147705, | |
| "learning_rate": 5.504587155963304e-07, | |
| "loss": 0.7196, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.00896, | |
| "grad_norm": 1.709489345550537, | |
| "learning_rate": 5.931299338596117e-07, | |
| "loss": 0.7215, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.0096, | |
| "grad_norm": 1.350512146949768, | |
| "learning_rate": 6.358011521228932e-07, | |
| "loss": 0.71, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.01024, | |
| "grad_norm": 2.33050537109375, | |
| "learning_rate": 6.784723703861745e-07, | |
| "loss": 0.7191, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.01088, | |
| "grad_norm": 1.0042874813079834, | |
| "learning_rate": 7.21143588649456e-07, | |
| "loss": 0.72, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.01152, | |
| "grad_norm": 1.1835744380950928, | |
| "learning_rate": 7.638148069127374e-07, | |
| "loss": 0.7122, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.01216, | |
| "grad_norm": 1.949506402015686, | |
| "learning_rate": 8.064860251760189e-07, | |
| "loss": 0.7091, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.0128, | |
| "grad_norm": 1.139626383781433, | |
| "learning_rate": 8.491572434393003e-07, | |
| "loss": 0.7046, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.01344, | |
| "grad_norm": 1.7734779119491577, | |
| "learning_rate": 8.918284617025817e-07, | |
| "loss": 0.7135, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.01408, | |
| "grad_norm": 1.3442974090576172, | |
| "learning_rate": 9.344996799658632e-07, | |
| "loss": 0.7127, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.01472, | |
| "grad_norm": 1.6148335933685303, | |
| "learning_rate": 9.771708982291445e-07, | |
| "loss": 0.7168, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.01536, | |
| "grad_norm": 0.9220213294029236, | |
| "learning_rate": 1.0198421164924258e-06, | |
| "loss": 0.7109, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 1.5631815195083618, | |
| "learning_rate": 1.0625133347557074e-06, | |
| "loss": 0.7149, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.01664, | |
| "grad_norm": 1.5052822828292847, | |
| "learning_rate": 1.1051845530189888e-06, | |
| "loss": 0.7112, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.01728, | |
| "grad_norm": 1.6714903116226196, | |
| "learning_rate": 1.1478557712822702e-06, | |
| "loss": 0.7128, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.01792, | |
| "grad_norm": 1.6667262315750122, | |
| "learning_rate": 1.1905269895455517e-06, | |
| "loss": 0.7091, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.01856, | |
| "grad_norm": 1.6861907243728638, | |
| "learning_rate": 1.233198207808833e-06, | |
| "loss": 0.7078, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.0192, | |
| "grad_norm": 1.6259212493896484, | |
| "learning_rate": 1.2758694260721145e-06, | |
| "loss": 0.7087, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.01984, | |
| "grad_norm": 1.207320213317871, | |
| "learning_rate": 1.318540644335396e-06, | |
| "loss": 0.7087, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.02048, | |
| "grad_norm": 1.087124228477478, | |
| "learning_rate": 1.3612118625986772e-06, | |
| "loss": 0.7174, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.02112, | |
| "grad_norm": 1.5668721199035645, | |
| "learning_rate": 1.4038830808619588e-06, | |
| "loss": 0.7066, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.02176, | |
| "grad_norm": 1.5332859754562378, | |
| "learning_rate": 1.4465542991252401e-06, | |
| "loss": 0.7115, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.0224, | |
| "grad_norm": 2.092994451522827, | |
| "learning_rate": 1.4892255173885215e-06, | |
| "loss": 0.7085, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.02304, | |
| "grad_norm": 1.266851544380188, | |
| "learning_rate": 1.531896735651803e-06, | |
| "loss": 0.7091, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.02368, | |
| "grad_norm": 1.290616512298584, | |
| "learning_rate": 1.5745679539150842e-06, | |
| "loss": 0.7103, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.02432, | |
| "grad_norm": 1.2045000791549683, | |
| "learning_rate": 1.6172391721783658e-06, | |
| "loss": 0.705, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.02496, | |
| "grad_norm": 1.828832983970642, | |
| "learning_rate": 1.6599103904416472e-06, | |
| "loss": 0.7038, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.0256, | |
| "grad_norm": 1.7793196439743042, | |
| "learning_rate": 1.7025816087049288e-06, | |
| "loss": 0.7085, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.02624, | |
| "grad_norm": 0.8949472904205322, | |
| "learning_rate": 1.7452528269682101e-06, | |
| "loss": 0.7045, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.02688, | |
| "grad_norm": 1.3071945905685425, | |
| "learning_rate": 1.7879240452314913e-06, | |
| "loss": 0.702, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.02752, | |
| "grad_norm": 1.7497148513793945, | |
| "learning_rate": 1.8305952634947729e-06, | |
| "loss": 0.7073, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.02816, | |
| "grad_norm": 1.0856297016143799, | |
| "learning_rate": 1.8732664817580542e-06, | |
| "loss": 0.7008, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.0288, | |
| "grad_norm": 1.369019627571106, | |
| "learning_rate": 1.915937700021336e-06, | |
| "loss": 0.7059, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.02944, | |
| "grad_norm": 2.0961010456085205, | |
| "learning_rate": 1.958608918284617e-06, | |
| "loss": 0.6952, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.03008, | |
| "grad_norm": 1.6076347827911377, | |
| "learning_rate": 2.0012801365478988e-06, | |
| "loss": 0.7074, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.03072, | |
| "grad_norm": 1.3129311800003052, | |
| "learning_rate": 2.04395135481118e-06, | |
| "loss": 0.6994, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.03136, | |
| "grad_norm": 1.3621476888656616, | |
| "learning_rate": 2.0866225730744615e-06, | |
| "loss": 0.696, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 0.9786806106567383, | |
| "learning_rate": 2.129293791337743e-06, | |
| "loss": 0.7011, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.03264, | |
| "grad_norm": 1.4611176252365112, | |
| "learning_rate": 2.1719650096010242e-06, | |
| "loss": 0.699, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.03328, | |
| "grad_norm": 1.0675945281982422, | |
| "learning_rate": 2.214636227864306e-06, | |
| "loss": 0.6906, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.03392, | |
| "grad_norm": 1.6564017534255981, | |
| "learning_rate": 2.257307446127587e-06, | |
| "loss": 0.6923, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.03456, | |
| "grad_norm": 1.228119134902954, | |
| "learning_rate": 2.2999786643908685e-06, | |
| "loss": 0.6928, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.0352, | |
| "grad_norm": 1.9868593215942383, | |
| "learning_rate": 2.34264988265415e-06, | |
| "loss": 0.6912, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.03584, | |
| "grad_norm": 1.2531176805496216, | |
| "learning_rate": 2.3853211009174317e-06, | |
| "loss": 0.6923, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.03648, | |
| "grad_norm": 1.413602352142334, | |
| "learning_rate": 2.427992319180713e-06, | |
| "loss": 0.6993, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.03712, | |
| "grad_norm": 0.9655390977859497, | |
| "learning_rate": 2.470663537443994e-06, | |
| "loss": 0.7017, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.03776, | |
| "grad_norm": 1.962438941001892, | |
| "learning_rate": 2.5133347557072756e-06, | |
| "loss": 0.6933, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.0384, | |
| "grad_norm": 1.1099931001663208, | |
| "learning_rate": 2.556005973970557e-06, | |
| "loss": 0.6925, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.03904, | |
| "grad_norm": 1.1766624450683594, | |
| "learning_rate": 2.5986771922338383e-06, | |
| "loss": 0.6894, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.03968, | |
| "grad_norm": 1.574353814125061, | |
| "learning_rate": 2.64134841049712e-06, | |
| "loss": 0.6923, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.04032, | |
| "grad_norm": 1.194074273109436, | |
| "learning_rate": 2.6840196287604015e-06, | |
| "loss": 0.6855, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.04096, | |
| "grad_norm": 1.1750593185424805, | |
| "learning_rate": 2.7266908470236826e-06, | |
| "loss": 0.7002, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.0416, | |
| "grad_norm": 1.3165347576141357, | |
| "learning_rate": 2.7693620652869642e-06, | |
| "loss": 0.6893, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.04224, | |
| "grad_norm": 1.1079384088516235, | |
| "learning_rate": 2.812033283550246e-06, | |
| "loss": 0.6852, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.04288, | |
| "grad_norm": 1.231327772140503, | |
| "learning_rate": 2.8547045018135274e-06, | |
| "loss": 0.695, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.04352, | |
| "grad_norm": 1.966036319732666, | |
| "learning_rate": 2.897375720076808e-06, | |
| "loss": 0.6833, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.04416, | |
| "grad_norm": 1.0460131168365479, | |
| "learning_rate": 2.9400469383400897e-06, | |
| "loss": 0.6802, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.0448, | |
| "grad_norm": 1.2445200681686401, | |
| "learning_rate": 2.9827181566033713e-06, | |
| "loss": 0.6883, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.04544, | |
| "grad_norm": 1.1759636402130127, | |
| "learning_rate": 3.025389374866653e-06, | |
| "loss": 0.682, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.04608, | |
| "grad_norm": 1.2478561401367188, | |
| "learning_rate": 3.068060593129934e-06, | |
| "loss": 0.6802, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.04672, | |
| "grad_norm": 0.8945108652114868, | |
| "learning_rate": 3.1107318113932156e-06, | |
| "loss": 0.6873, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.04736, | |
| "grad_norm": 1.6810317039489746, | |
| "learning_rate": 3.153403029656497e-06, | |
| "loss": 0.6828, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 2.882283926010132, | |
| "learning_rate": 3.1960742479197783e-06, | |
| "loss": 0.6797, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.04864, | |
| "grad_norm": 1.575766921043396, | |
| "learning_rate": 3.23874546618306e-06, | |
| "loss": 0.6826, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.04928, | |
| "grad_norm": 1.8044737577438354, | |
| "learning_rate": 3.2814166844463415e-06, | |
| "loss": 0.6837, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.04992, | |
| "grad_norm": 1.4755513668060303, | |
| "learning_rate": 3.324087902709623e-06, | |
| "loss": 0.6815, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.05056, | |
| "grad_norm": 2.0735654830932617, | |
| "learning_rate": 3.3667591209729038e-06, | |
| "loss": 0.6773, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.0512, | |
| "grad_norm": 0.9823655486106873, | |
| "learning_rate": 3.4094303392361854e-06, | |
| "loss": 0.6689, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.05184, | |
| "grad_norm": 1.0731920003890991, | |
| "learning_rate": 3.452101557499467e-06, | |
| "loss": 0.6689, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.05248, | |
| "grad_norm": 1.1081087589263916, | |
| "learning_rate": 3.494772775762748e-06, | |
| "loss": 0.6694, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.05312, | |
| "grad_norm": 1.2130361795425415, | |
| "learning_rate": 3.5374439940260297e-06, | |
| "loss": 0.682, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.05376, | |
| "grad_norm": 1.170345664024353, | |
| "learning_rate": 3.5801152122893113e-06, | |
| "loss": 0.6756, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.0544, | |
| "grad_norm": 1.2610124349594116, | |
| "learning_rate": 3.622786430552593e-06, | |
| "loss": 0.67, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.05504, | |
| "grad_norm": 1.0138545036315918, | |
| "learning_rate": 3.665457648815874e-06, | |
| "loss": 0.6807, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.05568, | |
| "grad_norm": 1.0937036275863647, | |
| "learning_rate": 3.7081288670791556e-06, | |
| "loss": 0.6774, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.05632, | |
| "grad_norm": 1.119221568107605, | |
| "learning_rate": 3.750800085342437e-06, | |
| "loss": 0.6728, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.05696, | |
| "grad_norm": 1.9351646900177002, | |
| "learning_rate": 3.793471303605718e-06, | |
| "loss": 0.6664, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.0576, | |
| "grad_norm": 1.6208665370941162, | |
| "learning_rate": 3.836142521869e-06, | |
| "loss": 0.6653, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.05824, | |
| "grad_norm": 1.3460793495178223, | |
| "learning_rate": 3.8788137401322815e-06, | |
| "loss": 0.6651, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.05888, | |
| "grad_norm": 1.8987629413604736, | |
| "learning_rate": 3.921484958395563e-06, | |
| "loss": 0.6694, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.05952, | |
| "grad_norm": 1.3637055158615112, | |
| "learning_rate": 3.964156176658844e-06, | |
| "loss": 0.6691, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.06016, | |
| "grad_norm": 1.3319580554962158, | |
| "learning_rate": 4.006827394922125e-06, | |
| "loss": 0.6749, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.0608, | |
| "grad_norm": 1.3574756383895874, | |
| "learning_rate": 4.049498613185407e-06, | |
| "loss": 0.6688, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.06144, | |
| "grad_norm": 1.136319637298584, | |
| "learning_rate": 4.092169831448688e-06, | |
| "loss": 0.6657, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.06208, | |
| "grad_norm": 1.7666376829147339, | |
| "learning_rate": 4.134841049711969e-06, | |
| "loss": 0.6667, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.06272, | |
| "grad_norm": 1.332014560699463, | |
| "learning_rate": 4.177512267975251e-06, | |
| "loss": 0.6715, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.06336, | |
| "grad_norm": 1.3127824068069458, | |
| "learning_rate": 4.220183486238532e-06, | |
| "loss": 0.6737, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 1.293678641319275, | |
| "learning_rate": 4.262854704501814e-06, | |
| "loss": 0.6557, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "eval_accuracy": 0.76589, | |
| "eval_loss": 0.6282070279121399, | |
| "eval_macro_f1": 0.7657238614598489, | |
| "eval_precision": 0.7708542319194018, | |
| "eval_recall": 0.7687514011652705, | |
| "eval_runtime": 806.5969, | |
| "eval_samples_per_second": 123.978, | |
| "eval_steps_per_second": 1.938, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.06464, | |
| "grad_norm": 1.2433074712753296, | |
| "learning_rate": 4.3055259227650956e-06, | |
| "loss": 0.6583, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.06528, | |
| "grad_norm": 1.5115686655044556, | |
| "learning_rate": 4.348197141028377e-06, | |
| "loss": 0.6659, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.06592, | |
| "grad_norm": 1.3560088872909546, | |
| "learning_rate": 4.390868359291658e-06, | |
| "loss": 0.6675, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.06656, | |
| "grad_norm": 0.8229928016662598, | |
| "learning_rate": 4.4335395775549394e-06, | |
| "loss": 0.6629, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.0672, | |
| "grad_norm": 1.5138262510299683, | |
| "learning_rate": 4.476210795818221e-06, | |
| "loss": 0.6635, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.06784, | |
| "grad_norm": 1.1732233762741089, | |
| "learning_rate": 4.518882014081503e-06, | |
| "loss": 0.6531, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.06848, | |
| "grad_norm": 1.6118066310882568, | |
| "learning_rate": 4.561553232344783e-06, | |
| "loss": 0.6644, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.06912, | |
| "grad_norm": 0.9575800895690918, | |
| "learning_rate": 4.604224450608065e-06, | |
| "loss": 0.6542, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.06976, | |
| "grad_norm": 1.157271146774292, | |
| "learning_rate": 4.6468956688713465e-06, | |
| "loss": 0.6505, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.0704, | |
| "grad_norm": 1.4502589702606201, | |
| "learning_rate": 4.689566887134628e-06, | |
| "loss": 0.6725, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.07104, | |
| "grad_norm": 1.122251033782959, | |
| "learning_rate": 4.73223810539791e-06, | |
| "loss": 0.6625, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.07168, | |
| "grad_norm": 0.9529566764831543, | |
| "learning_rate": 4.774909323661191e-06, | |
| "loss": 0.6684, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.07232, | |
| "grad_norm": 1.6433424949645996, | |
| "learning_rate": 4.817580541924473e-06, | |
| "loss": 0.6599, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.07296, | |
| "grad_norm": 1.6565576791763306, | |
| "learning_rate": 4.8602517601877535e-06, | |
| "loss": 0.6591, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.0736, | |
| "grad_norm": 1.3955042362213135, | |
| "learning_rate": 4.902922978451035e-06, | |
| "loss": 0.6649, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.07424, | |
| "grad_norm": 1.530819296836853, | |
| "learning_rate": 4.945594196714317e-06, | |
| "loss": 0.65, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.07488, | |
| "grad_norm": 1.0729092359542847, | |
| "learning_rate": 4.9882654149775974e-06, | |
| "loss": 0.6603, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.07552, | |
| "grad_norm": 1.6307648420333862, | |
| "learning_rate": 5.030936633240879e-06, | |
| "loss": 0.6702, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.07616, | |
| "grad_norm": 1.324406623840332, | |
| "learning_rate": 5.073607851504161e-06, | |
| "loss": 0.6634, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.0768, | |
| "grad_norm": 1.1919140815734863, | |
| "learning_rate": 5.116279069767442e-06, | |
| "loss": 0.6586, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.07744, | |
| "grad_norm": 1.0658751726150513, | |
| "learning_rate": 5.158950288030724e-06, | |
| "loss": 0.6573, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.07808, | |
| "grad_norm": 1.7762951850891113, | |
| "learning_rate": 5.201621506294005e-06, | |
| "loss": 0.6596, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.07872, | |
| "grad_norm": 1.1911667585372925, | |
| "learning_rate": 5.244292724557286e-06, | |
| "loss": 0.651, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.07936, | |
| "grad_norm": 1.442029595375061, | |
| "learning_rate": 5.286963942820568e-06, | |
| "loss": 0.6598, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.9955100417137146, | |
| "learning_rate": 5.329635161083849e-06, | |
| "loss": 0.6587, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.08064, | |
| "grad_norm": 1.1801563501358032, | |
| "learning_rate": 5.372306379347131e-06, | |
| "loss": 0.6669, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.08128, | |
| "grad_norm": 1.8548506498336792, | |
| "learning_rate": 5.414977597610412e-06, | |
| "loss": 0.6534, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.08192, | |
| "grad_norm": 1.8297194242477417, | |
| "learning_rate": 5.457648815873694e-06, | |
| "loss": 0.668, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.08256, | |
| "grad_norm": 1.3471113443374634, | |
| "learning_rate": 5.5003200341369755e-06, | |
| "loss": 0.659, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.0832, | |
| "grad_norm": 1.4015250205993652, | |
| "learning_rate": 5.542991252400256e-06, | |
| "loss": 0.6602, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.08384, | |
| "grad_norm": 1.0796362161636353, | |
| "learning_rate": 5.585662470663538e-06, | |
| "loss": 0.6652, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.08448, | |
| "grad_norm": 2.0007293224334717, | |
| "learning_rate": 5.628333688926819e-06, | |
| "loss": 0.6573, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.08512, | |
| "grad_norm": 1.3140546083450317, | |
| "learning_rate": 5.671004907190101e-06, | |
| "loss": 0.6642, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.08576, | |
| "grad_norm": 1.6801965236663818, | |
| "learning_rate": 5.7136761254533826e-06, | |
| "loss": 0.6527, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.0864, | |
| "grad_norm": 1.9289913177490234, | |
| "learning_rate": 5.756347343716664e-06, | |
| "loss": 0.6567, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.08704, | |
| "grad_norm": 1.420436143875122, | |
| "learning_rate": 5.799018561979946e-06, | |
| "loss": 0.6479, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.08768, | |
| "grad_norm": 1.3068914413452148, | |
| "learning_rate": 5.841689780243226e-06, | |
| "loss": 0.6522, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.08832, | |
| "grad_norm": 1.8793392181396484, | |
| "learning_rate": 5.884360998506507e-06, | |
| "loss": 0.6491, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.08896, | |
| "grad_norm": 1.3998247385025024, | |
| "learning_rate": 5.927032216769789e-06, | |
| "loss": 0.6638, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.0896, | |
| "grad_norm": 1.371824026107788, | |
| "learning_rate": 5.96970343503307e-06, | |
| "loss": 0.6517, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.09024, | |
| "grad_norm": 2.0050251483917236, | |
| "learning_rate": 6.012374653296352e-06, | |
| "loss": 0.6556, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.09088, | |
| "grad_norm": 1.499855399131775, | |
| "learning_rate": 6.0550458715596335e-06, | |
| "loss": 0.6597, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.09152, | |
| "grad_norm": 1.2066516876220703, | |
| "learning_rate": 6.097717089822915e-06, | |
| "loss": 0.6546, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.09216, | |
| "grad_norm": 1.3586395978927612, | |
| "learning_rate": 6.140388308086196e-06, | |
| "loss": 0.6434, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.0928, | |
| "grad_norm": 1.1413036584854126, | |
| "learning_rate": 6.183059526349477e-06, | |
| "loss": 0.6554, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.09344, | |
| "grad_norm": 1.1844472885131836, | |
| "learning_rate": 6.225730744612759e-06, | |
| "loss": 0.6357, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.09408, | |
| "grad_norm": 1.7203835248947144, | |
| "learning_rate": 6.2684019628760406e-06, | |
| "loss": 0.6546, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.09472, | |
| "grad_norm": 1.1869184970855713, | |
| "learning_rate": 6.311073181139322e-06, | |
| "loss": 0.6376, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.09536, | |
| "grad_norm": 1.3234835863113403, | |
| "learning_rate": 6.353744399402604e-06, | |
| "loss": 0.6494, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 1.1924010515213013, | |
| "learning_rate": 6.396415617665885e-06, | |
| "loss": 0.652, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.09664, | |
| "grad_norm": 1.4505853652954102, | |
| "learning_rate": 6.439086835929166e-06, | |
| "loss": 0.6606, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.09728, | |
| "grad_norm": 1.6351200342178345, | |
| "learning_rate": 6.481758054192448e-06, | |
| "loss": 0.6567, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.09792, | |
| "grad_norm": 1.4423909187316895, | |
| "learning_rate": 6.524429272455729e-06, | |
| "loss": 0.653, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.09856, | |
| "grad_norm": 0.8749169111251831, | |
| "learning_rate": 6.567100490719011e-06, | |
| "loss": 0.6417, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.0992, | |
| "grad_norm": 1.2707208395004272, | |
| "learning_rate": 6.609771708982292e-06, | |
| "loss": 0.6563, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.09984, | |
| "grad_norm": 1.163254976272583, | |
| "learning_rate": 6.652442927245574e-06, | |
| "loss": 0.6425, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.10048, | |
| "grad_norm": 1.4674128293991089, | |
| "learning_rate": 6.6951141455088555e-06, | |
| "loss": 0.6561, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.10112, | |
| "grad_norm": 1.6261919736862183, | |
| "learning_rate": 6.737785363772135e-06, | |
| "loss": 0.6473, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.10176, | |
| "grad_norm": 1.2146090269088745, | |
| "learning_rate": 6.780456582035417e-06, | |
| "loss": 0.6467, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.1024, | |
| "grad_norm": 1.3596712350845337, | |
| "learning_rate": 6.8231278002986985e-06, | |
| "loss": 0.6431, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.10304, | |
| "grad_norm": 1.7861741781234741, | |
| "learning_rate": 6.86579901856198e-06, | |
| "loss": 0.6586, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.10368, | |
| "grad_norm": 1.456587553024292, | |
| "learning_rate": 6.908470236825262e-06, | |
| "loss": 0.6496, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.10432, | |
| "grad_norm": 1.5652499198913574, | |
| "learning_rate": 6.951141455088543e-06, | |
| "loss": 0.6436, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.10496, | |
| "grad_norm": 1.1411017179489136, | |
| "learning_rate": 6.993812673351825e-06, | |
| "loss": 0.6572, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.1056, | |
| "grad_norm": 1.3867056369781494, | |
| "learning_rate": 7.036483891615106e-06, | |
| "loss": 0.6425, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.10624, | |
| "grad_norm": 1.2714463472366333, | |
| "learning_rate": 7.079155109878387e-06, | |
| "loss": 0.6558, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.10688, | |
| "grad_norm": 1.32753586769104, | |
| "learning_rate": 7.121826328141669e-06, | |
| "loss": 0.6537, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.10752, | |
| "grad_norm": 1.5559014081954956, | |
| "learning_rate": 7.16449754640495e-06, | |
| "loss": 0.6613, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.10816, | |
| "grad_norm": 1.1726677417755127, | |
| "learning_rate": 7.207168764668232e-06, | |
| "loss": 0.6516, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.1088, | |
| "grad_norm": 1.3453316688537598, | |
| "learning_rate": 7.2498399829315135e-06, | |
| "loss": 0.6455, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.10944, | |
| "grad_norm": 1.6420834064483643, | |
| "learning_rate": 7.292511201194795e-06, | |
| "loss": 0.6457, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.11008, | |
| "grad_norm": 1.0518358945846558, | |
| "learning_rate": 7.335182419458076e-06, | |
| "loss": 0.6315, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.11072, | |
| "grad_norm": 1.4042829275131226, | |
| "learning_rate": 7.377853637721357e-06, | |
| "loss": 0.6445, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.11136, | |
| "grad_norm": 1.2282599210739136, | |
| "learning_rate": 7.420524855984639e-06, | |
| "loss": 0.648, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 1.2711114883422852, | |
| "learning_rate": 7.4631960742479205e-06, | |
| "loss": 0.6522, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.11264, | |
| "grad_norm": 1.2256455421447754, | |
| "learning_rate": 7.505867292511202e-06, | |
| "loss": 0.6414, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.11328, | |
| "grad_norm": 1.2845845222473145, | |
| "learning_rate": 7.548538510774484e-06, | |
| "loss": 0.6372, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.11392, | |
| "grad_norm": 1.557308554649353, | |
| "learning_rate": 7.591209729037765e-06, | |
| "loss": 0.6472, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.11456, | |
| "grad_norm": 1.5465153455734253, | |
| "learning_rate": 7.633880947301045e-06, | |
| "loss": 0.6387, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.1152, | |
| "grad_norm": 1.0359770059585571, | |
| "learning_rate": 7.676552165564327e-06, | |
| "loss": 0.6335, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.11584, | |
| "grad_norm": 1.9624851942062378, | |
| "learning_rate": 7.719223383827608e-06, | |
| "loss": 0.6448, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.11648, | |
| "grad_norm": 1.4034361839294434, | |
| "learning_rate": 7.76189460209089e-06, | |
| "loss": 0.6711, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.11712, | |
| "grad_norm": 1.503132700920105, | |
| "learning_rate": 7.804565820354171e-06, | |
| "loss": 0.6457, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.11776, | |
| "grad_norm": 1.0341029167175293, | |
| "learning_rate": 7.847237038617453e-06, | |
| "loss": 0.6393, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.1184, | |
| "grad_norm": 1.884297251701355, | |
| "learning_rate": 7.889908256880735e-06, | |
| "loss": 0.6415, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.11904, | |
| "grad_norm": 1.3774992227554321, | |
| "learning_rate": 7.932579475144016e-06, | |
| "loss": 0.635, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.11968, | |
| "grad_norm": 1.9972243309020996, | |
| "learning_rate": 7.975250693407298e-06, | |
| "loss": 0.64, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.12032, | |
| "grad_norm": 1.1215760707855225, | |
| "learning_rate": 8.01792191167058e-06, | |
| "loss": 0.6358, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.12096, | |
| "grad_norm": 1.1372530460357666, | |
| "learning_rate": 8.060593129933861e-06, | |
| "loss": 0.6264, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.1216, | |
| "grad_norm": 1.0789086818695068, | |
| "learning_rate": 8.103264348197143e-06, | |
| "loss": 0.6396, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.12224, | |
| "grad_norm": 1.3709907531738281, | |
| "learning_rate": 8.145935566460424e-06, | |
| "loss": 0.638, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.12288, | |
| "grad_norm": 1.5928348302841187, | |
| "learning_rate": 8.188606784723706e-06, | |
| "loss": 0.6495, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.12352, | |
| "grad_norm": 1.0440688133239746, | |
| "learning_rate": 8.231278002986986e-06, | |
| "loss": 0.6236, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.12416, | |
| "grad_norm": 1.2079739570617676, | |
| "learning_rate": 8.273949221250267e-06, | |
| "loss": 0.6489, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.1248, | |
| "grad_norm": 1.161354422569275, | |
| "learning_rate": 8.316620439513549e-06, | |
| "loss": 0.6412, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.12544, | |
| "grad_norm": 1.7108139991760254, | |
| "learning_rate": 8.35929165777683e-06, | |
| "loss": 0.6383, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.12608, | |
| "grad_norm": 1.4153392314910889, | |
| "learning_rate": 8.401962876040112e-06, | |
| "loss": 0.6482, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.12672, | |
| "grad_norm": 1.6085275411605835, | |
| "learning_rate": 8.444634094303393e-06, | |
| "loss": 0.6488, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.12736, | |
| "grad_norm": 1.2328271865844727, | |
| "learning_rate": 8.487305312566675e-06, | |
| "loss": 0.6466, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.9865418672561646, | |
| "learning_rate": 8.529976530829955e-06, | |
| "loss": 0.6468, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "eval_accuracy": 0.7972, | |
| "eval_loss": 0.578827440738678, | |
| "eval_macro_f1": 0.797186028145479, | |
| "eval_precision": 0.7981394308396883, | |
| "eval_recall": 0.7985143807471076, | |
| "eval_runtime": 813.4399, | |
| "eval_samples_per_second": 122.935, | |
| "eval_steps_per_second": 1.921, | |
| "step": 2000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 46875, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.68331875442944e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |