Instructions to use dzungpham/graphcodebert-code-classification with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use dzungpham/graphcodebert-code-classification with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("dzungpham/graphcodebert-code-classification", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": 1550, | |
| "best_metric": 0.6134094606232656, | |
| "best_model_checkpoint": "output_checkpoints/graphcodebert-best/checkpoint-1550", | |
| "epoch": 1.6144814090019568, | |
| "eval_steps": 50, | |
| "global_step": 1650, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009784735812133072, | |
| "grad_norm": 165951.1875, | |
| "learning_rate": 4.411764705882353e-08, | |
| "loss": 0.8416, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.019569471624266144, | |
| "grad_norm": 206132.796875, | |
| "learning_rate": 9.313725490196079e-08, | |
| "loss": 0.8378, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.029354207436399216, | |
| "grad_norm": 171197.53125, | |
| "learning_rate": 1.4215686274509803e-07, | |
| "loss": 0.8388, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03913894324853229, | |
| "grad_norm": 206661.828125, | |
| "learning_rate": 1.9117647058823527e-07, | |
| "loss": 0.847, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04892367906066536, | |
| "grad_norm": 230268.96875, | |
| "learning_rate": 2.4019607843137255e-07, | |
| "loss": 0.8411, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05870841487279843, | |
| "grad_norm": 133248.28125, | |
| "learning_rate": 2.8921568627450984e-07, | |
| "loss": 0.8412, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0684931506849315, | |
| "grad_norm": 147768.828125, | |
| "learning_rate": 3.3823529411764707e-07, | |
| "loss": 0.8431, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07827788649706457, | |
| "grad_norm": 167327.53125, | |
| "learning_rate": 3.872549019607843e-07, | |
| "loss": 0.8431, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08806262230919765, | |
| "grad_norm": 151943.75, | |
| "learning_rate": 4.3627450980392154e-07, | |
| "loss": 0.8311, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09784735812133072, | |
| "grad_norm": 149971.59375, | |
| "learning_rate": 4.852941176470588e-07, | |
| "loss": 0.8412, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10763209393346379, | |
| "grad_norm": 162329.40625, | |
| "learning_rate": 5.34313725490196e-07, | |
| "loss": 0.8356, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.11741682974559686, | |
| "grad_norm": 158769.765625, | |
| "learning_rate": 5.833333333333334e-07, | |
| "loss": 0.8351, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.12720156555772993, | |
| "grad_norm": 157725.59375, | |
| "learning_rate": 6.323529411764706e-07, | |
| "loss": 0.8331, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.136986301369863, | |
| "grad_norm": 205733.0, | |
| "learning_rate": 6.813725490196079e-07, | |
| "loss": 0.8366, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.14677103718199608, | |
| "grad_norm": 172581.328125, | |
| "learning_rate": 7.30392156862745e-07, | |
| "loss": 0.831, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.15655577299412915, | |
| "grad_norm": 137428.0, | |
| "learning_rate": 7.794117647058823e-07, | |
| "loss": 0.8326, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.16634050880626222, | |
| "grad_norm": 151189.203125, | |
| "learning_rate": 8.284313725490196e-07, | |
| "loss": 0.8281, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1761252446183953, | |
| "grad_norm": 145620.625, | |
| "learning_rate": 8.774509803921568e-07, | |
| "loss": 0.8252, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.18590998043052837, | |
| "grad_norm": 169938.84375, | |
| "learning_rate": 9.264705882352941e-07, | |
| "loss": 0.8308, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.19569471624266144, | |
| "grad_norm": 138760.015625, | |
| "learning_rate": 9.754901960784313e-07, | |
| "loss": 0.824, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2054794520547945, | |
| "grad_norm": 107872.125, | |
| "learning_rate": 9.99631294374483e-07, | |
| "loss": 0.8254, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.21526418786692758, | |
| "grad_norm": 113610.8359375, | |
| "learning_rate": 9.966849112204936e-07, | |
| "loss": 0.8267, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.22504892367906065, | |
| "grad_norm": 172441.515625, | |
| "learning_rate": 9.908095200725339e-07, | |
| "loss": 0.8198, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.23483365949119372, | |
| "grad_norm": 128079.9140625, | |
| "learning_rate": 9.82039768787326e-07, | |
| "loss": 0.8143, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2446183953033268, | |
| "grad_norm": 98149.8984375, | |
| "learning_rate": 9.70427373595903e-07, | |
| "loss": 0.8191, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.25440313111545987, | |
| "grad_norm": 147377.4375, | |
| "learning_rate": 9.560408141270531e-07, | |
| "loss": 0.8178, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.26418786692759294, | |
| "grad_norm": 156439.40625, | |
| "learning_rate": 9.389649295750781e-07, | |
| "loss": 0.8085, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.273972602739726, | |
| "grad_norm": 131892.015625, | |
| "learning_rate": 9.193004183933174e-07, | |
| "loss": 0.8172, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2837573385518591, | |
| "grad_norm": 132474.5, | |
| "learning_rate": 8.971632444637971e-07, | |
| "loss": 0.8136, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.29354207436399216, | |
| "grad_norm": 112619.109375, | |
| "learning_rate": 8.726839532448928e-07, | |
| "loss": 0.8065, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.30332681017612523, | |
| "grad_norm": 94225.5703125, | |
| "learning_rate": 8.460069019297606e-07, | |
| "loss": 0.813, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.3131115459882583, | |
| "grad_norm": 117812.8203125, | |
| "learning_rate": 8.172894081553822e-07, | |
| "loss": 0.8022, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.32289628180039137, | |
| "grad_norm": 143287.125, | |
| "learning_rate": 7.86700822282382e-07, | |
| "loss": 0.8061, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.33268101761252444, | |
| "grad_norm": 107456.25, | |
| "learning_rate": 7.544215287164917e-07, | |
| "loss": 0.8006, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3424657534246575, | |
| "grad_norm": 101521.171875, | |
| "learning_rate": 7.206418821609812e-07, | |
| "loss": 0.8055, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3522504892367906, | |
| "grad_norm": 118311.5234375, | |
| "learning_rate": 6.855610850731017e-07, | |
| "loss": 0.8081, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.36203522504892366, | |
| "grad_norm": 89621.828125, | |
| "learning_rate": 6.493860129443046e-07, | |
| "loss": 0.8058, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.37181996086105673, | |
| "grad_norm": 143168.65625, | |
| "learning_rate": 6.123299943316975e-07, | |
| "loss": 0.8028, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.3816046966731898, | |
| "grad_norm": 118598.2890625, | |
| "learning_rate": 5.746115528350295e-07, | |
| "loss": 0.8032, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.3913894324853229, | |
| "grad_norm": 111807.8203125, | |
| "learning_rate": 5.364531184379139e-07, | |
| "loss": 0.8014, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.40117416829745595, | |
| "grad_norm": 124106.1328125, | |
| "learning_rate": 4.980797158126566e-07, | |
| "loss": 0.8045, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.410958904109589, | |
| "grad_norm": 102746.4765625, | |
| "learning_rate": 4.597176373239061e-07, | |
| "loss": 0.7996, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.4207436399217221, | |
| "grad_norm": 108317.5859375, | |
| "learning_rate": 4.215931085565761e-07, | |
| "loss": 0.8037, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.43052837573385516, | |
| "grad_norm": 113342.515625, | |
| "learning_rate": 3.8393095423757764e-07, | |
| "loss": 0.8047, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.44031311154598823, | |
| "grad_norm": 111908.90625, | |
| "learning_rate": 3.469532724185699e-07, | |
| "loss": 0.8021, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.4500978473581213, | |
| "grad_norm": 85004.15625, | |
| "learning_rate": 3.1087812473822845e-07, | |
| "loss": 0.8012, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.4598825831702544, | |
| "grad_norm": 98335.1796875, | |
| "learning_rate": 2.7591825048770643e-07, | |
| "loss": 0.7977, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.46966731898238745, | |
| "grad_norm": 110674.03125, | |
| "learning_rate": 2.422798120625886e-07, | |
| "loss": 0.7979, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.4794520547945205, | |
| "grad_norm": 114861.5390625, | |
| "learning_rate": 2.1016117919955512e-07, | |
| "loss": 0.8014, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.4892367906066536, | |
| "grad_norm": 130109.5703125, | |
| "learning_rate": 1.7975175916725031e-07, | |
| "loss": 0.7994, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.49902152641878667, | |
| "grad_norm": 93286.4296875, | |
| "learning_rate": 1.5123087980985606e-07, | |
| "loss": 0.8039, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5088062622309197, | |
| "grad_norm": 123646.703125, | |
| "learning_rate": 1.2476673203018728e-07, | |
| "loss": 0.7917, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5185909980430529, | |
| "grad_norm": 98060.2265625, | |
| "learning_rate": 1.0051537794861287e-07, | |
| "loss": 0.7945, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5283757338551859, | |
| "grad_norm": 91181.1796875, | |
| "learning_rate": 7.861983058679871e-08, | |
| "loss": 0.7952, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.538160469667319, | |
| "grad_norm": 97605.3984375, | |
| "learning_rate": 5.920921050348626e-08, | |
| "loss": 0.7963, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.538160469667319, | |
| "eval_accuracy": 0.767, | |
| "eval_loss": 0.662277102470398, | |
| "eval_macro_f1": 0.5341406895117674, | |
| "eval_precision": 0.6105222734254993, | |
| "eval_recall": 0.5415245482510056, | |
| "eval_runtime": 16.9722, | |
| "eval_samples_per_second": 58.92, | |
| "eval_steps_per_second": 0.059, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.547945205479452, | |
| "grad_norm": 101627.8046875, | |
| "learning_rate": 4.23979843557199e-08, | |
| "loss": 0.7962, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5577299412915852, | |
| "grad_norm": 73889.578125, | |
| "learning_rate": 2.8285289875818407e-08, | |
| "loss": 0.794, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5675146771037182, | |
| "grad_norm": 87191.0546875, | |
| "learning_rate": 1.695435124477512e-08, | |
| "loss": 0.7984, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5772994129158513, | |
| "grad_norm": 105470.234375, | |
| "learning_rate": 8.471988309699807e-09, | |
| "loss": 0.7924, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5870841487279843, | |
| "grad_norm": 81710.7265625, | |
| "learning_rate": 2.8882225395023275e-09, | |
| "loss": 0.8005, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5870841487279843, | |
| "eval_accuracy": 0.767, | |
| "eval_loss": 0.6623347401618958, | |
| "eval_macro_f1": 0.5341406895117674, | |
| "eval_precision": 0.6105222734254993, | |
| "eval_recall": 0.5415245482510056, | |
| "eval_runtime": 17.0174, | |
| "eval_samples_per_second": 58.763, | |
| "eval_steps_per_second": 0.059, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5968688845401174, | |
| "grad_norm": 97212.125, | |
| "learning_rate": 2.35982042542604e-10, | |
| "loss": 0.7981, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6066536203522505, | |
| "grad_norm": 100700.640625, | |
| "learning_rate": 9.994690926142082e-07, | |
| "loss": 0.8006, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6164383561643836, | |
| "grad_norm": 88023.046875, | |
| "learning_rate": 9.962287406393883e-07, | |
| "loss": 0.7976, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6262230919765166, | |
| "grad_norm": 118936.609375, | |
| "learning_rate": 9.900620707609317e-07, | |
| "loss": 0.7961, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6360078277886497, | |
| "grad_norm": 91811.3984375, | |
| "learning_rate": 9.810054485397043e-07, | |
| "loss": 0.7904, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6360078277886497, | |
| "eval_accuracy": 0.757, | |
| "eval_loss": 0.6629467606544495, | |
| "eval_macro_f1": 0.5299579283330915, | |
| "eval_precision": 0.5844492268452096, | |
| "eval_recall": 0.5366881936388663, | |
| "eval_runtime": 17.0504, | |
| "eval_samples_per_second": 58.65, | |
| "eval_steps_per_second": 0.059, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6457925636007827, | |
| "grad_norm": 118632.46875, | |
| "learning_rate": 9.69112281917613e-07, | |
| "loss": 0.7927, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6555772994129159, | |
| "grad_norm": 70413.0390625, | |
| "learning_rate": 9.544527062648345e-07, | |
| "loss": 0.785, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.6653620352250489, | |
| "grad_norm": 84746.4453125, | |
| "learning_rate": 9.371131707834789e-07, | |
| "loss": 0.7861, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.675146771037182, | |
| "grad_norm": 92019.0390625, | |
| "learning_rate": 9.171959287067115e-07, | |
| "loss": 0.7892, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.684931506849315, | |
| "grad_norm": 70649.2578125, | |
| "learning_rate": 8.948184342996868e-07, | |
| "loss": 0.789, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.684931506849315, | |
| "eval_accuracy": 0.751, | |
| "eval_loss": 0.6648359894752502, | |
| "eval_macro_f1": 0.5514410631387237, | |
| "eval_precision": 0.589223697650664, | |
| "eval_recall": 0.5504123598294002, | |
| "eval_runtime": 17.0124, | |
| "eval_samples_per_second": 58.781, | |
| "eval_steps_per_second": 0.059, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6947162426614482, | |
| "grad_norm": 80338.796875, | |
| "learning_rate": 8.701126502182503e-07, | |
| "loss": 0.7907, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.7045009784735812, | |
| "grad_norm": 69257.328125, | |
| "learning_rate": 8.432242693099946e-07, | |
| "loss": 0.7881, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 95345.15625, | |
| "learning_rate": 8.14311855446795e-07, | |
| "loss": 0.7849, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7240704500978473, | |
| "grad_norm": 94407.8984375, | |
| "learning_rate": 7.835459084554374e-07, | |
| "loss": 0.7868, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7338551859099804, | |
| "grad_norm": 74418.0625, | |
| "learning_rate": 7.511078586605484e-07, | |
| "loss": 0.783, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7338551859099804, | |
| "eval_accuracy": 0.742, | |
| "eval_loss": 0.6660248637199402, | |
| "eval_macro_f1": 0.5532467532467532, | |
| "eval_precision": 0.579727791757989, | |
| "eval_recall": 0.5510154613293626, | |
| "eval_runtime": 16.9303, | |
| "eval_samples_per_second": 59.066, | |
| "eval_steps_per_second": 0.059, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7436399217221135, | |
| "grad_norm": 95142.0546875, | |
| "learning_rate": 7.171889969691225e-07, | |
| "loss": 0.7861, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7534246575342466, | |
| "grad_norm": 118907.203125, | |
| "learning_rate": 6.819893468060643e-07, | |
| "loss": 0.7878, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7632093933463796, | |
| "grad_norm": 66268.78125, | |
| "learning_rate": 6.457164845530662e-07, | |
| "loss": 0.7859, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.7729941291585127, | |
| "grad_norm": 81464.265625, | |
| "learning_rate": 6.085843154468354e-07, | |
| "loss": 0.7819, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.7827788649706457, | |
| "grad_norm": 87098.984375, | |
| "learning_rate": 5.708118121553318e-07, | |
| "loss": 0.7801, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7827788649706457, | |
| "eval_accuracy": 0.734, | |
| "eval_loss": 0.667039155960083, | |
| "eval_macro_f1": 0.5514001038862917, | |
| "eval_precision": 0.5710358542434345, | |
| "eval_recall": 0.5490647598270917, | |
| "eval_runtime": 17.0236, | |
| "eval_samples_per_second": 58.742, | |
| "eval_steps_per_second": 0.059, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7925636007827789, | |
| "grad_norm": 70466.7109375, | |
| "learning_rate": 5.326217234707852e-07, | |
| "loss": 0.7782, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.8023483365949119, | |
| "grad_norm": 78038.6015625, | |
| "learning_rate": 4.942392607344717e-07, | |
| "loss": 0.786, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.812133072407045, | |
| "grad_norm": 91227.28125, | |
| "learning_rate": 4.558907697395553e-07, | |
| "loss": 0.7781, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.821917808219178, | |
| "grad_norm": 85470.5625, | |
| "learning_rate": 4.1780239594393807e-07, | |
| "loss": 0.7806, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.8317025440313112, | |
| "grad_norm": 73229.96875, | |
| "learning_rate": 3.8019875086450787e-07, | |
| "loss": 0.7823, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8317025440313112, | |
| "eval_accuracy": 0.725, | |
| "eval_loss": 0.668379545211792, | |
| "eval_macro_f1": 0.5541786430494829, | |
| "eval_precision": 0.5667714002224962, | |
| "eval_recall": 0.5512665131499213, | |
| "eval_runtime": 16.9299, | |
| "eval_samples_per_second": 59.067, | |
| "eval_steps_per_second": 0.059, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8414872798434442, | |
| "grad_norm": 84347.0546875, | |
| "learning_rate": 3.4330158751721504e-07, | |
| "loss": 0.7805, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.8512720156555773, | |
| "grad_norm": 60984.21875, | |
| "learning_rate": 3.0732849271405547e-07, | |
| "loss": 0.779, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.8610567514677103, | |
| "grad_norm": 72854.8046875, | |
| "learning_rate": 2.7249160392863903e-07, | |
| "loss": 0.7811, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.8708414872798435, | |
| "grad_norm": 69136.65625, | |
| "learning_rate": 2.3899635829713077e-07, | |
| "loss": 0.7811, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.8806262230919765, | |
| "grad_norm": 74345.7421875, | |
| "learning_rate": 2.0704028113185306e-07, | |
| "loss": 0.7845, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.8806262230919765, | |
| "eval_accuracy": 0.726, | |
| "eval_loss": 0.6684988737106323, | |
| "eval_macro_f1": 0.5584519389438943, | |
| "eval_precision": 0.5706882639048261, | |
| "eval_recall": 0.555107317439156, | |
| "eval_runtime": 16.9904, | |
| "eval_samples_per_second": 58.857, | |
| "eval_steps_per_second": 0.059, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.8904109589041096, | |
| "grad_norm": 75862.8046875, | |
| "learning_rate": 1.768118210918219e-07, | |
| "loss": 0.7821, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.9001956947162426, | |
| "grad_norm": 60382.25, | |
| "learning_rate": 1.484892388793511e-07, | |
| "loss": 0.7811, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.9099804305283757, | |
| "grad_norm": 67346.859375, | |
| "learning_rate": 1.2223955601620633e-07, | |
| "loss": 0.781, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.9197651663405088, | |
| "grad_norm": 81895.7265625, | |
| "learning_rate": 9.821756989850016e-08, | |
| "loss": 0.7816, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.9295499021526419, | |
| "grad_norm": 87715.9453125, | |
| "learning_rate": 7.656494093865984e-08, | |
| "loss": 0.7815, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.9295499021526419, | |
| "eval_accuracy": 0.723, | |
| "eval_loss": 0.6687072515487671, | |
| "eval_macro_f1": 0.5562540149399822, | |
| "eval_precision": 0.5672044579297467, | |
| "eval_recall": 0.553176815508654, | |
| "eval_runtime": 16.997, | |
| "eval_samples_per_second": 58.834, | |
| "eval_steps_per_second": 0.059, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.9393346379647749, | |
| "grad_norm": 62333.75390625, | |
| "learning_rate": 5.740935717769707e-08, | |
| "loss": 0.775, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.949119373776908, | |
| "grad_norm": 67404.265625, | |
| "learning_rate": 4.0863781294158185e-08, | |
| "loss": 0.78, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.958904109589041, | |
| "grad_norm": 75798.1328125, | |
| "learning_rate": 2.702578445022852e-08, | |
| "loss": 0.7808, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.9686888454011742, | |
| "grad_norm": 77483.9609375, | |
| "learning_rate": 1.597697090337763e-08, | |
| "loss": 0.7804, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.9784735812133072, | |
| "grad_norm": 82261.78125, | |
| "learning_rate": 7.782496776675695e-09, | |
| "loss": 0.7795, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9784735812133072, | |
| "eval_accuracy": 0.723, | |
| "eval_loss": 0.6687466502189636, | |
| "eval_macro_f1": 0.5579874225879597, | |
| "eval_precision": 0.5685547947184422, | |
| "eval_recall": 0.5547754673315212, | |
| "eval_runtime": 16.934, | |
| "eval_samples_per_second": 59.053, | |
| "eval_steps_per_second": 0.059, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9882583170254403, | |
| "grad_norm": 68659.8671875, | |
| "learning_rate": 2.4906858256491524e-09, | |
| "loss": 0.78, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.9980430528375733, | |
| "grad_norm": 92298.78125, | |
| "learning_rate": 1.3274446754157898e-10, | |
| "loss": 0.7824, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.0078277886497065, | |
| "grad_norm": 47534.20703125, | |
| "learning_rate": 2.607950146993215e-08, | |
| "loss": 0.7439, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.0176125244618395, | |
| "grad_norm": 43369.4765625, | |
| "learning_rate": 2.09146573606756e-08, | |
| "loss": 0.741, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.0273972602739727, | |
| "grad_norm": 48618.94140625, | |
| "learning_rate": 1.6308405330324294e-08, | |
| "loss": 0.742, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.0273972602739727, | |
| "eval_accuracy": 0.723, | |
| "eval_loss": 0.668785035610199, | |
| "eval_macro_f1": 0.5597017416418834, | |
| "eval_precision": 0.5698832417582418, | |
| "eval_recall": 0.5563741191543882, | |
| "eval_runtime": 17.6735, | |
| "eval_samples_per_second": 56.582, | |
| "eval_steps_per_second": 0.057, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.0371819960861057, | |
| "grad_norm": 47853.890625, | |
| "learning_rate": 1.2266116062696951e-08, | |
| "loss": 0.7434, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.0469667318982387, | |
| "grad_norm": 58555.92578125, | |
| "learning_rate": 8.792502686258752e-09, | |
| "loss": 0.7448, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.0567514677103718, | |
| "grad_norm": 54508.24609375, | |
| "learning_rate": 5.891615278823537e-09, | |
| "loss": 0.7406, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.0665362035225048, | |
| "grad_norm": 51657.953125, | |
| "learning_rate": 3.5668361453450313e-09, | |
| "loss": 0.7427, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.076320939334638, | |
| "grad_norm": 54486.4921875, | |
| "learning_rate": 1.8208758743000207e-09, | |
| "loss": 0.7444, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.076320939334638, | |
| "eval_accuracy": 0.723, | |
| "eval_loss": 0.6687813997268677, | |
| "eval_macro_f1": 0.5597017416418834, | |
| "eval_precision": 0.5698832417582418, | |
| "eval_recall": 0.5563741191543882, | |
| "eval_runtime": 17.4461, | |
| "eval_samples_per_second": 57.319, | |
| "eval_steps_per_second": 0.057, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.086105675146771, | |
| "grad_norm": 42684.90625, | |
| "learning_rate": 6.557701772635371e-10, | |
| "loss": 0.7402, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.095890410958904, | |
| "grad_norm": 43652.2265625, | |
| "learning_rate": 7.287751536050324e-11, | |
| "loss": 0.742, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.1056751467710373, | |
| "grad_norm": 55561.12890625, | |
| "learning_rate": 9.999271224846395e-07, | |
| "loss": 0.7405, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.1154598825831703, | |
| "grad_norm": 57599.58984375, | |
| "learning_rate": 9.993442298227364e-07, | |
| "loss": 0.7397, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.1252446183953033, | |
| "grad_norm": 43188.921875, | |
| "learning_rate": 9.981791241257e-07, | |
| "loss": 0.7405, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.1252446183953033, | |
| "eval_accuracy": 0.723, | |
| "eval_loss": 0.6686130166053772, | |
| "eval_macro_f1": 0.5630742537166292, | |
| "eval_precision": 0.572477811481856, | |
| "eval_recall": 0.5595714228001223, | |
| "eval_runtime": 17.3251, | |
| "eval_samples_per_second": 57.72, | |
| "eval_steps_per_second": 0.058, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.1350293542074363, | |
| "grad_norm": 42596.6796875, | |
| "learning_rate": 9.964331638546548e-07, | |
| "loss": 0.7397, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.1448140900195694, | |
| "grad_norm": 60584.2890625, | |
| "learning_rate": 9.941083847211764e-07, | |
| "loss": 0.7417, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.1545988258317026, | |
| "grad_norm": 61568.48046875, | |
| "learning_rate": 9.912074973137411e-07, | |
| "loss": 0.7405, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.1643835616438356, | |
| "grad_norm": 50988.54296875, | |
| "learning_rate": 9.87733883937303e-07, | |
| "loss": 0.7381, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.1741682974559686, | |
| "grad_norm": 48960.01171875, | |
| "learning_rate": 9.836915946696757e-07, | |
| "loss": 0.7377, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.1741682974559686, | |
| "eval_accuracy": 0.716, | |
| "eval_loss": 0.6701745986938477, | |
| "eval_macro_f1": 0.5807350718065004, | |
| "eval_precision": 0.582848916337505, | |
| "eval_recall": 0.5790466956386239, | |
| "eval_runtime": 17.3208, | |
| "eval_samples_per_second": 57.734, | |
| "eval_steps_per_second": 0.058, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.1839530332681019, | |
| "grad_norm": 50420.14453125, | |
| "learning_rate": 9.790853426393242e-07, | |
| "loss": 0.7393, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.1937377690802349, | |
| "grad_norm": 39574.37890625, | |
| "learning_rate": 9.73920498530068e-07, | |
| "loss": 0.7388, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.203522504892368, | |
| "grad_norm": 51037.01953125, | |
| "learning_rate": 9.68203084319102e-07, | |
| "loss": 0.739, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.213307240704501, | |
| "grad_norm": 40847.125, | |
| "learning_rate": 9.619397662556433e-07, | |
| "loss": 0.7377, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.223091976516634, | |
| "grad_norm": 54462.5703125, | |
| "learning_rate": 9.55137847088381e-07, | |
| "loss": 0.7361, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.223091976516634, | |
| "eval_accuracy": 0.701, | |
| "eval_loss": 0.671897292137146, | |
| "eval_macro_f1": 0.5884212859150606, | |
| "eval_precision": 0.5853845179540225, | |
| "eval_recall": 0.5933739633291203, | |
| "eval_runtime": 17.4826, | |
| "eval_samples_per_second": 57.2, | |
| "eval_steps_per_second": 0.057, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.2328767123287672, | |
| "grad_norm": 49697.23046875, | |
| "learning_rate": 9.478052575507982e-07, | |
| "loss": 0.737, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.2426614481409002, | |
| "grad_norm": 40874.4921875, | |
| "learning_rate": 9.39950547114292e-07, | |
| "loss": 0.7343, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.2524461839530332, | |
| "grad_norm": 63225.41796875, | |
| "learning_rate": 9.315828740198713e-07, | |
| "loss": 0.7346, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.2622309197651664, | |
| "grad_norm": 42103.6328125, | |
| "learning_rate": 9.227119946000589e-07, | |
| "loss": 0.733, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.2720156555772995, | |
| "grad_norm": 53617.17578125, | |
| "learning_rate": 9.133482519034428e-07, | |
| "loss": 0.7334, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.2720156555772995, | |
| "eval_accuracy": 0.699, | |
| "eval_loss": 0.6714372038841248, | |
| "eval_macro_f1": 0.586855697342288, | |
| "eval_precision": 0.5837743615591398, | |
| "eval_recall": 0.592086962042119, | |
| "eval_runtime": 17.5108, | |
| "eval_samples_per_second": 57.108, | |
| "eval_steps_per_second": 0.057, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.2818003913894325, | |
| "grad_norm": 49828.37890625, | |
| "learning_rate": 9.035025636351454e-07, | |
| "loss": 0.7325, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.2915851272015655, | |
| "grad_norm": 52261.57421875, | |
| "learning_rate": 8.931864094272663e-07, | |
| "loss": 0.7311, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.3013698630136985, | |
| "grad_norm": 44072.09765625, | |
| "learning_rate": 8.824118174541462e-07, | |
| "loss": 0.7327, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.3111545988258317, | |
| "grad_norm": 46144.7578125, | |
| "learning_rate": 8.711913504080533e-07, | |
| "loss": 0.73, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.3209393346379648, | |
| "grad_norm": 40000.671875, | |
| "learning_rate": 8.595380908516452e-07, | |
| "loss": 0.7327, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.3209393346379648, | |
| "eval_accuracy": 0.697, | |
| "eval_loss": 0.6719282865524292, | |
| "eval_macro_f1": 0.5986622073578596, | |
| "eval_precision": 0.5941197969142022, | |
| "eval_recall": 0.6099837826295225, | |
| "eval_runtime": 17.375, | |
| "eval_samples_per_second": 57.554, | |
| "eval_steps_per_second": 0.058, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.3307240704500978, | |
| "grad_norm": 35656.375, | |
| "learning_rate": 8.474656259642874e-07, | |
| "loss": 0.7292, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.340508806262231, | |
| "grad_norm": 40683.66796875, | |
| "learning_rate": 8.349880317000082e-07, | |
| "loss": 0.7295, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.350293542074364, | |
| "grad_norm": 41085.30078125, | |
| "learning_rate": 8.221198563755681e-07, | |
| "loss": 0.7309, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.360078277886497, | |
| "grad_norm": 45236.1171875, | |
| "learning_rate": 8.088761037077715e-07, | |
| "loss": 0.7315, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.36986301369863, | |
| "grad_norm": 37422.8671875, | |
| "learning_rate": 7.952722153198053e-07, | |
| "loss": 0.7275, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.36986301369863, | |
| "eval_accuracy": 0.695, | |
| "eval_loss": 0.6719865202903748, | |
| "eval_macro_f1": 0.6001735640004142, | |
| "eval_precision": 0.5955075279261778, | |
| "eval_recall": 0.6134927368111225, | |
| "eval_runtime": 17.4181, | |
| "eval_samples_per_second": 57.412, | |
| "eval_steps_per_second": 0.057, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.379647749510763, | |
| "grad_norm": 0.6123355031013489, | |
| "learning_rate": 8.815093721181899e-07, | |
| "loss": 0.7295, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.3894324853228963, | |
| "grad_norm": 0.7243862748146057, | |
| "learning_rate": 8.688188004496398e-07, | |
| "loss": 0.7316, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.3992172211350293, | |
| "grad_norm": 0.6597609519958496, | |
| "learning_rate": 8.555842865150007e-07, | |
| "loss": 0.7315, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.4090019569471623, | |
| "grad_norm": 0.5748956203460693, | |
| "learning_rate": 8.418253488732276e-07, | |
| "loss": 0.7288, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.4187866927592956, | |
| "grad_norm": 0.6559261679649353, | |
| "learning_rate": 8.275622795151589e-07, | |
| "loss": 0.7285, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.4187866927592956, | |
| "eval_accuracy": 0.692, | |
| "eval_loss": 0.6723695993423462, | |
| "eval_macro_f1": 0.6046720575022462, | |
| "eval_precision": 0.5999736781495739, | |
| "eval_recall": 0.62275279764069, | |
| "eval_runtime": 7.5196, | |
| "eval_samples_per_second": 132.986, | |
| "eval_steps_per_second": 0.266, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 0.6027814745903015, | |
| "learning_rate": 8.128161139364327e-07, | |
| "loss": 0.7278, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.4383561643835616, | |
| "grad_norm": 0.6246464252471924, | |
| "learning_rate": 7.976086001138633e-07, | |
| "loss": 0.7292, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.4481409001956946, | |
| "grad_norm": 0.6013718247413635, | |
| "learning_rate": 7.819621664310375e-07, | |
| "loss": 0.7257, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.4579256360078277, | |
| "grad_norm": 0.5971313714981079, | |
| "learning_rate": 7.658998886004268e-07, | |
| "loss": 0.7266, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.467710371819961, | |
| "grad_norm": 0.691210925579071, | |
| "learning_rate": 7.494454556308112e-07, | |
| "loss": 0.7292, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.467710371819961, | |
| "eval_accuracy": 0.696, | |
| "eval_loss": 0.6721649765968323, | |
| "eval_macro_f1": 0.6107434799532893, | |
| "eval_precision": 0.6055948182597334, | |
| "eval_recall": 0.6301227556832938, | |
| "eval_runtime": 7.4797, | |
| "eval_samples_per_second": 133.696, | |
| "eval_steps_per_second": 0.267, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.477495107632094, | |
| "grad_norm": 0.6443959474563599, | |
| "learning_rate": 7.326231348901924e-07, | |
| "loss": 0.7281, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.487279843444227, | |
| "grad_norm": 0.6890615820884705, | |
| "learning_rate": 7.154577363157363e-07, | |
| "loss": 0.7252, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.4970645792563602, | |
| "grad_norm": 0.5993087291717529, | |
| "learning_rate": 6.979745758235166e-07, | |
| "loss": 0.7263, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.5068493150684932, | |
| "grad_norm": 0.6016016602516174, | |
| "learning_rate": 6.801994379720355e-07, | |
| "loss": 0.7255, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.5166340508806262, | |
| "grad_norm": 0.6525799632072449, | |
| "learning_rate": 6.621585379345729e-07, | |
| "loss": 0.7216, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.5166340508806262, | |
| "eval_accuracy": 0.697, | |
| "eval_loss": 0.672131359577179, | |
| "eval_macro_f1": 0.6134094606232656, | |
| "eval_precision": 0.6081358079904591, | |
| "eval_recall": 0.6339635599725286, | |
| "eval_runtime": 7.506, | |
| "eval_samples_per_second": 133.227, | |
| "eval_steps_per_second": 0.266, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.5264187866927594, | |
| "grad_norm": 0.6760383248329163, | |
| "learning_rate": 6.438784828364623e-07, | |
| "loss": 0.7266, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.5362035225048922, | |
| "grad_norm": 0.5501950979232788, | |
| "learning_rate": 6.253862325143006e-07, | |
| "loss": 0.7235, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.5459882583170255, | |
| "grad_norm": 0.6858824491500854, | |
| "learning_rate": 6.067090597549778e-07, | |
| "loss": 0.7282, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.5557729941291585, | |
| "grad_norm": 0.6812397241592407, | |
| "learning_rate": 5.878745100731532e-07, | |
| "loss": 0.7225, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.5655577299412915, | |
| "grad_norm": 0.8024822473526001, | |
| "learning_rate": 5.689103610865153e-07, | |
| "loss": 0.7293, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.5655577299412915, | |
| "eval_accuracy": 0.689, | |
| "eval_loss": 0.6729016900062561, | |
| "eval_macro_f1": 0.6104704539960596, | |
| "eval_precision": 0.606289924506388, | |
| "eval_recall": 0.6352101621159918, | |
| "eval_runtime": 7.4663, | |
| "eval_samples_per_second": 133.936, | |
| "eval_steps_per_second": 0.268, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.5753424657534247, | |
| "grad_norm": 0.6455201506614685, | |
| "learning_rate": 5.498445815487207e-07, | |
| "loss": 0.7284, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.5851272015655578, | |
| "grad_norm": 0.6250964403152466, | |
| "learning_rate": 5.307052901004523e-07, | |
| "loss": 0.7229, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.5949119373776908, | |
| "grad_norm": 0.8491455316543579, | |
| "learning_rate": 5.115207137994109e-07, | |
| "loss": 0.7269, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.604696673189824, | |
| "grad_norm": 0.6143584847450256, | |
| "learning_rate": 4.923191464904187e-07, | |
| "loss": 0.7244, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.6144814090019568, | |
| "grad_norm": 0.6323517560958862, | |
| "learning_rate": 4.73128907077017e-07, | |
| "loss": 0.7235, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.6144814090019568, | |
| "eval_accuracy": 0.686, | |
| "eval_loss": 0.6727749705314636, | |
| "eval_macro_f1": 0.6071542957802867, | |
| "eval_precision": 0.6033547896122016, | |
| "eval_recall": 0.6316810083626228, | |
| "eval_runtime": 7.8151, | |
| "eval_samples_per_second": 127.957, | |
| "eval_steps_per_second": 0.256, | |
| "step": 1650 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2044, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 2 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.2221991380228096e+17, | |
| "train_batch_size": 512, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |