Instructions to use dzungpham/graphcodebert-code-classification with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use dzungpham/graphcodebert-code-classification with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("dzungpham/graphcodebert-code-classification", dtype="auto") - Notebooks
- Google Colab
- Kaggle
graphcodebert-code-classification / graphcodebert-swa-from-epoch-1 /checkpoint-1300 /trainer_state.json
| { | |
| "best_global_step": 1200, | |
| "best_metric": 0.6440359919423964, | |
| "best_model_checkpoint": "graphcodebert-swa-from-epoch-1/checkpoint-1200", | |
| "epoch": 2.658486707566462, | |
| "eval_steps": 100, | |
| "global_step": 1300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.010224948875255624, | |
| "grad_norm": 2.4707133769989014, | |
| "learning_rate": 2.0512820512820512e-08, | |
| "loss": 0.8431, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.02044989775051125, | |
| "grad_norm": 3.114851951599121, | |
| "learning_rate": 4.615384615384615e-08, | |
| "loss": 0.844, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03067484662576687, | |
| "grad_norm": 2.2256007194519043, | |
| "learning_rate": 7.179487179487178e-08, | |
| "loss": 0.847, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0408997955010225, | |
| "grad_norm": 2.5343081951141357, | |
| "learning_rate": 9.743589743589743e-08, | |
| "loss": 0.8492, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05112474437627812, | |
| "grad_norm": 3.1964163780212402, | |
| "learning_rate": 1.2307692307692308e-07, | |
| "loss": 0.8475, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.06134969325153374, | |
| "grad_norm": 2.0466485023498535, | |
| "learning_rate": 1.4871794871794872e-07, | |
| "loss": 0.8445, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07157464212678936, | |
| "grad_norm": 2.164569139480591, | |
| "learning_rate": 1.7435897435897435e-07, | |
| "loss": 0.8452, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.081799591002045, | |
| "grad_norm": 2.56343150138855, | |
| "learning_rate": 2e-07, | |
| "loss": 0.8473, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09202453987730061, | |
| "grad_norm": 2.5742437839508057, | |
| "learning_rate": 2.2564102564102563e-07, | |
| "loss": 0.848, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.10224948875255624, | |
| "grad_norm": 2.587480306625366, | |
| "learning_rate": 2.5128205128205126e-07, | |
| "loss": 0.8409, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11247443762781185, | |
| "grad_norm": 2.5737764835357666, | |
| "learning_rate": 2.7692307692307693e-07, | |
| "loss": 0.8471, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.12269938650306748, | |
| "grad_norm": 3.044358730316162, | |
| "learning_rate": 3.0256410256410254e-07, | |
| "loss": 0.8448, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1329243353783231, | |
| "grad_norm": 2.326373815536499, | |
| "learning_rate": 3.282051282051282e-07, | |
| "loss": 0.8517, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.14314928425357873, | |
| "grad_norm": 2.267547607421875, | |
| "learning_rate": 3.5384615384615386e-07, | |
| "loss": 0.8387, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15337423312883436, | |
| "grad_norm": 2.609232187271118, | |
| "learning_rate": 3.7948717948717947e-07, | |
| "loss": 0.841, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.16359918200409, | |
| "grad_norm": 2.9532523155212402, | |
| "learning_rate": 4.0512820512820514e-07, | |
| "loss": 0.8509, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1738241308793456, | |
| "grad_norm": 3.002154588699341, | |
| "learning_rate": 4.307692307692308e-07, | |
| "loss": 0.8482, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.18404907975460122, | |
| "grad_norm": 2.701613187789917, | |
| "learning_rate": 4.5641025641025636e-07, | |
| "loss": 0.8422, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.19427402862985685, | |
| "grad_norm": 2.7430365085601807, | |
| "learning_rate": 4.82051282051282e-07, | |
| "loss": 0.846, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.20449897750511248, | |
| "grad_norm": 2.8101418018341064, | |
| "learning_rate": 5.076923076923076e-07, | |
| "loss": 0.8444, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.20449897750511248, | |
| "eval_accuracy": 0.52033, | |
| "eval_loss": 0.6922348141670227, | |
| "eval_macro_f1": 0.4427650399783254, | |
| "eval_precision": 0.6036606007378691, | |
| "eval_recall": 0.5386742448919869, | |
| "eval_runtime": 80.6812, | |
| "eval_samples_per_second": 1239.446, | |
| "eval_steps_per_second": 0.607, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2147239263803681, | |
| "grad_norm": 2.5835089683532715, | |
| "learning_rate": 5.333333333333333e-07, | |
| "loss": 0.8437, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.2249488752556237, | |
| "grad_norm": 2.7237253189086914, | |
| "learning_rate": 5.58974358974359e-07, | |
| "loss": 0.8431, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.23517382413087934, | |
| "grad_norm": 2.4648072719573975, | |
| "learning_rate": 5.846153846153847e-07, | |
| "loss": 0.8399, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.24539877300613497, | |
| "grad_norm": 2.7011852264404297, | |
| "learning_rate": 6.102564102564103e-07, | |
| "loss": 0.8409, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2556237218813906, | |
| "grad_norm": 2.3170969486236572, | |
| "learning_rate": 6.358974358974358e-07, | |
| "loss": 0.8361, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.2658486707566462, | |
| "grad_norm": 2.517194986343384, | |
| "learning_rate": 6.615384615384615e-07, | |
| "loss": 0.839, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.27607361963190186, | |
| "grad_norm": 2.5092124938964844, | |
| "learning_rate": 6.871794871794871e-07, | |
| "loss": 0.8438, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.28629856850715746, | |
| "grad_norm": 2.3993237018585205, | |
| "learning_rate": 7.128205128205128e-07, | |
| "loss": 0.8349, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2965235173824131, | |
| "grad_norm": 2.1388165950775146, | |
| "learning_rate": 7.384615384615384e-07, | |
| "loss": 0.8363, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.3067484662576687, | |
| "grad_norm": 1.8425891399383545, | |
| "learning_rate": 7.64102564102564e-07, | |
| "loss": 0.8325, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3169734151329243, | |
| "grad_norm": 1.8665552139282227, | |
| "learning_rate": 7.897435897435897e-07, | |
| "loss": 0.835, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.32719836400818, | |
| "grad_norm": 1.8765455484390259, | |
| "learning_rate": 8.153846153846154e-07, | |
| "loss": 0.8328, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3374233128834356, | |
| "grad_norm": 2.640779495239258, | |
| "learning_rate": 8.41025641025641e-07, | |
| "loss": 0.8388, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3476482617586912, | |
| "grad_norm": 2.174116373062134, | |
| "learning_rate": 8.666666666666667e-07, | |
| "loss": 0.8336, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.35787321063394684, | |
| "grad_norm": 1.8411178588867188, | |
| "learning_rate": 8.923076923076923e-07, | |
| "loss": 0.8384, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.36809815950920244, | |
| "grad_norm": 2.3652143478393555, | |
| "learning_rate": 9.179487179487179e-07, | |
| "loss": 0.8318, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3783231083844581, | |
| "grad_norm": 1.9870903491973877, | |
| "learning_rate": 9.435897435897435e-07, | |
| "loss": 0.8306, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.3885480572597137, | |
| "grad_norm": 2.458887815475464, | |
| "learning_rate": 9.692307692307691e-07, | |
| "loss": 0.8342, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.3987730061349693, | |
| "grad_norm": 1.9105890989303589, | |
| "learning_rate": 9.948717948717949e-07, | |
| "loss": 0.8301, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.40899795501022496, | |
| "grad_norm": 2.04896879196167, | |
| "learning_rate": 9.999490793845076e-07, | |
| "loss": 0.8291, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.40899795501022496, | |
| "eval_accuracy": 0.52697, | |
| "eval_loss": 0.6913915872573853, | |
| "eval_macro_f1": 0.4511625248903547, | |
| "eval_precision": 0.6198512746424523, | |
| "eval_recall": 0.5452618609595298, | |
| "eval_runtime": 80.6395, | |
| "eval_samples_per_second": 1240.088, | |
| "eval_steps_per_second": 0.608, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.41922290388548056, | |
| "grad_norm": 2.394630193710327, | |
| "learning_rate": 9.997422321595486e-07, | |
| "loss": 0.8311, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.4294478527607362, | |
| "grad_norm": 1.7013665437698364, | |
| "learning_rate": 9.993763415653074e-07, | |
| "loss": 0.8264, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4396728016359918, | |
| "grad_norm": 2.1158103942871094, | |
| "learning_rate": 9.988515240467613e-07, | |
| "loss": 0.8262, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.4498977505112474, | |
| "grad_norm": 1.5985370874404907, | |
| "learning_rate": 9.981679466275095e-07, | |
| "loss": 0.8296, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4601226993865031, | |
| "grad_norm": 2.0426042079925537, | |
| "learning_rate": 9.973258268566182e-07, | |
| "loss": 0.8233, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.4703476482617587, | |
| "grad_norm": 1.7411834001541138, | |
| "learning_rate": 9.963254327393853e-07, | |
| "loss": 0.8269, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.48057259713701433, | |
| "grad_norm": 2.1182405948638916, | |
| "learning_rate": 9.95167082652047e-07, | |
| "loss": 0.8247, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.49079754601226994, | |
| "grad_norm": 2.0239953994750977, | |
| "learning_rate": 9.938511452404547e-07, | |
| "loss": 0.8308, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5010224948875256, | |
| "grad_norm": 2.366060495376587, | |
| "learning_rate": 9.923780393027534e-07, | |
| "loss": 0.8205, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.5112474437627812, | |
| "grad_norm": 1.848169207572937, | |
| "learning_rate": 9.907482336560982e-07, | |
| "loss": 0.825, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5214723926380368, | |
| "grad_norm": 1.8216668367385864, | |
| "learning_rate": 9.889622469874535e-07, | |
| "loss": 0.8271, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.5316973415132924, | |
| "grad_norm": 1.507730484008789, | |
| "learning_rate": 9.8702064768852e-07, | |
| "loss": 0.8147, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5419222903885481, | |
| "grad_norm": 1.7608263492584229, | |
| "learning_rate": 9.849240536748438e-07, | |
| "loss": 0.8221, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5521472392638037, | |
| "grad_norm": 2.203326940536499, | |
| "learning_rate": 9.826731321891641e-07, | |
| "loss": 0.8292, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5623721881390593, | |
| "grad_norm": 1.9529740810394287, | |
| "learning_rate": 9.802685995890632e-07, | |
| "loss": 0.8228, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5725971370143149, | |
| "grad_norm": 1.6214399337768555, | |
| "learning_rate": 9.777112211189841e-07, | |
| "loss": 0.8149, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5828220858895705, | |
| "grad_norm": 2.07482647895813, | |
| "learning_rate": 9.750018106666924e-07, | |
| "loss": 0.8143, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5930470347648262, | |
| "grad_norm": 1.7083203792572021, | |
| "learning_rate": 9.721412305042538e-07, | |
| "loss": 0.8188, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6032719836400818, | |
| "grad_norm": 2.0022943019866943, | |
| "learning_rate": 9.69130391013617e-07, | |
| "loss": 0.8195, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.6134969325153374, | |
| "grad_norm": 1.5799461603164673, | |
| "learning_rate": 9.659702503968834e-07, | |
| "loss": 0.8146, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6134969325153374, | |
| "eval_accuracy": 0.55052, | |
| "eval_loss": 0.6896406412124634, | |
| "eval_macro_f1": 0.49869783315905847, | |
| "eval_precision": 0.6264643684302231, | |
| "eval_recall": 0.5665461014402418, | |
| "eval_runtime": 80.6145, | |
| "eval_samples_per_second": 1240.472, | |
| "eval_steps_per_second": 0.608, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.623721881390593, | |
| "grad_norm": 1.9373347759246826, | |
| "learning_rate": 9.626618143713586e-07, | |
| "loss": 0.8166, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6339468302658486, | |
| "grad_norm": 1.6276922225952148, | |
| "learning_rate": 9.592061358494813e-07, | |
| "loss": 0.8176, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6441717791411042, | |
| "grad_norm": 1.9373250007629395, | |
| "learning_rate": 9.556043146037337e-07, | |
| "loss": 0.8168, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.65439672801636, | |
| "grad_norm": 1.320465087890625, | |
| "learning_rate": 9.518574969166391e-07, | |
| "loss": 0.8101, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6646216768916156, | |
| "grad_norm": 1.8596330881118774, | |
| "learning_rate": 9.47966875215954e-07, | |
| "loss": 0.8167, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.6748466257668712, | |
| "grad_norm": 1.304662823677063, | |
| "learning_rate": 9.439336876951793e-07, | |
| "loss": 0.815, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6850715746421268, | |
| "grad_norm": 1.8063029050827026, | |
| "learning_rate": 9.397592179195033e-07, | |
| "loss": 0.8121, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.6952965235173824, | |
| "grad_norm": 1.7432739734649658, | |
| "learning_rate": 9.354447944173059e-07, | |
| "loss": 0.8104, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7055214723926381, | |
| "grad_norm": 1.4523797035217285, | |
| "learning_rate": 9.309917902573533e-07, | |
| "loss": 0.8098, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.7157464212678937, | |
| "grad_norm": 1.681409478187561, | |
| "learning_rate": 9.264016226118188e-07, | |
| "loss": 0.8107, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7259713701431493, | |
| "grad_norm": 1.5168694257736206, | |
| "learning_rate": 9.216757523052652e-07, | |
| "loss": 0.8085, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.7361963190184049, | |
| "grad_norm": 1.2200194597244263, | |
| "learning_rate": 9.168156833497371e-07, | |
| "loss": 0.8109, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7464212678936605, | |
| "grad_norm": 1.2745580673217773, | |
| "learning_rate": 9.118229624661078e-07, | |
| "loss": 0.8096, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.7566462167689162, | |
| "grad_norm": 1.8339142799377441, | |
| "learning_rate": 9.066991785918333e-07, | |
| "loss": 0.808, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7668711656441718, | |
| "grad_norm": 1.2315114736557007, | |
| "learning_rate": 9.01445962375273e-07, | |
| "loss": 0.805, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.7770961145194274, | |
| "grad_norm": 1.3081412315368652, | |
| "learning_rate": 8.960649856567333e-07, | |
| "loss": 0.8066, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.787321063394683, | |
| "grad_norm": 1.5145998001098633, | |
| "learning_rate": 8.90557960936404e-07, | |
| "loss": 0.8028, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.7975460122699386, | |
| "grad_norm": 1.5990959405899048, | |
| "learning_rate": 8.84926640829353e-07, | |
| "loss": 0.8035, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8077709611451943, | |
| "grad_norm": 1.2120558023452759, | |
| "learning_rate": 8.79172817507756e-07, | |
| "loss": 0.802, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.8179959100204499, | |
| "grad_norm": 1.5799622535705566, | |
| "learning_rate": 8.73298322130535e-07, | |
| "loss": 0.8037, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8179959100204499, | |
| "eval_accuracy": 0.58537, | |
| "eval_loss": 0.6877263784408569, | |
| "eval_macro_f1": 0.5630337315451738, | |
| "eval_precision": 0.628845494567806, | |
| "eval_recall": 0.5970616303474306, | |
| "eval_runtime": 81.293, | |
| "eval_samples_per_second": 1230.118, | |
| "eval_steps_per_second": 0.603, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8282208588957055, | |
| "grad_norm": 1.3475037813186646, | |
| "learning_rate": 8.673050242605921e-07, | |
| "loss": 0.8067, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.8384458077709611, | |
| "grad_norm": 1.2836309671401978, | |
| "learning_rate": 8.611948312698179e-07, | |
| "loss": 0.7996, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8486707566462167, | |
| "grad_norm": 1.460316777229309, | |
| "learning_rate": 8.5496968773207e-07, | |
| "loss": 0.802, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.8588957055214724, | |
| "grad_norm": 1.33119797706604, | |
| "learning_rate": 8.486315748043109e-07, | |
| "loss": 0.798, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.869120654396728, | |
| "grad_norm": 1.9951454401016235, | |
| "learning_rate": 8.42182509596102e-07, | |
| "loss": 0.8013, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.8793456032719836, | |
| "grad_norm": 1.2590746879577637, | |
| "learning_rate": 8.356245445276584e-07, | |
| "loss": 0.7963, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.8895705521472392, | |
| "grad_norm": 1.1192667484283447, | |
| "learning_rate": 8.28959766676663e-07, | |
| "loss": 0.8004, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.8997955010224948, | |
| "grad_norm": 1.1180275678634644, | |
| "learning_rate": 8.221902971140535e-07, | |
| "loss": 0.8041, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9100204498977505, | |
| "grad_norm": 1.1210858821868896, | |
| "learning_rate": 8.153182902289897e-07, | |
| "loss": 0.7991, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.9202453987730062, | |
| "grad_norm": 1.1266220808029175, | |
| "learning_rate": 8.083459330432164e-07, | |
| "loss": 0.8002, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9304703476482618, | |
| "grad_norm": 1.0373694896697998, | |
| "learning_rate": 8.012754445150434e-07, | |
| "loss": 0.7974, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.9406952965235174, | |
| "grad_norm": 1.2223235368728638, | |
| "learning_rate": 7.941090748331589e-07, | |
| "loss": 0.8001, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.950920245398773, | |
| "grad_norm": 1.4549195766448975, | |
| "learning_rate": 7.868491047005065e-07, | |
| "loss": 0.7993, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.9611451942740287, | |
| "grad_norm": 1.3064852952957153, | |
| "learning_rate": 7.794978446084483e-07, | |
| "loss": 0.8006, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9713701431492843, | |
| "grad_norm": 1.2408719062805176, | |
| "learning_rate": 7.720576341014498e-07, | |
| "loss": 0.7983, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.9815950920245399, | |
| "grad_norm": 1.2148370742797852, | |
| "learning_rate": 7.645308410325187e-07, | |
| "loss": 0.7959, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.9918200408997955, | |
| "grad_norm": 1.0927603244781494, | |
| "learning_rate": 7.569198608096317e-07, | |
| "loss": 0.7978, | |
| "step": 485 | |
| }, | |
| { | |
| "SWA": "started", | |
| "epoch": 1.0, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.0020449897750512, | |
| "grad_norm": 1.245108723640442, | |
| "learning_rate": 7.492271156333967e-07, | |
| "loss": 0.7965, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0122699386503067, | |
| "grad_norm": 1.3393553495407104, | |
| "learning_rate": 7.414550537261828e-07, | |
| "loss": 0.795, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.0224948875255624, | |
| "grad_norm": 1.2823072671890259, | |
| "learning_rate": 7.336061485529738e-07, | |
| "loss": 0.8014, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0224948875255624, | |
| "eval_accuracy": 0.60723, | |
| "eval_loss": 0.6864892244338989, | |
| "eval_macro_f1": 0.5966241921587988, | |
| "eval_precision": 0.6341761761282843, | |
| "eval_recall": 0.6160142746967282, | |
| "eval_runtime": 81.931, | |
| "eval_samples_per_second": 1220.539, | |
| "eval_steps_per_second": 0.598, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.032719836400818, | |
| "grad_norm": 1.1278107166290283, | |
| "learning_rate": 7.256828980341846e-07, | |
| "loss": 0.7977, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.0429447852760736, | |
| "grad_norm": 1.110093355178833, | |
| "learning_rate": 7.176878237506965e-07, | |
| "loss": 0.7954, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.0531697341513293, | |
| "grad_norm": 1.2248748540878296, | |
| "learning_rate": 7.096234701413617e-07, | |
| "loss": 0.7957, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.0633946830265848, | |
| "grad_norm": 1.2420642375946045, | |
| "learning_rate": 7.014924036932345e-07, | |
| "loss": 0.7935, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.0736196319018405, | |
| "grad_norm": 1.0777639150619507, | |
| "learning_rate": 6.932972121247831e-07, | |
| "loss": 0.796, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.0838445807770962, | |
| "grad_norm": 1.3830324411392212, | |
| "learning_rate": 6.850405035623481e-07, | |
| "loss": 0.7929, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.0940695296523517, | |
| "grad_norm": 0.9407713413238525, | |
| "learning_rate": 6.767249057101023e-07, | |
| "loss": 0.7964, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.1042944785276074, | |
| "grad_norm": 1.1688194274902344, | |
| "learning_rate": 6.683530650137832e-07, | |
| "loss": 0.7944, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.114519427402863, | |
| "grad_norm": 0.9509923458099365, | |
| "learning_rate": 6.599276458184588e-07, | |
| "loss": 0.7912, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.1247443762781186, | |
| "grad_norm": 1.0683159828186035, | |
| "learning_rate": 6.514513295205969e-07, | |
| "loss": 0.7931, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.1349693251533743, | |
| "grad_norm": 0.9022642374038696, | |
| "learning_rate": 6.429268137147104e-07, | |
| "loss": 0.7945, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.1451942740286298, | |
| "grad_norm": 1.1609984636306763, | |
| "learning_rate": 6.343568113348441e-07, | |
| "loss": 0.7913, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.1554192229038855, | |
| "grad_norm": 1.2184994220733643, | |
| "learning_rate": 6.257440497911815e-07, | |
| "loss": 0.7919, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.165644171779141, | |
| "grad_norm": 1.0256582498550415, | |
| "learning_rate": 6.170912701020454e-07, | |
| "loss": 0.7912, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.1758691206543967, | |
| "grad_norm": 0.8725862503051758, | |
| "learning_rate": 6.084012260215645e-07, | |
| "loss": 0.7907, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.1860940695296525, | |
| "grad_norm": 1.5192348957061768, | |
| "learning_rate": 5.996766831632912e-07, | |
| "loss": 0.7913, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.196319018404908, | |
| "grad_norm": 1.109052062034607, | |
| "learning_rate": 5.909204181200414e-07, | |
| "loss": 0.795, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.2065439672801637, | |
| "grad_norm": 1.0413333177566528, | |
| "learning_rate": 5.821352175802419e-07, | |
| "loss": 0.7924, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.2167689161554192, | |
| "grad_norm": 0.8926281929016113, | |
| "learning_rate": 5.733238774410647e-07, | |
| "loss": 0.7921, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.2269938650306749, | |
| "grad_norm": 0.9231971502304077, | |
| "learning_rate": 5.644892019186307e-07, | |
| "loss": 0.7894, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.2269938650306749, | |
| "eval_accuracy": 0.62182, | |
| "eval_loss": 0.6853985786437988, | |
| "eval_macro_f1": 0.6195549574374046, | |
| "eval_precision": 0.6317310781859349, | |
| "eval_recall": 0.6267089641577176, | |
| "eval_runtime": 81.4512, | |
| "eval_samples_per_second": 1227.728, | |
| "eval_steps_per_second": 0.602, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.2372188139059306, | |
| "grad_norm": 0.9845394492149353, | |
| "learning_rate": 5.556340026555653e-07, | |
| "loss": 0.7918, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.247443762781186, | |
| "grad_norm": 1.3759487867355347, | |
| "learning_rate": 5.467610978261906e-07, | |
| "loss": 0.7904, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.2576687116564418, | |
| "grad_norm": 1.1568200588226318, | |
| "learning_rate": 5.378733112396398e-07, | |
| "loss": 0.7923, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.2678936605316973, | |
| "grad_norm": 1.4351176023483276, | |
| "learning_rate": 5.289734714411775e-07, | |
| "loss": 0.7905, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.278118609406953, | |
| "grad_norm": 1.178076982498169, | |
| "learning_rate": 5.200644108120121e-07, | |
| "loss": 0.7947, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.2883435582822087, | |
| "grad_norm": 1.2398017644882202, | |
| "learning_rate": 5.111489646678896e-07, | |
| "loss": 0.796, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.2985685071574642, | |
| "grad_norm": 1.1236284971237183, | |
| "learning_rate": 5.022299703567508e-07, | |
| "loss": 0.7895, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.30879345603272, | |
| "grad_norm": 1.0112528800964355, | |
| "learning_rate": 4.933102663557439e-07, | |
| "loss": 0.79, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.3190184049079754, | |
| "grad_norm": 1.3201746940612793, | |
| "learning_rate": 4.843926913678757e-07, | |
| "loss": 0.7897, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.329243353783231, | |
| "grad_norm": 0.969918429851532, | |
| "learning_rate": 4.7548008341859384e-07, | |
| "loss": 0.7912, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.3394683026584868, | |
| "grad_norm": 0.8914945125579834, | |
| "learning_rate": 4.665752789525812e-07, | |
| "loss": 0.7964, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.3496932515337423, | |
| "grad_norm": 0.906989574432373, | |
| "learning_rate": 4.576811119310563e-07, | |
| "loss": 0.7924, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.359918200408998, | |
| "grad_norm": 1.2423877716064453, | |
| "learning_rate": 4.488004129298618e-07, | |
| "loss": 0.7904, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.3701431492842535, | |
| "grad_norm": 1.2455909252166748, | |
| "learning_rate": 4.3993600823863256e-07, | |
| "loss": 0.7875, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.3803680981595092, | |
| "grad_norm": 1.4931528568267822, | |
| "learning_rate": 4.3109071896132574e-07, | |
| "loss": 0.7947, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.390593047034765, | |
| "grad_norm": 1.0538350343704224, | |
| "learning_rate": 4.222673601184029e-07, | |
| "loss": 0.7886, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.4008179959100204, | |
| "grad_norm": 0.9246828556060791, | |
| "learning_rate": 4.134687397509467e-07, | |
| "loss": 0.7884, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.4110429447852761, | |
| "grad_norm": 1.0383715629577637, | |
| "learning_rate": 4.0469765802700033e-07, | |
| "loss": 0.7943, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.4212678936605316, | |
| "grad_norm": 1.0180901288986206, | |
| "learning_rate": 3.9595690635041145e-07, | |
| "loss": 0.7895, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.4314928425357873, | |
| "grad_norm": 0.9119181632995605, | |
| "learning_rate": 3.8724926647246536e-07, | |
| "loss": 0.7864, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.4314928425357873, | |
| "eval_accuracy": 0.62357, | |
| "eval_loss": 0.6852650046348572, | |
| "eval_macro_f1": 0.6215147432652665, | |
| "eval_precision": 0.6330088346022082, | |
| "eval_recall": 0.628302383508456, | |
| "eval_runtime": 80.5998, | |
| "eval_samples_per_second": 1240.698, | |
| "eval_steps_per_second": 0.608, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.441717791411043, | |
| "grad_norm": 0.8882152438163757, | |
| "learning_rate": 3.785775096065909e-07, | |
| "loss": 0.7858, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.4519427402862985, | |
| "grad_norm": 1.5290203094482422, | |
| "learning_rate": 3.699443955464192e-07, | |
| "loss": 0.7837, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.4621676891615543, | |
| "grad_norm": 0.881521463394165, | |
| "learning_rate": 3.613526717874774e-07, | |
| "loss": 0.7858, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.4723926380368098, | |
| "grad_norm": 0.9955899119377136, | |
| "learning_rate": 3.5280507265279555e-07, | |
| "loss": 0.7907, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.4826175869120655, | |
| "grad_norm": 1.3247544765472412, | |
| "learning_rate": 3.443043184227067e-07, | |
| "loss": 0.79, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.4928425357873212, | |
| "grad_norm": 1.200223445892334, | |
| "learning_rate": 3.358531144691148e-07, | |
| "loss": 0.7874, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.5030674846625767, | |
| "grad_norm": 0.9952226281166077, | |
| "learning_rate": 3.2745415039450867e-07, | |
| "loss": 0.7874, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.5132924335378322, | |
| "grad_norm": 1.2515606880187988, | |
| "learning_rate": 3.19110099175993e-07, | |
| "loss": 0.789, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.5235173824130879, | |
| "grad_norm": 0.8901408314704895, | |
| "learning_rate": 3.10823616314612e-07, | |
| "loss": 0.7853, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.5337423312883436, | |
| "grad_norm": 1.0439373254776, | |
| "learning_rate": 3.0259733899023345e-07, | |
| "loss": 0.7899, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.5439672801635993, | |
| "grad_norm": 1.0658971071243286, | |
| "learning_rate": 2.944338852222643e-07, | |
| "loss": 0.7868, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.5541922290388548, | |
| "grad_norm": 0.927455484867096, | |
| "learning_rate": 2.8633585303646413e-07, | |
| "loss": 0.7904, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.5644171779141103, | |
| "grad_norm": 0.9637423753738403, | |
| "learning_rate": 2.783058196381214e-07, | |
| "loss": 0.7856, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.574642126789366, | |
| "grad_norm": 1.396472692489624, | |
| "learning_rate": 2.7034634059185437e-07, | |
| "loss": 0.7903, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.5848670756646217, | |
| "grad_norm": 0.7922792434692383, | |
| "learning_rate": 2.6245994900830257e-07, | |
| "loss": 0.7843, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.5950920245398774, | |
| "grad_norm": 0.8896881341934204, | |
| "learning_rate": 2.546491547379619e-07, | |
| "loss": 0.787, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.605316973415133, | |
| "grad_norm": 0.8732028007507324, | |
| "learning_rate": 2.469164435724212e-07, | |
| "loss": 0.7856, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.6155419222903884, | |
| "grad_norm": 1.0021744966506958, | |
| "learning_rate": 2.3926427645325875e-07, | |
| "loss": 0.7867, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.6257668711656441, | |
| "grad_norm": 1.1783545017242432, | |
| "learning_rate": 2.3169508868884453e-07, | |
| "loss": 0.7897, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.6359918200408998, | |
| "grad_norm": 0.9119800329208374, | |
| "learning_rate": 2.2421128917930243e-07, | |
| "loss": 0.7845, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6359918200408998, | |
| "eval_accuracy": 0.62896, | |
| "eval_loss": 0.6847647428512573, | |
| "eval_macro_f1": 0.6281943240633717, | |
| "eval_precision": 0.6346364525627035, | |
| "eval_recall": 0.6323959922867678, | |
| "eval_runtime": 80.6105, | |
| "eval_samples_per_second": 1240.533, | |
| "eval_steps_per_second": 0.608, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6462167689161555, | |
| "grad_norm": 0.8903971314430237, | |
| "learning_rate": 2.1681525964987474e-07, | |
| "loss": 0.7824, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.656441717791411, | |
| "grad_norm": 1.115395188331604, | |
| "learning_rate": 2.0950935389293656e-07, | |
| "loss": 0.7824, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.9636144638061523, | |
| "learning_rate": 2.022958970189001e-07, | |
| "loss": 0.7917, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.6768916155419222, | |
| "grad_norm": 0.8787257075309753, | |
| "learning_rate": 1.9517718471624532e-07, | |
| "loss": 0.7869, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.687116564417178, | |
| "grad_norm": 1.0157173871994019, | |
| "learning_rate": 1.88155482520916e-07, | |
| "loss": 0.7844, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.6973415132924337, | |
| "grad_norm": 0.9504719972610474, | |
| "learning_rate": 1.812330250953107e-07, | |
| "loss": 0.7872, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.7075664621676891, | |
| "grad_norm": 0.893625795841217, | |
| "learning_rate": 1.7441201551710016e-07, | |
| "loss": 0.7879, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.7177914110429446, | |
| "grad_norm": 0.8460310101509094, | |
| "learning_rate": 1.6769462457809536e-07, | |
| "loss": 0.7853, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.7280163599182004, | |
| "grad_norm": 0.9349818229675293, | |
| "learning_rate": 1.610829900933917e-07, | |
| "loss": 0.7862, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.738241308793456, | |
| "grad_norm": 0.859866738319397, | |
| "learning_rate": 1.545792162210074e-07, | |
| "loss": 0.7836, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.7484662576687118, | |
| "grad_norm": 1.0148438215255737, | |
| "learning_rate": 1.481853727922341e-07, | |
| "loss": 0.7859, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.7586912065439673, | |
| "grad_norm": 0.8861204385757446, | |
| "learning_rate": 1.4190349465291035e-07, | |
| "loss": 0.7909, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.7689161554192228, | |
| "grad_norm": 0.7679073214530945, | |
| "learning_rate": 1.3573558101583105e-07, | |
| "loss": 0.785, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.7791411042944785, | |
| "grad_norm": 0.7364144325256348, | |
| "learning_rate": 1.2968359482449636e-07, | |
| "loss": 0.7824, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.7893660531697342, | |
| "grad_norm": 0.945924699306488, | |
| "learning_rate": 1.2374946212840288e-07, | |
| "loss": 0.7864, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.79959100204499, | |
| "grad_norm": 1.1060514450073242, | |
| "learning_rate": 1.1793507147007714e-07, | |
| "loss": 0.7866, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.8098159509202454, | |
| "grad_norm": 0.9230445623397827, | |
| "learning_rate": 1.1224227328404534e-07, | |
| "loss": 0.7895, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.8200408997955009, | |
| "grad_norm": 0.9153196811676025, | |
| "learning_rate": 1.0667287930793151e-07, | |
| "loss": 0.7835, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.8302658486707566, | |
| "grad_norm": 0.9513780474662781, | |
| "learning_rate": 1.0122866200586944e-07, | |
| "loss": 0.7846, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.8404907975460123, | |
| "grad_norm": 0.8672247529029846, | |
| "learning_rate": 9.591135400441552e-08, | |
| "loss": 0.7839, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.8404907975460123, | |
| "eval_accuracy": 0.63125, | |
| "eval_loss": 0.6845182776451111, | |
| "eval_macro_f1": 0.6309538076224105, | |
| "eval_precision": 0.6350446377333951, | |
| "eval_recall": 0.6339031903992685, | |
| "eval_runtime": 80.5646, | |
| "eval_samples_per_second": 1241.24, | |
| "eval_steps_per_second": 0.608, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.850715746421268, | |
| "grad_norm": 1.2127219438552856, | |
| "learning_rate": 9.072264754113912e-08, | |
| "loss": 0.7876, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.8609406952965235, | |
| "grad_norm": 0.875455379486084, | |
| "learning_rate": 8.566419392606544e-08, | |
| "loss": 0.787, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.871165644171779, | |
| "grad_norm": 0.92503821849823, | |
| "learning_rate": 8.073760301614596e-08, | |
| "loss": 0.7834, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.8813905930470347, | |
| "grad_norm": 1.1361068487167358, | |
| "learning_rate": 7.594444270291922e-08, | |
| "loss": 0.7821, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.8916155419222904, | |
| "grad_norm": 1.1415101289749146, | |
| "learning_rate": 7.128623841352916e-08, | |
| "loss": 0.7877, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.9018404907975461, | |
| "grad_norm": 0.9358757138252258, | |
| "learning_rate": 6.676447262525547e-08, | |
| "loss": 0.7867, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.9120654396728016, | |
| "grad_norm": 0.912706732749939, | |
| "learning_rate": 6.238058439371479e-08, | |
| "loss": 0.7884, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.9222903885480571, | |
| "grad_norm": 0.9449842572212219, | |
| "learning_rate": 5.813596889488009e-08, | |
| "loss": 0.7893, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.9325153374233128, | |
| "grad_norm": 0.8449825048446655, | |
| "learning_rate": 5.403197698106432e-08, | |
| "loss": 0.7828, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 1.9427402862985685, | |
| "grad_norm": 0.9307764768600464, | |
| "learning_rate": 5.0069914751010913e-08, | |
| "loss": 0.785, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.9529652351738243, | |
| "grad_norm": 1.3704556226730347, | |
| "learning_rate": 4.625104313422673e-08, | |
| "loss": 0.7874, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 1.9631901840490797, | |
| "grad_norm": 1.0163496732711792, | |
| "learning_rate": 4.257657748969046e-08, | |
| "loss": 0.7834, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.9734151329243352, | |
| "grad_norm": 0.8112438321113586, | |
| "learning_rate": 3.904768721906304e-08, | |
| "loss": 0.7852, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 1.983640081799591, | |
| "grad_norm": 0.885705828666687, | |
| "learning_rate": 3.566549539452529e-08, | |
| "loss": 0.7792, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.9938650306748467, | |
| "grad_norm": 0.8692009449005127, | |
| "learning_rate": 3.243107840135878e-08, | |
| "loss": 0.7822, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 2.0040899795501024, | |
| "grad_norm": 0.8909807205200195, | |
| "learning_rate": 2.9345465595385866e-08, | |
| "loss": 0.7826, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.014314928425358, | |
| "grad_norm": 0.9065344333648682, | |
| "learning_rate": 2.6409638975375737e-08, | |
| "loss": 0.7849, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 2.0245398773006134, | |
| "grad_norm": 0.8145809173583984, | |
| "learning_rate": 2.3624532870522962e-08, | |
| "loss": 0.7885, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.034764826175869, | |
| "grad_norm": 0.9461153149604797, | |
| "learning_rate": 2.0991033643096457e-08, | |
| "loss": 0.7853, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 2.044989775051125, | |
| "grad_norm": 0.8470706343650818, | |
| "learning_rate": 1.8509979406353794e-08, | |
| "loss": 0.7881, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.044989775051125, | |
| "eval_accuracy": 0.63202, | |
| "eval_loss": 0.6844514012336731, | |
| "eval_macro_f1": 0.6318036560759084, | |
| "eval_precision": 0.6354113747156731, | |
| "eval_recall": 0.6344858797364747, | |
| "eval_runtime": 81.1838, | |
| "eval_samples_per_second": 1231.772, | |
| "eval_steps_per_second": 0.604, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.0552147239263805, | |
| "grad_norm": 0.8817445635795593, | |
| "learning_rate": 1.6182159757810897e-08, | |
| "loss": 0.7879, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 2.065439672801636, | |
| "grad_norm": 0.856109082698822, | |
| "learning_rate": 1.400831552795234e-08, | |
| "loss": 0.7868, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.0756646216768915, | |
| "grad_norm": 0.956066370010376, | |
| "learning_rate": 1.1989138544461375e-08, | |
| "loss": 0.7845, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 2.085889570552147, | |
| "grad_norm": 0.930978000164032, | |
| "learning_rate": 1.0125271412044666e-08, | |
| "loss": 0.7876, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.096114519427403, | |
| "grad_norm": 0.9799636602401733, | |
| "learning_rate": 8.417307307923615e-09, | |
| "loss": 0.7861, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 2.1063394683026586, | |
| "grad_norm": 0.9991019368171692, | |
| "learning_rate": 6.8657897930547435e-09, | |
| "loss": 0.7852, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.116564417177914, | |
| "grad_norm": 1.076750636100769, | |
| "learning_rate": 5.471212639141132e-09, | |
| "loss": 0.7789, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 2.1267893660531696, | |
| "grad_norm": 0.9805507063865662, | |
| "learning_rate": 4.23401967148912e-09, | |
| "loss": 0.7829, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.1370143149284253, | |
| "grad_norm": 0.7899750471115112, | |
| "learning_rate": 3.154604627760571e-09, | |
| "loss": 0.7839, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 2.147239263803681, | |
| "grad_norm": 1.1698967218399048, | |
| "learning_rate": 2.2333110326655526e-09, | |
| "loss": 0.7869, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.1574642126789367, | |
| "grad_norm": 0.9302964806556702, | |
| "learning_rate": 1.4704320886352873e-09, | |
| "loss": 0.7832, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 2.1676891615541924, | |
| "grad_norm": 1.057986855506897, | |
| "learning_rate": 8.662105825103517e-10, | |
| "loss": 0.7864, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.1779141104294477, | |
| "grad_norm": 1.0347933769226074, | |
| "learning_rate": 4.208388082733161e-10, | |
| "loss": 0.7822, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 2.1881390593047034, | |
| "grad_norm": 0.9827083945274353, | |
| "learning_rate": 1.3445850585130924e-10, | |
| "loss": 0.784, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.198364008179959, | |
| "grad_norm": 0.8463678956031799, | |
| "learning_rate": 7.160816007045767e-12, | |
| "loss": 0.7811, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 2.208588957055215, | |
| "grad_norm": 0.9141009449958801, | |
| "learning_rate": 9.999610137486667e-07, | |
| "loss": 0.7828, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.21881390593047, | |
| "grad_norm": 0.8992940783500671, | |
| "learning_rate": 9.997700753166407e-07, | |
| "loss": 0.7843, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 2.229038854805726, | |
| "grad_norm": 0.9198014140129089, | |
| "learning_rate": 9.99420084654225e-07, | |
| "loss": 0.7867, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.2392638036809815, | |
| "grad_norm": 0.841385006904602, | |
| "learning_rate": 9.98911153146231e-07, | |
| "loss": 0.7899, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 2.2494887525562373, | |
| "grad_norm": 0.9428244233131409, | |
| "learning_rate": 9.982434427605222e-07, | |
| "loss": 0.783, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.2494887525562373, | |
| "eval_accuracy": 0.63535, | |
| "eval_loss": 0.6841139197349548, | |
| "eval_macro_f1": 0.6353491904387377, | |
| "eval_precision": 0.6368108503242846, | |
| "eval_recall": 0.6367719631437929, | |
| "eval_runtime": 81.1976, | |
| "eval_samples_per_second": 1231.563, | |
| "eval_steps_per_second": 0.603, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.259713701431493, | |
| "grad_norm": 0.8474355936050415, | |
| "learning_rate": 9.974171659964687e-07, | |
| "loss": 0.7805, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 2.2699386503067487, | |
| "grad_norm": 0.8366284370422363, | |
| "learning_rate": 9.964325858173184e-07, | |
| "loss": 0.7821, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.280163599182004, | |
| "grad_norm": 1.102426290512085, | |
| "learning_rate": 9.952900155665089e-07, | |
| "loss": 0.7854, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 2.2903885480572597, | |
| "grad_norm": 0.8815932273864746, | |
| "learning_rate": 9.939898188679462e-07, | |
| "loss": 0.7835, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.3006134969325154, | |
| "grad_norm": 0.8016415238380432, | |
| "learning_rate": 9.925324095102806e-07, | |
| "loss": 0.7842, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 2.310838445807771, | |
| "grad_norm": 0.8805480599403381, | |
| "learning_rate": 9.909182513152177e-07, | |
| "loss": 0.7791, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.3210633946830264, | |
| "grad_norm": 0.9736661314964294, | |
| "learning_rate": 9.891478579899078e-07, | |
| "loss": 0.7825, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 2.331288343558282, | |
| "grad_norm": 0.8331109285354614, | |
| "learning_rate": 9.872217929634573e-07, | |
| "loss": 0.7852, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.341513292433538, | |
| "grad_norm": 0.8597177267074585, | |
| "learning_rate": 9.851406692076183e-07, | |
| "loss": 0.7817, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 2.3517382413087935, | |
| "grad_norm": 0.7928445339202881, | |
| "learning_rate": 9.829051490417071e-07, | |
| "loss": 0.7765, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.361963190184049, | |
| "grad_norm": 0.8488237857818604, | |
| "learning_rate": 9.80515943921824e-07, | |
| "loss": 0.7836, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 2.372188139059305, | |
| "grad_norm": 0.7608004212379456, | |
| "learning_rate": 9.77973814214429e-07, | |
| "loss": 0.7834, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.38241308793456, | |
| "grad_norm": 0.8542405962944031, | |
| "learning_rate": 9.752795689543563e-07, | |
| "loss": 0.7777, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 2.392638036809816, | |
| "grad_norm": 0.8797897100448608, | |
| "learning_rate": 9.72434065587337e-07, | |
| "loss": 0.7823, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.4028629856850716, | |
| "grad_norm": 0.9687849283218384, | |
| "learning_rate": 9.69438209697118e-07, | |
| "loss": 0.7754, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 2.4130879345603273, | |
| "grad_norm": 0.9111893773078918, | |
| "learning_rate": 9.662929547172574e-07, | |
| "loss": 0.7806, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.4233128834355826, | |
| "grad_norm": 1.0323760509490967, | |
| "learning_rate": 9.629993016276944e-07, | |
| "loss": 0.7801, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 2.4335378323108383, | |
| "grad_norm": 0.79954594373703, | |
| "learning_rate": 9.595582986361872e-07, | |
| "loss": 0.7781, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.443762781186094, | |
| "grad_norm": 0.7106928825378418, | |
| "learning_rate": 9.559710408447184e-07, | |
| "loss": 0.7788, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 2.4539877300613497, | |
| "grad_norm": 0.77292400598526, | |
| "learning_rate": 9.522386699009795e-07, | |
| "loss": 0.7827, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.4539877300613497, | |
| "eval_accuracy": 0.645, | |
| "eval_loss": 0.6828427314758301, | |
| "eval_macro_f1": 0.6440359919423964, | |
| "eval_precision": 0.6441481409802297, | |
| "eval_recall": 0.6439695264773649, | |
| "eval_runtime": 81.1775, | |
| "eval_samples_per_second": 1231.869, | |
| "eval_steps_per_second": 0.604, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.4642126789366054, | |
| "grad_norm": 0.8576335310935974, | |
| "learning_rate": 9.483623736350402e-07, | |
| "loss": 0.7765, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 2.474437627811861, | |
| "grad_norm": 0.7940819263458252, | |
| "learning_rate": 9.443433856813196e-07, | |
| "loss": 0.7744, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.4846625766871164, | |
| "grad_norm": 0.9138656854629517, | |
| "learning_rate": 9.401829850859823e-07, | |
| "loss": 0.779, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 2.494887525562372, | |
| "grad_norm": 0.7292961478233337, | |
| "learning_rate": 9.358824958998804e-07, | |
| "loss": 0.7741, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.505112474437628, | |
| "grad_norm": 0.8346101641654968, | |
| "learning_rate": 9.314432867571731e-07, | |
| "loss": 0.7769, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 2.5153374233128836, | |
| "grad_norm": 0.7433446645736694, | |
| "learning_rate": 9.268667704397576e-07, | |
| "loss": 0.7811, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.525562372188139, | |
| "grad_norm": 0.7142143845558167, | |
| "learning_rate": 9.22154403427651e-07, | |
| "loss": 0.7739, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 2.5357873210633946, | |
| "grad_norm": 0.8269698023796082, | |
| "learning_rate": 9.173076854354633e-07, | |
| "loss": 0.7751, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.5460122699386503, | |
| "grad_norm": 0.7005385160446167, | |
| "learning_rate": 9.123281589351127e-07, | |
| "loss": 0.7747, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 2.556237218813906, | |
| "grad_norm": 0.7422548532485962, | |
| "learning_rate": 9.072174086649326e-07, | |
| "loss": 0.7764, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.5664621676891617, | |
| "grad_norm": 0.7844764590263367, | |
| "learning_rate": 9.01977061125327e-07, | |
| "loss": 0.7751, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 2.5766871165644174, | |
| "grad_norm": 0.8993695378303528, | |
| "learning_rate": 8.966087840611356e-07, | |
| "loss": 0.7771, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.5869120654396727, | |
| "grad_norm": 0.7648841738700867, | |
| "learning_rate": 8.911142859308729e-07, | |
| "loss": 0.7771, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 2.5971370143149284, | |
| "grad_norm": 0.789523720741272, | |
| "learning_rate": 8.854953153630096e-07, | |
| "loss": 0.7732, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.607361963190184, | |
| "grad_norm": 0.7698408365249634, | |
| "learning_rate": 8.7975366059947e-07, | |
| "loss": 0.769, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 2.61758691206544, | |
| "grad_norm": 1.019235610961914, | |
| "learning_rate": 8.738911489265233e-07, | |
| "loss": 0.7768, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.627811860940695, | |
| "grad_norm": 0.8915722966194153, | |
| "learning_rate": 8.679096460932475e-07, | |
| "loss": 0.774, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 2.638036809815951, | |
| "grad_norm": 1.0551347732543945, | |
| "learning_rate": 8.618110557177536e-07, | |
| "loss": 0.7711, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.6482617586912065, | |
| "grad_norm": 0.7630209922790527, | |
| "learning_rate": 8.555973186813575e-07, | |
| "loss": 0.7724, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 2.658486707566462, | |
| "grad_norm": 0.6783341765403748, | |
| "learning_rate": 8.49270412510893e-07, | |
| "loss": 0.773, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.658486707566462, | |
| "eval_accuracy": 0.64275, | |
| "eval_loss": 0.6818840503692627, | |
| "eval_macro_f1": 0.6366805441223703, | |
| "eval_precision": 0.6443671237738225, | |
| "eval_recall": 0.6381477730287184, | |
| "eval_runtime": 81.1862, | |
| "eval_samples_per_second": 1231.736, | |
| "eval_steps_per_second": 0.604, | |
| "step": 1300 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1956, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 1 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.498661374217421e+17, | |
| "train_batch_size": 1024, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |