| { | |
| "best_metric": 1.1161140203475952, | |
| "best_model_checkpoint": "square_run_age_gender/checkpoint-261", | |
| "epoch": 35.0, | |
| "eval_steps": 500, | |
| "global_step": 1015, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06896551724137931, | |
| "grad_norm": 13.471597671508789, | |
| "learning_rate": 1.96078431372549e-06, | |
| "loss": 1.979, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.13793103448275862, | |
| "grad_norm": 6.632089138031006, | |
| "learning_rate": 3.92156862745098e-06, | |
| "loss": 1.8207, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.20689655172413793, | |
| "grad_norm": 10.463000297546387, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": 1.9296, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.27586206896551724, | |
| "grad_norm": 7.69188117980957, | |
| "learning_rate": 7.84313725490196e-06, | |
| "loss": 1.9443, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.3448275862068966, | |
| "grad_norm": 10.529653549194336, | |
| "learning_rate": 9.803921568627451e-06, | |
| "loss": 1.9129, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.41379310344827586, | |
| "grad_norm": 9.45837116241455, | |
| "learning_rate": 1.1764705882352942e-05, | |
| "loss": 2.0835, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.4827586206896552, | |
| "grad_norm": 6.899144649505615, | |
| "learning_rate": 1.3725490196078432e-05, | |
| "loss": 1.905, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.5517241379310345, | |
| "grad_norm": 6.275972843170166, | |
| "learning_rate": 1.568627450980392e-05, | |
| "loss": 2.0193, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.6206896551724138, | |
| "grad_norm": 7.337122440338135, | |
| "learning_rate": 1.7647058823529414e-05, | |
| "loss": 1.7395, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.6896551724137931, | |
| "grad_norm": 8.741500854492188, | |
| "learning_rate": 1.9607843137254903e-05, | |
| "loss": 1.8184, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.7586206896551724, | |
| "grad_norm": 5.90638542175293, | |
| "learning_rate": 2.1568627450980395e-05, | |
| "loss": 1.8613, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.8275862068965517, | |
| "grad_norm": 5.549325942993164, | |
| "learning_rate": 2.3529411764705884e-05, | |
| "loss": 1.8278, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.896551724137931, | |
| "grad_norm": 9.570854187011719, | |
| "learning_rate": 2.5490196078431373e-05, | |
| "loss": 1.8713, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.9655172413793104, | |
| "grad_norm": 5.850081920623779, | |
| "learning_rate": 2.7450980392156865e-05, | |
| "loss": 1.8891, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.25757575757575757, | |
| "eval_f1_macro": 0.17420690764344018, | |
| "eval_f1_micro": 0.25757575757575757, | |
| "eval_f1_weighted": 0.2100702351405982, | |
| "eval_loss": 1.867130160331726, | |
| "eval_precision_macro": 0.1681240063593005, | |
| "eval_precision_micro": 0.25757575757575757, | |
| "eval_precision_weighted": 0.20448824492942141, | |
| "eval_recall_macro": 0.21421012849584278, | |
| "eval_recall_micro": 0.25757575757575757, | |
| "eval_recall_weighted": 0.25757575757575757, | |
| "eval_runtime": 2.1698, | |
| "eval_samples_per_second": 60.835, | |
| "eval_steps_per_second": 7.835, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 1.0344827586206897, | |
| "grad_norm": 8.4002103805542, | |
| "learning_rate": 2.9411764705882354e-05, | |
| "loss": 1.8553, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.103448275862069, | |
| "grad_norm": 6.214775085449219, | |
| "learning_rate": 3.137254901960784e-05, | |
| "loss": 1.8232, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 1.1724137931034484, | |
| "grad_norm": 5.48581600189209, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 1.9136, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 1.2413793103448276, | |
| "grad_norm": 3.806295871734619, | |
| "learning_rate": 3.529411764705883e-05, | |
| "loss": 1.9254, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 1.3103448275862069, | |
| "grad_norm": 5.554256439208984, | |
| "learning_rate": 3.725490196078432e-05, | |
| "loss": 1.8243, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 1.3793103448275863, | |
| "grad_norm": 7.334174156188965, | |
| "learning_rate": 3.9215686274509805e-05, | |
| "loss": 1.9214, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.4482758620689655, | |
| "grad_norm": 4.776826858520508, | |
| "learning_rate": 4.11764705882353e-05, | |
| "loss": 1.8654, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 1.5172413793103448, | |
| "grad_norm": 10.249964714050293, | |
| "learning_rate": 4.313725490196079e-05, | |
| "loss": 1.9152, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 1.5862068965517242, | |
| "grad_norm": 5.083812713623047, | |
| "learning_rate": 4.5098039215686275e-05, | |
| "loss": 1.9904, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 1.6551724137931034, | |
| "grad_norm": 4.7975029945373535, | |
| "learning_rate": 4.705882352941177e-05, | |
| "loss": 1.839, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.7241379310344827, | |
| "grad_norm": 4.822813987731934, | |
| "learning_rate": 4.901960784313725e-05, | |
| "loss": 1.9303, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.793103448275862, | |
| "grad_norm": 6.845904350280762, | |
| "learning_rate": 5.0980392156862745e-05, | |
| "loss": 1.8255, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 1.8620689655172413, | |
| "grad_norm": 7.084125518798828, | |
| "learning_rate": 5.294117647058824e-05, | |
| "loss": 1.871, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 1.9310344827586206, | |
| "grad_norm": 8.454116821289062, | |
| "learning_rate": 5.490196078431373e-05, | |
| "loss": 1.817, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.273526430130005, | |
| "learning_rate": 5.6862745098039215e-05, | |
| "loss": 1.8327, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.3181818181818182, | |
| "eval_f1_macro": 0.15698350488823806, | |
| "eval_f1_micro": 0.3181818181818182, | |
| "eval_f1_weighted": 0.19373566841158035, | |
| "eval_loss": 1.8123832941055298, | |
| "eval_precision_macro": 0.13350340136054423, | |
| "eval_precision_micro": 0.3181818181818182, | |
| "eval_precision_weighted": 0.16110209235209236, | |
| "eval_recall_macro": 0.25083144368858656, | |
| "eval_recall_micro": 0.3181818181818182, | |
| "eval_recall_weighted": 0.3181818181818182, | |
| "eval_runtime": 2.1978, | |
| "eval_samples_per_second": 60.059, | |
| "eval_steps_per_second": 7.735, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 2.0689655172413794, | |
| "grad_norm": 7.48805570602417, | |
| "learning_rate": 5.882352941176471e-05, | |
| "loss": 1.875, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.1379310344827585, | |
| "grad_norm": 3.892385721206665, | |
| "learning_rate": 6.078431372549019e-05, | |
| "loss": 1.7235, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 2.206896551724138, | |
| "grad_norm": 8.482718467712402, | |
| "learning_rate": 6.274509803921569e-05, | |
| "loss": 1.8003, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 2.2758620689655173, | |
| "grad_norm": 13.920659065246582, | |
| "learning_rate": 6.470588235294118e-05, | |
| "loss": 1.984, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 2.344827586206897, | |
| "grad_norm": 7.427146911621094, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 1.7298, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 2.413793103448276, | |
| "grad_norm": 9.012772560119629, | |
| "learning_rate": 6.862745098039216e-05, | |
| "loss": 1.8282, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.4827586206896552, | |
| "grad_norm": 8.024530410766602, | |
| "learning_rate": 7.058823529411765e-05, | |
| "loss": 1.8988, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 2.5517241379310347, | |
| "grad_norm": 6.818090438842773, | |
| "learning_rate": 7.254901960784314e-05, | |
| "loss": 1.6532, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 2.6206896551724137, | |
| "grad_norm": 10.93878173828125, | |
| "learning_rate": 7.450980392156864e-05, | |
| "loss": 2.0298, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 2.689655172413793, | |
| "grad_norm": 9.51241397857666, | |
| "learning_rate": 7.647058823529411e-05, | |
| "loss": 2.0224, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 2.7586206896551726, | |
| "grad_norm": 9.287165641784668, | |
| "learning_rate": 7.843137254901961e-05, | |
| "loss": 1.8984, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.8275862068965516, | |
| "grad_norm": 8.898433685302734, | |
| "learning_rate": 8.039215686274511e-05, | |
| "loss": 1.746, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 2.896551724137931, | |
| "grad_norm": 6.119718074798584, | |
| "learning_rate": 8.23529411764706e-05, | |
| "loss": 1.9194, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 2.9655172413793105, | |
| "grad_norm": 6.862753391265869, | |
| "learning_rate": 8.431372549019608e-05, | |
| "loss": 1.9127, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.3181818181818182, | |
| "eval_f1_macro": 0.20846518382633952, | |
| "eval_f1_micro": 0.3181818181818182, | |
| "eval_f1_weighted": 0.25755123465797625, | |
| "eval_loss": 1.7830312252044678, | |
| "eval_precision_macro": 0.21283015309910777, | |
| "eval_precision_micro": 0.3181818181818182, | |
| "eval_precision_weighted": 0.2617880187903288, | |
| "eval_recall_macro": 0.26250188964474674, | |
| "eval_recall_micro": 0.3181818181818182, | |
| "eval_recall_weighted": 0.3181818181818182, | |
| "eval_runtime": 2.2371, | |
| "eval_samples_per_second": 59.004, | |
| "eval_steps_per_second": 7.599, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 3.0344827586206895, | |
| "grad_norm": 10.097410202026367, | |
| "learning_rate": 8.627450980392158e-05, | |
| "loss": 1.896, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 3.103448275862069, | |
| "grad_norm": 6.835166931152344, | |
| "learning_rate": 8.823529411764706e-05, | |
| "loss": 1.5849, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 3.1724137931034484, | |
| "grad_norm": 7.843909740447998, | |
| "learning_rate": 9.019607843137255e-05, | |
| "loss": 1.7373, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 3.2413793103448274, | |
| "grad_norm": 7.719568252563477, | |
| "learning_rate": 9.215686274509804e-05, | |
| "loss": 1.5069, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 3.310344827586207, | |
| "grad_norm": 8.885810852050781, | |
| "learning_rate": 9.411764705882353e-05, | |
| "loss": 1.523, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 3.3793103448275863, | |
| "grad_norm": 6.505782127380371, | |
| "learning_rate": 9.607843137254903e-05, | |
| "loss": 1.5585, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 3.4482758620689653, | |
| "grad_norm": 7.679609298706055, | |
| "learning_rate": 9.80392156862745e-05, | |
| "loss": 1.5167, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 3.5172413793103448, | |
| "grad_norm": 12.342342376708984, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9155, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 3.586206896551724, | |
| "grad_norm": 5.502551078796387, | |
| "learning_rate": 9.978094194961665e-05, | |
| "loss": 1.3553, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 3.655172413793103, | |
| "grad_norm": 8.53254222869873, | |
| "learning_rate": 9.95618838992333e-05, | |
| "loss": 1.6346, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 3.7241379310344827, | |
| "grad_norm": 11.95875072479248, | |
| "learning_rate": 9.934282584884996e-05, | |
| "loss": 1.5988, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 3.793103448275862, | |
| "grad_norm": 7.445318222045898, | |
| "learning_rate": 9.912376779846659e-05, | |
| "loss": 1.613, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 3.862068965517241, | |
| "grad_norm": 12.528891563415527, | |
| "learning_rate": 9.890470974808325e-05, | |
| "loss": 1.7571, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 3.9310344827586206, | |
| "grad_norm": 9.875731468200684, | |
| "learning_rate": 9.86856516976999e-05, | |
| "loss": 1.53, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 6.541798114776611, | |
| "learning_rate": 9.846659364731654e-05, | |
| "loss": 1.4498, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.38636363636363635, | |
| "eval_f1_macro": 0.29356560009870114, | |
| "eval_f1_micro": 0.38636363636363635, | |
| "eval_f1_weighted": 0.3437889854048391, | |
| "eval_loss": 1.579649806022644, | |
| "eval_precision_macro": 0.43422035480859006, | |
| "eval_precision_micro": 0.38636363636363635, | |
| "eval_precision_weighted": 0.4527406417112299, | |
| "eval_recall_macro": 0.3179440665154951, | |
| "eval_recall_micro": 0.38636363636363635, | |
| "eval_recall_weighted": 0.38636363636363635, | |
| "eval_runtime": 2.2188, | |
| "eval_samples_per_second": 59.491, | |
| "eval_steps_per_second": 7.662, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 4.068965517241379, | |
| "grad_norm": 11.127625465393066, | |
| "learning_rate": 9.824753559693319e-05, | |
| "loss": 1.5055, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 4.137931034482759, | |
| "grad_norm": 7.331289768218994, | |
| "learning_rate": 9.802847754654983e-05, | |
| "loss": 1.3358, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 4.206896551724138, | |
| "grad_norm": 8.787720680236816, | |
| "learning_rate": 9.78094194961665e-05, | |
| "loss": 1.6528, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 4.275862068965517, | |
| "grad_norm": 8.822704315185547, | |
| "learning_rate": 9.759036144578314e-05, | |
| "loss": 1.2185, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 4.344827586206897, | |
| "grad_norm": 7.718049049377441, | |
| "learning_rate": 9.737130339539979e-05, | |
| "loss": 1.376, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 4.413793103448276, | |
| "grad_norm": 6.089763641357422, | |
| "learning_rate": 9.715224534501643e-05, | |
| "loss": 1.2114, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 4.482758620689655, | |
| "grad_norm": 11.10245418548584, | |
| "learning_rate": 9.693318729463309e-05, | |
| "loss": 1.5952, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 4.551724137931035, | |
| "grad_norm": 9.193964958190918, | |
| "learning_rate": 9.671412924424972e-05, | |
| "loss": 1.691, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 4.620689655172414, | |
| "grad_norm": 7.160553455352783, | |
| "learning_rate": 9.649507119386638e-05, | |
| "loss": 1.7942, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 4.689655172413794, | |
| "grad_norm": 10.51407527923584, | |
| "learning_rate": 9.627601314348302e-05, | |
| "loss": 1.541, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 4.758620689655173, | |
| "grad_norm": 7.315539836883545, | |
| "learning_rate": 9.605695509309968e-05, | |
| "loss": 1.4463, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 4.827586206896552, | |
| "grad_norm": 5.069845676422119, | |
| "learning_rate": 9.583789704271632e-05, | |
| "loss": 1.2094, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 4.896551724137931, | |
| "grad_norm": 7.61802864074707, | |
| "learning_rate": 9.561883899233297e-05, | |
| "loss": 1.3717, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 4.9655172413793105, | |
| "grad_norm": 5.559959888458252, | |
| "learning_rate": 9.539978094194963e-05, | |
| "loss": 1.2166, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.4772727272727273, | |
| "eval_f1_macro": 0.3867993464710291, | |
| "eval_f1_micro": 0.4772727272727273, | |
| "eval_f1_weighted": 0.4441923804921774, | |
| "eval_loss": 1.3484843969345093, | |
| "eval_precision_macro": 0.5067572493188256, | |
| "eval_precision_micro": 0.4772727272727273, | |
| "eval_precision_weighted": 0.5372770333115161, | |
| "eval_recall_macro": 0.4076719576719577, | |
| "eval_recall_micro": 0.4772727272727273, | |
| "eval_recall_weighted": 0.4772727272727273, | |
| "eval_runtime": 2.192, | |
| "eval_samples_per_second": 60.219, | |
| "eval_steps_per_second": 7.756, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 5.0344827586206895, | |
| "grad_norm": 6.8597941398620605, | |
| "learning_rate": 9.518072289156626e-05, | |
| "loss": 1.5412, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 5.103448275862069, | |
| "grad_norm": 5.627894401550293, | |
| "learning_rate": 9.496166484118292e-05, | |
| "loss": 1.0841, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 5.172413793103448, | |
| "grad_norm": 10.26051139831543, | |
| "learning_rate": 9.474260679079957e-05, | |
| "loss": 1.1118, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 5.241379310344827, | |
| "grad_norm": 7.57983922958374, | |
| "learning_rate": 9.452354874041621e-05, | |
| "loss": 1.1301, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 5.310344827586207, | |
| "grad_norm": 7.729267120361328, | |
| "learning_rate": 9.430449069003286e-05, | |
| "loss": 1.3092, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 5.379310344827586, | |
| "grad_norm": 5.462944507598877, | |
| "learning_rate": 9.40854326396495e-05, | |
| "loss": 1.0817, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 5.448275862068965, | |
| "grad_norm": 11.766180992126465, | |
| "learning_rate": 9.386637458926615e-05, | |
| "loss": 1.747, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 5.517241379310345, | |
| "grad_norm": 8.114238739013672, | |
| "learning_rate": 9.364731653888281e-05, | |
| "loss": 1.1059, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 5.586206896551724, | |
| "grad_norm": 7.437196254730225, | |
| "learning_rate": 9.342825848849946e-05, | |
| "loss": 1.2654, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 5.655172413793103, | |
| "grad_norm": 9.772777557373047, | |
| "learning_rate": 9.32092004381161e-05, | |
| "loss": 1.1972, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 5.724137931034483, | |
| "grad_norm": 5.461746692657471, | |
| "learning_rate": 9.299014238773275e-05, | |
| "loss": 1.1775, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 5.793103448275862, | |
| "grad_norm": 6.507452964782715, | |
| "learning_rate": 9.27710843373494e-05, | |
| "loss": 1.0831, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 5.862068965517241, | |
| "grad_norm": 10.11184024810791, | |
| "learning_rate": 9.255202628696606e-05, | |
| "loss": 1.002, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 5.931034482758621, | |
| "grad_norm": 9.433456420898438, | |
| "learning_rate": 9.233296823658269e-05, | |
| "loss": 1.5848, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 9.081482887268066, | |
| "learning_rate": 9.211391018619935e-05, | |
| "loss": 1.5704, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.5606060606060606, | |
| "eval_f1_macro": 0.48525454733832823, | |
| "eval_f1_micro": 0.5606060606060606, | |
| "eval_f1_weighted": 0.5509911571429003, | |
| "eval_loss": 1.2560298442840576, | |
| "eval_precision_macro": 0.4905962384953981, | |
| "eval_precision_micro": 0.5606060606060606, | |
| "eval_precision_weighted": 0.5678953399541635, | |
| "eval_recall_macro": 0.5025774754346183, | |
| "eval_recall_micro": 0.5606060606060606, | |
| "eval_recall_weighted": 0.5606060606060606, | |
| "eval_runtime": 2.2113, | |
| "eval_samples_per_second": 59.694, | |
| "eval_steps_per_second": 7.688, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 6.068965517241379, | |
| "grad_norm": 7.783880710601807, | |
| "learning_rate": 9.1894852135816e-05, | |
| "loss": 1.1088, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 6.137931034482759, | |
| "grad_norm": 8.41838550567627, | |
| "learning_rate": 9.167579408543264e-05, | |
| "loss": 1.2991, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 6.206896551724138, | |
| "grad_norm": 7.9511799812316895, | |
| "learning_rate": 9.14567360350493e-05, | |
| "loss": 1.14, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 6.275862068965517, | |
| "grad_norm": 8.29940128326416, | |
| "learning_rate": 9.123767798466593e-05, | |
| "loss": 1.5207, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 6.344827586206897, | |
| "grad_norm": 7.130605220794678, | |
| "learning_rate": 9.10186199342826e-05, | |
| "loss": 1.2116, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 6.413793103448276, | |
| "grad_norm": 6.235842227935791, | |
| "learning_rate": 9.079956188389924e-05, | |
| "loss": 0.9688, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 6.482758620689655, | |
| "grad_norm": 4.8011980056762695, | |
| "learning_rate": 9.058050383351589e-05, | |
| "loss": 0.8892, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 6.551724137931035, | |
| "grad_norm": 7.332155227661133, | |
| "learning_rate": 9.036144578313253e-05, | |
| "loss": 1.0801, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 6.620689655172414, | |
| "grad_norm": 6.098062515258789, | |
| "learning_rate": 9.014238773274918e-05, | |
| "loss": 0.9725, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 6.689655172413794, | |
| "grad_norm": 7.8977813720703125, | |
| "learning_rate": 8.992332968236583e-05, | |
| "loss": 1.1209, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 6.758620689655173, | |
| "grad_norm": 6.769626617431641, | |
| "learning_rate": 8.970427163198248e-05, | |
| "loss": 1.305, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 6.827586206896552, | |
| "grad_norm": 6.7080793380737305, | |
| "learning_rate": 8.948521358159913e-05, | |
| "loss": 1.097, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 6.896551724137931, | |
| "grad_norm": 6.5601806640625, | |
| "learning_rate": 8.926615553121578e-05, | |
| "loss": 0.9768, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 6.9655172413793105, | |
| "grad_norm": 6.294341564178467, | |
| "learning_rate": 8.904709748083242e-05, | |
| "loss": 1.2465, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.49242424242424243, | |
| "eval_f1_macro": 0.38536093384329645, | |
| "eval_f1_micro": 0.49242424242424243, | |
| "eval_f1_weighted": 0.4392739197478552, | |
| "eval_loss": 1.4968072175979614, | |
| "eval_precision_macro": 0.5611372180451127, | |
| "eval_precision_micro": 0.49242424242424243, | |
| "eval_precision_weighted": 0.5975304027113237, | |
| "eval_recall_macro": 0.4107180650037793, | |
| "eval_recall_micro": 0.49242424242424243, | |
| "eval_recall_weighted": 0.49242424242424243, | |
| "eval_runtime": 2.2083, | |
| "eval_samples_per_second": 59.774, | |
| "eval_steps_per_second": 7.698, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 7.0344827586206895, | |
| "grad_norm": 6.664538383483887, | |
| "learning_rate": 8.882803943044907e-05, | |
| "loss": 1.0662, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 7.103448275862069, | |
| "grad_norm": 8.425392150878906, | |
| "learning_rate": 8.860898138006573e-05, | |
| "loss": 1.034, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 7.172413793103448, | |
| "grad_norm": 8.941866874694824, | |
| "learning_rate": 8.838992332968236e-05, | |
| "loss": 1.3566, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 7.241379310344827, | |
| "grad_norm": 7.900031089782715, | |
| "learning_rate": 8.817086527929902e-05, | |
| "loss": 1.3013, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 7.310344827586207, | |
| "grad_norm": 7.721550464630127, | |
| "learning_rate": 8.795180722891567e-05, | |
| "loss": 1.1345, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 7.379310344827586, | |
| "grad_norm": 6.119128704071045, | |
| "learning_rate": 8.773274917853231e-05, | |
| "loss": 0.7996, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 7.448275862068965, | |
| "grad_norm": 6.922367095947266, | |
| "learning_rate": 8.751369112814896e-05, | |
| "loss": 1.1724, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 7.517241379310345, | |
| "grad_norm": 7.787768840789795, | |
| "learning_rate": 8.72946330777656e-05, | |
| "loss": 1.0874, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 7.586206896551724, | |
| "grad_norm": 5.789196014404297, | |
| "learning_rate": 8.707557502738227e-05, | |
| "loss": 0.7744, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 7.655172413793103, | |
| "grad_norm": 8.260876655578613, | |
| "learning_rate": 8.685651697699891e-05, | |
| "loss": 0.9799, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 7.724137931034483, | |
| "grad_norm": 5.3789520263671875, | |
| "learning_rate": 8.663745892661556e-05, | |
| "loss": 0.8668, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 7.793103448275862, | |
| "grad_norm": 10.147786140441895, | |
| "learning_rate": 8.64184008762322e-05, | |
| "loss": 1.1608, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 7.862068965517241, | |
| "grad_norm": 5.489473342895508, | |
| "learning_rate": 8.619934282584885e-05, | |
| "loss": 0.8116, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 7.931034482758621, | |
| "grad_norm": 7.813507080078125, | |
| "learning_rate": 8.59802847754655e-05, | |
| "loss": 1.1165, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 9.428513526916504, | |
| "learning_rate": 8.576122672508216e-05, | |
| "loss": 1.2531, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.5, | |
| "eval_f1_macro": 0.4380420530832049, | |
| "eval_f1_micro": 0.5, | |
| "eval_f1_weighted": 0.48411055093350336, | |
| "eval_loss": 1.4662878513336182, | |
| "eval_precision_macro": 0.46228529523343914, | |
| "eval_precision_micro": 0.5, | |
| "eval_precision_weighted": 0.5301592857204586, | |
| "eval_recall_macro": 0.46928949357520783, | |
| "eval_recall_micro": 0.5, | |
| "eval_recall_weighted": 0.5, | |
| "eval_runtime": 2.2486, | |
| "eval_samples_per_second": 58.704, | |
| "eval_steps_per_second": 7.56, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 8.068965517241379, | |
| "grad_norm": 8.723676681518555, | |
| "learning_rate": 8.55421686746988e-05, | |
| "loss": 0.7241, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 8.137931034482758, | |
| "grad_norm": 5.1509904861450195, | |
| "learning_rate": 8.532311062431545e-05, | |
| "loss": 1.1178, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 8.206896551724139, | |
| "grad_norm": 9.173816680908203, | |
| "learning_rate": 8.51040525739321e-05, | |
| "loss": 1.1048, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 8.275862068965518, | |
| "grad_norm": 5.16646146774292, | |
| "learning_rate": 8.488499452354874e-05, | |
| "loss": 0.8407, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 8.344827586206897, | |
| "grad_norm": 7.476856708526611, | |
| "learning_rate": 8.46659364731654e-05, | |
| "loss": 0.7104, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 8.413793103448276, | |
| "grad_norm": 4.857934474945068, | |
| "learning_rate": 8.444687842278203e-05, | |
| "loss": 0.8153, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 8.482758620689655, | |
| "grad_norm": 4.849685192108154, | |
| "learning_rate": 8.42278203723987e-05, | |
| "loss": 0.7941, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 8.551724137931034, | |
| "grad_norm": 6.878391265869141, | |
| "learning_rate": 8.400876232201533e-05, | |
| "loss": 0.8408, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 8.620689655172415, | |
| "grad_norm": 9.568788528442383, | |
| "learning_rate": 8.378970427163199e-05, | |
| "loss": 1.1011, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 8.689655172413794, | |
| "grad_norm": 6.0624284744262695, | |
| "learning_rate": 8.357064622124863e-05, | |
| "loss": 0.6055, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 8.758620689655173, | |
| "grad_norm": 8.931193351745605, | |
| "learning_rate": 8.335158817086528e-05, | |
| "loss": 1.1554, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 8.827586206896552, | |
| "grad_norm": 9.992157936096191, | |
| "learning_rate": 8.313253012048194e-05, | |
| "loss": 0.9706, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 8.89655172413793, | |
| "grad_norm": 7.536012649536133, | |
| "learning_rate": 8.291347207009858e-05, | |
| "loss": 0.9864, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 8.96551724137931, | |
| "grad_norm": 5.046841144561768, | |
| "learning_rate": 8.269441401971523e-05, | |
| "loss": 0.5318, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.5909090909090909, | |
| "eval_f1_macro": 0.4938130613497124, | |
| "eval_f1_micro": 0.5909090909090909, | |
| "eval_f1_weighted": 0.564608679657, | |
| "eval_loss": 1.1161140203475952, | |
| "eval_precision_macro": 0.48919183057838517, | |
| "eval_precision_micro": 0.5909090909090909, | |
| "eval_precision_weighted": 0.5594657793187205, | |
| "eval_recall_macro": 0.5175661375661376, | |
| "eval_recall_micro": 0.5909090909090909, | |
| "eval_recall_weighted": 0.5909090909090909, | |
| "eval_runtime": 2.2385, | |
| "eval_samples_per_second": 58.969, | |
| "eval_steps_per_second": 7.594, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 9.03448275862069, | |
| "grad_norm": 8.074467658996582, | |
| "learning_rate": 8.247535596933188e-05, | |
| "loss": 0.9298, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 9.10344827586207, | |
| "grad_norm": 5.22785758972168, | |
| "learning_rate": 8.225629791894852e-05, | |
| "loss": 0.7831, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 9.172413793103448, | |
| "grad_norm": 9.326375007629395, | |
| "learning_rate": 8.203723986856517e-05, | |
| "loss": 0.8289, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 9.241379310344827, | |
| "grad_norm": 5.424740791320801, | |
| "learning_rate": 8.181818181818183e-05, | |
| "loss": 0.5332, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 9.310344827586206, | |
| "grad_norm": 8.164321899414062, | |
| "learning_rate": 8.159912376779846e-05, | |
| "loss": 0.7783, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 9.379310344827585, | |
| "grad_norm": 7.742315769195557, | |
| "learning_rate": 8.138006571741512e-05, | |
| "loss": 0.7159, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 9.448275862068966, | |
| "grad_norm": 6.30488920211792, | |
| "learning_rate": 8.116100766703177e-05, | |
| "loss": 0.9866, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 9.517241379310345, | |
| "grad_norm": 7.696253776550293, | |
| "learning_rate": 8.094194961664841e-05, | |
| "loss": 0.521, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 9.586206896551724, | |
| "grad_norm": 4.019304275512695, | |
| "learning_rate": 8.072289156626507e-05, | |
| "loss": 0.4281, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 9.655172413793103, | |
| "grad_norm": 4.379205703735352, | |
| "learning_rate": 8.05038335158817e-05, | |
| "loss": 0.3542, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 9.724137931034482, | |
| "grad_norm": 7.670277118682861, | |
| "learning_rate": 8.028477546549837e-05, | |
| "loss": 0.9465, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 9.793103448275861, | |
| "grad_norm": 8.019712448120117, | |
| "learning_rate": 8.0065717415115e-05, | |
| "loss": 1.087, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 9.862068965517242, | |
| "grad_norm": 8.645779609680176, | |
| "learning_rate": 7.984665936473166e-05, | |
| "loss": 0.88, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 9.931034482758621, | |
| "grad_norm": 5.542499542236328, | |
| "learning_rate": 7.96276013143483e-05, | |
| "loss": 0.7297, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 5.367166042327881, | |
| "learning_rate": 7.940854326396495e-05, | |
| "loss": 0.6824, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.5909090909090909, | |
| "eval_f1_macro": 0.48022039225046736, | |
| "eval_f1_micro": 0.5909090909090909, | |
| "eval_f1_weighted": 0.5515227462595883, | |
| "eval_loss": 1.1811466217041016, | |
| "eval_precision_macro": 0.4813612313612314, | |
| "eval_precision_micro": 0.5909090909090909, | |
| "eval_precision_weighted": 0.549845041322314, | |
| "eval_recall_macro": 0.5147770219198791, | |
| "eval_recall_micro": 0.5909090909090909, | |
| "eval_recall_weighted": 0.5909090909090909, | |
| "eval_runtime": 2.1897, | |
| "eval_samples_per_second": 60.283, | |
| "eval_steps_per_second": 7.764, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 10.068965517241379, | |
| "grad_norm": 8.408239364624023, | |
| "learning_rate": 7.918948521358161e-05, | |
| "loss": 0.5506, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 10.137931034482758, | |
| "grad_norm": 4.473087787628174, | |
| "learning_rate": 7.897042716319824e-05, | |
| "loss": 0.4722, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 10.206896551724139, | |
| "grad_norm": 5.755477428436279, | |
| "learning_rate": 7.87513691128149e-05, | |
| "loss": 0.6369, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 10.275862068965518, | |
| "grad_norm": 9.0516939163208, | |
| "learning_rate": 7.853231106243155e-05, | |
| "loss": 0.6167, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 10.344827586206897, | |
| "grad_norm": 5.995102405548096, | |
| "learning_rate": 7.83132530120482e-05, | |
| "loss": 0.6051, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 10.413793103448276, | |
| "grad_norm": 7.3448805809021, | |
| "learning_rate": 7.809419496166484e-05, | |
| "loss": 0.5321, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 10.482758620689655, | |
| "grad_norm": 8.903775215148926, | |
| "learning_rate": 7.78751369112815e-05, | |
| "loss": 0.6208, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 10.551724137931034, | |
| "grad_norm": 9.240314483642578, | |
| "learning_rate": 7.765607886089813e-05, | |
| "loss": 0.9838, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 10.620689655172415, | |
| "grad_norm": 10.112192153930664, | |
| "learning_rate": 7.74370208105148e-05, | |
| "loss": 0.905, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 10.689655172413794, | |
| "grad_norm": 9.252533912658691, | |
| "learning_rate": 7.721796276013144e-05, | |
| "loss": 1.002, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 10.758620689655173, | |
| "grad_norm": 7.741162300109863, | |
| "learning_rate": 7.699890470974809e-05, | |
| "loss": 1.0869, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 10.827586206896552, | |
| "grad_norm": 9.742755889892578, | |
| "learning_rate": 7.677984665936475e-05, | |
| "loss": 0.5421, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 10.89655172413793, | |
| "grad_norm": 8.84914493560791, | |
| "learning_rate": 7.656078860898138e-05, | |
| "loss": 0.523, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 10.96551724137931, | |
| "grad_norm": 7.173616409301758, | |
| "learning_rate": 7.634173055859804e-05, | |
| "loss": 0.6324, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.5757575757575758, | |
| "eval_f1_macro": 0.4926929392446634, | |
| "eval_f1_micro": 0.5757575757575758, | |
| "eval_f1_weighted": 0.5506095437129921, | |
| "eval_loss": 1.2358123064041138, | |
| "eval_precision_macro": 0.5015354104024055, | |
| "eval_precision_micro": 0.5757575757575758, | |
| "eval_precision_weighted": 0.5689625015643824, | |
| "eval_recall_macro": 0.5226228269085412, | |
| "eval_recall_micro": 0.5757575757575758, | |
| "eval_recall_weighted": 0.5757575757575758, | |
| "eval_runtime": 2.2118, | |
| "eval_samples_per_second": 59.681, | |
| "eval_steps_per_second": 7.686, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 11.03448275862069, | |
| "grad_norm": 8.598217964172363, | |
| "learning_rate": 7.612267250821467e-05, | |
| "loss": 0.703, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 11.10344827586207, | |
| "grad_norm": 4.415513038635254, | |
| "learning_rate": 7.590361445783133e-05, | |
| "loss": 0.5108, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 11.172413793103448, | |
| "grad_norm": 6.3496479988098145, | |
| "learning_rate": 7.568455640744798e-05, | |
| "loss": 0.673, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 11.241379310344827, | |
| "grad_norm": 5.767419338226318, | |
| "learning_rate": 7.546549835706462e-05, | |
| "loss": 0.3173, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 11.310344827586206, | |
| "grad_norm": 5.924855709075928, | |
| "learning_rate": 7.524644030668127e-05, | |
| "loss": 0.4236, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 11.379310344827585, | |
| "grad_norm": 6.807033538818359, | |
| "learning_rate": 7.502738225629792e-05, | |
| "loss": 0.7664, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 11.448275862068966, | |
| "grad_norm": 11.941972732543945, | |
| "learning_rate": 7.480832420591458e-05, | |
| "loss": 0.8078, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 11.517241379310345, | |
| "grad_norm": 5.140421390533447, | |
| "learning_rate": 7.458926615553122e-05, | |
| "loss": 0.4366, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 11.586206896551724, | |
| "grad_norm": 4.931862831115723, | |
| "learning_rate": 7.437020810514787e-05, | |
| "loss": 0.552, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 11.655172413793103, | |
| "grad_norm": 6.9343647956848145, | |
| "learning_rate": 7.415115005476451e-05, | |
| "loss": 0.5526, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 11.724137931034482, | |
| "grad_norm": 4.292028903961182, | |
| "learning_rate": 7.393209200438116e-05, | |
| "loss": 0.5248, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 11.793103448275861, | |
| "grad_norm": 6.613484859466553, | |
| "learning_rate": 7.371303395399781e-05, | |
| "loss": 0.7224, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 11.862068965517242, | |
| "grad_norm": 5.9594502449035645, | |
| "learning_rate": 7.349397590361447e-05, | |
| "loss": 0.4696, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 11.931034482758621, | |
| "grad_norm": 5.859204292297363, | |
| "learning_rate": 7.327491785323111e-05, | |
| "loss": 0.3449, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 5.6179046630859375, | |
| "learning_rate": 7.305585980284776e-05, | |
| "loss": 0.4145, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.6742424242424242, | |
| "eval_f1_macro": 0.5845780796908616, | |
| "eval_f1_micro": 0.6742424242424242, | |
| "eval_f1_weighted": 0.6643483452693979, | |
| "eval_loss": 1.160757064819336, | |
| "eval_precision_macro": 0.5822360668405294, | |
| "eval_precision_micro": 0.6742424242424242, | |
| "eval_precision_weighted": 0.6680857766304014, | |
| "eval_recall_macro": 0.6004686318972033, | |
| "eval_recall_micro": 0.6742424242424242, | |
| "eval_recall_weighted": 0.6742424242424242, | |
| "eval_runtime": 2.2477, | |
| "eval_samples_per_second": 58.726, | |
| "eval_steps_per_second": 7.563, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 12.068965517241379, | |
| "grad_norm": 8.443059921264648, | |
| "learning_rate": 7.28368017524644e-05, | |
| "loss": 0.5175, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 12.137931034482758, | |
| "grad_norm": 9.789414405822754, | |
| "learning_rate": 7.261774370208105e-05, | |
| "loss": 0.8298, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 12.206896551724139, | |
| "grad_norm": 4.579267978668213, | |
| "learning_rate": 7.239868565169771e-05, | |
| "loss": 0.372, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 12.275862068965518, | |
| "grad_norm": 7.095308303833008, | |
| "learning_rate": 7.217962760131434e-05, | |
| "loss": 0.4208, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 12.344827586206897, | |
| "grad_norm": 2.6553964614868164, | |
| "learning_rate": 7.1960569550931e-05, | |
| "loss": 0.2939, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 12.413793103448276, | |
| "grad_norm": 6.320093631744385, | |
| "learning_rate": 7.174151150054765e-05, | |
| "loss": 0.556, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 12.482758620689655, | |
| "grad_norm": 8.005858421325684, | |
| "learning_rate": 7.15224534501643e-05, | |
| "loss": 0.4923, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 12.551724137931034, | |
| "grad_norm": 4.577536106109619, | |
| "learning_rate": 7.130339539978094e-05, | |
| "loss": 0.2823, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 12.620689655172415, | |
| "grad_norm": 7.0326008796691895, | |
| "learning_rate": 7.108433734939759e-05, | |
| "loss": 0.3633, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 12.689655172413794, | |
| "grad_norm": 8.876154899597168, | |
| "learning_rate": 7.086527929901425e-05, | |
| "loss": 0.5113, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 12.758620689655173, | |
| "grad_norm": 9.319496154785156, | |
| "learning_rate": 7.06462212486309e-05, | |
| "loss": 0.9363, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 12.827586206896552, | |
| "grad_norm": 4.129659175872803, | |
| "learning_rate": 7.042716319824754e-05, | |
| "loss": 0.43, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 12.89655172413793, | |
| "grad_norm": 8.008423805236816, | |
| "learning_rate": 7.020810514786419e-05, | |
| "loss": 0.4344, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 12.96551724137931, | |
| "grad_norm": 6.894300937652588, | |
| "learning_rate": 6.998904709748083e-05, | |
| "loss": 0.4805, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.5757575757575758, | |
| "eval_f1_macro": 0.527595781401402, | |
| "eval_f1_micro": 0.5757575757575758, | |
| "eval_f1_weighted": 0.5689098612906363, | |
| "eval_loss": 1.319955587387085, | |
| "eval_precision_macro": 0.5767229968910641, | |
| "eval_precision_micro": 0.5757575757575758, | |
| "eval_precision_weighted": 0.6137522608110844, | |
| "eval_recall_macro": 0.5268707482993198, | |
| "eval_recall_micro": 0.5757575757575758, | |
| "eval_recall_weighted": 0.5757575757575758, | |
| "eval_runtime": 2.206, | |
| "eval_samples_per_second": 59.836, | |
| "eval_steps_per_second": 7.706, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 13.03448275862069, | |
| "grad_norm": 7.305329322814941, | |
| "learning_rate": 6.976998904709748e-05, | |
| "loss": 0.4888, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 13.10344827586207, | |
| "grad_norm": 7.0207624435424805, | |
| "learning_rate": 6.955093099671414e-05, | |
| "loss": 0.4489, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 13.172413793103448, | |
| "grad_norm": 3.134613513946533, | |
| "learning_rate": 6.933187294633077e-05, | |
| "loss": 0.2369, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 13.241379310344827, | |
| "grad_norm": 4.9292097091674805, | |
| "learning_rate": 6.911281489594743e-05, | |
| "loss": 0.4839, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 13.310344827586206, | |
| "grad_norm": 2.2589919567108154, | |
| "learning_rate": 6.889375684556408e-05, | |
| "loss": 0.222, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 13.379310344827585, | |
| "grad_norm": 4.867913246154785, | |
| "learning_rate": 6.867469879518072e-05, | |
| "loss": 0.2502, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 13.448275862068966, | |
| "grad_norm": 3.433598756790161, | |
| "learning_rate": 6.845564074479738e-05, | |
| "loss": 0.2846, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 13.517241379310345, | |
| "grad_norm": 4.033895492553711, | |
| "learning_rate": 6.823658269441402e-05, | |
| "loss": 0.2156, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 13.586206896551724, | |
| "grad_norm": 6.298670768737793, | |
| "learning_rate": 6.801752464403068e-05, | |
| "loss": 0.2056, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 13.655172413793103, | |
| "grad_norm": 5.606608867645264, | |
| "learning_rate": 6.779846659364732e-05, | |
| "loss": 0.5755, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 13.724137931034482, | |
| "grad_norm": 4.751099109649658, | |
| "learning_rate": 6.757940854326397e-05, | |
| "loss": 0.3081, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 13.793103448275861, | |
| "grad_norm": 6.851717472076416, | |
| "learning_rate": 6.736035049288061e-05, | |
| "loss": 0.512, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 13.862068965517242, | |
| "grad_norm": 6.983868598937988, | |
| "learning_rate": 6.714129244249726e-05, | |
| "loss": 0.5849, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 13.931034482758621, | |
| "grad_norm": 9.133752822875977, | |
| "learning_rate": 6.692223439211392e-05, | |
| "loss": 0.7721, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 9.328068733215332, | |
| "learning_rate": 6.670317634173057e-05, | |
| "loss": 0.6232, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.5757575757575758, | |
| "eval_f1_macro": 0.4789696951253122, | |
| "eval_f1_micro": 0.5757575757575758, | |
| "eval_f1_weighted": 0.5516716249691459, | |
| "eval_loss": 1.319008231163025, | |
| "eval_precision_macro": 0.502492644655116, | |
| "eval_precision_micro": 0.5757575757575758, | |
| "eval_precision_weighted": 0.5734135715543037, | |
| "eval_recall_macro": 0.5006122448979592, | |
| "eval_recall_micro": 0.5757575757575758, | |
| "eval_recall_weighted": 0.5757575757575758, | |
| "eval_runtime": 2.1818, | |
| "eval_samples_per_second": 60.499, | |
| "eval_steps_per_second": 7.792, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 14.068965517241379, | |
| "grad_norm": 3.8055107593536377, | |
| "learning_rate": 6.648411829134721e-05, | |
| "loss": 0.2276, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 14.137931034482758, | |
| "grad_norm": 3.0821352005004883, | |
| "learning_rate": 6.626506024096386e-05, | |
| "loss": 0.2451, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 14.206896551724139, | |
| "grad_norm": 7.605597972869873, | |
| "learning_rate": 6.60460021905805e-05, | |
| "loss": 0.3332, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 14.275862068965518, | |
| "grad_norm": 7.357143402099609, | |
| "learning_rate": 6.582694414019715e-05, | |
| "loss": 0.5178, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 14.344827586206897, | |
| "grad_norm": 2.5872600078582764, | |
| "learning_rate": 6.560788608981381e-05, | |
| "loss": 0.1075, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 14.413793103448276, | |
| "grad_norm": 5.9071879386901855, | |
| "learning_rate": 6.538882803943044e-05, | |
| "loss": 0.2012, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 14.482758620689655, | |
| "grad_norm": 6.262528419494629, | |
| "learning_rate": 6.51697699890471e-05, | |
| "loss": 0.219, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 14.551724137931034, | |
| "grad_norm": 4.72699499130249, | |
| "learning_rate": 6.495071193866375e-05, | |
| "loss": 0.4705, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 14.620689655172415, | |
| "grad_norm": 2.8275880813598633, | |
| "learning_rate": 6.47316538882804e-05, | |
| "loss": 0.259, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 14.689655172413794, | |
| "grad_norm": 4.1800312995910645, | |
| "learning_rate": 6.451259583789706e-05, | |
| "loss": 0.2478, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 14.758620689655173, | |
| "grad_norm": 6.540757179260254, | |
| "learning_rate": 6.429353778751369e-05, | |
| "loss": 0.3005, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 14.827586206896552, | |
| "grad_norm": 3.0680577754974365, | |
| "learning_rate": 6.407447973713035e-05, | |
| "loss": 0.3351, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 14.89655172413793, | |
| "grad_norm": 12.751289367675781, | |
| "learning_rate": 6.385542168674698e-05, | |
| "loss": 0.4988, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 14.96551724137931, | |
| "grad_norm": 7.108068466186523, | |
| "learning_rate": 6.363636363636364e-05, | |
| "loss": 0.3475, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.696969696969697, | |
| "eval_f1_macro": 0.630347810119719, | |
| "eval_f1_micro": 0.696969696969697, | |
| "eval_f1_weighted": 0.6894476798984056, | |
| "eval_loss": 1.185251235961914, | |
| "eval_precision_macro": 0.6716845878136201, | |
| "eval_precision_micro": 0.696969696969697, | |
| "eval_precision_weighted": 0.7087732160312806, | |
| "eval_recall_macro": 0.6311791383219955, | |
| "eval_recall_micro": 0.696969696969697, | |
| "eval_recall_weighted": 0.696969696969697, | |
| "eval_runtime": 2.1666, | |
| "eval_samples_per_second": 60.925, | |
| "eval_steps_per_second": 7.846, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 15.03448275862069, | |
| "grad_norm": 6.197813510894775, | |
| "learning_rate": 6.341730558598029e-05, | |
| "loss": 0.162, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 15.10344827586207, | |
| "grad_norm": 3.9454376697540283, | |
| "learning_rate": 6.319824753559693e-05, | |
| "loss": 0.3396, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 15.172413793103448, | |
| "grad_norm": 8.980201721191406, | |
| "learning_rate": 6.297918948521358e-05, | |
| "loss": 0.2316, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 15.241379310344827, | |
| "grad_norm": 4.091892719268799, | |
| "learning_rate": 6.276013143483024e-05, | |
| "loss": 0.277, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 15.310344827586206, | |
| "grad_norm": 7.498462200164795, | |
| "learning_rate": 6.254107338444689e-05, | |
| "loss": 0.3305, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 15.379310344827585, | |
| "grad_norm": 6.023470401763916, | |
| "learning_rate": 6.232201533406353e-05, | |
| "loss": 0.1605, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 15.448275862068966, | |
| "grad_norm": 4.88850212097168, | |
| "learning_rate": 6.210295728368018e-05, | |
| "loss": 0.1803, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 15.517241379310345, | |
| "grad_norm": 2.798743724822998, | |
| "learning_rate": 6.188389923329682e-05, | |
| "loss": 0.1585, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 15.586206896551724, | |
| "grad_norm": 6.272281646728516, | |
| "learning_rate": 6.166484118291348e-05, | |
| "loss": 0.1494, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 15.655172413793103, | |
| "grad_norm": 7.970227241516113, | |
| "learning_rate": 6.144578313253012e-05, | |
| "loss": 0.3169, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 15.724137931034482, | |
| "grad_norm": 6.0759406089782715, | |
| "learning_rate": 6.122672508214678e-05, | |
| "loss": 0.3508, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 15.793103448275861, | |
| "grad_norm": 4.981871128082275, | |
| "learning_rate": 6.1007667031763415e-05, | |
| "loss": 0.1469, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 15.862068965517242, | |
| "grad_norm": 8.259228706359863, | |
| "learning_rate": 6.078860898138007e-05, | |
| "loss": 0.3808, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 15.931034482758621, | |
| "grad_norm": 5.493587017059326, | |
| "learning_rate": 6.056955093099672e-05, | |
| "loss": 0.218, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 5.321525573730469, | |
| "learning_rate": 6.035049288061336e-05, | |
| "loss": 0.1956, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.5151515151515151, | |
| "eval_f1_macro": 0.4323315041705846, | |
| "eval_f1_micro": 0.5151515151515151, | |
| "eval_f1_weighted": 0.4974051721657085, | |
| "eval_loss": 1.569486141204834, | |
| "eval_precision_macro": 0.47551801581876774, | |
| "eval_precision_micro": 0.5151515151515151, | |
| "eval_precision_weighted": 0.5333931937281219, | |
| "eval_recall_macro": 0.4357898715041572, | |
| "eval_recall_micro": 0.5151515151515151, | |
| "eval_recall_weighted": 0.5151515151515151, | |
| "eval_runtime": 2.1474, | |
| "eval_samples_per_second": 61.468, | |
| "eval_steps_per_second": 7.916, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 16.06896551724138, | |
| "grad_norm": 7.035810947418213, | |
| "learning_rate": 6.0131434830230014e-05, | |
| "loss": 0.4266, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 16.137931034482758, | |
| "grad_norm": 3.2283682823181152, | |
| "learning_rate": 5.991237677984666e-05, | |
| "loss": 0.2042, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 16.20689655172414, | |
| "grad_norm": 3.6779544353485107, | |
| "learning_rate": 5.969331872946331e-05, | |
| "loss": 0.1058, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 16.275862068965516, | |
| "grad_norm": 1.8620399236679077, | |
| "learning_rate": 5.9474260679079966e-05, | |
| "loss": 0.1272, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 16.344827586206897, | |
| "grad_norm": 2.111825942993164, | |
| "learning_rate": 5.9255202628696605e-05, | |
| "loss": 0.1298, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 16.413793103448278, | |
| "grad_norm": 6.74976110458374, | |
| "learning_rate": 5.903614457831326e-05, | |
| "loss": 0.3262, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 16.482758620689655, | |
| "grad_norm": 5.992347240447998, | |
| "learning_rate": 5.88170865279299e-05, | |
| "loss": 0.3271, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 16.551724137931036, | |
| "grad_norm": 2.5913877487182617, | |
| "learning_rate": 5.859802847754655e-05, | |
| "loss": 0.1947, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 16.620689655172413, | |
| "grad_norm": 7.1807403564453125, | |
| "learning_rate": 5.8378970427163204e-05, | |
| "loss": 0.1701, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 16.689655172413794, | |
| "grad_norm": 5.666691303253174, | |
| "learning_rate": 5.815991237677984e-05, | |
| "loss": 0.2304, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 16.75862068965517, | |
| "grad_norm": 6.325366973876953, | |
| "learning_rate": 5.7940854326396496e-05, | |
| "loss": 0.3751, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 16.82758620689655, | |
| "grad_norm": 4.312324523925781, | |
| "learning_rate": 5.772179627601315e-05, | |
| "loss": 0.16, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 16.896551724137932, | |
| "grad_norm": 4.738943576812744, | |
| "learning_rate": 5.7502738225629795e-05, | |
| "loss": 0.2339, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 16.96551724137931, | |
| "grad_norm": 4.70164155960083, | |
| "learning_rate": 5.728368017524645e-05, | |
| "loss": 0.1519, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.6439393939393939, | |
| "eval_f1_macro": 0.5818818031106167, | |
| "eval_f1_micro": 0.6439393939393939, | |
| "eval_f1_weighted": 0.6317482833372663, | |
| "eval_loss": 1.440421223640442, | |
| "eval_precision_macro": 0.6438369250139081, | |
| "eval_precision_micro": 0.6439393939393939, | |
| "eval_precision_weighted": 0.657660361816567, | |
| "eval_recall_macro": 0.5705744520030234, | |
| "eval_recall_micro": 0.6439393939393939, | |
| "eval_recall_weighted": 0.6439393939393939, | |
| "eval_runtime": 2.213, | |
| "eval_samples_per_second": 59.648, | |
| "eval_steps_per_second": 7.682, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 17.03448275862069, | |
| "grad_norm": 5.552275657653809, | |
| "learning_rate": 5.706462212486309e-05, | |
| "loss": 0.1605, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 17.103448275862068, | |
| "grad_norm": 1.6765620708465576, | |
| "learning_rate": 5.684556407447974e-05, | |
| "loss": 0.057, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 17.17241379310345, | |
| "grad_norm": 2.917738437652588, | |
| "learning_rate": 5.6626506024096394e-05, | |
| "loss": 0.0525, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 17.24137931034483, | |
| "grad_norm": 4.772071838378906, | |
| "learning_rate": 5.640744797371303e-05, | |
| "loss": 0.0594, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 17.310344827586206, | |
| "grad_norm": 5.168885231018066, | |
| "learning_rate": 5.6188389923329686e-05, | |
| "loss": 0.1819, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 17.379310344827587, | |
| "grad_norm": 6.547173976898193, | |
| "learning_rate": 5.596933187294633e-05, | |
| "loss": 0.1747, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 17.448275862068964, | |
| "grad_norm": 2.393808364868164, | |
| "learning_rate": 5.575027382256298e-05, | |
| "loss": 0.1172, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 17.517241379310345, | |
| "grad_norm": 6.183032512664795, | |
| "learning_rate": 5.553121577217963e-05, | |
| "loss": 0.1184, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 17.586206896551722, | |
| "grad_norm": 12.154343605041504, | |
| "learning_rate": 5.531215772179628e-05, | |
| "loss": 0.2882, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 17.655172413793103, | |
| "grad_norm": 7.199910640716553, | |
| "learning_rate": 5.509309967141293e-05, | |
| "loss": 0.3635, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 17.724137931034484, | |
| "grad_norm": 1.0473498106002808, | |
| "learning_rate": 5.487404162102957e-05, | |
| "loss": 0.0773, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 17.79310344827586, | |
| "grad_norm": 1.043884038925171, | |
| "learning_rate": 5.465498357064622e-05, | |
| "loss": 0.0349, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 17.862068965517242, | |
| "grad_norm": 0.591170072555542, | |
| "learning_rate": 5.4435925520262876e-05, | |
| "loss": 0.1192, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 17.93103448275862, | |
| "grad_norm": 4.3712477684021, | |
| "learning_rate": 5.4216867469879516e-05, | |
| "loss": 0.1702, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 5.553340911865234, | |
| "learning_rate": 5.399780941949617e-05, | |
| "loss": 0.1031, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.6136363636363636, | |
| "eval_f1_macro": 0.5370450788240546, | |
| "eval_f1_micro": 0.6136363636363636, | |
| "eval_f1_weighted": 0.6040564132330857, | |
| "eval_loss": 1.4877225160598755, | |
| "eval_precision_macro": 0.5351284054291573, | |
| "eval_precision_micro": 0.6136363636363636, | |
| "eval_precision_weighted": 0.5975074566581743, | |
| "eval_recall_macro": 0.5421919879062737, | |
| "eval_recall_micro": 0.6136363636363636, | |
| "eval_recall_weighted": 0.6136363636363636, | |
| "eval_runtime": 2.2122, | |
| "eval_samples_per_second": 59.668, | |
| "eval_steps_per_second": 7.685, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 18.06896551724138, | |
| "grad_norm": 2.231707811355591, | |
| "learning_rate": 5.3778751369112815e-05, | |
| "loss": 0.0562, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 18.137931034482758, | |
| "grad_norm": 1.9797624349594116, | |
| "learning_rate": 5.355969331872947e-05, | |
| "loss": 0.1513, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 18.20689655172414, | |
| "grad_norm": 4.362570285797119, | |
| "learning_rate": 5.334063526834612e-05, | |
| "loss": 0.1637, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 18.275862068965516, | |
| "grad_norm": 5.458191871643066, | |
| "learning_rate": 5.312157721796276e-05, | |
| "loss": 0.1761, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 18.344827586206897, | |
| "grad_norm": 12.664368629455566, | |
| "learning_rate": 5.290251916757941e-05, | |
| "loss": 0.2171, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 18.413793103448278, | |
| "grad_norm": 4.849126815795898, | |
| "learning_rate": 5.2683461117196066e-05, | |
| "loss": 0.1213, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 18.482758620689655, | |
| "grad_norm": 1.9513343572616577, | |
| "learning_rate": 5.2464403066812705e-05, | |
| "loss": 0.0647, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 18.551724137931036, | |
| "grad_norm": 5.6937642097473145, | |
| "learning_rate": 5.224534501642936e-05, | |
| "loss": 0.1924, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 18.620689655172413, | |
| "grad_norm": 6.157546043395996, | |
| "learning_rate": 5.2026286966046e-05, | |
| "loss": 0.1621, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 18.689655172413794, | |
| "grad_norm": 3.375688076019287, | |
| "learning_rate": 5.180722891566265e-05, | |
| "loss": 0.0725, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 18.75862068965517, | |
| "grad_norm": 1.283026099205017, | |
| "learning_rate": 5.1588170865279304e-05, | |
| "loss": 0.1705, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 18.82758620689655, | |
| "grad_norm": 6.894308090209961, | |
| "learning_rate": 5.136911281489595e-05, | |
| "loss": 0.2579, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 18.896551724137932, | |
| "grad_norm": 7.978748321533203, | |
| "learning_rate": 5.11500547645126e-05, | |
| "loss": 0.1522, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 18.96551724137931, | |
| "grad_norm": 3.8156979084014893, | |
| "learning_rate": 5.093099671412924e-05, | |
| "loss": 0.0615, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.6060606060606061, | |
| "eval_f1_macro": 0.6012825511436246, | |
| "eval_f1_micro": 0.6060606060606061, | |
| "eval_f1_weighted": 0.6106316401527286, | |
| "eval_loss": 1.4801414012908936, | |
| "eval_precision_macro": 0.6475544200111578, | |
| "eval_precision_micro": 0.6060606060606061, | |
| "eval_precision_weighted": 0.6581095440160177, | |
| "eval_recall_macro": 0.5951398337112623, | |
| "eval_recall_micro": 0.6060606060606061, | |
| "eval_recall_weighted": 0.6060606060606061, | |
| "eval_runtime": 2.2056, | |
| "eval_samples_per_second": 59.847, | |
| "eval_steps_per_second": 7.708, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 19.03448275862069, | |
| "grad_norm": 1.505656361579895, | |
| "learning_rate": 5.0711938663745895e-05, | |
| "loss": 0.0365, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 19.103448275862068, | |
| "grad_norm": 1.1916121244430542, | |
| "learning_rate": 5.049288061336255e-05, | |
| "loss": 0.099, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 19.17241379310345, | |
| "grad_norm": 3.222411632537842, | |
| "learning_rate": 5.027382256297919e-05, | |
| "loss": 0.0374, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 19.24137931034483, | |
| "grad_norm": 4.7354536056518555, | |
| "learning_rate": 5.005476451259584e-05, | |
| "loss": 0.0901, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 19.310344827586206, | |
| "grad_norm": 0.37637993693351746, | |
| "learning_rate": 4.983570646221249e-05, | |
| "loss": 0.0087, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 19.379310344827587, | |
| "grad_norm": 1.4076848030090332, | |
| "learning_rate": 4.961664841182913e-05, | |
| "loss": 0.2459, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 19.448275862068964, | |
| "grad_norm": 7.020608901977539, | |
| "learning_rate": 4.9397590361445786e-05, | |
| "loss": 0.1859, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 19.517241379310345, | |
| "grad_norm": 1.6740795373916626, | |
| "learning_rate": 4.917853231106244e-05, | |
| "loss": 0.0443, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 19.586206896551722, | |
| "grad_norm": 1.2484346628189087, | |
| "learning_rate": 4.8959474260679085e-05, | |
| "loss": 0.026, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 19.655172413793103, | |
| "grad_norm": 0.44467589259147644, | |
| "learning_rate": 4.874041621029573e-05, | |
| "loss": 0.0704, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 19.724137931034484, | |
| "grad_norm": 1.164262056350708, | |
| "learning_rate": 4.852135815991238e-05, | |
| "loss": 0.0843, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 19.79310344827586, | |
| "grad_norm": 2.4461233615875244, | |
| "learning_rate": 4.8302300109529024e-05, | |
| "loss": 0.1391, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 19.862068965517242, | |
| "grad_norm": 0.4338299334049225, | |
| "learning_rate": 4.808324205914568e-05, | |
| "loss": 0.0174, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 19.93103448275862, | |
| "grad_norm": 14.353382110595703, | |
| "learning_rate": 4.786418400876232e-05, | |
| "loss": 0.0887, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.43623942136764526, | |
| "learning_rate": 4.764512595837897e-05, | |
| "loss": 0.0249, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.5909090909090909, | |
| "eval_f1_macro": 0.5197787455591448, | |
| "eval_f1_micro": 0.5909090909090909, | |
| "eval_f1_weighted": 0.5825078945882032, | |
| "eval_loss": 1.6081513166427612, | |
| "eval_precision_macro": 0.5148994878087059, | |
| "eval_precision_micro": 0.5909090909090909, | |
| "eval_precision_weighted": 0.5769802287329502, | |
| "eval_recall_macro": 0.5272184429327286, | |
| "eval_recall_micro": 0.5909090909090909, | |
| "eval_recall_weighted": 0.5909090909090909, | |
| "eval_runtime": 2.1745, | |
| "eval_samples_per_second": 60.703, | |
| "eval_steps_per_second": 7.818, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 20.06896551724138, | |
| "grad_norm": 0.3680793046951294, | |
| "learning_rate": 4.742606790799562e-05, | |
| "loss": 0.0216, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 20.137931034482758, | |
| "grad_norm": 2.0422375202178955, | |
| "learning_rate": 4.7207009857612275e-05, | |
| "loss": 0.0177, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 20.20689655172414, | |
| "grad_norm": 6.626030445098877, | |
| "learning_rate": 4.698795180722892e-05, | |
| "loss": 0.2056, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 20.275862068965516, | |
| "grad_norm": 0.7436681389808655, | |
| "learning_rate": 4.676889375684557e-05, | |
| "loss": 0.0327, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 20.344827586206897, | |
| "grad_norm": 0.9783719182014465, | |
| "learning_rate": 4.6549835706462214e-05, | |
| "loss": 0.044, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 20.413793103448278, | |
| "grad_norm": 0.8457818627357483, | |
| "learning_rate": 4.633077765607886e-05, | |
| "loss": 0.1102, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 20.482758620689655, | |
| "grad_norm": 3.060871124267578, | |
| "learning_rate": 4.611171960569551e-05, | |
| "loss": 0.0726, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 20.551724137931036, | |
| "grad_norm": 7.481118679046631, | |
| "learning_rate": 4.589266155531216e-05, | |
| "loss": 0.1447, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 20.620689655172413, | |
| "grad_norm": 8.65415096282959, | |
| "learning_rate": 4.5673603504928806e-05, | |
| "loss": 0.2099, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 20.689655172413794, | |
| "grad_norm": 3.2042698860168457, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 0.0497, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 20.75862068965517, | |
| "grad_norm": 0.5125285983085632, | |
| "learning_rate": 4.5235487404162105e-05, | |
| "loss": 0.069, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 20.82758620689655, | |
| "grad_norm": 0.8691998720169067, | |
| "learning_rate": 4.501642935377876e-05, | |
| "loss": 0.0354, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 20.896551724137932, | |
| "grad_norm": 10.247215270996094, | |
| "learning_rate": 4.4797371303395404e-05, | |
| "loss": 0.1541, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 20.96551724137931, | |
| "grad_norm": 11.587034225463867, | |
| "learning_rate": 4.457831325301205e-05, | |
| "loss": 0.374, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.6287878787878788, | |
| "eval_f1_macro": 0.6084137522037308, | |
| "eval_f1_micro": 0.6287878787878788, | |
| "eval_f1_weighted": 0.6185060346144132, | |
| "eval_loss": 1.7593897581100464, | |
| "eval_precision_macro": 0.6711527035056447, | |
| "eval_precision_micro": 0.6287878787878788, | |
| "eval_precision_weighted": 0.6679164641063037, | |
| "eval_recall_macro": 0.6049433106575963, | |
| "eval_recall_micro": 0.6287878787878788, | |
| "eval_recall_weighted": 0.6287878787878788, | |
| "eval_runtime": 2.1955, | |
| "eval_samples_per_second": 60.123, | |
| "eval_steps_per_second": 7.743, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 21.03448275862069, | |
| "grad_norm": 1.38335120677948, | |
| "learning_rate": 4.4359255202628696e-05, | |
| "loss": 0.079, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 21.103448275862068, | |
| "grad_norm": 5.99662446975708, | |
| "learning_rate": 4.414019715224535e-05, | |
| "loss": 0.0648, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 21.17241379310345, | |
| "grad_norm": 1.0241988897323608, | |
| "learning_rate": 4.3921139101861996e-05, | |
| "loss": 0.1807, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 21.24137931034483, | |
| "grad_norm": 0.5548591017723083, | |
| "learning_rate": 4.370208105147864e-05, | |
| "loss": 0.0375, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 21.310344827586206, | |
| "grad_norm": 0.7137009501457214, | |
| "learning_rate": 4.348302300109529e-05, | |
| "loss": 0.0694, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 21.379310344827587, | |
| "grad_norm": 7.560571193695068, | |
| "learning_rate": 4.326396495071194e-05, | |
| "loss": 0.2715, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 21.448275862068964, | |
| "grad_norm": 7.067291736602783, | |
| "learning_rate": 4.3044906900328594e-05, | |
| "loss": 0.1016, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 21.517241379310345, | |
| "grad_norm": 4.622091770172119, | |
| "learning_rate": 4.282584884994524e-05, | |
| "loss": 0.0621, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 21.586206896551722, | |
| "grad_norm": 5.788636207580566, | |
| "learning_rate": 4.2606790799561886e-05, | |
| "loss": 0.1775, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 21.655172413793103, | |
| "grad_norm": 3.3069419860839844, | |
| "learning_rate": 4.238773274917853e-05, | |
| "loss": 0.0586, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 21.724137931034484, | |
| "grad_norm": 0.65139240026474, | |
| "learning_rate": 4.2168674698795186e-05, | |
| "loss": 0.0672, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 21.79310344827586, | |
| "grad_norm": 2.4793200492858887, | |
| "learning_rate": 4.194961664841183e-05, | |
| "loss": 0.1428, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 21.862068965517242, | |
| "grad_norm": 2.988377809524536, | |
| "learning_rate": 4.173055859802848e-05, | |
| "loss": 0.1759, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 21.93103448275862, | |
| "grad_norm": 5.487617015838623, | |
| "learning_rate": 4.1511500547645124e-05, | |
| "loss": 0.0849, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "grad_norm": 2.8238141536712646, | |
| "learning_rate": 4.129244249726178e-05, | |
| "loss": 0.025, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.6515151515151515, | |
| "eval_f1_macro": 0.6445815393183814, | |
| "eval_f1_micro": 0.6515151515151515, | |
| "eval_f1_weighted": 0.6520235479565623, | |
| "eval_loss": 1.4723178148269653, | |
| "eval_precision_macro": 0.6542847694633409, | |
| "eval_precision_micro": 0.6515151515151515, | |
| "eval_precision_weighted": 0.6660361050986052, | |
| "eval_recall_macro": 0.6478760393046107, | |
| "eval_recall_micro": 0.6515151515151515, | |
| "eval_recall_weighted": 0.6515151515151515, | |
| "eval_runtime": 2.2232, | |
| "eval_samples_per_second": 59.375, | |
| "eval_steps_per_second": 7.647, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 22.06896551724138, | |
| "grad_norm": 6.143444061279297, | |
| "learning_rate": 4.107338444687843e-05, | |
| "loss": 0.0913, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 22.137931034482758, | |
| "grad_norm": 2.971240997314453, | |
| "learning_rate": 4.0854326396495076e-05, | |
| "loss": 0.0312, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 22.20689655172414, | |
| "grad_norm": 0.27099546790122986, | |
| "learning_rate": 4.063526834611172e-05, | |
| "loss": 0.045, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 22.275862068965516, | |
| "grad_norm": 0.35845986008644104, | |
| "learning_rate": 4.041621029572837e-05, | |
| "loss": 0.013, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 22.344827586206897, | |
| "grad_norm": 2.0845632553100586, | |
| "learning_rate": 4.019715224534502e-05, | |
| "loss": 0.0523, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 22.413793103448278, | |
| "grad_norm": 3.686854362487793, | |
| "learning_rate": 3.997809419496167e-05, | |
| "loss": 0.0292, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 22.482758620689655, | |
| "grad_norm": 0.846224844455719, | |
| "learning_rate": 3.9759036144578314e-05, | |
| "loss": 0.1901, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 22.551724137931036, | |
| "grad_norm": 0.3240630626678467, | |
| "learning_rate": 3.953997809419496e-05, | |
| "loss": 0.0187, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 22.620689655172413, | |
| "grad_norm": 7.635501861572266, | |
| "learning_rate": 3.9320920043811607e-05, | |
| "loss": 0.1124, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 22.689655172413794, | |
| "grad_norm": 2.2201285362243652, | |
| "learning_rate": 3.910186199342826e-05, | |
| "loss": 0.03, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 22.75862068965517, | |
| "grad_norm": 8.953709602355957, | |
| "learning_rate": 3.888280394304491e-05, | |
| "loss": 0.0588, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 22.82758620689655, | |
| "grad_norm": 7.417150497436523, | |
| "learning_rate": 3.866374589266156e-05, | |
| "loss": 0.0751, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 22.896551724137932, | |
| "grad_norm": 1.5134751796722412, | |
| "learning_rate": 3.8444687842278205e-05, | |
| "loss": 0.0166, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 22.96551724137931, | |
| "grad_norm": 0.537891685962677, | |
| "learning_rate": 3.822562979189485e-05, | |
| "loss": 0.0096, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.6136363636363636, | |
| "eval_f1_macro": 0.5899262553677856, | |
| "eval_f1_micro": 0.6136363636363636, | |
| "eval_f1_weighted": 0.6088674363985942, | |
| "eval_loss": 1.5689215660095215, | |
| "eval_precision_macro": 0.616981329954019, | |
| "eval_precision_micro": 0.6136363636363636, | |
| "eval_precision_weighted": 0.6315426797963563, | |
| "eval_recall_macro": 0.5878231292517008, | |
| "eval_recall_micro": 0.6136363636363636, | |
| "eval_recall_weighted": 0.6136363636363636, | |
| "eval_runtime": 2.2085, | |
| "eval_samples_per_second": 59.769, | |
| "eval_steps_per_second": 7.697, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 23.03448275862069, | |
| "grad_norm": 0.5641638040542603, | |
| "learning_rate": 3.8006571741511504e-05, | |
| "loss": 0.0089, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 23.103448275862068, | |
| "grad_norm": 3.1353189945220947, | |
| "learning_rate": 3.778751369112815e-05, | |
| "loss": 0.0292, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 23.17241379310345, | |
| "grad_norm": 0.7373493313789368, | |
| "learning_rate": 3.7568455640744796e-05, | |
| "loss": 0.0612, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 23.24137931034483, | |
| "grad_norm": 2.668566942214966, | |
| "learning_rate": 3.734939759036144e-05, | |
| "loss": 0.0416, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 23.310344827586206, | |
| "grad_norm": 4.210921287536621, | |
| "learning_rate": 3.7130339539978096e-05, | |
| "loss": 0.0405, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 23.379310344827587, | |
| "grad_norm": 0.31117522716522217, | |
| "learning_rate": 3.691128148959475e-05, | |
| "loss": 0.0162, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 23.448275862068964, | |
| "grad_norm": 8.15129280090332, | |
| "learning_rate": 3.6692223439211395e-05, | |
| "loss": 0.0723, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 23.517241379310345, | |
| "grad_norm": 2.1367807388305664, | |
| "learning_rate": 3.647316538882804e-05, | |
| "loss": 0.0501, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 23.586206896551722, | |
| "grad_norm": 1.1246554851531982, | |
| "learning_rate": 3.625410733844469e-05, | |
| "loss": 0.0115, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 23.655172413793103, | |
| "grad_norm": 1.3772636651992798, | |
| "learning_rate": 3.603504928806134e-05, | |
| "loss": 0.033, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 23.724137931034484, | |
| "grad_norm": 0.15142899751663208, | |
| "learning_rate": 3.5815991237677986e-05, | |
| "loss": 0.0613, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 23.79310344827586, | |
| "grad_norm": 13.712115287780762, | |
| "learning_rate": 3.559693318729463e-05, | |
| "loss": 0.2608, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 23.862068965517242, | |
| "grad_norm": 6.292361259460449, | |
| "learning_rate": 3.537787513691128e-05, | |
| "loss": 0.1134, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 23.93103448275862, | |
| "grad_norm": 0.6719773411750793, | |
| "learning_rate": 3.515881708652793e-05, | |
| "loss": 0.0106, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 1.0674413442611694, | |
| "learning_rate": 3.4939759036144585e-05, | |
| "loss": 0.0661, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.6666666666666666, | |
| "eval_f1_macro": 0.6056024708734068, | |
| "eval_f1_micro": 0.6666666666666666, | |
| "eval_f1_weighted": 0.657565805841668, | |
| "eval_loss": 1.6276419162750244, | |
| "eval_precision_macro": 0.6690476190476191, | |
| "eval_precision_micro": 0.6666666666666666, | |
| "eval_precision_weighted": 0.6866965105601469, | |
| "eval_recall_macro": 0.5948677248677248, | |
| "eval_recall_micro": 0.6666666666666666, | |
| "eval_recall_weighted": 0.6666666666666666, | |
| "eval_runtime": 2.2477, | |
| "eval_samples_per_second": 58.726, | |
| "eval_steps_per_second": 7.563, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 24.06896551724138, | |
| "grad_norm": 0.15591685473918915, | |
| "learning_rate": 3.472070098576123e-05, | |
| "loss": 0.0109, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 24.137931034482758, | |
| "grad_norm": 1.411007046699524, | |
| "learning_rate": 3.450164293537788e-05, | |
| "loss": 0.0806, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 24.20689655172414, | |
| "grad_norm": 6.958545684814453, | |
| "learning_rate": 3.4282584884994523e-05, | |
| "loss": 0.2285, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 24.275862068965516, | |
| "grad_norm": 0.38558292388916016, | |
| "learning_rate": 3.4063526834611176e-05, | |
| "loss": 0.0268, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 24.344827586206897, | |
| "grad_norm": 6.778842926025391, | |
| "learning_rate": 3.384446878422782e-05, | |
| "loss": 0.1081, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 24.413793103448278, | |
| "grad_norm": 0.2550676465034485, | |
| "learning_rate": 3.362541073384447e-05, | |
| "loss": 0.0058, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 24.482758620689655, | |
| "grad_norm": 0.24779938161373138, | |
| "learning_rate": 3.3406352683461115e-05, | |
| "loss": 0.0252, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 24.551724137931036, | |
| "grad_norm": 0.1385107785463333, | |
| "learning_rate": 3.318729463307776e-05, | |
| "loss": 0.0057, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 24.620689655172413, | |
| "grad_norm": 3.3009445667266846, | |
| "learning_rate": 3.2968236582694414e-05, | |
| "loss": 0.0338, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 24.689655172413794, | |
| "grad_norm": 0.6270205974578857, | |
| "learning_rate": 3.274917853231107e-05, | |
| "loss": 0.0112, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 24.75862068965517, | |
| "grad_norm": 0.24541209638118744, | |
| "learning_rate": 3.253012048192771e-05, | |
| "loss": 0.0098, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 24.82758620689655, | |
| "grad_norm": 0.5051412582397461, | |
| "learning_rate": 3.231106243154436e-05, | |
| "loss": 0.0616, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 24.896551724137932, | |
| "grad_norm": 0.21808616816997528, | |
| "learning_rate": 3.209200438116101e-05, | |
| "loss": 0.0238, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 24.96551724137931, | |
| "grad_norm": 0.19809569418430328, | |
| "learning_rate": 3.187294633077766e-05, | |
| "loss": 0.0463, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.6136363636363636, | |
| "eval_f1_macro": 0.5591332103178793, | |
| "eval_f1_micro": 0.6136363636363636, | |
| "eval_f1_weighted": 0.6084641064500635, | |
| "eval_loss": 1.6760780811309814, | |
| "eval_precision_macro": 0.6192834056699603, | |
| "eval_precision_micro": 0.6136363636363636, | |
| "eval_precision_weighted": 0.6400907915613798, | |
| "eval_recall_macro": 0.5521088435374149, | |
| "eval_recall_micro": 0.6136363636363636, | |
| "eval_recall_weighted": 0.6136363636363636, | |
| "eval_runtime": 2.1953, | |
| "eval_samples_per_second": 60.128, | |
| "eval_steps_per_second": 7.744, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 25.03448275862069, | |
| "grad_norm": 0.9460155367851257, | |
| "learning_rate": 3.1653888280394305e-05, | |
| "loss": 0.0328, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 25.103448275862068, | |
| "grad_norm": 0.34770432114601135, | |
| "learning_rate": 3.143483023001095e-05, | |
| "loss": 0.0169, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 25.17241379310345, | |
| "grad_norm": 0.6745150089263916, | |
| "learning_rate": 3.12157721796276e-05, | |
| "loss": 0.0292, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 25.24137931034483, | |
| "grad_norm": 0.14288195967674255, | |
| "learning_rate": 3.099671412924425e-05, | |
| "loss": 0.0672, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 25.310344827586206, | |
| "grad_norm": 0.1784912347793579, | |
| "learning_rate": 3.07776560788609e-05, | |
| "loss": 0.0046, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 25.379310344827587, | |
| "grad_norm": 0.7752932906150818, | |
| "learning_rate": 3.055859802847755e-05, | |
| "loss": 0.0089, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 25.448275862068964, | |
| "grad_norm": 8.310676574707031, | |
| "learning_rate": 3.0339539978094196e-05, | |
| "loss": 0.0854, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 25.517241379310345, | |
| "grad_norm": 1.2783715724945068, | |
| "learning_rate": 3.012048192771085e-05, | |
| "loss": 0.0091, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 25.586206896551722, | |
| "grad_norm": 1.2155754566192627, | |
| "learning_rate": 2.9901423877327495e-05, | |
| "loss": 0.0182, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 25.655172413793103, | |
| "grad_norm": 0.10511256754398346, | |
| "learning_rate": 2.968236582694414e-05, | |
| "loss": 0.0043, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 25.724137931034484, | |
| "grad_norm": 0.1467219889163971, | |
| "learning_rate": 2.9463307776560787e-05, | |
| "loss": 0.0041, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 25.79310344827586, | |
| "grad_norm": 0.06411899626255035, | |
| "learning_rate": 2.9244249726177437e-05, | |
| "loss": 0.0043, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 25.862068965517242, | |
| "grad_norm": 9.776043891906738, | |
| "learning_rate": 2.902519167579409e-05, | |
| "loss": 0.0456, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 25.93103448275862, | |
| "grad_norm": 0.1743546575307846, | |
| "learning_rate": 2.8806133625410736e-05, | |
| "loss": 0.0029, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "grad_norm": 0.9783799648284912, | |
| "learning_rate": 2.8587075575027382e-05, | |
| "loss": 0.0118, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.6287878787878788, | |
| "eval_f1_macro": 0.5353365735453567, | |
| "eval_f1_micro": 0.6287878787878788, | |
| "eval_f1_weighted": 0.6074711236094882, | |
| "eval_loss": 1.6210349798202515, | |
| "eval_precision_macro": 0.5715752748253354, | |
| "eval_precision_micro": 0.6287878787878788, | |
| "eval_precision_weighted": 0.6263481846840905, | |
| "eval_recall_macro": 0.5410279667422525, | |
| "eval_recall_micro": 0.6287878787878788, | |
| "eval_recall_weighted": 0.6287878787878788, | |
| "eval_runtime": 2.2013, | |
| "eval_samples_per_second": 59.964, | |
| "eval_steps_per_second": 7.723, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 26.06896551724138, | |
| "grad_norm": 0.2862379252910614, | |
| "learning_rate": 2.8368017524644032e-05, | |
| "loss": 0.0041, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 26.137931034482758, | |
| "grad_norm": 1.7093660831451416, | |
| "learning_rate": 2.8148959474260678e-05, | |
| "loss": 0.0129, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 26.20689655172414, | |
| "grad_norm": 2.0235061645507812, | |
| "learning_rate": 2.792990142387733e-05, | |
| "loss": 0.0107, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 26.275862068965516, | |
| "grad_norm": 0.19022098183631897, | |
| "learning_rate": 2.7710843373493977e-05, | |
| "loss": 0.0044, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 26.344827586206897, | |
| "grad_norm": 0.09240903705358505, | |
| "learning_rate": 2.7491785323110624e-05, | |
| "loss": 0.0023, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 26.413793103448278, | |
| "grad_norm": 0.08767610788345337, | |
| "learning_rate": 2.7272727272727273e-05, | |
| "loss": 0.0026, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 26.482758620689655, | |
| "grad_norm": 0.3399060368537903, | |
| "learning_rate": 2.7053669222343926e-05, | |
| "loss": 0.0129, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 26.551724137931036, | |
| "grad_norm": 0.16420547664165497, | |
| "learning_rate": 2.6834611171960572e-05, | |
| "loss": 0.0315, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 26.620689655172413, | |
| "grad_norm": 0.07277621328830719, | |
| "learning_rate": 2.661555312157722e-05, | |
| "loss": 0.1677, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 26.689655172413794, | |
| "grad_norm": 0.0779278352856636, | |
| "learning_rate": 2.6396495071193865e-05, | |
| "loss": 0.0034, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 26.75862068965517, | |
| "grad_norm": 0.030221056193113327, | |
| "learning_rate": 2.6177437020810514e-05, | |
| "loss": 0.009, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 26.82758620689655, | |
| "grad_norm": 0.7204201221466064, | |
| "learning_rate": 2.5958378970427167e-05, | |
| "loss": 0.0242, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 26.896551724137932, | |
| "grad_norm": 2.2107677459716797, | |
| "learning_rate": 2.5739320920043813e-05, | |
| "loss": 0.0818, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 26.96551724137931, | |
| "grad_norm": 0.04545823484659195, | |
| "learning_rate": 2.552026286966046e-05, | |
| "loss": 0.0018, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.6742424242424242, | |
| "eval_f1_macro": 0.5860085994600535, | |
| "eval_f1_micro": 0.6742424242424242, | |
| "eval_f1_weighted": 0.6574843706054311, | |
| "eval_loss": 1.607276201248169, | |
| "eval_precision_macro": 0.5955862562810968, | |
| "eval_precision_micro": 0.6742424242424242, | |
| "eval_precision_weighted": 0.6586732219548587, | |
| "eval_recall_macro": 0.5929327286470143, | |
| "eval_recall_micro": 0.6742424242424242, | |
| "eval_recall_weighted": 0.6742424242424242, | |
| "eval_runtime": 2.1629, | |
| "eval_samples_per_second": 61.028, | |
| "eval_steps_per_second": 7.86, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 27.03448275862069, | |
| "grad_norm": 1.2520081996917725, | |
| "learning_rate": 2.530120481927711e-05, | |
| "loss": 0.0145, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 27.103448275862068, | |
| "grad_norm": 0.4600828289985657, | |
| "learning_rate": 2.5082146768893762e-05, | |
| "loss": 0.0133, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 27.17241379310345, | |
| "grad_norm": 4.692933082580566, | |
| "learning_rate": 2.486308871851041e-05, | |
| "loss": 0.0382, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 27.24137931034483, | |
| "grad_norm": 0.3261309862136841, | |
| "learning_rate": 2.4644030668127055e-05, | |
| "loss": 0.0035, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 27.310344827586206, | |
| "grad_norm": 0.028574170544743538, | |
| "learning_rate": 2.44249726177437e-05, | |
| "loss": 0.0084, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 27.379310344827587, | |
| "grad_norm": 0.4913921356201172, | |
| "learning_rate": 2.420591456736035e-05, | |
| "loss": 0.0183, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 27.448275862068964, | |
| "grad_norm": 1.5067977905273438, | |
| "learning_rate": 2.3986856516977e-05, | |
| "loss": 0.0387, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 27.517241379310345, | |
| "grad_norm": 0.8277406096458435, | |
| "learning_rate": 2.376779846659365e-05, | |
| "loss": 0.0233, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 27.586206896551722, | |
| "grad_norm": 5.168019771575928, | |
| "learning_rate": 2.3548740416210296e-05, | |
| "loss": 0.0222, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 27.655172413793103, | |
| "grad_norm": 0.5925205945968628, | |
| "learning_rate": 2.3329682365826945e-05, | |
| "loss": 0.0066, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 27.724137931034484, | |
| "grad_norm": 0.7455288767814636, | |
| "learning_rate": 2.3110624315443595e-05, | |
| "loss": 0.0044, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 27.79310344827586, | |
| "grad_norm": 0.029589757323265076, | |
| "learning_rate": 2.289156626506024e-05, | |
| "loss": 0.0055, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 27.862068965517242, | |
| "grad_norm": 10.534521102905273, | |
| "learning_rate": 2.267250821467689e-05, | |
| "loss": 0.0515, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 27.93103448275862, | |
| "grad_norm": 0.19806796312332153, | |
| "learning_rate": 2.2453450164293537e-05, | |
| "loss": 0.0157, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 5.914359092712402, | |
| "learning_rate": 2.2234392113910187e-05, | |
| "loss": 0.0336, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.6439393939393939, | |
| "eval_f1_macro": 0.6085667254819397, | |
| "eval_f1_micro": 0.6439393939393939, | |
| "eval_f1_weighted": 0.6411146413787409, | |
| "eval_loss": 1.5964038372039795, | |
| "eval_precision_macro": 0.6379142750217496, | |
| "eval_precision_micro": 0.6439393939393939, | |
| "eval_precision_weighted": 0.6565570652257958, | |
| "eval_recall_macro": 0.5978533635676493, | |
| "eval_recall_micro": 0.6439393939393939, | |
| "eval_recall_weighted": 0.6439393939393939, | |
| "eval_runtime": 2.2198, | |
| "eval_samples_per_second": 59.466, | |
| "eval_steps_per_second": 7.659, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 28.06896551724138, | |
| "grad_norm": 1.2526508569717407, | |
| "learning_rate": 2.2015334063526836e-05, | |
| "loss": 0.0304, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 28.137931034482758, | |
| "grad_norm": 1.441461443901062, | |
| "learning_rate": 2.1796276013143486e-05, | |
| "loss": 0.0071, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 28.20689655172414, | |
| "grad_norm": 0.2004363089799881, | |
| "learning_rate": 2.1577217962760132e-05, | |
| "loss": 0.0261, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 28.275862068965516, | |
| "grad_norm": 0.16825991868972778, | |
| "learning_rate": 2.1358159912376778e-05, | |
| "loss": 0.0297, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 28.344827586206897, | |
| "grad_norm": 0.8127052783966064, | |
| "learning_rate": 2.1139101861993428e-05, | |
| "loss": 0.005, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 28.413793103448278, | |
| "grad_norm": 0.2261103391647339, | |
| "learning_rate": 2.0920043811610077e-05, | |
| "loss": 0.0056, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 28.482758620689655, | |
| "grad_norm": 0.04766825586557388, | |
| "learning_rate": 2.0700985761226727e-05, | |
| "loss": 0.0013, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 28.551724137931036, | |
| "grad_norm": 0.036670394241809845, | |
| "learning_rate": 2.0481927710843373e-05, | |
| "loss": 0.0028, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 28.620689655172413, | |
| "grad_norm": 0.41730597615242004, | |
| "learning_rate": 2.0262869660460023e-05, | |
| "loss": 0.0028, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 28.689655172413794, | |
| "grad_norm": 0.04215677082538605, | |
| "learning_rate": 2.0043811610076672e-05, | |
| "loss": 0.0018, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 28.75862068965517, | |
| "grad_norm": 0.08167728036642075, | |
| "learning_rate": 1.9824753559693322e-05, | |
| "loss": 0.0016, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 28.82758620689655, | |
| "grad_norm": 0.031280118972063065, | |
| "learning_rate": 1.9605695509309968e-05, | |
| "loss": 0.002, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 28.896551724137932, | |
| "grad_norm": 1.7285773754119873, | |
| "learning_rate": 1.9386637458926614e-05, | |
| "loss": 0.0117, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 28.96551724137931, | |
| "grad_norm": 0.06211957335472107, | |
| "learning_rate": 1.9167579408543264e-05, | |
| "loss": 0.0014, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.7121212121212122, | |
| "eval_f1_macro": 0.6872675353596543, | |
| "eval_f1_micro": 0.7121212121212122, | |
| "eval_f1_weighted": 0.7082990442199542, | |
| "eval_loss": 1.5290158987045288, | |
| "eval_precision_macro": 0.7262781667691, | |
| "eval_precision_micro": 0.7121212121212122, | |
| "eval_precision_weighted": 0.7308177925367624, | |
| "eval_recall_macro": 0.6733560090702948, | |
| "eval_recall_micro": 0.7121212121212122, | |
| "eval_recall_weighted": 0.7121212121212122, | |
| "eval_runtime": 2.1608, | |
| "eval_samples_per_second": 61.088, | |
| "eval_steps_per_second": 7.867, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 29.03448275862069, | |
| "grad_norm": 0.01278562843799591, | |
| "learning_rate": 1.8948521358159914e-05, | |
| "loss": 0.003, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 29.103448275862068, | |
| "grad_norm": 0.06935442239046097, | |
| "learning_rate": 1.8729463307776563e-05, | |
| "loss": 0.0014, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 29.17241379310345, | |
| "grad_norm": 0.6586639285087585, | |
| "learning_rate": 1.851040525739321e-05, | |
| "loss": 0.0023, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 29.24137931034483, | |
| "grad_norm": 3.5995774269104004, | |
| "learning_rate": 1.829134720700986e-05, | |
| "loss": 0.018, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 29.310344827586206, | |
| "grad_norm": 0.08816396445035934, | |
| "learning_rate": 1.8072289156626505e-05, | |
| "loss": 0.0031, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 29.379310344827587, | |
| "grad_norm": 1.209425926208496, | |
| "learning_rate": 1.7853231106243155e-05, | |
| "loss": 0.0282, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 29.448275862068964, | |
| "grad_norm": 3.262197732925415, | |
| "learning_rate": 1.7634173055859804e-05, | |
| "loss": 0.0246, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 29.517241379310345, | |
| "grad_norm": 0.7129732966423035, | |
| "learning_rate": 1.741511500547645e-05, | |
| "loss": 0.0085, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 29.586206896551722, | |
| "grad_norm": 0.15869493782520294, | |
| "learning_rate": 1.71960569550931e-05, | |
| "loss": 0.0015, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 29.655172413793103, | |
| "grad_norm": 0.07819876074790955, | |
| "learning_rate": 1.697699890470975e-05, | |
| "loss": 0.0476, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 29.724137931034484, | |
| "grad_norm": 0.12180998921394348, | |
| "learning_rate": 1.67579408543264e-05, | |
| "loss": 0.0032, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 29.79310344827586, | |
| "grad_norm": 0.034297507256269455, | |
| "learning_rate": 1.6538882803943046e-05, | |
| "loss": 0.0008, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 29.862068965517242, | |
| "grad_norm": 0.14563943445682526, | |
| "learning_rate": 1.6319824753559695e-05, | |
| "loss": 0.0087, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 29.93103448275862, | |
| "grad_norm": 0.23122666776180267, | |
| "learning_rate": 1.610076670317634e-05, | |
| "loss": 0.0416, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 0.06973911821842194, | |
| "learning_rate": 1.588170865279299e-05, | |
| "loss": 0.021, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.696969696969697, | |
| "eval_f1_macro": 0.6982068677202545, | |
| "eval_f1_micro": 0.696969696969697, | |
| "eval_f1_weighted": 0.6973740805478152, | |
| "eval_loss": 1.5439778566360474, | |
| "eval_precision_macro": 0.7076152020847177, | |
| "eval_precision_micro": 0.696969696969697, | |
| "eval_precision_weighted": 0.7169868679432659, | |
| "eval_recall_macro": 0.7086167800453513, | |
| "eval_recall_micro": 0.696969696969697, | |
| "eval_recall_weighted": 0.696969696969697, | |
| "eval_runtime": 2.1633, | |
| "eval_samples_per_second": 61.018, | |
| "eval_steps_per_second": 7.858, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 30.06896551724138, | |
| "grad_norm": 1.7453641891479492, | |
| "learning_rate": 1.566265060240964e-05, | |
| "loss": 0.0083, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 30.137931034482758, | |
| "grad_norm": 0.3179946541786194, | |
| "learning_rate": 1.5443592552026287e-05, | |
| "loss": 0.0024, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 30.20689655172414, | |
| "grad_norm": 1.7078912258148193, | |
| "learning_rate": 1.5224534501642936e-05, | |
| "loss": 0.0082, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 30.275862068965516, | |
| "grad_norm": 0.32421720027923584, | |
| "learning_rate": 1.5005476451259584e-05, | |
| "loss": 0.0032, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 30.344827586206897, | |
| "grad_norm": 0.04044501855969429, | |
| "learning_rate": 1.4786418400876234e-05, | |
| "loss": 0.0444, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 30.413793103448278, | |
| "grad_norm": 0.06957350671291351, | |
| "learning_rate": 1.4567360350492882e-05, | |
| "loss": 0.0226, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 30.482758620689655, | |
| "grad_norm": 0.5221211314201355, | |
| "learning_rate": 1.4348302300109528e-05, | |
| "loss": 0.0041, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 30.551724137931036, | |
| "grad_norm": 0.017598647624254227, | |
| "learning_rate": 1.412924424972618e-05, | |
| "loss": 0.0013, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 30.620689655172413, | |
| "grad_norm": 0.47180604934692383, | |
| "learning_rate": 1.3910186199342825e-05, | |
| "loss": 0.003, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 30.689655172413794, | |
| "grad_norm": 0.03941981866955757, | |
| "learning_rate": 1.3691128148959475e-05, | |
| "loss": 0.0112, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 30.75862068965517, | |
| "grad_norm": 0.023555099964141846, | |
| "learning_rate": 1.3472070098576123e-05, | |
| "loss": 0.03, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 30.82758620689655, | |
| "grad_norm": 0.48139652609825134, | |
| "learning_rate": 1.3253012048192772e-05, | |
| "loss": 0.0052, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 30.896551724137932, | |
| "grad_norm": 0.1581326723098755, | |
| "learning_rate": 1.303395399780942e-05, | |
| "loss": 0.0156, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 30.96551724137931, | |
| "grad_norm": 1.283683180809021, | |
| "learning_rate": 1.2814895947426067e-05, | |
| "loss": 0.0065, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.696969696969697, | |
| "eval_f1_macro": 0.6868812120235886, | |
| "eval_f1_micro": 0.696969696969697, | |
| "eval_f1_weighted": 0.6914990595967736, | |
| "eval_loss": 1.6575924158096313, | |
| "eval_precision_macro": 0.7429775738046415, | |
| "eval_precision_micro": 0.696969696969697, | |
| "eval_precision_weighted": 0.7269597892803633, | |
| "eval_recall_macro": 0.6698866213151928, | |
| "eval_recall_micro": 0.696969696969697, | |
| "eval_recall_weighted": 0.696969696969697, | |
| "eval_runtime": 2.1493, | |
| "eval_samples_per_second": 61.416, | |
| "eval_steps_per_second": 7.91, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 31.03448275862069, | |
| "grad_norm": 0.018120741471648216, | |
| "learning_rate": 1.2595837897042718e-05, | |
| "loss": 0.0011, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 31.103448275862068, | |
| "grad_norm": 0.1496172994375229, | |
| "learning_rate": 1.2376779846659366e-05, | |
| "loss": 0.0031, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 31.17241379310345, | |
| "grad_norm": 1.2407957315444946, | |
| "learning_rate": 1.2157721796276014e-05, | |
| "loss": 0.0278, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 31.24137931034483, | |
| "grad_norm": 0.14974364638328552, | |
| "learning_rate": 1.1938663745892662e-05, | |
| "loss": 0.0236, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 31.310344827586206, | |
| "grad_norm": 4.202882766723633, | |
| "learning_rate": 1.171960569550931e-05, | |
| "loss": 0.0241, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 31.379310344827587, | |
| "grad_norm": 0.05061774700880051, | |
| "learning_rate": 1.1500547645125959e-05, | |
| "loss": 0.001, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 31.448275862068964, | |
| "grad_norm": 0.019295161589980125, | |
| "learning_rate": 1.1281489594742607e-05, | |
| "loss": 0.0012, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 31.517241379310345, | |
| "grad_norm": 0.1430915743112564, | |
| "learning_rate": 1.1062431544359257e-05, | |
| "loss": 0.0041, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 31.586206896551722, | |
| "grad_norm": 0.03306346759200096, | |
| "learning_rate": 1.0843373493975904e-05, | |
| "loss": 0.0013, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 31.655172413793103, | |
| "grad_norm": 0.42486900091171265, | |
| "learning_rate": 1.0624315443592552e-05, | |
| "loss": 0.006, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 31.724137931034484, | |
| "grad_norm": 0.058433897793293, | |
| "learning_rate": 1.0405257393209202e-05, | |
| "loss": 0.0018, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 31.79310344827586, | |
| "grad_norm": 0.027252651751041412, | |
| "learning_rate": 1.0186199342825848e-05, | |
| "loss": 0.004, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 31.862068965517242, | |
| "grad_norm": 0.13943073153495789, | |
| "learning_rate": 9.967141292442498e-06, | |
| "loss": 0.0016, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 31.93103448275862, | |
| "grad_norm": 0.048901163041591644, | |
| "learning_rate": 9.748083242059146e-06, | |
| "loss": 0.0019, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "grad_norm": 0.019549880176782608, | |
| "learning_rate": 9.529025191675795e-06, | |
| "loss": 0.0013, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7196969696969697, | |
| "eval_f1_macro": 0.7124096518979722, | |
| "eval_f1_micro": 0.7196969696969697, | |
| "eval_f1_weighted": 0.7172944138815308, | |
| "eval_loss": 1.560258388519287, | |
| "eval_precision_macro": 0.750803957946815, | |
| "eval_precision_micro": 0.7196969696969697, | |
| "eval_precision_weighted": 0.7410763478945297, | |
| "eval_recall_macro": 0.6987226001511715, | |
| "eval_recall_micro": 0.7196969696969697, | |
| "eval_recall_weighted": 0.7196969696969697, | |
| "eval_runtime": 2.1728, | |
| "eval_samples_per_second": 60.752, | |
| "eval_steps_per_second": 7.824, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 32.06896551724138, | |
| "grad_norm": 0.05375111103057861, | |
| "learning_rate": 9.309967141292443e-06, | |
| "loss": 0.0024, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 32.13793103448276, | |
| "grad_norm": 0.3470950424671173, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 0.0035, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 32.206896551724135, | |
| "grad_norm": 0.02533532679080963, | |
| "learning_rate": 8.87185104052574e-06, | |
| "loss": 0.001, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 32.275862068965516, | |
| "grad_norm": 1.6475239992141724, | |
| "learning_rate": 8.652792990142389e-06, | |
| "loss": 0.0343, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 32.3448275862069, | |
| "grad_norm": 0.13403227925300598, | |
| "learning_rate": 8.433734939759036e-06, | |
| "loss": 0.0019, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 32.41379310344828, | |
| "grad_norm": 0.13201530277729034, | |
| "learning_rate": 8.214676889375684e-06, | |
| "loss": 0.0019, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 32.48275862068966, | |
| "grad_norm": 2.441126823425293, | |
| "learning_rate": 7.995618838992334e-06, | |
| "loss": 0.0152, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 32.55172413793103, | |
| "grad_norm": 0.3257850408554077, | |
| "learning_rate": 7.776560788608982e-06, | |
| "loss": 0.0019, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 32.62068965517241, | |
| "grad_norm": 0.10473517328500748, | |
| "learning_rate": 7.5575027382256306e-06, | |
| "loss": 0.0027, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 32.689655172413794, | |
| "grad_norm": 0.04300970956683159, | |
| "learning_rate": 7.3384446878422785e-06, | |
| "loss": 0.0008, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 32.758620689655174, | |
| "grad_norm": 0.12258446961641312, | |
| "learning_rate": 7.119386637458927e-06, | |
| "loss": 0.0028, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 32.827586206896555, | |
| "grad_norm": 0.14553672075271606, | |
| "learning_rate": 6.900328587075576e-06, | |
| "loss": 0.0013, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 32.89655172413793, | |
| "grad_norm": 1.198081612586975, | |
| "learning_rate": 6.681270536692223e-06, | |
| "loss": 0.0276, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 32.96551724137931, | |
| "grad_norm": 0.4895022511482239, | |
| "learning_rate": 6.462212486308872e-06, | |
| "loss": 0.0129, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.6893939393939394, | |
| "eval_f1_macro": 0.6841929992352647, | |
| "eval_f1_micro": 0.6893939393939394, | |
| "eval_f1_weighted": 0.6870473377924375, | |
| "eval_loss": 1.6027860641479492, | |
| "eval_precision_macro": 0.7152604691775198, | |
| "eval_precision_micro": 0.6893939393939394, | |
| "eval_precision_weighted": 0.7059007626456307, | |
| "eval_recall_macro": 0.673061224489796, | |
| "eval_recall_micro": 0.6893939393939394, | |
| "eval_recall_weighted": 0.6893939393939394, | |
| "eval_runtime": 2.1875, | |
| "eval_samples_per_second": 60.343, | |
| "eval_steps_per_second": 7.771, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 33.03448275862069, | |
| "grad_norm": 0.18283292651176453, | |
| "learning_rate": 6.2431544359255205e-06, | |
| "loss": 0.0023, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 33.10344827586207, | |
| "grad_norm": 0.09138727933168411, | |
| "learning_rate": 6.024096385542169e-06, | |
| "loss": 0.0018, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 33.172413793103445, | |
| "grad_norm": 0.27659812569618225, | |
| "learning_rate": 5.805038335158817e-06, | |
| "loss": 0.004, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 33.241379310344826, | |
| "grad_norm": 1.0702749490737915, | |
| "learning_rate": 5.585980284775466e-06, | |
| "loss": 0.0228, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 33.310344827586206, | |
| "grad_norm": 0.10491228103637695, | |
| "learning_rate": 5.366922234392114e-06, | |
| "loss": 0.0023, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 33.37931034482759, | |
| "grad_norm": 0.33777573704719543, | |
| "learning_rate": 5.1478641840087625e-06, | |
| "loss": 0.0034, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 33.44827586206897, | |
| "grad_norm": 0.06301871687173843, | |
| "learning_rate": 4.928806133625411e-06, | |
| "loss": 0.0023, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 33.51724137931034, | |
| "grad_norm": 0.09340860694646835, | |
| "learning_rate": 4.70974808324206e-06, | |
| "loss": 0.0205, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 33.58620689655172, | |
| "grad_norm": 0.020821426063776016, | |
| "learning_rate": 4.490690032858708e-06, | |
| "loss": 0.0009, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 33.6551724137931, | |
| "grad_norm": 0.04694080352783203, | |
| "learning_rate": 4.271631982475356e-06, | |
| "loss": 0.001, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 33.724137931034484, | |
| "grad_norm": 0.056120615452528, | |
| "learning_rate": 4.0525739320920046e-06, | |
| "loss": 0.0018, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 33.793103448275865, | |
| "grad_norm": 1.5992101430892944, | |
| "learning_rate": 3.8335158817086525e-06, | |
| "loss": 0.0073, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 33.86206896551724, | |
| "grad_norm": 0.044718772172927856, | |
| "learning_rate": 3.614457831325301e-06, | |
| "loss": 0.012, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 33.93103448275862, | |
| "grad_norm": 0.019480116665363312, | |
| "learning_rate": 3.39539978094195e-06, | |
| "loss": 0.0012, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "grad_norm": 0.01623496413230896, | |
| "learning_rate": 3.1763417305585983e-06, | |
| "loss": 0.0006, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.6818181818181818, | |
| "eval_f1_macro": 0.6786724695679507, | |
| "eval_f1_micro": 0.6818181818181818, | |
| "eval_f1_weighted": 0.6800052243915973, | |
| "eval_loss": 1.6074531078338623, | |
| "eval_precision_macro": 0.7093953665382237, | |
| "eval_precision_micro": 0.6818181818181818, | |
| "eval_precision_weighted": 0.6991099809281627, | |
| "eval_recall_macro": 0.6677702191987906, | |
| "eval_recall_micro": 0.6818181818181818, | |
| "eval_recall_weighted": 0.6818181818181818, | |
| "eval_runtime": 2.1756, | |
| "eval_samples_per_second": 60.674, | |
| "eval_steps_per_second": 7.814, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 34.06896551724138, | |
| "grad_norm": 0.02141761966049671, | |
| "learning_rate": 2.9572836801752466e-06, | |
| "loss": 0.0015, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 34.13793103448276, | |
| "grad_norm": 0.03435864299535751, | |
| "learning_rate": 2.738225629791895e-06, | |
| "loss": 0.0009, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 34.206896551724135, | |
| "grad_norm": 0.03004680573940277, | |
| "learning_rate": 2.5191675794085432e-06, | |
| "loss": 0.0011, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 34.275862068965516, | |
| "grad_norm": 0.027880065143108368, | |
| "learning_rate": 2.3001095290251916e-06, | |
| "loss": 0.0008, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 34.3448275862069, | |
| "grad_norm": 1.3690009117126465, | |
| "learning_rate": 2.0810514786418403e-06, | |
| "loss": 0.0308, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 34.41379310344828, | |
| "grad_norm": 0.04672781005501747, | |
| "learning_rate": 1.8619934282584884e-06, | |
| "loss": 0.0012, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 34.48275862068966, | |
| "grad_norm": 0.06204487010836601, | |
| "learning_rate": 1.642935377875137e-06, | |
| "loss": 0.0017, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 34.55172413793103, | |
| "grad_norm": 0.03130810335278511, | |
| "learning_rate": 1.4238773274917855e-06, | |
| "loss": 0.0015, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 34.62068965517241, | |
| "grad_norm": 0.06150972098112106, | |
| "learning_rate": 1.2048192771084338e-06, | |
| "loss": 0.0026, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 34.689655172413794, | |
| "grad_norm": 0.1301676630973816, | |
| "learning_rate": 9.857612267250823e-07, | |
| "loss": 0.0023, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 34.758620689655174, | |
| "grad_norm": 0.11791616678237915, | |
| "learning_rate": 7.667031763417306e-07, | |
| "loss": 0.0019, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 34.827586206896555, | |
| "grad_norm": 2.598999500274658, | |
| "learning_rate": 5.47645125958379e-07, | |
| "loss": 0.0082, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 34.89655172413793, | |
| "grad_norm": 1.0025254487991333, | |
| "learning_rate": 3.285870755750274e-07, | |
| "loss": 0.0207, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 34.96551724137931, | |
| "grad_norm": 0.543045699596405, | |
| "learning_rate": 1.095290251916758e-07, | |
| "loss": 0.0022, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.6893939393939394, | |
| "eval_f1_macro": 0.6848257838882342, | |
| "eval_f1_micro": 0.6893939393939394, | |
| "eval_f1_weighted": 0.6869347739708745, | |
| "eval_loss": 1.6008917093276978, | |
| "eval_precision_macro": 0.7170539138281073, | |
| "eval_precision_micro": 0.6893939393939394, | |
| "eval_precision_weighted": 0.7061795242880288, | |
| "eval_recall_macro": 0.673061224489796, | |
| "eval_recall_micro": 0.6893939393939394, | |
| "eval_recall_weighted": 0.6893939393939394, | |
| "eval_runtime": 2.1921, | |
| "eval_samples_per_second": 60.216, | |
| "eval_steps_per_second": 7.755, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "step": 1015, | |
| "total_flos": 1.2531016253190758e+18, | |
| "train_loss": 0.5009635013656627, | |
| "train_runtime": 1007.7292, | |
| "train_samples_per_second": 16.046, | |
| "train_steps_per_second": 1.007 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1015, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 35, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.2531016253190758e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |