Upload rerank_model from local

Browse files

Files changed (8) hide show

config.json +1 -1
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +1 -1
tokenizer_config.json +1 -1
trainer_state.json +1252 -0
training_args.bin +3 -0

config.json CHANGED Viewed

@@ -48,4 +48,4 @@
   "unpad_inputs": false,
   "use_memory_efficient_attention": false,
   "vocab_size": 250048
-}

   "unpad_inputs": false,
   "use_memory_efficient_attention": false,
   "vocab_size": 250048
+}

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a32ba179658a4500aef14adfde824f9fd1209b08f12c4879c82c1c1e424a677
+size 2447795147

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:153e458bd75fb3848db9b6f77fd3dbc202e8b9991637e5679559811445c889a7
+size 14645

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d860214655dfba7a7d4a60019458f6430c807bb94c834bbbef71b2ce0819480e
+size 1465

special_tokens_map.json CHANGED Viewed

@@ -48,4 +48,4 @@
     "rstrip": false,
     "single_word": false
   }
-}

     "rstrip": false,
     "single_word": false
   }
+}

tokenizer_config.json CHANGED Viewed

@@ -58,4 +58,4 @@
   "truncation_side": "right",
   "truncation_strategy": "longest_first",
   "unk_token": "<unk>"
-}

   "truncation_side": "right",
   "truncation_strategy": "longest_first",
   "unk_token": "<unk>"
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1252 @@

+{
+  "best_metric": 0.0018289362778887153,
+  "best_model_checkpoint": "./results/checkpoint-1669",
+  "epoch": 3.998502994011976,
+  "eval_steps": 500,
+  "global_step": 1669,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.023952095808383235,
+      "grad_norm": 21869930.0,
+      "learning_rate": 2.3980815347721825e-07,
+      "loss": 0.9053,
+      "step": 10
+    },
+    {
+      "epoch": 0.04790419161676647,
+      "grad_norm": 19059822.0,
+      "learning_rate": 4.796163069544365e-07,
+      "loss": 0.7081,
+      "step": 20
+    },
+    {
+      "epoch": 0.0718562874251497,
+      "grad_norm": 11655885.0,
+      "learning_rate": 7.194244604316547e-07,
+      "loss": 0.5323,
+      "step": 30
+    },
+    {
+      "epoch": 0.09580838323353294,
+      "grad_norm": 3832326.5,
+      "learning_rate": 9.59232613908873e-07,
+      "loss": 0.3305,
+      "step": 40
+    },
+    {
+      "epoch": 0.11976047904191617,
+      "grad_norm": 2399766.25,
+      "learning_rate": 1.1990407673860912e-06,
+      "loss": 0.178,
+      "step": 50
+    },
+    {
+      "epoch": 0.1437125748502994,
+      "grad_norm": 707622.9375,
+      "learning_rate": 1.4388489208633094e-06,
+      "loss": 0.1208,
+      "step": 60
+    },
+    {
+      "epoch": 0.16766467065868262,
+      "grad_norm": 694203.75,
+      "learning_rate": 1.6786570743405278e-06,
+      "loss": 0.1082,
+      "step": 70
+    },
+    {
+      "epoch": 0.19161676646706588,
+      "grad_norm": 906404.875,
+      "learning_rate": 1.918465227817746e-06,
+      "loss": 0.085,
+      "step": 80
+    },
+    {
+      "epoch": 0.2155688622754491,
+      "grad_norm": 430721.75,
+      "learning_rate": 2.158273381294964e-06,
+      "loss": 0.0717,
+      "step": 90
+    },
+    {
+      "epoch": 0.23952095808383234,
+      "grad_norm": 347536.15625,
+      "learning_rate": 2.3980815347721824e-06,
+      "loss": 0.053,
+      "step": 100
+    },
+    {
+      "epoch": 0.2634730538922156,
+      "grad_norm": 364138.15625,
+      "learning_rate": 2.637889688249401e-06,
+      "loss": 0.045,
+      "step": 110
+    },
+    {
+      "epoch": 0.2874251497005988,
+      "grad_norm": 305517.90625,
+      "learning_rate": 2.877697841726619e-06,
+      "loss": 0.0366,
+      "step": 120
+    },
+    {
+      "epoch": 0.31137724550898205,
+      "grad_norm": 286330.90625,
+      "learning_rate": 3.1175059952038373e-06,
+      "loss": 0.0318,
+      "step": 130
+    },
+    {
+      "epoch": 0.33532934131736525,
+      "grad_norm": 221961.6875,
+      "learning_rate": 3.3573141486810557e-06,
+      "loss": 0.0262,
+      "step": 140
+    },
+    {
+      "epoch": 0.3592814371257485,
+      "grad_norm": 307385.9375,
+      "learning_rate": 3.5971223021582737e-06,
+      "loss": 0.0229,
+      "step": 150
+    },
+    {
+      "epoch": 0.38323353293413176,
+      "grad_norm": 362847.0,
+      "learning_rate": 3.836930455635492e-06,
+      "loss": 0.0227,
+      "step": 160
+    },
+    {
+      "epoch": 0.40718562874251496,
+      "grad_norm": 225246.109375,
+      "learning_rate": 4.0767386091127105e-06,
+      "loss": 0.0211,
+      "step": 170
+    },
+    {
+      "epoch": 0.4311377245508982,
+      "grad_norm": 336017.8125,
+      "learning_rate": 4.316546762589928e-06,
+      "loss": 0.0188,
+      "step": 180
+    },
+    {
+      "epoch": 0.4550898203592814,
+      "grad_norm": 232213.890625,
+      "learning_rate": 4.5563549160671465e-06,
+      "loss": 0.0134,
+      "step": 190
+    },
+    {
+      "epoch": 0.47904191616766467,
+      "grad_norm": 189961.46875,
+      "learning_rate": 4.796163069544365e-06,
+      "loss": 0.016,
+      "step": 200
+    },
+    {
+      "epoch": 0.5029940119760479,
+      "grad_norm": 108848.3671875,
+      "learning_rate": 5.035971223021583e-06,
+      "loss": 0.0141,
+      "step": 210
+    },
+    {
+      "epoch": 0.5269461077844312,
+      "grad_norm": 158108.1875,
+      "learning_rate": 5.275779376498802e-06,
+      "loss": 0.0131,
+      "step": 220
+    },
+    {
+      "epoch": 0.5508982035928144,
+      "grad_norm": 257074.015625,
+      "learning_rate": 5.51558752997602e-06,
+      "loss": 0.0098,
+      "step": 230
+    },
+    {
+      "epoch": 0.5748502994011976,
+      "grad_norm": 181764.84375,
+      "learning_rate": 5.755395683453238e-06,
+      "loss": 0.011,
+      "step": 240
+    },
+    {
+      "epoch": 0.5988023952095808,
+      "grad_norm": 231444.921875,
+      "learning_rate": 5.995203836930457e-06,
+      "loss": 0.0135,
+      "step": 250
+    },
+    {
+      "epoch": 0.6227544910179641,
+      "grad_norm": 267698.96875,
+      "learning_rate": 6.2350119904076745e-06,
+      "loss": 0.0099,
+      "step": 260
+    },
+    {
+      "epoch": 0.6467065868263473,
+      "grad_norm": 168324.34375,
+      "learning_rate": 6.474820143884892e-06,
+      "loss": 0.0107,
+      "step": 270
+    },
+    {
+      "epoch": 0.6706586826347305,
+      "grad_norm": 174906.8125,
+      "learning_rate": 6.714628297362111e-06,
+      "loss": 0.013,
+      "step": 280
+    },
+    {
+      "epoch": 0.6946107784431138,
+      "grad_norm": 229340.8125,
+      "learning_rate": 6.954436450839329e-06,
+      "loss": 0.0125,
+      "step": 290
+    },
+    {
+      "epoch": 0.718562874251497,
+      "grad_norm": 141271.78125,
+      "learning_rate": 7.194244604316547e-06,
+      "loss": 0.0093,
+      "step": 300
+    },
+    {
+      "epoch": 0.7425149700598802,
+      "grad_norm": 65792.4296875,
+      "learning_rate": 7.434052757793766e-06,
+      "loss": 0.0107,
+      "step": 310
+    },
+    {
+      "epoch": 0.7664670658682635,
+      "grad_norm": 155497.109375,
+      "learning_rate": 7.673860911270984e-06,
+      "loss": 0.0078,
+      "step": 320
+    },
+    {
+      "epoch": 0.7904191616766467,
+      "grad_norm": 360102.5625,
+      "learning_rate": 7.913669064748202e-06,
+      "loss": 0.0077,
+      "step": 330
+    },
+    {
+      "epoch": 0.8143712574850299,
+      "grad_norm": 261093.21875,
+      "learning_rate": 8.153477218225421e-06,
+      "loss": 0.0061,
+      "step": 340
+    },
+    {
+      "epoch": 0.8383233532934131,
+      "grad_norm": 178080.28125,
+      "learning_rate": 8.393285371702639e-06,
+      "loss": 0.0082,
+      "step": 350
+    },
+    {
+      "epoch": 0.8622754491017964,
+      "grad_norm": 269176.5625,
+      "learning_rate": 8.633093525179856e-06,
+      "loss": 0.007,
+      "step": 360
+    },
+    {
+      "epoch": 0.8862275449101796,
+      "grad_norm": 409776.6875,
+      "learning_rate": 8.872901678657075e-06,
+      "loss": 0.009,
+      "step": 370
+    },
+    {
+      "epoch": 0.9101796407185628,
+      "grad_norm": 653130.125,
+      "learning_rate": 9.112709832134293e-06,
+      "loss": 0.0065,
+      "step": 380
+    },
+    {
+      "epoch": 0.9341317365269461,
+      "grad_norm": 85098.8671875,
+      "learning_rate": 9.35251798561151e-06,
+      "loss": 0.0036,
+      "step": 390
+    },
+    {
+      "epoch": 0.9580838323353293,
+      "grad_norm": 427729.28125,
+      "learning_rate": 9.59232613908873e-06,
+      "loss": 0.0059,
+      "step": 400
+    },
+    {
+      "epoch": 0.9820359281437125,
+      "grad_norm": 151977.40625,
+      "learning_rate": 9.832134292565947e-06,
+      "loss": 0.0074,
+      "step": 410
+    },
+    {
+      "epoch": 0.9988023952095808,
+      "eval_accuracy": 0.8697995620683847,
+      "eval_f1": 0.8873013558827817,
+      "eval_loss": 0.009821193292737007,
+      "eval_precision": 0.7974318658280922,
+      "eval_recall": 1.0,
+      "eval_runtime": 156.9292,
+      "eval_samples_per_second": 37.832,
+      "eval_steps_per_second": 2.37,
+      "step": 417
+    },
+    {
+      "epoch": 1.0062874251497007,
+      "grad_norm": 229342.40625,
+      "learning_rate": 1.0071942446043167e-05,
+      "loss": 0.0093,
+      "step": 420
+    },
+    {
+      "epoch": 1.030239520958084,
+      "grad_norm": 105467.90625,
+      "learning_rate": 1.0311750599520384e-05,
+      "loss": 0.0042,
+      "step": 430
+    },
+    {
+      "epoch": 1.054191616766467,
+      "grad_norm": 71231.6171875,
+      "learning_rate": 1.0551558752997603e-05,
+      "loss": 0.0043,
+      "step": 440
+    },
+    {
+      "epoch": 1.0781437125748503,
+      "grad_norm": 147894.453125,
+      "learning_rate": 1.0791366906474821e-05,
+      "loss": 0.0037,
+      "step": 450
+    },
+    {
+      "epoch": 1.1020958083832335,
+      "grad_norm": 173828.546875,
+      "learning_rate": 1.103117505995204e-05,
+      "loss": 0.0044,
+      "step": 460
+    },
+    {
+      "epoch": 1.1260479041916167,
+      "grad_norm": 158015.078125,
+      "learning_rate": 1.1270983213429258e-05,
+      "loss": 0.0039,
+      "step": 470
+    },
+    {
+      "epoch": 1.15,
+      "grad_norm": 258871.4375,
+      "learning_rate": 1.1510791366906475e-05,
+      "loss": 0.0034,
+      "step": 480
+    },
+    {
+      "epoch": 1.1739520958083833,
+      "grad_norm": 182111.390625,
+      "learning_rate": 1.1750599520383695e-05,
+      "loss": 0.0066,
+      "step": 490
+    },
+    {
+      "epoch": 1.1979041916167665,
+      "grad_norm": 68393.5703125,
+      "learning_rate": 1.1990407673860914e-05,
+      "loss": 0.0031,
+      "step": 500
+    },
+    {
+      "epoch": 1.2218562874251497,
+      "grad_norm": 304475.46875,
+      "learning_rate": 1.223021582733813e-05,
+      "loss": 0.0026,
+      "step": 510
+    },
+    {
+      "epoch": 1.245808383233533,
+      "grad_norm": 149909.921875,
+      "learning_rate": 1.2470023980815349e-05,
+      "loss": 0.0051,
+      "step": 520
+    },
+    {
+      "epoch": 1.2697604790419161,
+      "grad_norm": 275816.8125,
+      "learning_rate": 1.2709832134292568e-05,
+      "loss": 0.0067,
+      "step": 530
+    },
+    {
+      "epoch": 1.2937125748502993,
+      "grad_norm": 343921.03125,
+      "learning_rate": 1.2949640287769784e-05,
+      "loss": 0.0052,
+      "step": 540
+    },
+    {
+      "epoch": 1.3176646706586825,
+      "grad_norm": 132028.453125,
+      "learning_rate": 1.3189448441247003e-05,
+      "loss": 0.0048,
+      "step": 550
+    },
+    {
+      "epoch": 1.341616766467066,
+      "grad_norm": 253700.5625,
+      "learning_rate": 1.3429256594724223e-05,
+      "loss": 0.0044,
+      "step": 560
+    },
+    {
+      "epoch": 1.3655688622754492,
+      "grad_norm": 66469.2265625,
+      "learning_rate": 1.3669064748201439e-05,
+      "loss": 0.0034,
+      "step": 570
+    },
+    {
+      "epoch": 1.3895209580838324,
+      "grad_norm": 266988.59375,
+      "learning_rate": 1.3908872901678658e-05,
+      "loss": 0.0066,
+      "step": 580
+    },
+    {
+      "epoch": 1.4134730538922156,
+      "grad_norm": 305832.90625,
+      "learning_rate": 1.4148681055155877e-05,
+      "loss": 0.0033,
+      "step": 590
+    },
+    {
+      "epoch": 1.4374251497005988,
+      "grad_norm": 356095.40625,
+      "learning_rate": 1.4388489208633095e-05,
+      "loss": 0.004,
+      "step": 600
+    },
+    {
+      "epoch": 1.461377245508982,
+      "grad_norm": 240207.0625,
+      "learning_rate": 1.4628297362110312e-05,
+      "loss": 0.006,
+      "step": 610
+    },
+    {
+      "epoch": 1.4853293413173652,
+      "grad_norm": 147457.96875,
+      "learning_rate": 1.4868105515587531e-05,
+      "loss": 0.0042,
+      "step": 620
+    },
+    {
+      "epoch": 1.5092814371257486,
+      "grad_norm": 148538.671875,
+      "learning_rate": 1.5107913669064749e-05,
+      "loss": 0.006,
+      "step": 630
+    },
+    {
+      "epoch": 1.5332335329341318,
+      "grad_norm": 85050.8359375,
+      "learning_rate": 1.534772182254197e-05,
+      "loss": 0.0024,
+      "step": 640
+    },
+    {
+      "epoch": 1.557185628742515,
+      "grad_norm": 211666.234375,
+      "learning_rate": 1.5587529976019188e-05,
+      "loss": 0.0034,
+      "step": 650
+    },
+    {
+      "epoch": 1.5811377245508982,
+      "grad_norm": 96871.3203125,
+      "learning_rate": 1.5827338129496403e-05,
+      "loss": 0.0058,
+      "step": 660
+    },
+    {
+      "epoch": 1.6050898203592814,
+      "grad_norm": 126729.3984375,
+      "learning_rate": 1.6067146282973623e-05,
+      "loss": 0.0022,
+      "step": 670
+    },
+    {
+      "epoch": 1.6290419161676648,
+      "grad_norm": 242174.453125,
+      "learning_rate": 1.6306954436450842e-05,
+      "loss": 0.0031,
+      "step": 680
+    },
+    {
+      "epoch": 1.6529940119760478,
+      "grad_norm": 86450.3984375,
+      "learning_rate": 1.6546762589928058e-05,
+      "loss": 0.0065,
+      "step": 690
+    },
+    {
+      "epoch": 1.6769461077844312,
+      "grad_norm": 218104.953125,
+      "learning_rate": 1.6786570743405277e-05,
+      "loss": 0.0018,
+      "step": 700
+    },
+    {
+      "epoch": 1.7008982035928144,
+      "grad_norm": 59439.62890625,
+      "learning_rate": 1.7026378896882496e-05,
+      "loss": 0.0028,
+      "step": 710
+    },
+    {
+      "epoch": 1.7248502994011976,
+      "grad_norm": 593990.1875,
+      "learning_rate": 1.7266187050359712e-05,
+      "loss": 0.0058,
+      "step": 720
+    },
+    {
+      "epoch": 1.7488023952095808,
+      "grad_norm": 75406.0546875,
+      "learning_rate": 1.750599520383693e-05,
+      "loss": 0.0024,
+      "step": 730
+    },
+    {
+      "epoch": 1.772754491017964,
+      "grad_norm": 339184.53125,
+      "learning_rate": 1.774580335731415e-05,
+      "loss": 0.0025,
+      "step": 740
+    },
+    {
+      "epoch": 1.7967065868263474,
+      "grad_norm": 50401.92578125,
+      "learning_rate": 1.7985611510791367e-05,
+      "loss": 0.0034,
+      "step": 750
+    },
+    {
+      "epoch": 1.8206586826347304,
+      "grad_norm": 149760.171875,
+      "learning_rate": 1.8225419664268586e-05,
+      "loss": 0.0007,
+      "step": 760
+    },
+    {
+      "epoch": 1.8446107784431138,
+      "grad_norm": 186960.296875,
+      "learning_rate": 1.8465227817745805e-05,
+      "loss": 0.0026,
+      "step": 770
+    },
+    {
+      "epoch": 1.868562874251497,
+      "grad_norm": 84101.9140625,
+      "learning_rate": 1.870503597122302e-05,
+      "loss": 0.0041,
+      "step": 780
+    },
+    {
+      "epoch": 1.8925149700598802,
+      "grad_norm": 158750.65625,
+      "learning_rate": 1.894484412470024e-05,
+      "loss": 0.0051,
+      "step": 790
+    },
+    {
+      "epoch": 1.9164670658682634,
+      "grad_norm": 148909.34375,
+      "learning_rate": 1.918465227817746e-05,
+      "loss": 0.002,
+      "step": 800
+    },
+    {
+      "epoch": 1.9404191616766466,
+      "grad_norm": 266416.875,
+      "learning_rate": 1.9424460431654675e-05,
+      "loss": 0.003,
+      "step": 810
+    },
+    {
+      "epoch": 1.96437125748503,
+      "grad_norm": 262396.09375,
+      "learning_rate": 1.9664268585131895e-05,
+      "loss": 0.0017,
+      "step": 820
+    },
+    {
+      "epoch": 1.988323353293413,
+      "grad_norm": 103789.453125,
+      "learning_rate": 1.9904076738609114e-05,
+      "loss": 0.0022,
+      "step": 830
+    },
+    {
+      "epoch": 1.9979041916167666,
+      "eval_accuracy": 0.9964628600303184,
+      "eval_f1": 0.9965613230718847,
+      "eval_loss": 0.00301022338680923,
+      "eval_precision": 0.9931462140992167,
+      "eval_recall": 1.0,
+      "eval_runtime": 156.7957,
+      "eval_samples_per_second": 37.865,
+      "eval_steps_per_second": 2.373,
+      "step": 834
+    },
+    {
+      "epoch": 2.0125748502994014,
+      "grad_norm": 38590.04296875,
+      "learning_rate": 1.996402877697842e-05,
+      "loss": 0.0023,
+      "step": 840
+    },
+    {
+      "epoch": 2.0365269461077844,
+      "grad_norm": 76568.2109375,
+      "learning_rate": 1.9904076738609114e-05,
+      "loss": 0.0032,
+      "step": 850
+    },
+    {
+      "epoch": 2.060479041916168,
+      "grad_norm": 106512.125,
+      "learning_rate": 1.984412470023981e-05,
+      "loss": 0.0024,
+      "step": 860
+    },
+    {
+      "epoch": 2.084431137724551,
+      "grad_norm": 156610.90625,
+      "learning_rate": 1.9784172661870504e-05,
+      "loss": 0.0017,
+      "step": 870
+    },
+    {
+      "epoch": 2.108383233532934,
+      "grad_norm": 81875.859375,
+      "learning_rate": 1.97242206235012e-05,
+      "loss": 0.0028,
+      "step": 880
+    },
+    {
+      "epoch": 2.132335329341317,
+      "grad_norm": 138606.71875,
+      "learning_rate": 1.9664268585131895e-05,
+      "loss": 0.0014,
+      "step": 890
+    },
+    {
+      "epoch": 2.1562874251497006,
+      "grad_norm": 381988.15625,
+      "learning_rate": 1.960431654676259e-05,
+      "loss": 0.0033,
+      "step": 900
+    },
+    {
+      "epoch": 2.180239520958084,
+      "grad_norm": 83168.578125,
+      "learning_rate": 1.954436450839329e-05,
+      "loss": 0.001,
+      "step": 910
+    },
+    {
+      "epoch": 2.204191616766467,
+      "grad_norm": 20675.666015625,
+      "learning_rate": 1.9484412470023982e-05,
+      "loss": 0.0006,
+      "step": 920
+    },
+    {
+      "epoch": 2.2281437125748504,
+      "grad_norm": 278388.1875,
+      "learning_rate": 1.9424460431654675e-05,
+      "loss": 0.0008,
+      "step": 930
+    },
+    {
+      "epoch": 2.2520958083832334,
+      "grad_norm": 89630.8828125,
+      "learning_rate": 1.9364508393285372e-05,
+      "loss": 0.0008,
+      "step": 940
+    },
+    {
+      "epoch": 2.276047904191617,
+      "grad_norm": 172995.15625,
+      "learning_rate": 1.930455635491607e-05,
+      "loss": 0.0033,
+      "step": 950
+    },
+    {
+      "epoch": 2.3,
+      "grad_norm": 17161.27734375,
+      "learning_rate": 1.9244604316546766e-05,
+      "loss": 0.0013,
+      "step": 960
+    },
+    {
+      "epoch": 2.3239520958083832,
+      "grad_norm": 57101.74609375,
+      "learning_rate": 1.918465227817746e-05,
+      "loss": 0.0025,
+      "step": 970
+    },
+    {
+      "epoch": 2.3479041916167667,
+      "grad_norm": 368897.71875,
+      "learning_rate": 1.9124700239808156e-05,
+      "loss": 0.0042,
+      "step": 980
+    },
+    {
+      "epoch": 2.3718562874251496,
+      "grad_norm": 33615.3828125,
+      "learning_rate": 1.906474820143885e-05,
+      "loss": 0.0004,
+      "step": 990
+    },
+    {
+      "epoch": 2.395808383233533,
+      "grad_norm": 46844.67578125,
+      "learning_rate": 1.9004796163069547e-05,
+      "loss": 0.0011,
+      "step": 1000
+    },
+    {
+      "epoch": 2.419760479041916,
+      "grad_norm": 70535.828125,
+      "learning_rate": 1.894484412470024e-05,
+      "loss": 0.0005,
+      "step": 1010
+    },
+    {
+      "epoch": 2.4437125748502995,
+      "grad_norm": 41407.75,
+      "learning_rate": 1.8884892086330937e-05,
+      "loss": 0.0036,
+      "step": 1020
+    },
+    {
+      "epoch": 2.4676646706586824,
+      "grad_norm": 41804.3671875,
+      "learning_rate": 1.8824940047961634e-05,
+      "loss": 0.0005,
+      "step": 1030
+    },
+    {
+      "epoch": 2.491616766467066,
+      "grad_norm": 17560.765625,
+      "learning_rate": 1.8764988009592328e-05,
+      "loss": 0.0013,
+      "step": 1040
+    },
+    {
+      "epoch": 2.5155688622754493,
+      "grad_norm": 11264.94921875,
+      "learning_rate": 1.870503597122302e-05,
+      "loss": 0.0024,
+      "step": 1050
+    },
+    {
+      "epoch": 2.5395209580838323,
+      "grad_norm": 102580.203125,
+      "learning_rate": 1.8645083932853718e-05,
+      "loss": 0.0004,
+      "step": 1060
+    },
+    {
+      "epoch": 2.5634730538922157,
+      "grad_norm": 19371.921875,
+      "learning_rate": 1.8585131894484415e-05,
+      "loss": 0.0021,
+      "step": 1070
+    },
+    {
+      "epoch": 2.5874251497005987,
+      "grad_norm": 59045.21484375,
+      "learning_rate": 1.8525179856115108e-05,
+      "loss": 0.0039,
+      "step": 1080
+    },
+    {
+      "epoch": 2.611377245508982,
+      "grad_norm": 37528.83984375,
+      "learning_rate": 1.8465227817745805e-05,
+      "loss": 0.0015,
+      "step": 1090
+    },
+    {
+      "epoch": 2.635329341317365,
+      "grad_norm": 484952.59375,
+      "learning_rate": 1.8405275779376502e-05,
+      "loss": 0.0023,
+      "step": 1100
+    },
+    {
+      "epoch": 2.6592814371257485,
+      "grad_norm": 14117.0771484375,
+      "learning_rate": 1.8345323741007196e-05,
+      "loss": 0.0017,
+      "step": 1110
+    },
+    {
+      "epoch": 2.683233532934132,
+      "grad_norm": 276284.59375,
+      "learning_rate": 1.8285371702637892e-05,
+      "loss": 0.0016,
+      "step": 1120
+    },
+    {
+      "epoch": 2.707185628742515,
+      "grad_norm": 40982.03515625,
+      "learning_rate": 1.8225419664268586e-05,
+      "loss": 0.001,
+      "step": 1130
+    },
+    {
+      "epoch": 2.7311377245508983,
+      "grad_norm": 101166.234375,
+      "learning_rate": 1.8165467625899283e-05,
+      "loss": 0.0009,
+      "step": 1140
+    },
+    {
+      "epoch": 2.7550898203592813,
+      "grad_norm": 51041.46875,
+      "learning_rate": 1.810551558752998e-05,
+      "loss": 0.0013,
+      "step": 1150
+    },
+    {
+      "epoch": 2.7790419161676647,
+      "grad_norm": 204233.671875,
+      "learning_rate": 1.8045563549160673e-05,
+      "loss": 0.0016,
+      "step": 1160
+    },
+    {
+      "epoch": 2.8029940119760477,
+      "grad_norm": 143324.921875,
+      "learning_rate": 1.7985611510791367e-05,
+      "loss": 0.0026,
+      "step": 1170
+    },
+    {
+      "epoch": 2.826946107784431,
+      "grad_norm": 44342.2734375,
+      "learning_rate": 1.7925659472422064e-05,
+      "loss": 0.0023,
+      "step": 1180
+    },
+    {
+      "epoch": 2.8508982035928145,
+      "grad_norm": 75831.65625,
+      "learning_rate": 1.786570743405276e-05,
+      "loss": 0.0016,
+      "step": 1190
+    },
+    {
+      "epoch": 2.8748502994011975,
+      "grad_norm": 96995.328125,
+      "learning_rate": 1.7805755395683454e-05,
+      "loss": 0.0003,
+      "step": 1200
+    },
+    {
+      "epoch": 2.898802395209581,
+      "grad_norm": 110547.0546875,
+      "learning_rate": 1.774580335731415e-05,
+      "loss": 0.0003,
+      "step": 1210
+    },
+    {
+      "epoch": 2.922754491017964,
+      "grad_norm": 30139.875,
+      "learning_rate": 1.7685851318944848e-05,
+      "loss": 0.0018,
+      "step": 1220
+    },
+    {
+      "epoch": 2.9467065868263473,
+      "grad_norm": 42453.53515625,
+      "learning_rate": 1.762589928057554e-05,
+      "loss": 0.0002,
+      "step": 1230
+    },
+    {
+      "epoch": 2.9706586826347303,
+      "grad_norm": 49538.61328125,
+      "learning_rate": 1.7565947242206235e-05,
+      "loss": 0.001,
+      "step": 1240
+    },
+    {
+      "epoch": 2.9946107784431137,
+      "grad_norm": 121568.9453125,
+      "learning_rate": 1.750599520383693e-05,
+      "loss": 0.0007,
+      "step": 1250
+    },
+    {
+      "epoch": 2.9994011976047905,
+      "eval_accuracy": 0.9823143001515917,
+      "eval_f1": 0.9830398966241318,
+      "eval_loss": 0.0027729119174182415,
+      "eval_precision": 0.9666454891994918,
+      "eval_recall": 1.0,
+      "eval_runtime": 156.768,
+      "eval_samples_per_second": 37.871,
+      "eval_steps_per_second": 2.373,
+      "step": 1252
+    },
+    {
+      "epoch": 3.0188622754491017,
+      "grad_norm": 54773.9140625,
+      "learning_rate": 1.744604316546763e-05,
+      "loss": 0.0008,
+      "step": 1260
+    },
+    {
+      "epoch": 3.042814371257485,
+      "grad_norm": 130051.09375,
+      "learning_rate": 1.7386091127098322e-05,
+      "loss": 0.0004,
+      "step": 1270
+    },
+    {
+      "epoch": 3.066766467065868,
+      "grad_norm": 38819.87109375,
+      "learning_rate": 1.732613908872902e-05,
+      "loss": 0.0012,
+      "step": 1280
+    },
+    {
+      "epoch": 3.0907185628742515,
+      "grad_norm": 36351.796875,
+      "learning_rate": 1.7266187050359712e-05,
+      "loss": 0.0009,
+      "step": 1290
+    },
+    {
+      "epoch": 3.114670658682635,
+      "grad_norm": 71120.03125,
+      "learning_rate": 1.720623501199041e-05,
+      "loss": 0.0027,
+      "step": 1300
+    },
+    {
+      "epoch": 3.138622754491018,
+      "grad_norm": 50392.36328125,
+      "learning_rate": 1.7146282973621106e-05,
+      "loss": 0.0045,
+      "step": 1310
+    },
+    {
+      "epoch": 3.1625748502994013,
+      "grad_norm": 25353.765625,
+      "learning_rate": 1.70863309352518e-05,
+      "loss": 0.0016,
+      "step": 1320
+    },
+    {
+      "epoch": 3.1865269461077843,
+      "grad_norm": 55808.15625,
+      "learning_rate": 1.7026378896882496e-05,
+      "loss": 0.002,
+      "step": 1330
+    },
+    {
+      "epoch": 3.2104790419161677,
+      "grad_norm": 35924.5078125,
+      "learning_rate": 1.6966426858513193e-05,
+      "loss": 0.0002,
+      "step": 1340
+    },
+    {
+      "epoch": 3.2344311377245507,
+      "grad_norm": 8763.8037109375,
+      "learning_rate": 1.6906474820143887e-05,
+      "loss": 0.0005,
+      "step": 1350
+    },
+    {
+      "epoch": 3.258383233532934,
+      "grad_norm": 3576.546875,
+      "learning_rate": 1.684652278177458e-05,
+      "loss": 0.0004,
+      "step": 1360
+    },
+    {
+      "epoch": 3.2823353293413176,
+      "grad_norm": 53395.13671875,
+      "learning_rate": 1.6786570743405277e-05,
+      "loss": 0.0001,
+      "step": 1370
+    },
+    {
+      "epoch": 3.3062874251497005,
+      "grad_norm": 14970.0224609375,
+      "learning_rate": 1.6726618705035974e-05,
+      "loss": 0.0001,
+      "step": 1380
+    },
+    {
+      "epoch": 3.330239520958084,
+      "grad_norm": 62940.63671875,
+      "learning_rate": 1.6666666666666667e-05,
+      "loss": 0.0008,
+      "step": 1390
+    },
+    {
+      "epoch": 3.354191616766467,
+      "grad_norm": 66767.4453125,
+      "learning_rate": 1.660671462829736e-05,
+      "loss": 0.001,
+      "step": 1400
+    },
+    {
+      "epoch": 3.3781437125748504,
+      "grad_norm": 62226.42578125,
+      "learning_rate": 1.6546762589928058e-05,
+      "loss": 0.0015,
+      "step": 1410
+    },
+    {
+      "epoch": 3.4020958083832333,
+      "grad_norm": 43982.64453125,
+      "learning_rate": 1.6486810551558755e-05,
+      "loss": 0.0018,
+      "step": 1420
+    },
+    {
+      "epoch": 3.4260479041916168,
+      "grad_norm": 114928.5390625,
+      "learning_rate": 1.6426858513189448e-05,
+      "loss": 0.0036,
+      "step": 1430
+    },
+    {
+      "epoch": 3.45,
+      "grad_norm": 101840.3515625,
+      "learning_rate": 1.6366906474820145e-05,
+      "loss": 0.0011,
+      "step": 1440
+    },
+    {
+      "epoch": 3.473952095808383,
+      "grad_norm": 59594.5078125,
+      "learning_rate": 1.6306954436450842e-05,
+      "loss": 0.0033,
+      "step": 1450
+    },
+    {
+      "epoch": 3.4979041916167666,
+      "grad_norm": 18318.77734375,
+      "learning_rate": 1.6247002398081535e-05,
+      "loss": 0.0004,
+      "step": 1460
+    },
+    {
+      "epoch": 3.5218562874251496,
+      "grad_norm": 18281.22265625,
+      "learning_rate": 1.6187050359712232e-05,
+      "loss": 0.0003,
+      "step": 1470
+    },
+    {
+      "epoch": 3.545808383233533,
+      "grad_norm": 13549.404296875,
+      "learning_rate": 1.6127098321342926e-05,
+      "loss": 0.0009,
+      "step": 1480
+    },
+    {
+      "epoch": 3.569760479041916,
+      "grad_norm": 13404.5205078125,
+      "learning_rate": 1.6067146282973623e-05,
+      "loss": 0.0007,
+      "step": 1490
+    },
+    {
+      "epoch": 3.5937125748502994,
+      "grad_norm": 36192.89453125,
+      "learning_rate": 1.600719424460432e-05,
+      "loss": 0.0002,
+      "step": 1500
+    },
+    {
+      "epoch": 3.617664670658683,
+      "grad_norm": 38474.55859375,
+      "learning_rate": 1.5947242206235013e-05,
+      "loss": 0.0017,
+      "step": 1510
+    },
+    {
+      "epoch": 3.641616766467066,
+      "grad_norm": 36329.5,
+      "learning_rate": 1.5887290167865707e-05,
+      "loss": 0.0006,
+      "step": 1520
+    },
+    {
+      "epoch": 3.665568862275449,
+      "grad_norm": 839098.25,
+      "learning_rate": 1.5827338129496403e-05,
+      "loss": 0.0004,
+      "step": 1530
+    },
+    {
+      "epoch": 3.689520958083832,
+      "grad_norm": 29361.619140625,
+      "learning_rate": 1.57673860911271e-05,
+      "loss": 0.001,
+      "step": 1540
+    },
+    {
+      "epoch": 3.7134730538922156,
+      "grad_norm": 14026.3037109375,
+      "learning_rate": 1.5707434052757794e-05,
+      "loss": 0.0009,
+      "step": 1550
+    },
+    {
+      "epoch": 3.7374251497005986,
+      "grad_norm": 17467.173828125,
+      "learning_rate": 1.564748201438849e-05,
+      "loss": 0.0016,
+      "step": 1560
+    },
+    {
+      "epoch": 3.761377245508982,
+      "grad_norm": 27251.703125,
+      "learning_rate": 1.5587529976019188e-05,
+      "loss": 0.001,
+      "step": 1570
+    },
+    {
+      "epoch": 3.7853293413173654,
+      "grad_norm": 54221.84765625,
+      "learning_rate": 1.552757793764988e-05,
+      "loss": 0.0002,
+      "step": 1580
+    },
+    {
+      "epoch": 3.8092814371257484,
+      "grad_norm": 22032.01953125,
+      "learning_rate": 1.5467625899280575e-05,
+      "loss": 0.001,
+      "step": 1590
+    },
+    {
+      "epoch": 3.833233532934132,
+      "grad_norm": 32913.08203125,
+      "learning_rate": 1.540767386091127e-05,
+      "loss": 0.0019,
+      "step": 1600
+    },
+    {
+      "epoch": 3.857185628742515,
+      "grad_norm": 16616.380859375,
+      "learning_rate": 1.534772182254197e-05,
+      "loss": 0.0009,
+      "step": 1610
+    },
+    {
+      "epoch": 3.8811377245508982,
+      "grad_norm": 80629.234375,
+      "learning_rate": 1.5287769784172665e-05,
+      "loss": 0.0001,
+      "step": 1620
+    },
+    {
+      "epoch": 3.905089820359281,
+      "grad_norm": 16487.8671875,
+      "learning_rate": 1.5227817745803359e-05,
+      "loss": 0.0007,
+      "step": 1630
+    },
+    {
+      "epoch": 3.9290419161676646,
+      "grad_norm": 11642.541015625,
+      "learning_rate": 1.5167865707434052e-05,
+      "loss": 0.0011,
+      "step": 1640
+    },
+    {
+      "epoch": 3.952994011976048,
+      "grad_norm": 37791.38671875,
+      "learning_rate": 1.5107913669064749e-05,
+      "loss": 0.0004,
+      "step": 1650
+    },
+    {
+      "epoch": 3.976946107784431,
+      "grad_norm": 16768.58203125,
+      "learning_rate": 1.5047961630695444e-05,
+      "loss": 0.0015,
+      "step": 1660
+    },
+    {
+      "epoch": 3.998502994011976,
+      "eval_accuracy": 0.9984840828701365,
+      "eval_f1": 0.9985233798195242,
+      "eval_loss": 0.0018289362778887153,
+      "eval_precision": 0.997051114023591,
+      "eval_recall": 1.0,
+      "eval_runtime": 157.3988,
+      "eval_samples_per_second": 37.719,
+      "eval_steps_per_second": 2.363,
+      "step": 1669
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 4170,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.01
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 3
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.478687595449549e+16,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9615c5ea3e960e79d4b8cdc71a576ac4a577dbd92d77caf40421c0eddaea1df1
+size 5713