Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

config.json +29 -0
model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
trainer_state.json +1682 -0
training_args.bin +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_name_or_path": "xlm-roberta-large",
+  "architectures": [
+    "XLMRobertaForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "xlm-roberta",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "output_past": true,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.0",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 250002
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2c4235e0122df8053e5b145ce022717e48f8b11f7ae6b8747405efa073ab860
+size 2239618672

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5fe5a3a9c7de4dff2e4a0a958fda8ad34f65f134b8eb103f3940d2af4ef5b11
+size 4479472721

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb3ca69ddef97a508052d0fb90faf53edae00a390ad05f5f5e1d42c9e09606d9
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:261690907322abaddd2cd63061922e342503711f5208de9f0dbbbb4535b0bbd4
+size 1064

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1682 @@

+{
+  "best_metric": 0.20471014082431793,
+  "best_model_checkpoint": "./results_all/checkpoint-2000",
+  "epoch": 9.945750452079565,
+  "eval_steps": 1000,
+  "global_step": 11000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05,
+      "grad_norm": 5.776618480682373,
+      "learning_rate": 9.954792043399639e-06,
+      "loss": 0.4562,
+      "step": 50
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 4.7439141273498535,
+      "learning_rate": 9.909584086799278e-06,
+      "loss": 0.2825,
+      "step": 100
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 8.098787307739258,
+      "learning_rate": 9.864376130198916e-06,
+      "loss": 0.248,
+      "step": 150
+    },
+    {
+      "epoch": 0.18,
+      "grad_norm": 4.512809753417969,
+      "learning_rate": 9.819168173598554e-06,
+      "loss": 0.2309,
+      "step": 200
+    },
+    {
+      "epoch": 0.23,
+      "grad_norm": 4.11778450012207,
+      "learning_rate": 9.773960216998193e-06,
+      "loss": 0.2357,
+      "step": 250
+    },
+    {
+      "epoch": 0.27,
+      "grad_norm": 2.7208287715911865,
+      "learning_rate": 9.728752260397831e-06,
+      "loss": 0.2285,
+      "step": 300
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 5.254836082458496,
+      "learning_rate": 9.68354430379747e-06,
+      "loss": 0.2189,
+      "step": 350
+    },
+    {
+      "epoch": 0.36,
+      "grad_norm": 6.350110054016113,
+      "learning_rate": 9.638336347197107e-06,
+      "loss": 0.2197,
+      "step": 400
+    },
+    {
+      "epoch": 0.41,
+      "grad_norm": 3.0524585247039795,
+      "learning_rate": 9.593128390596746e-06,
+      "loss": 0.2108,
+      "step": 450
+    },
+    {
+      "epoch": 0.45,
+      "grad_norm": 3.113680362701416,
+      "learning_rate": 9.547920433996384e-06,
+      "loss": 0.206,
+      "step": 500
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 3.020354986190796,
+      "learning_rate": 9.502712477396022e-06,
+      "loss": 0.2123,
+      "step": 550
+    },
+    {
+      "epoch": 0.54,
+      "grad_norm": 3.340101480484009,
+      "learning_rate": 9.457504520795661e-06,
+      "loss": 0.2137,
+      "step": 600
+    },
+    {
+      "epoch": 0.59,
+      "grad_norm": 4.386758804321289,
+      "learning_rate": 9.412296564195299e-06,
+      "loss": 0.2071,
+      "step": 650
+    },
+    {
+      "epoch": 0.63,
+      "grad_norm": 4.073632717132568,
+      "learning_rate": 9.367088607594937e-06,
+      "loss": 0.2063,
+      "step": 700
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 2.6999106407165527,
+      "learning_rate": 9.321880650994576e-06,
+      "loss": 0.2089,
+      "step": 750
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 2.527625322341919,
+      "learning_rate": 9.276672694394214e-06,
+      "loss": 0.2025,
+      "step": 800
+    },
+    {
+      "epoch": 0.77,
+      "grad_norm": 3.8473989963531494,
+      "learning_rate": 9.231464737793852e-06,
+      "loss": 0.2059,
+      "step": 850
+    },
+    {
+      "epoch": 0.81,
+      "grad_norm": 2.255798816680908,
+      "learning_rate": 9.186256781193491e-06,
+      "loss": 0.2018,
+      "step": 900
+    },
+    {
+      "epoch": 0.86,
+      "grad_norm": 3.915482521057129,
+      "learning_rate": 9.14104882459313e-06,
+      "loss": 0.1985,
+      "step": 950
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 2.375387191772461,
+      "learning_rate": 9.095840867992769e-06,
+      "loss": 0.2014,
+      "step": 1000
+    },
+    {
+      "epoch": 0.9,
+      "eval_accuracy": 0.9132575757575757,
+      "eval_auc": 0.9716989722609015,
+      "eval_f1": 0.875638101444553,
+      "eval_loss": 0.20884743332862854,
+      "eval_runtime": 53.4002,
+      "eval_samples_per_second": 247.19,
+      "eval_steps_per_second": 3.876,
+      "step": 1000
+    },
+    {
+      "epoch": 0.95,
+      "grad_norm": 3.186845302581787,
+      "learning_rate": 9.050632911392407e-06,
+      "loss": 0.1916,
+      "step": 1050
+    },
+    {
+      "epoch": 0.99,
+      "grad_norm": 3.6988000869750977,
+      "learning_rate": 9.005424954792044e-06,
+      "loss": 0.1913,
+      "step": 1100
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 3.2452192306518555,
+      "learning_rate": 8.960216998191682e-06,
+      "loss": 0.1836,
+      "step": 1150
+    },
+    {
+      "epoch": 1.08,
+      "grad_norm": 3.898977041244507,
+      "learning_rate": 8.91500904159132e-06,
+      "loss": 0.1848,
+      "step": 1200
+    },
+    {
+      "epoch": 1.13,
+      "grad_norm": 1.8791488409042358,
+      "learning_rate": 8.86980108499096e-06,
+      "loss": 0.1812,
+      "step": 1250
+    },
+    {
+      "epoch": 1.18,
+      "grad_norm": 3.3532564640045166,
+      "learning_rate": 8.824593128390597e-06,
+      "loss": 0.1785,
+      "step": 1300
+    },
+    {
+      "epoch": 1.22,
+      "grad_norm": 3.177605390548706,
+      "learning_rate": 8.779385171790235e-06,
+      "loss": 0.1888,
+      "step": 1350
+    },
+    {
+      "epoch": 1.27,
+      "grad_norm": 4.205639362335205,
+      "learning_rate": 8.734177215189874e-06,
+      "loss": 0.1807,
+      "step": 1400
+    },
+    {
+      "epoch": 1.31,
+      "grad_norm": 2.800039052963257,
+      "learning_rate": 8.688969258589512e-06,
+      "loss": 0.1851,
+      "step": 1450
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 3.714484930038452,
+      "learning_rate": 8.64376130198915e-06,
+      "loss": 0.1854,
+      "step": 1500
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 3.9688687324523926,
+      "learning_rate": 8.59855334538879e-06,
+      "loss": 0.1781,
+      "step": 1550
+    },
+    {
+      "epoch": 1.45,
+      "grad_norm": 3.0902647972106934,
+      "learning_rate": 8.553345388788427e-06,
+      "loss": 0.1789,
+      "step": 1600
+    },
+    {
+      "epoch": 1.49,
+      "grad_norm": 2.457174301147461,
+      "learning_rate": 8.508137432188067e-06,
+      "loss": 0.1743,
+      "step": 1650
+    },
+    {
+      "epoch": 1.54,
+      "grad_norm": 2.403961420059204,
+      "learning_rate": 8.462929475587705e-06,
+      "loss": 0.1854,
+      "step": 1700
+    },
+    {
+      "epoch": 1.58,
+      "grad_norm": 4.13264274597168,
+      "learning_rate": 8.417721518987342e-06,
+      "loss": 0.1833,
+      "step": 1750
+    },
+    {
+      "epoch": 1.63,
+      "grad_norm": 3.0983669757843018,
+      "learning_rate": 8.372513562386982e-06,
+      "loss": 0.1796,
+      "step": 1800
+    },
+    {
+      "epoch": 1.67,
+      "grad_norm": 2.797614812850952,
+      "learning_rate": 8.327305605786618e-06,
+      "loss": 0.1745,
+      "step": 1850
+    },
+    {
+      "epoch": 1.72,
+      "grad_norm": 2.1761841773986816,
+      "learning_rate": 8.282097649186258e-06,
+      "loss": 0.1738,
+      "step": 1900
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 2.973947286605835,
+      "learning_rate": 8.236889692585895e-06,
+      "loss": 0.1756,
+      "step": 1950
+    },
+    {
+      "epoch": 1.81,
+      "grad_norm": 3.152864694595337,
+      "learning_rate": 8.191681735985533e-06,
+      "loss": 0.1755,
+      "step": 2000
+    },
+    {
+      "epoch": 1.81,
+      "eval_accuracy": 0.9200757575757575,
+      "eval_auc": 0.974728335511236,
+      "eval_f1": 0.8865225341508013,
+      "eval_loss": 0.20471014082431793,
+      "eval_runtime": 53.1495,
+      "eval_samples_per_second": 248.356,
+      "eval_steps_per_second": 3.895,
+      "step": 2000
+    },
+    {
+      "epoch": 1.85,
+      "grad_norm": 2.979734420776367,
+      "learning_rate": 8.146473779385173e-06,
+      "loss": 0.1762,
+      "step": 2050
+    },
+    {
+      "epoch": 1.9,
+      "grad_norm": 2.802384853363037,
+      "learning_rate": 8.10126582278481e-06,
+      "loss": 0.1764,
+      "step": 2100
+    },
+    {
+      "epoch": 1.94,
+      "grad_norm": 4.056249618530273,
+      "learning_rate": 8.056057866184448e-06,
+      "loss": 0.1786,
+      "step": 2150
+    },
+    {
+      "epoch": 1.99,
+      "grad_norm": 3.7627487182617188,
+      "learning_rate": 8.010849909584088e-06,
+      "loss": 0.1755,
+      "step": 2200
+    },
+    {
+      "epoch": 2.03,
+      "grad_norm": 2.634819984436035,
+      "learning_rate": 7.965641952983726e-06,
+      "loss": 0.1642,
+      "step": 2250
+    },
+    {
+      "epoch": 2.08,
+      "grad_norm": 3.3615341186523438,
+      "learning_rate": 7.920433996383365e-06,
+      "loss": 0.1581,
+      "step": 2300
+    },
+    {
+      "epoch": 2.12,
+      "grad_norm": 3.702347993850708,
+      "learning_rate": 7.875226039783003e-06,
+      "loss": 0.1597,
+      "step": 2350
+    },
+    {
+      "epoch": 2.17,
+      "grad_norm": 11.943075180053711,
+      "learning_rate": 7.83001808318264e-06,
+      "loss": 0.1565,
+      "step": 2400
+    },
+    {
+      "epoch": 2.22,
+      "grad_norm": 4.544873237609863,
+      "learning_rate": 7.78481012658228e-06,
+      "loss": 0.1607,
+      "step": 2450
+    },
+    {
+      "epoch": 2.26,
+      "grad_norm": 3.886201858520508,
+      "learning_rate": 7.739602169981918e-06,
+      "loss": 0.155,
+      "step": 2500
+    },
+    {
+      "epoch": 2.31,
+      "grad_norm": 2.688396692276001,
+      "learning_rate": 7.694394213381556e-06,
+      "loss": 0.1637,
+      "step": 2550
+    },
+    {
+      "epoch": 2.35,
+      "grad_norm": 4.257401466369629,
+      "learning_rate": 7.649186256781194e-06,
+      "loss": 0.1518,
+      "step": 2600
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 3.5023510456085205,
+      "learning_rate": 7.603978300180832e-06,
+      "loss": 0.1545,
+      "step": 2650
+    },
+    {
+      "epoch": 2.44,
+      "grad_norm": 3.3911523818969727,
+      "learning_rate": 7.558770343580471e-06,
+      "loss": 0.1587,
+      "step": 2700
+    },
+    {
+      "epoch": 2.49,
+      "grad_norm": 3.0427675247192383,
+      "learning_rate": 7.513562386980109e-06,
+      "loss": 0.1592,
+      "step": 2750
+    },
+    {
+      "epoch": 2.53,
+      "grad_norm": 3.7051773071289062,
+      "learning_rate": 7.468354430379747e-06,
+      "loss": 0.1643,
+      "step": 2800
+    },
+    {
+      "epoch": 2.58,
+      "grad_norm": 3.8492684364318848,
+      "learning_rate": 7.423146473779386e-06,
+      "loss": 0.1554,
+      "step": 2850
+    },
+    {
+      "epoch": 2.62,
+      "grad_norm": 2.938525676727295,
+      "learning_rate": 7.377938517179024e-06,
+      "loss": 0.1612,
+      "step": 2900
+    },
+    {
+      "epoch": 2.67,
+      "grad_norm": 2.807177782058716,
+      "learning_rate": 7.3327305605786624e-06,
+      "loss": 0.1559,
+      "step": 2950
+    },
+    {
+      "epoch": 2.71,
+      "grad_norm": 2.838815212249756,
+      "learning_rate": 7.287522603978301e-06,
+      "loss": 0.1586,
+      "step": 3000
+    },
+    {
+      "epoch": 2.71,
+      "eval_accuracy": 0.9209848484848485,
+      "eval_auc": 0.9746563484736904,
+      "eval_f1": 0.885447556287754,
+      "eval_loss": 0.2061145007610321,
+      "eval_runtime": 53.3625,
+      "eval_samples_per_second": 247.365,
+      "eval_steps_per_second": 3.879,
+      "step": 3000
+    },
+    {
+      "epoch": 2.76,
+      "grad_norm": 3.3024938106536865,
+      "learning_rate": 7.24231464737794e-06,
+      "loss": 0.1613,
+      "step": 3050
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 3.188176393508911,
+      "learning_rate": 7.1971066907775775e-06,
+      "loss": 0.1613,
+      "step": 3100
+    },
+    {
+      "epoch": 2.85,
+      "grad_norm": 2.8831701278686523,
+      "learning_rate": 7.151898734177216e-06,
+      "loss": 0.152,
+      "step": 3150
+    },
+    {
+      "epoch": 2.89,
+      "grad_norm": 3.3415660858154297,
+      "learning_rate": 7.106690777576855e-06,
+      "loss": 0.1606,
+      "step": 3200
+    },
+    {
+      "epoch": 2.94,
+      "grad_norm": 3.208709478378296,
+      "learning_rate": 7.061482820976493e-06,
+      "loss": 0.1569,
+      "step": 3250
+    },
+    {
+      "epoch": 2.98,
+      "grad_norm": 3.284241199493408,
+      "learning_rate": 7.01627486437613e-06,
+      "loss": 0.1586,
+      "step": 3300
+    },
+    {
+      "epoch": 3.03,
+      "grad_norm": 4.061830520629883,
+      "learning_rate": 6.971066907775769e-06,
+      "loss": 0.1494,
+      "step": 3350
+    },
+    {
+      "epoch": 3.07,
+      "grad_norm": 4.065818786621094,
+      "learning_rate": 6.925858951175407e-06,
+      "loss": 0.142,
+      "step": 3400
+    },
+    {
+      "epoch": 3.12,
+      "grad_norm": 4.541987895965576,
+      "learning_rate": 6.8806509945750455e-06,
+      "loss": 0.1436,
+      "step": 3450
+    },
+    {
+      "epoch": 3.16,
+      "grad_norm": 3.972423791885376,
+      "learning_rate": 6.835443037974684e-06,
+      "loss": 0.1323,
+      "step": 3500
+    },
+    {
+      "epoch": 3.21,
+      "grad_norm": 6.7635579109191895,
+      "learning_rate": 6.790235081374322e-06,
+      "loss": 0.1398,
+      "step": 3550
+    },
+    {
+      "epoch": 3.25,
+      "grad_norm": 3.5160984992980957,
+      "learning_rate": 6.745027124773961e-06,
+      "loss": 0.1418,
+      "step": 3600
+    },
+    {
+      "epoch": 3.3,
+      "grad_norm": 2.7026939392089844,
+      "learning_rate": 6.699819168173599e-06,
+      "loss": 0.1352,
+      "step": 3650
+    },
+    {
+      "epoch": 3.35,
+      "grad_norm": 3.9507486820220947,
+      "learning_rate": 6.654611211573238e-06,
+      "loss": 0.1406,
+      "step": 3700
+    },
+    {
+      "epoch": 3.39,
+      "grad_norm": 3.7145042419433594,
+      "learning_rate": 6.609403254972876e-06,
+      "loss": 0.1355,
+      "step": 3750
+    },
+    {
+      "epoch": 3.44,
+      "grad_norm": 3.2809576988220215,
+      "learning_rate": 6.564195298372514e-06,
+      "loss": 0.1406,
+      "step": 3800
+    },
+    {
+      "epoch": 3.48,
+      "grad_norm": 3.0519375801086426,
+      "learning_rate": 6.518987341772153e-06,
+      "loss": 0.1352,
+      "step": 3850
+    },
+    {
+      "epoch": 3.53,
+      "grad_norm": 3.6294689178466797,
+      "learning_rate": 6.473779385171791e-06,
+      "loss": 0.1396,
+      "step": 3900
+    },
+    {
+      "epoch": 3.57,
+      "grad_norm": 3.4787845611572266,
+      "learning_rate": 6.4285714285714295e-06,
+      "loss": 0.1409,
+      "step": 3950
+    },
+    {
+      "epoch": 3.62,
+      "grad_norm": 3.510904550552368,
+      "learning_rate": 6.383363471971068e-06,
+      "loss": 0.141,
+      "step": 4000
+    },
+    {
+      "epoch": 3.62,
+      "eval_accuracy": 0.9195454545454546,
+      "eval_auc": 0.9743002838306037,
+      "eval_f1": 0.8847406121120035,
+      "eval_loss": 0.22910813987255096,
+      "eval_runtime": 53.2967,
+      "eval_samples_per_second": 247.67,
+      "eval_steps_per_second": 3.884,
+      "step": 4000
+    },
+    {
+      "epoch": 3.66,
+      "grad_norm": 3.591719150543213,
+      "learning_rate": 6.338155515370705e-06,
+      "loss": 0.1368,
+      "step": 4050
+    },
+    {
+      "epoch": 3.71,
+      "grad_norm": 4.009765625,
+      "learning_rate": 6.292947558770344e-06,
+      "loss": 0.1344,
+      "step": 4100
+    },
+    {
+      "epoch": 3.75,
+      "grad_norm": 3.1466119289398193,
+      "learning_rate": 6.247739602169982e-06,
+      "loss": 0.1424,
+      "step": 4150
+    },
+    {
+      "epoch": 3.8,
+      "grad_norm": 3.8974554538726807,
+      "learning_rate": 6.20253164556962e-06,
+      "loss": 0.1374,
+      "step": 4200
+    },
+    {
+      "epoch": 3.84,
+      "grad_norm": 3.8210461139678955,
+      "learning_rate": 6.157323688969259e-06,
+      "loss": 0.1365,
+      "step": 4250
+    },
+    {
+      "epoch": 3.89,
+      "grad_norm": 3.865953207015991,
+      "learning_rate": 6.1121157323688975e-06,
+      "loss": 0.1416,
+      "step": 4300
+    },
+    {
+      "epoch": 3.93,
+      "grad_norm": 3.4419188499450684,
+      "learning_rate": 6.066907775768536e-06,
+      "loss": 0.1394,
+      "step": 4350
+    },
+    {
+      "epoch": 3.98,
+      "grad_norm": 3.2424869537353516,
+      "learning_rate": 6.021699819168174e-06,
+      "loss": 0.1379,
+      "step": 4400
+    },
+    {
+      "epoch": 4.02,
+      "grad_norm": 4.413413047790527,
+      "learning_rate": 5.9764918625678126e-06,
+      "loss": 0.1305,
+      "step": 4450
+    },
+    {
+      "epoch": 4.07,
+      "grad_norm": 5.382500648498535,
+      "learning_rate": 5.931283905967451e-06,
+      "loss": 0.1246,
+      "step": 4500
+    },
+    {
+      "epoch": 4.11,
+      "grad_norm": 3.337273120880127,
+      "learning_rate": 5.886075949367089e-06,
+      "loss": 0.121,
+      "step": 4550
+    },
+    {
+      "epoch": 4.16,
+      "grad_norm": 4.4842705726623535,
+      "learning_rate": 5.840867992766728e-06,
+      "loss": 0.12,
+      "step": 4600
+    },
+    {
+      "epoch": 4.2,
+      "grad_norm": 3.6167163848876953,
+      "learning_rate": 5.795660036166366e-06,
+      "loss": 0.1223,
+      "step": 4650
+    },
+    {
+      "epoch": 4.25,
+      "grad_norm": 3.679234027862549,
+      "learning_rate": 5.750452079566005e-06,
+      "loss": 0.123,
+      "step": 4700
+    },
+    {
+      "epoch": 4.29,
+      "grad_norm": 4.960000991821289,
+      "learning_rate": 5.705244122965642e-06,
+      "loss": 0.1175,
+      "step": 4750
+    },
+    {
+      "epoch": 4.34,
+      "grad_norm": 5.214667320251465,
+      "learning_rate": 5.6600361663652806e-06,
+      "loss": 0.1237,
+      "step": 4800
+    },
+    {
+      "epoch": 4.39,
+      "grad_norm": 3.636551856994629,
+      "learning_rate": 5.614828209764918e-06,
+      "loss": 0.121,
+      "step": 4850
+    },
+    {
+      "epoch": 4.43,
+      "grad_norm": 4.095456123352051,
+      "learning_rate": 5.569620253164557e-06,
+      "loss": 0.118,
+      "step": 4900
+    },
+    {
+      "epoch": 4.48,
+      "grad_norm": 3.4457345008850098,
+      "learning_rate": 5.524412296564196e-06,
+      "loss": 0.1226,
+      "step": 4950
+    },
+    {
+      "epoch": 4.52,
+      "grad_norm": 4.705195903778076,
+      "learning_rate": 5.479204339963834e-06,
+      "loss": 0.1232,
+      "step": 5000
+    },
+    {
+      "epoch": 4.52,
+      "eval_accuracy": 0.9215909090909091,
+      "eval_auc": 0.9733304450242736,
+      "eval_f1": 0.8873653281096964,
+      "eval_loss": 0.26092976331710815,
+      "eval_runtime": 52.7152,
+      "eval_samples_per_second": 250.402,
+      "eval_steps_per_second": 3.927,
+      "step": 5000
+    },
+    {
+      "epoch": 4.57,
+      "grad_norm": 5.460992336273193,
+      "learning_rate": 5.433996383363472e-06,
+      "loss": 0.1191,
+      "step": 5050
+    },
+    {
+      "epoch": 4.61,
+      "grad_norm": 4.26489782333374,
+      "learning_rate": 5.388788426763111e-06,
+      "loss": 0.1206,
+      "step": 5100
+    },
+    {
+      "epoch": 4.66,
+      "grad_norm": 5.637485027313232,
+      "learning_rate": 5.343580470162749e-06,
+      "loss": 0.1237,
+      "step": 5150
+    },
+    {
+      "epoch": 4.7,
+      "grad_norm": 3.848707675933838,
+      "learning_rate": 5.298372513562387e-06,
+      "loss": 0.1204,
+      "step": 5200
+    },
+    {
+      "epoch": 4.75,
+      "grad_norm": 3.9218106269836426,
+      "learning_rate": 5.253164556962026e-06,
+      "loss": 0.1226,
+      "step": 5250
+    },
+    {
+      "epoch": 4.79,
+      "grad_norm": 6.16193151473999,
+      "learning_rate": 5.2079566003616645e-06,
+      "loss": 0.1267,
+      "step": 5300
+    },
+    {
+      "epoch": 4.84,
+      "grad_norm": 3.432070016860962,
+      "learning_rate": 5.162748643761303e-06,
+      "loss": 0.1232,
+      "step": 5350
+    },
+    {
+      "epoch": 4.88,
+      "grad_norm": 4.200852394104004,
+      "learning_rate": 5.117540687160941e-06,
+      "loss": 0.1219,
+      "step": 5400
+    },
+    {
+      "epoch": 4.93,
+      "grad_norm": 5.724985599517822,
+      "learning_rate": 5.07233273056058e-06,
+      "loss": 0.1215,
+      "step": 5450
+    },
+    {
+      "epoch": 4.97,
+      "grad_norm": 5.0651445388793945,
+      "learning_rate": 5.0271247739602165e-06,
+      "loss": 0.1236,
+      "step": 5500
+    },
+    {
+      "epoch": 5.02,
+      "grad_norm": 4.59727144241333,
+      "learning_rate": 4.981916817359856e-06,
+      "loss": 0.1177,
+      "step": 5550
+    },
+    {
+      "epoch": 5.06,
+      "grad_norm": 4.37638521194458,
+      "learning_rate": 4.936708860759495e-06,
+      "loss": 0.1035,
+      "step": 5600
+    },
+    {
+      "epoch": 5.11,
+      "grad_norm": 4.5621337890625,
+      "learning_rate": 4.8915009041591325e-06,
+      "loss": 0.108,
+      "step": 5650
+    },
+    {
+      "epoch": 5.15,
+      "grad_norm": 3.9966683387756348,
+      "learning_rate": 4.84629294755877e-06,
+      "loss": 0.1057,
+      "step": 5700
+    },
+    {
+      "epoch": 5.2,
+      "grad_norm": 3.630200147628784,
+      "learning_rate": 4.801084990958409e-06,
+      "loss": 0.1043,
+      "step": 5750
+    },
+    {
+      "epoch": 5.24,
+      "grad_norm": 4.338815689086914,
+      "learning_rate": 4.755877034358048e-06,
+      "loss": 0.1066,
+      "step": 5800
+    },
+    {
+      "epoch": 5.29,
+      "grad_norm": 2.695749282836914,
+      "learning_rate": 4.710669077757685e-06,
+      "loss": 0.1059,
+      "step": 5850
+    },
+    {
+      "epoch": 5.33,
+      "grad_norm": 7.642387866973877,
+      "learning_rate": 4.665461121157324e-06,
+      "loss": 0.1073,
+      "step": 5900
+    },
+    {
+      "epoch": 5.38,
+      "grad_norm": 6.227341651916504,
+      "learning_rate": 4.620253164556963e-06,
+      "loss": 0.1115,
+      "step": 5950
+    },
+    {
+      "epoch": 5.42,
+      "grad_norm": 5.479009628295898,
+      "learning_rate": 4.575045207956601e-06,
+      "loss": 0.1058,
+      "step": 6000
+    },
+    {
+      "epoch": 5.42,
+      "eval_accuracy": 0.913409090909091,
+      "eval_auc": 0.9717929602890524,
+      "eval_f1": 0.8788553259141495,
+      "eval_loss": 0.2780343294143677,
+      "eval_runtime": 53.9647,
+      "eval_samples_per_second": 244.604,
+      "eval_steps_per_second": 3.836,
+      "step": 6000
+    },
+    {
+      "epoch": 5.47,
+      "grad_norm": 4.2524871826171875,
+      "learning_rate": 4.529837251356239e-06,
+      "loss": 0.1053,
+      "step": 6050
+    },
+    {
+      "epoch": 5.52,
+      "grad_norm": 3.475076675415039,
+      "learning_rate": 4.484629294755877e-06,
+      "loss": 0.1068,
+      "step": 6100
+    },
+    {
+      "epoch": 5.56,
+      "grad_norm": 4.190853118896484,
+      "learning_rate": 4.439421338155516e-06,
+      "loss": 0.1067,
+      "step": 6150
+    },
+    {
+      "epoch": 5.61,
+      "grad_norm": 4.308005332946777,
+      "learning_rate": 4.394213381555154e-06,
+      "loss": 0.104,
+      "step": 6200
+    },
+    {
+      "epoch": 5.65,
+      "grad_norm": 3.767345666885376,
+      "learning_rate": 4.349005424954793e-06,
+      "loss": 0.1056,
+      "step": 6250
+    },
+    {
+      "epoch": 5.7,
+      "grad_norm": 3.9116408824920654,
+      "learning_rate": 4.303797468354431e-06,
+      "loss": 0.1123,
+      "step": 6300
+    },
+    {
+      "epoch": 5.74,
+      "grad_norm": 5.004114151000977,
+      "learning_rate": 4.258589511754069e-06,
+      "loss": 0.1099,
+      "step": 6350
+    },
+    {
+      "epoch": 5.79,
+      "grad_norm": 4.2018208503723145,
+      "learning_rate": 4.213381555153707e-06,
+      "loss": 0.11,
+      "step": 6400
+    },
+    {
+      "epoch": 5.83,
+      "grad_norm": 3.213355779647827,
+      "learning_rate": 4.168173598553346e-06,
+      "loss": 0.1067,
+      "step": 6450
+    },
+    {
+      "epoch": 5.88,
+      "grad_norm": 6.60046911239624,
+      "learning_rate": 4.122965641952984e-06,
+      "loss": 0.1056,
+      "step": 6500
+    },
+    {
+      "epoch": 5.92,
+      "grad_norm": 3.6781232357025146,
+      "learning_rate": 4.077757685352622e-06,
+      "loss": 0.1053,
+      "step": 6550
+    },
+    {
+      "epoch": 5.97,
+      "grad_norm": 3.4061131477355957,
+      "learning_rate": 4.032549728752261e-06,
+      "loss": 0.1084,
+      "step": 6600
+    },
+    {
+      "epoch": 6.01,
+      "grad_norm": 5.02764892578125,
+      "learning_rate": 3.9873417721518995e-06,
+      "loss": 0.1054,
+      "step": 6650
+    },
+    {
+      "epoch": 6.06,
+      "grad_norm": 4.55075740814209,
+      "learning_rate": 3.942133815551537e-06,
+      "loss": 0.0913,
+      "step": 6700
+    },
+    {
+      "epoch": 6.1,
+      "grad_norm": 4.121731758117676,
+      "learning_rate": 3.896925858951175e-06,
+      "loss": 0.0928,
+      "step": 6750
+    },
+    {
+      "epoch": 6.15,
+      "grad_norm": 4.887127876281738,
+      "learning_rate": 3.851717902350814e-06,
+      "loss": 0.092,
+      "step": 6800
+    },
+    {
+      "epoch": 6.19,
+      "grad_norm": 6.114282131195068,
+      "learning_rate": 3.8065099457504524e-06,
+      "loss": 0.091,
+      "step": 6850
+    },
+    {
+      "epoch": 6.24,
+      "grad_norm": 3.952768564224243,
+      "learning_rate": 3.7613019891500906e-06,
+      "loss": 0.093,
+      "step": 6900
+    },
+    {
+      "epoch": 6.28,
+      "grad_norm": 4.0614800453186035,
+      "learning_rate": 3.7160940325497293e-06,
+      "loss": 0.0871,
+      "step": 6950
+    },
+    {
+      "epoch": 6.33,
+      "grad_norm": 5.143472671508789,
+      "learning_rate": 3.6708860759493675e-06,
+      "loss": 0.093,
+      "step": 7000
+    },
+    {
+      "epoch": 6.33,
+      "eval_accuracy": 0.9113636363636364,
+      "eval_auc": 0.9692261016594985,
+      "eval_f1": 0.8748127541194094,
+      "eval_loss": 0.2975885272026062,
+      "eval_runtime": 53.1958,
+      "eval_samples_per_second": 248.14,
+      "eval_steps_per_second": 3.891,
+      "step": 7000
+    },
+    {
+      "epoch": 6.37,
+      "grad_norm": 3.874722957611084,
+      "learning_rate": 3.6256781193490057e-06,
+      "loss": 0.0949,
+      "step": 7050
+    },
+    {
+      "epoch": 6.42,
+      "grad_norm": 3.9388911724090576,
+      "learning_rate": 3.580470162748644e-06,
+      "loss": 0.0963,
+      "step": 7100
+    },
+    {
+      "epoch": 6.46,
+      "grad_norm": 3.774689197540283,
+      "learning_rate": 3.535262206148282e-06,
+      "loss": 0.0915,
+      "step": 7150
+    },
+    {
+      "epoch": 6.51,
+      "grad_norm": 5.495402812957764,
+      "learning_rate": 3.4900542495479204e-06,
+      "loss": 0.0935,
+      "step": 7200
+    },
+    {
+      "epoch": 6.56,
+      "grad_norm": 5.229218006134033,
+      "learning_rate": 3.444846292947559e-06,
+      "loss": 0.0955,
+      "step": 7250
+    },
+    {
+      "epoch": 6.6,
+      "grad_norm": 8.326276779174805,
+      "learning_rate": 3.3996383363471973e-06,
+      "loss": 0.0905,
+      "step": 7300
+    },
+    {
+      "epoch": 6.65,
+      "grad_norm": 3.566875457763672,
+      "learning_rate": 3.354430379746836e-06,
+      "loss": 0.0988,
+      "step": 7350
+    },
+    {
+      "epoch": 6.69,
+      "grad_norm": 4.5591559410095215,
+      "learning_rate": 3.309222423146474e-06,
+      "loss": 0.0902,
+      "step": 7400
+    },
+    {
+      "epoch": 6.74,
+      "grad_norm": 4.157392978668213,
+      "learning_rate": 3.264014466546113e-06,
+      "loss": 0.0937,
+      "step": 7450
+    },
+    {
+      "epoch": 6.78,
+      "grad_norm": 3.770798444747925,
+      "learning_rate": 3.2188065099457506e-06,
+      "loss": 0.0928,
+      "step": 7500
+    },
+    {
+      "epoch": 6.83,
+      "grad_norm": 4.334375858306885,
+      "learning_rate": 3.173598553345389e-06,
+      "loss": 0.0929,
+      "step": 7550
+    },
+    {
+      "epoch": 6.87,
+      "grad_norm": 4.411478519439697,
+      "learning_rate": 3.1283905967450275e-06,
+      "loss": 0.0921,
+      "step": 7600
+    },
+    {
+      "epoch": 6.92,
+      "grad_norm": 3.9217774868011475,
+      "learning_rate": 3.0831826401446657e-06,
+      "loss": 0.0907,
+      "step": 7650
+    },
+    {
+      "epoch": 6.96,
+      "grad_norm": 4.253012657165527,
+      "learning_rate": 3.037974683544304e-06,
+      "loss": 0.0965,
+      "step": 7700
+    },
+    {
+      "epoch": 7.01,
+      "grad_norm": 4.301031112670898,
+      "learning_rate": 2.9927667269439426e-06,
+      "loss": 0.0916,
+      "step": 7750
+    },
+    {
+      "epoch": 7.05,
+      "grad_norm": 4.984923362731934,
+      "learning_rate": 2.947558770343581e-06,
+      "loss": 0.079,
+      "step": 7800
+    },
+    {
+      "epoch": 7.1,
+      "grad_norm": 6.965585231781006,
+      "learning_rate": 2.9023508137432186e-06,
+      "loss": 0.0856,
+      "step": 7850
+    },
+    {
+      "epoch": 7.14,
+      "grad_norm": 5.262626647949219,
+      "learning_rate": 2.8571428571428573e-06,
+      "loss": 0.0814,
+      "step": 7900
+    },
+    {
+      "epoch": 7.19,
+      "grad_norm": 6.5270771980285645,
+      "learning_rate": 2.8119349005424955e-06,
+      "loss": 0.084,
+      "step": 7950
+    },
+    {
+      "epoch": 7.23,
+      "grad_norm": 4.8120808601379395,
+      "learning_rate": 2.766726943942134e-06,
+      "loss": 0.0811,
+      "step": 8000
+    },
+    {
+      "epoch": 7.23,
+      "eval_accuracy": 0.915530303030303,
+      "eval_auc": 0.969414026787581,
+      "eval_f1": 0.8789753609030717,
+      "eval_loss": 0.3245289623737335,
+      "eval_runtime": 52.4229,
+      "eval_samples_per_second": 251.799,
+      "eval_steps_per_second": 3.949,
+      "step": 8000
+    },
+    {
+      "epoch": 7.28,
+      "grad_norm": 6.172023773193359,
+      "learning_rate": 2.7215189873417724e-06,
+      "loss": 0.0845,
+      "step": 8050
+    },
+    {
+      "epoch": 7.32,
+      "grad_norm": 4.027543544769287,
+      "learning_rate": 2.676311030741411e-06,
+      "loss": 0.081,
+      "step": 8100
+    },
+    {
+      "epoch": 7.37,
+      "grad_norm": 4.544090747833252,
+      "learning_rate": 2.6311030741410492e-06,
+      "loss": 0.0885,
+      "step": 8150
+    },
+    {
+      "epoch": 7.41,
+      "grad_norm": 5.340161323547363,
+      "learning_rate": 2.585895117540687e-06,
+      "loss": 0.0856,
+      "step": 8200
+    },
+    {
+      "epoch": 7.46,
+      "grad_norm": 7.733421325683594,
+      "learning_rate": 2.5406871609403257e-06,
+      "loss": 0.0815,
+      "step": 8250
+    },
+    {
+      "epoch": 7.5,
+      "grad_norm": 5.390356063842773,
+      "learning_rate": 2.495479204339964e-06,
+      "loss": 0.0812,
+      "step": 8300
+    },
+    {
+      "epoch": 7.55,
+      "grad_norm": 4.908448696136475,
+      "learning_rate": 2.4502712477396025e-06,
+      "loss": 0.0814,
+      "step": 8350
+    },
+    {
+      "epoch": 7.59,
+      "grad_norm": 7.446599960327148,
+      "learning_rate": 2.4050632911392408e-06,
+      "loss": 0.0833,
+      "step": 8400
+    },
+    {
+      "epoch": 7.64,
+      "grad_norm": 4.445633888244629,
+      "learning_rate": 2.359855334538879e-06,
+      "loss": 0.0804,
+      "step": 8450
+    },
+    {
+      "epoch": 7.69,
+      "grad_norm": 3.859055757522583,
+      "learning_rate": 2.3146473779385172e-06,
+      "loss": 0.0813,
+      "step": 8500
+    },
+    {
+      "epoch": 7.73,
+      "grad_norm": 8.008405685424805,
+      "learning_rate": 2.269439421338156e-06,
+      "loss": 0.0835,
+      "step": 8550
+    },
+    {
+      "epoch": 7.78,
+      "grad_norm": 4.7624711990356445,
+      "learning_rate": 2.224231464737794e-06,
+      "loss": 0.0827,
+      "step": 8600
+    },
+    {
+      "epoch": 7.82,
+      "grad_norm": 4.379971504211426,
+      "learning_rate": 2.1790235081374323e-06,
+      "loss": 0.079,
+      "step": 8650
+    },
+    {
+      "epoch": 7.87,
+      "grad_norm": 5.949471473693848,
+      "learning_rate": 2.1338155515370705e-06,
+      "loss": 0.0789,
+      "step": 8700
+    },
+    {
+      "epoch": 7.91,
+      "grad_norm": 5.32230281829834,
+      "learning_rate": 2.088607594936709e-06,
+      "loss": 0.0799,
+      "step": 8750
+    },
+    {
+      "epoch": 7.96,
+      "grad_norm": 8.27724552154541,
+      "learning_rate": 2.0433996383363474e-06,
+      "loss": 0.0842,
+      "step": 8800
+    },
+    {
+      "epoch": 8.0,
+      "grad_norm": 6.394008159637451,
+      "learning_rate": 1.9981916817359856e-06,
+      "loss": 0.0808,
+      "step": 8850
+    },
+    {
+      "epoch": 8.05,
+      "grad_norm": 4.033376693725586,
+      "learning_rate": 1.952983725135624e-06,
+      "loss": 0.0687,
+      "step": 8900
+    },
+    {
+      "epoch": 8.09,
+      "grad_norm": 5.514036178588867,
+      "learning_rate": 1.9077757685352625e-06,
+      "loss": 0.0757,
+      "step": 8950
+    },
+    {
+      "epoch": 8.14,
+      "grad_norm": 3.8214776515960693,
+      "learning_rate": 1.8625678119349007e-06,
+      "loss": 0.0721,
+      "step": 9000
+    },
+    {
+      "epoch": 8.14,
+      "eval_accuracy": 0.9152272727272728,
+      "eval_auc": 0.9681984211284476,
+      "eval_f1": 0.8782239634345413,
+      "eval_loss": 0.34035009145736694,
+      "eval_runtime": 52.8174,
+      "eval_samples_per_second": 249.918,
+      "eval_steps_per_second": 3.919,
+      "step": 9000
+    },
+    {
+      "epoch": 8.18,
+      "grad_norm": 5.467037200927734,
+      "learning_rate": 1.817359855334539e-06,
+      "loss": 0.0761,
+      "step": 9050
+    },
+    {
+      "epoch": 8.23,
+      "grad_norm": 5.79172945022583,
+      "learning_rate": 1.7721518987341774e-06,
+      "loss": 0.074,
+      "step": 9100
+    },
+    {
+      "epoch": 8.27,
+      "grad_norm": 3.8891448974609375,
+      "learning_rate": 1.7269439421338158e-06,
+      "loss": 0.0767,
+      "step": 9150
+    },
+    {
+      "epoch": 8.32,
+      "grad_norm": 4.498980522155762,
+      "learning_rate": 1.6817359855334538e-06,
+      "loss": 0.074,
+      "step": 9200
+    },
+    {
+      "epoch": 8.36,
+      "grad_norm": 5.459845542907715,
+      "learning_rate": 1.6365280289330923e-06,
+      "loss": 0.075,
+      "step": 9250
+    },
+    {
+      "epoch": 8.41,
+      "grad_norm": 6.381141662597656,
+      "learning_rate": 1.5913200723327307e-06,
+      "loss": 0.0767,
+      "step": 9300
+    },
+    {
+      "epoch": 8.45,
+      "grad_norm": 5.742413520812988,
+      "learning_rate": 1.5461121157323692e-06,
+      "loss": 0.0763,
+      "step": 9350
+    },
+    {
+      "epoch": 8.5,
+      "grad_norm": 5.142722129821777,
+      "learning_rate": 1.5009041591320072e-06,
+      "loss": 0.0747,
+      "step": 9400
+    },
+    {
+      "epoch": 8.54,
+      "grad_norm": 5.048556804656982,
+      "learning_rate": 1.4556962025316456e-06,
+      "loss": 0.0748,
+      "step": 9450
+    },
+    {
+      "epoch": 8.59,
+      "grad_norm": 5.201704502105713,
+      "learning_rate": 1.410488245931284e-06,
+      "loss": 0.0748,
+      "step": 9500
+    },
+    {
+      "epoch": 8.63,
+      "grad_norm": 5.143392086029053,
+      "learning_rate": 1.3652802893309225e-06,
+      "loss": 0.076,
+      "step": 9550
+    },
+    {
+      "epoch": 8.68,
+      "grad_norm": 5.493886470794678,
+      "learning_rate": 1.3200723327305607e-06,
+      "loss": 0.0718,
+      "step": 9600
+    },
+    {
+      "epoch": 8.73,
+      "grad_norm": 3.705559492111206,
+      "learning_rate": 1.274864376130199e-06,
+      "loss": 0.0712,
+      "step": 9650
+    },
+    {
+      "epoch": 8.77,
+      "grad_norm": 5.664219856262207,
+      "learning_rate": 1.2296564195298374e-06,
+      "loss": 0.0729,
+      "step": 9700
+    },
+    {
+      "epoch": 8.82,
+      "grad_norm": 5.6803812980651855,
+      "learning_rate": 1.1844484629294758e-06,
+      "loss": 0.0719,
+      "step": 9750
+    },
+    {
+      "epoch": 8.86,
+      "grad_norm": 4.449408054351807,
+      "learning_rate": 1.139240506329114e-06,
+      "loss": 0.0732,
+      "step": 9800
+    },
+    {
+      "epoch": 8.91,
+      "grad_norm": 6.274992942810059,
+      "learning_rate": 1.0940325497287525e-06,
+      "loss": 0.0764,
+      "step": 9850
+    },
+    {
+      "epoch": 8.95,
+      "grad_norm": 6.5285797119140625,
+      "learning_rate": 1.0488245931283907e-06,
+      "loss": 0.072,
+      "step": 9900
+    },
+    {
+      "epoch": 9.0,
+      "grad_norm": 10.324697494506836,
+      "learning_rate": 1.0036166365280291e-06,
+      "loss": 0.073,
+      "step": 9950
+    },
+    {
+      "epoch": 9.04,
+      "grad_norm": 12.908041954040527,
+      "learning_rate": 9.584086799276674e-07,
+      "loss": 0.0689,
+      "step": 10000
+    },
+    {
+      "epoch": 9.04,
+      "eval_accuracy": 0.915530303030303,
+      "eval_auc": 0.9687716182332089,
+      "eval_f1": 0.8797843665768194,
+      "eval_loss": 0.3541575074195862,
+      "eval_runtime": 52.8789,
+      "eval_samples_per_second": 249.627,
+      "eval_steps_per_second": 3.915,
+      "step": 10000
+    },
+    {
+      "epoch": 9.09,
+      "grad_norm": 4.13251256942749,
+      "learning_rate": 9.132007233273058e-07,
+      "loss": 0.0687,
+      "step": 10050
+    },
+    {
+      "epoch": 9.13,
+      "grad_norm": 6.25527286529541,
+      "learning_rate": 8.67992766726944e-07,
+      "loss": 0.0731,
+      "step": 10100
+    },
+    {
+      "epoch": 9.18,
+      "grad_norm": 5.105614185333252,
+      "learning_rate": 8.227848101265823e-07,
+      "loss": 0.0715,
+      "step": 10150
+    },
+    {
+      "epoch": 9.22,
+      "grad_norm": 4.943532943725586,
+      "learning_rate": 7.775768535262207e-07,
+      "loss": 0.0653,
+      "step": 10200
+    },
+    {
+      "epoch": 9.27,
+      "grad_norm": 5.668446063995361,
+      "learning_rate": 7.32368896925859e-07,
+      "loss": 0.0694,
+      "step": 10250
+    },
+    {
+      "epoch": 9.31,
+      "grad_norm": 5.078279972076416,
+      "learning_rate": 6.871609403254973e-07,
+      "loss": 0.0659,
+      "step": 10300
+    },
+    {
+      "epoch": 9.36,
+      "grad_norm": 4.663586616516113,
+      "learning_rate": 6.419529837251357e-07,
+      "loss": 0.0661,
+      "step": 10350
+    },
+    {
+      "epoch": 9.4,
+      "grad_norm": 4.528202533721924,
+      "learning_rate": 5.96745027124774e-07,
+      "loss": 0.0701,
+      "step": 10400
+    },
+    {
+      "epoch": 9.45,
+      "grad_norm": 5.685123920440674,
+      "learning_rate": 5.515370705244123e-07,
+      "loss": 0.069,
+      "step": 10450
+    },
+    {
+      "epoch": 9.49,
+      "grad_norm": 5.798079967498779,
+      "learning_rate": 5.063291139240507e-07,
+      "loss": 0.0628,
+      "step": 10500
+    },
+    {
+      "epoch": 9.54,
+      "grad_norm": 4.510618686676025,
+      "learning_rate": 4.61121157323689e-07,
+      "loss": 0.0677,
+      "step": 10550
+    },
+    {
+      "epoch": 9.58,
+      "grad_norm": 5.0236382484436035,
+      "learning_rate": 4.1591320072332737e-07,
+      "loss": 0.0685,
+      "step": 10600
+    },
+    {
+      "epoch": 9.63,
+      "grad_norm": 5.243931770324707,
+      "learning_rate": 3.707052441229657e-07,
+      "loss": 0.0687,
+      "step": 10650
+    },
+    {
+      "epoch": 9.67,
+      "grad_norm": 4.094812870025635,
+      "learning_rate": 3.2549728752260403e-07,
+      "loss": 0.0663,
+      "step": 10700
+    },
+    {
+      "epoch": 9.72,
+      "grad_norm": 4.533218860626221,
+      "learning_rate": 2.802893309222423e-07,
+      "loss": 0.0685,
+      "step": 10750
+    },
+    {
+      "epoch": 9.76,
+      "grad_norm": 6.6322021484375,
+      "learning_rate": 2.3508137432188067e-07,
+      "loss": 0.0664,
+      "step": 10800
+    },
+    {
+      "epoch": 9.81,
+      "grad_norm": 6.374199390411377,
+      "learning_rate": 1.89873417721519e-07,
+      "loss": 0.0688,
+      "step": 10850
+    },
+    {
+      "epoch": 9.86,
+      "grad_norm": 5.66272497177124,
+      "learning_rate": 1.4466546112115733e-07,
+      "loss": 0.0696,
+      "step": 10900
+    },
+    {
+      "epoch": 9.9,
+      "grad_norm": 7.157235622406006,
+      "learning_rate": 9.945750452079567e-08,
+      "loss": 0.0654,
+      "step": 10950
+    },
+    {
+      "epoch": 9.95,
+      "grad_norm": 6.200794696807861,
+      "learning_rate": 5.4249547920434e-08,
+      "loss": 0.0712,
+      "step": 11000
+    },
+    {
+      "epoch": 9.95,
+      "eval_accuracy": 0.9162121212121213,
+      "eval_auc": 0.9685840496026594,
+      "eval_f1": 0.8797826086956522,
+      "eval_loss": 0.35418105125427246,
+      "eval_runtime": 52.517,
+      "eval_samples_per_second": 251.347,
+      "eval_steps_per_second": 3.942,
+      "step": 11000
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 11060,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 1000,
+  "total_flos": 1.3113541708679086e+18,
+  "train_batch_size": 512,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01b338da7d13b5aaf587413f60c59186112df880ca73bd369438d7398966e3a0
+size 4920