Training in progress, epoch 1

Browse files

Files changed (14) hide show

model.safetensors +1 -1
run-2/checkpoint-1359/config.json +36 -0
run-2/checkpoint-1359/model.safetensors +3 -0
run-2/checkpoint-1359/optimizer.pt +3 -0
run-2/checkpoint-1359/rng_state.pth +3 -0
run-2/checkpoint-1359/scaler.pt +3 -0
run-2/checkpoint-1359/scheduler.pt +3 -0
run-2/checkpoint-1359/special_tokens_map.json +37 -0
run-2/checkpoint-1359/tokenizer.json +0 -0
run-2/checkpoint-1359/tokenizer_config.json +63 -0
run-2/checkpoint-1359/trainer_state.json +1013 -0
run-2/checkpoint-1359/training_args.bin +3 -0
run-2/checkpoint-1359/vocab.txt +0 -0
training_args.bin +1 -1

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f222110934988e114609565e5e8aba9fc71a59a451caae54b923b9c47a443599
 size 437961724

 version https://git-lfs.github.com/spec/v1
+oid sha256:17a9d1a7c0af34b2662d912bca96b4323fc85be50bfcd854f08488170d35f605
 size 437961724

run-2/checkpoint-1359/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "transformers_version": "4.57.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-2/checkpoint-1359/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17a9d1a7c0af34b2662d912bca96b4323fc85be50bfcd854f08488170d35f605
+size 437961724

run-2/checkpoint-1359/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d46c33389283e208e1decfa83b3515a6379f814c2ff81822e1e0b958a0c8ec9b
+size 876047755

run-2/checkpoint-1359/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3bf99a97e55e37df1321c689e62a0643f71031f8b594e08a09e50b16de6a73e3
+size 14709

run-2/checkpoint-1359/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be3415804e0baef86602afc2f77976eabebf5e078b8f5f63b855d510c163c36b
+size 1383

run-2/checkpoint-1359/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8e0bbbf384b81bbdda3c52b04df0b53dfa70b7e88d22ea4d6fa3aa455f7a45d
+size 1465

run-2/checkpoint-1359/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

run-2/checkpoint-1359/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-1359/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "max_length": 256,
+  "model_max_length": 512,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-1359/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1013 @@

+{
+  "best_global_step": 1359,
+  "best_metric": 0.7880794701986755,
+  "best_model_checkpoint": "bert-finetuned-sentiment/run-2/checkpoint-1359",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 1359,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.007358351729212656,
+      "grad_norm": 15.73861026763916,
+      "learning_rate": 7.661248088138319e-07,
+      "loss": 0.1318,
+      "step": 10
+    },
+    {
+      "epoch": 0.014716703458425313,
+      "grad_norm": 0.6841095685958862,
+      "learning_rate": 1.6173745963847562e-06,
+      "loss": 0.2228,
+      "step": 20
+    },
+    {
+      "epoch": 0.02207505518763797,
+      "grad_norm": 20.415630340576172,
+      "learning_rate": 2.46862438395568e-06,
+      "loss": 0.2499,
+      "step": 30
+    },
+    {
+      "epoch": 0.029433406916850625,
+      "grad_norm": 2.7358345985412598,
+      "learning_rate": 3.319874171526605e-06,
+      "loss": 0.2024,
+      "step": 40
+    },
+    {
+      "epoch": 0.03679175864606328,
+      "grad_norm": 6.356612205505371,
+      "learning_rate": 4.171123959097529e-06,
+      "loss": 0.1746,
+      "step": 50
+    },
+    {
+      "epoch": 0.04415011037527594,
+      "grad_norm": 0.4042581021785736,
+      "learning_rate": 5.022373746668453e-06,
+      "loss": 0.1,
+      "step": 60
+    },
+    {
+      "epoch": 0.051508462104488596,
+      "grad_norm": 1.1148451566696167,
+      "learning_rate": 5.8736235342393774e-06,
+      "loss": 0.1023,
+      "step": 70
+    },
+    {
+      "epoch": 0.05886681383370125,
+      "grad_norm": 39.592811584472656,
+      "learning_rate": 6.724873321810302e-06,
+      "loss": 0.0813,
+      "step": 80
+    },
+    {
+      "epoch": 0.06622516556291391,
+      "grad_norm": 19.681055068969727,
+      "learning_rate": 7.576123109381227e-06,
+      "loss": 0.0931,
+      "step": 90
+    },
+    {
+      "epoch": 0.07358351729212656,
+      "grad_norm": 10.252055168151855,
+      "learning_rate": 8.42737289695215e-06,
+      "loss": 0.2333,
+      "step": 100
+    },
+    {
+      "epoch": 0.08094186902133922,
+      "grad_norm": 33.362030029296875,
+      "learning_rate": 8.49814021437465e-06,
+      "loss": 0.1284,
+      "step": 110
+    },
+    {
+      "epoch": 0.08830022075055188,
+      "grad_norm": 20.02435874938965,
+      "learning_rate": 8.482187257336215e-06,
+      "loss": 0.1234,
+      "step": 120
+    },
+    {
+      "epoch": 0.09565857247976453,
+      "grad_norm": 66.22127532958984,
+      "learning_rate": 8.46623430029778e-06,
+      "loss": 0.0565,
+      "step": 130
+    },
+    {
+      "epoch": 0.10301692420897719,
+      "grad_norm": 0.9394611120223999,
+      "learning_rate": 8.450281343259344e-06,
+      "loss": 0.0571,
+      "step": 140
+    },
+    {
+      "epoch": 0.11037527593818984,
+      "grad_norm": 32.92816162109375,
+      "learning_rate": 8.434328386220908e-06,
+      "loss": 0.202,
+      "step": 150
+    },
+    {
+      "epoch": 0.1177336276674025,
+      "grad_norm": 30.171003341674805,
+      "learning_rate": 8.418375429182472e-06,
+      "loss": 0.2109,
+      "step": 160
+    },
+    {
+      "epoch": 0.12509197939661515,
+      "grad_norm": 0.9405515193939209,
+      "learning_rate": 8.402422472144037e-06,
+      "loss": 0.1541,
+      "step": 170
+    },
+    {
+      "epoch": 0.13245033112582782,
+      "grad_norm": 13.217094421386719,
+      "learning_rate": 8.386469515105601e-06,
+      "loss": 0.206,
+      "step": 180
+    },
+    {
+      "epoch": 0.13980868285504047,
+      "grad_norm": 0.7528594136238098,
+      "learning_rate": 8.370516558067165e-06,
+      "loss": 0.0698,
+      "step": 190
+    },
+    {
+      "epoch": 0.14716703458425312,
+      "grad_norm": 6.840025424957275,
+      "learning_rate": 8.35456360102873e-06,
+      "loss": 0.1986,
+      "step": 200
+    },
+    {
+      "epoch": 0.1545253863134658,
+      "grad_norm": 1.212363362312317,
+      "learning_rate": 8.338610643990296e-06,
+      "loss": 0.1699,
+      "step": 210
+    },
+    {
+      "epoch": 0.16188373804267844,
+      "grad_norm": 3.122361183166504,
+      "learning_rate": 8.322657686951858e-06,
+      "loss": 0.1288,
+      "step": 220
+    },
+    {
+      "epoch": 0.1692420897718911,
+      "grad_norm": 0.28292983770370483,
+      "learning_rate": 8.306704729913423e-06,
+      "loss": 0.007,
+      "step": 230
+    },
+    {
+      "epoch": 0.17660044150110377,
+      "grad_norm": 0.11904435604810715,
+      "learning_rate": 8.290751772874989e-06,
+      "loss": 0.1291,
+      "step": 240
+    },
+    {
+      "epoch": 0.18395879323031641,
+      "grad_norm": 62.53517532348633,
+      "learning_rate": 8.274798815836553e-06,
+      "loss": 0.0704,
+      "step": 250
+    },
+    {
+      "epoch": 0.19131714495952906,
+      "grad_norm": 34.67837142944336,
+      "learning_rate": 8.258845858798116e-06,
+      "loss": 0.1849,
+      "step": 260
+    },
+    {
+      "epoch": 0.1986754966887417,
+      "grad_norm": 0.05347510427236557,
+      "learning_rate": 8.242892901759682e-06,
+      "loss": 0.1584,
+      "step": 270
+    },
+    {
+      "epoch": 0.20603384841795438,
+      "grad_norm": 15.957563400268555,
+      "learning_rate": 8.226939944721246e-06,
+      "loss": 0.1044,
+      "step": 280
+    },
+    {
+      "epoch": 0.21339220014716703,
+      "grad_norm": 0.15385572612285614,
+      "learning_rate": 8.21098698768281e-06,
+      "loss": 0.0807,
+      "step": 290
+    },
+    {
+      "epoch": 0.22075055187637968,
+      "grad_norm": 0.08834321796894073,
+      "learning_rate": 8.195034030644375e-06,
+      "loss": 0.1526,
+      "step": 300
+    },
+    {
+      "epoch": 0.22810890360559236,
+      "grad_norm": 5.847803115844727,
+      "learning_rate": 8.179081073605939e-06,
+      "loss": 0.2518,
+      "step": 310
+    },
+    {
+      "epoch": 0.235467255334805,
+      "grad_norm": 76.60889434814453,
+      "learning_rate": 8.163128116567503e-06,
+      "loss": 0.3841,
+      "step": 320
+    },
+    {
+      "epoch": 0.24282560706401765,
+      "grad_norm": 5.798557758331299,
+      "learning_rate": 8.147175159529068e-06,
+      "loss": 0.2359,
+      "step": 330
+    },
+    {
+      "epoch": 0.2501839587932303,
+      "grad_norm": 0.20405276119709015,
+      "learning_rate": 8.131222202490632e-06,
+      "loss": 0.1507,
+      "step": 340
+    },
+    {
+      "epoch": 0.257542310522443,
+      "grad_norm": 0.35858389735221863,
+      "learning_rate": 8.115269245452196e-06,
+      "loss": 0.286,
+      "step": 350
+    },
+    {
+      "epoch": 0.26490066225165565,
+      "grad_norm": 0.10192416608333588,
+      "learning_rate": 8.09931628841376e-06,
+      "loss": 0.0885,
+      "step": 360
+    },
+    {
+      "epoch": 0.27225901398086827,
+      "grad_norm": 3.3536927700042725,
+      "learning_rate": 8.083363331375325e-06,
+      "loss": 0.2359,
+      "step": 370
+    },
+    {
+      "epoch": 0.27961736571008095,
+      "grad_norm": 0.36128467321395874,
+      "learning_rate": 8.06741037433689e-06,
+      "loss": 0.2289,
+      "step": 380
+    },
+    {
+      "epoch": 0.2869757174392936,
+      "grad_norm": 35.29405975341797,
+      "learning_rate": 8.051457417298454e-06,
+      "loss": 0.1705,
+      "step": 390
+    },
+    {
+      "epoch": 0.29433406916850624,
+      "grad_norm": 13.290261268615723,
+      "learning_rate": 8.035504460260018e-06,
+      "loss": 0.1391,
+      "step": 400
+    },
+    {
+      "epoch": 0.3016924208977189,
+      "grad_norm": 4.086803436279297,
+      "learning_rate": 8.019551503221582e-06,
+      "loss": 0.3115,
+      "step": 410
+    },
+    {
+      "epoch": 0.3090507726269316,
+      "grad_norm": 85.40064239501953,
+      "learning_rate": 8.003598546183147e-06,
+      "loss": 0.1905,
+      "step": 420
+    },
+    {
+      "epoch": 0.3164091243561442,
+      "grad_norm": 3.496741533279419,
+      "learning_rate": 7.987645589144711e-06,
+      "loss": 0.1074,
+      "step": 430
+    },
+    {
+      "epoch": 0.3237674760853569,
+      "grad_norm": 32.27187728881836,
+      "learning_rate": 7.971692632106275e-06,
+      "loss": 0.1561,
+      "step": 440
+    },
+    {
+      "epoch": 0.33112582781456956,
+      "grad_norm": 8.242842674255371,
+      "learning_rate": 7.95573967506784e-06,
+      "loss": 0.2492,
+      "step": 450
+    },
+    {
+      "epoch": 0.3384841795437822,
+      "grad_norm": 29.650043487548828,
+      "learning_rate": 7.939786718029404e-06,
+      "loss": 0.2973,
+      "step": 460
+    },
+    {
+      "epoch": 0.34584253127299486,
+      "grad_norm": 79.97142791748047,
+      "learning_rate": 7.923833760990968e-06,
+      "loss": 0.1544,
+      "step": 470
+    },
+    {
+      "epoch": 0.35320088300220753,
+      "grad_norm": 29.046892166137695,
+      "learning_rate": 7.907880803952533e-06,
+      "loss": 0.1099,
+      "step": 480
+    },
+    {
+      "epoch": 0.36055923473142015,
+      "grad_norm": 0.17732320725917816,
+      "learning_rate": 7.891927846914099e-06,
+      "loss": 0.1003,
+      "step": 490
+    },
+    {
+      "epoch": 0.36791758646063283,
+      "grad_norm": 0.7188624143600464,
+      "learning_rate": 7.875974889875661e-06,
+      "loss": 0.3034,
+      "step": 500
+    },
+    {
+      "epoch": 0.37527593818984545,
+      "grad_norm": 21.04668617248535,
+      "learning_rate": 7.860021932837226e-06,
+      "loss": 0.2042,
+      "step": 510
+    },
+    {
+      "epoch": 0.3826342899190581,
+      "grad_norm": 5.541590690612793,
+      "learning_rate": 7.844068975798792e-06,
+      "loss": 0.2426,
+      "step": 520
+    },
+    {
+      "epoch": 0.3899926416482708,
+      "grad_norm": 0.21457338333129883,
+      "learning_rate": 7.828116018760356e-06,
+      "loss": 0.2613,
+      "step": 530
+    },
+    {
+      "epoch": 0.3973509933774834,
+      "grad_norm": 6.815304279327393,
+      "learning_rate": 7.812163061721919e-06,
+      "loss": 0.1203,
+      "step": 540
+    },
+    {
+      "epoch": 0.4047093451066961,
+      "grad_norm": 1.4606707096099854,
+      "learning_rate": 7.796210104683485e-06,
+      "loss": 0.1307,
+      "step": 550
+    },
+    {
+      "epoch": 0.41206769683590877,
+      "grad_norm": 170.10000610351562,
+      "learning_rate": 7.780257147645049e-06,
+      "loss": 0.2187,
+      "step": 560
+    },
+    {
+      "epoch": 0.4194260485651214,
+      "grad_norm": 52.976402282714844,
+      "learning_rate": 7.764304190606613e-06,
+      "loss": 0.1297,
+      "step": 570
+    },
+    {
+      "epoch": 0.42678440029433407,
+      "grad_norm": 1.0618321895599365,
+      "learning_rate": 7.748351233568176e-06,
+      "loss": 0.1972,
+      "step": 580
+    },
+    {
+      "epoch": 0.43414275202354674,
+      "grad_norm": 0.2524672746658325,
+      "learning_rate": 7.732398276529742e-06,
+      "loss": 0.2513,
+      "step": 590
+    },
+    {
+      "epoch": 0.44150110375275936,
+      "grad_norm": 0.7126919627189636,
+      "learning_rate": 7.716445319491306e-06,
+      "loss": 0.2055,
+      "step": 600
+    },
+    {
+      "epoch": 0.44885945548197204,
+      "grad_norm": 6.286591529846191,
+      "learning_rate": 7.70049236245287e-06,
+      "loss": 0.0236,
+      "step": 610
+    },
+    {
+      "epoch": 0.4562178072111847,
+      "grad_norm": 0.11959370225667953,
+      "learning_rate": 7.684539405414435e-06,
+      "loss": 0.2016,
+      "step": 620
+    },
+    {
+      "epoch": 0.46357615894039733,
+      "grad_norm": 6.085186004638672,
+      "learning_rate": 7.668586448376e-06,
+      "loss": 0.3102,
+      "step": 630
+    },
+    {
+      "epoch": 0.47093451066961,
+      "grad_norm": 96.2499008178711,
+      "learning_rate": 7.652633491337564e-06,
+      "loss": 0.2181,
+      "step": 640
+    },
+    {
+      "epoch": 0.4782928623988227,
+      "grad_norm": 14.058534622192383,
+      "learning_rate": 7.636680534299128e-06,
+      "loss": 0.2536,
+      "step": 650
+    },
+    {
+      "epoch": 0.4856512141280353,
+      "grad_norm": 33.89338684082031,
+      "learning_rate": 7.620727577260692e-06,
+      "loss": 0.2979,
+      "step": 660
+    },
+    {
+      "epoch": 0.493009565857248,
+      "grad_norm": 0.733513355255127,
+      "learning_rate": 7.604774620222257e-06,
+      "loss": 0.1619,
+      "step": 670
+    },
+    {
+      "epoch": 0.5003679175864606,
+      "grad_norm": 0.18061041831970215,
+      "learning_rate": 7.588821663183821e-06,
+      "loss": 0.1896,
+      "step": 680
+    },
+    {
+      "epoch": 0.5077262693156733,
+      "grad_norm": 0.1485278606414795,
+      "learning_rate": 7.572868706145385e-06,
+      "loss": 0.0868,
+      "step": 690
+    },
+    {
+      "epoch": 0.515084621044886,
+      "grad_norm": 65.20437622070312,
+      "learning_rate": 7.55691574910695e-06,
+      "loss": 0.1969,
+      "step": 700
+    },
+    {
+      "epoch": 0.5224429727740986,
+      "grad_norm": 33.507415771484375,
+      "learning_rate": 7.540962792068515e-06,
+      "loss": 0.2309,
+      "step": 710
+    },
+    {
+      "epoch": 0.5298013245033113,
+      "grad_norm": 7.292962551116943,
+      "learning_rate": 7.525009835030078e-06,
+      "loss": 0.3215,
+      "step": 720
+    },
+    {
+      "epoch": 0.5371596762325239,
+      "grad_norm": 0.141510471701622,
+      "learning_rate": 7.509056877991643e-06,
+      "loss": 0.1861,
+      "step": 730
+    },
+    {
+      "epoch": 0.5445180279617365,
+      "grad_norm": 10.76659107208252,
+      "learning_rate": 7.493103920953208e-06,
+      "loss": 0.1563,
+      "step": 740
+    },
+    {
+      "epoch": 0.5518763796909493,
+      "grad_norm": 0.535017192363739,
+      "learning_rate": 7.477150963914772e-06,
+      "loss": 0.1037,
+      "step": 750
+    },
+    {
+      "epoch": 0.5592347314201619,
+      "grad_norm": 0.12771788239479065,
+      "learning_rate": 7.461198006876336e-06,
+      "loss": 0.0714,
+      "step": 760
+    },
+    {
+      "epoch": 0.5665930831493745,
+      "grad_norm": 0.13390083611011505,
+      "learning_rate": 7.4452450498379e-06,
+      "loss": 0.2959,
+      "step": 770
+    },
+    {
+      "epoch": 0.5739514348785872,
+      "grad_norm": 56.0723991394043,
+      "learning_rate": 7.429292092799465e-06,
+      "loss": 0.1132,
+      "step": 780
+    },
+    {
+      "epoch": 0.5813097866077999,
+      "grad_norm": 0.137408047914505,
+      "learning_rate": 7.4133391357610295e-06,
+      "loss": 0.2051,
+      "step": 790
+    },
+    {
+      "epoch": 0.5886681383370125,
+      "grad_norm": 12.899972915649414,
+      "learning_rate": 7.397386178722593e-06,
+      "loss": 0.3972,
+      "step": 800
+    },
+    {
+      "epoch": 0.5960264900662252,
+      "grad_norm": 11.240960121154785,
+      "learning_rate": 7.381433221684158e-06,
+      "loss": 0.3619,
+      "step": 810
+    },
+    {
+      "epoch": 0.6033848417954378,
+      "grad_norm": 8.278285026550293,
+      "learning_rate": 7.3654802646457225e-06,
+      "loss": 0.1447,
+      "step": 820
+    },
+    {
+      "epoch": 0.6107431935246505,
+      "grad_norm": 2.9699766635894775,
+      "learning_rate": 7.349527307607287e-06,
+      "loss": 0.1212,
+      "step": 830
+    },
+    {
+      "epoch": 0.6181015452538632,
+      "grad_norm": 12.325569152832031,
+      "learning_rate": 7.333574350568851e-06,
+      "loss": 0.2392,
+      "step": 840
+    },
+    {
+      "epoch": 0.6254598969830758,
+      "grad_norm": 1.178353190422058,
+      "learning_rate": 7.3176213935304155e-06,
+      "loss": 0.193,
+      "step": 850
+    },
+    {
+      "epoch": 0.6328182487122884,
+      "grad_norm": 42.010711669921875,
+      "learning_rate": 7.30166843649198e-06,
+      "loss": 0.178,
+      "step": 860
+    },
+    {
+      "epoch": 0.6401766004415012,
+      "grad_norm": 0.831745445728302,
+      "learning_rate": 7.285715479453545e-06,
+      "loss": 0.2348,
+      "step": 870
+    },
+    {
+      "epoch": 0.6475349521707138,
+      "grad_norm": 68.13945007324219,
+      "learning_rate": 7.2697625224151084e-06,
+      "loss": 0.0788,
+      "step": 880
+    },
+    {
+      "epoch": 0.6548933038999264,
+      "grad_norm": 33.832366943359375,
+      "learning_rate": 7.253809565376673e-06,
+      "loss": 0.1783,
+      "step": 890
+    },
+    {
+      "epoch": 0.6622516556291391,
+      "grad_norm": 1.357936978340149,
+      "learning_rate": 7.237856608338238e-06,
+      "loss": 0.1938,
+      "step": 900
+    },
+    {
+      "epoch": 0.6696100073583517,
+      "grad_norm": 53.46693420410156,
+      "learning_rate": 7.221903651299802e-06,
+      "loss": 0.4283,
+      "step": 910
+    },
+    {
+      "epoch": 0.6769683590875644,
+      "grad_norm": 4.254789352416992,
+      "learning_rate": 7.205950694261366e-06,
+      "loss": 0.2616,
+      "step": 920
+    },
+    {
+      "epoch": 0.6843267108167771,
+      "grad_norm": 75.10453796386719,
+      "learning_rate": 7.189997737222931e-06,
+      "loss": 0.1924,
+      "step": 930
+    },
+    {
+      "epoch": 0.6916850625459897,
+      "grad_norm": 0.7422951459884644,
+      "learning_rate": 7.174044780184495e-06,
+      "loss": 0.13,
+      "step": 940
+    },
+    {
+      "epoch": 0.6990434142752023,
+      "grad_norm": 18.9062557220459,
+      "learning_rate": 7.15809182314606e-06,
+      "loss": 0.1283,
+      "step": 950
+    },
+    {
+      "epoch": 0.7064017660044151,
+      "grad_norm": 6.750290393829346,
+      "learning_rate": 7.142138866107623e-06,
+      "loss": 0.3134,
+      "step": 960
+    },
+    {
+      "epoch": 0.7137601177336277,
+      "grad_norm": 16.308557510375977,
+      "learning_rate": 7.126185909069188e-06,
+      "loss": 0.2575,
+      "step": 970
+    },
+    {
+      "epoch": 0.7211184694628403,
+      "grad_norm": 67.54432678222656,
+      "learning_rate": 7.110232952030753e-06,
+      "loss": 0.3646,
+      "step": 980
+    },
+    {
+      "epoch": 0.7284768211920529,
+      "grad_norm": 20.565406799316406,
+      "learning_rate": 7.094279994992317e-06,
+      "loss": 0.1607,
+      "step": 990
+    },
+    {
+      "epoch": 0.7358351729212657,
+      "grad_norm": 9.457584381103516,
+      "learning_rate": 7.078327037953881e-06,
+      "loss": 0.4157,
+      "step": 1000
+    },
+    {
+      "epoch": 0.7431935246504783,
+      "grad_norm": 1.9208470582962036,
+      "learning_rate": 7.062374080915446e-06,
+      "loss": 0.288,
+      "step": 1010
+    },
+    {
+      "epoch": 0.7505518763796909,
+      "grad_norm": 0.18186113238334656,
+      "learning_rate": 7.04642112387701e-06,
+      "loss": 0.1683,
+      "step": 1020
+    },
+    {
+      "epoch": 0.7579102281089036,
+      "grad_norm": 56.17685317993164,
+      "learning_rate": 7.030468166838575e-06,
+      "loss": 0.1789,
+      "step": 1030
+    },
+    {
+      "epoch": 0.7652685798381162,
+      "grad_norm": 22.048622131347656,
+      "learning_rate": 7.014515209800139e-06,
+      "loss": 0.2506,
+      "step": 1040
+    },
+    {
+      "epoch": 0.7726269315673289,
+      "grad_norm": 3.697582244873047,
+      "learning_rate": 6.998562252761703e-06,
+      "loss": 0.1667,
+      "step": 1050
+    },
+    {
+      "epoch": 0.7799852832965416,
+      "grad_norm": 6.335505962371826,
+      "learning_rate": 6.982609295723268e-06,
+      "loss": 0.1866,
+      "step": 1060
+    },
+    {
+      "epoch": 0.7873436350257542,
+      "grad_norm": 18.40208625793457,
+      "learning_rate": 6.9666563386848324e-06,
+      "loss": 0.2035,
+      "step": 1070
+    },
+    {
+      "epoch": 0.7947019867549668,
+      "grad_norm": 0.3842657506465912,
+      "learning_rate": 6.950703381646396e-06,
+      "loss": 0.3858,
+      "step": 1080
+    },
+    {
+      "epoch": 0.8020603384841796,
+      "grad_norm": 0.10957049578428268,
+      "learning_rate": 6.934750424607961e-06,
+      "loss": 0.1044,
+      "step": 1090
+    },
+    {
+      "epoch": 0.8094186902133922,
+      "grad_norm": 52.896461486816406,
+      "learning_rate": 6.9187974675695254e-06,
+      "loss": 0.0542,
+      "step": 1100
+    },
+    {
+      "epoch": 0.8167770419426048,
+      "grad_norm": 77.98719787597656,
+      "learning_rate": 6.90284451053109e-06,
+      "loss": 0.2078,
+      "step": 1110
+    },
+    {
+      "epoch": 0.8241353936718175,
+      "grad_norm": 103.59429931640625,
+      "learning_rate": 6.886891553492654e-06,
+      "loss": 0.1757,
+      "step": 1120
+    },
+    {
+      "epoch": 0.8314937454010302,
+      "grad_norm": 14.622530937194824,
+      "learning_rate": 6.8709385964542184e-06,
+      "loss": 0.1741,
+      "step": 1130
+    },
+    {
+      "epoch": 0.8388520971302428,
+      "grad_norm": 0.8220998048782349,
+      "learning_rate": 6.854985639415783e-06,
+      "loss": 0.1864,
+      "step": 1140
+    },
+    {
+      "epoch": 0.8462104488594555,
+      "grad_norm": 44.293025970458984,
+      "learning_rate": 6.839032682377347e-06,
+      "loss": 0.3249,
+      "step": 1150
+    },
+    {
+      "epoch": 0.8535688005886681,
+      "grad_norm": 45.35190200805664,
+      "learning_rate": 6.823079725338911e-06,
+      "loss": 0.2564,
+      "step": 1160
+    },
+    {
+      "epoch": 0.8609271523178808,
+      "grad_norm": 92.81620025634766,
+      "learning_rate": 6.807126768300476e-06,
+      "loss": 0.1886,
+      "step": 1170
+    },
+    {
+      "epoch": 0.8682855040470935,
+      "grad_norm": 110.62460327148438,
+      "learning_rate": 6.79117381126204e-06,
+      "loss": 0.2484,
+      "step": 1180
+    },
+    {
+      "epoch": 0.8756438557763061,
+      "grad_norm": 25.606109619140625,
+      "learning_rate": 6.775220854223605e-06,
+      "loss": 0.1544,
+      "step": 1190
+    },
+    {
+      "epoch": 0.8830022075055187,
+      "grad_norm": 0.10603518784046173,
+      "learning_rate": 6.759267897185169e-06,
+      "loss": 0.3421,
+      "step": 1200
+    },
+    {
+      "epoch": 0.8903605592347315,
+      "grad_norm": 0.8049039840698242,
+      "learning_rate": 6.743314940146733e-06,
+      "loss": 0.3171,
+      "step": 1210
+    },
+    {
+      "epoch": 0.8977189109639441,
+      "grad_norm": 0.1788051277399063,
+      "learning_rate": 6.727361983108298e-06,
+      "loss": 0.2324,
+      "step": 1220
+    },
+    {
+      "epoch": 0.9050772626931567,
+      "grad_norm": 4.411452293395996,
+      "learning_rate": 6.711409026069863e-06,
+      "loss": 0.0503,
+      "step": 1230
+    },
+    {
+      "epoch": 0.9124356144223694,
+      "grad_norm": 0.08730533719062805,
+      "learning_rate": 6.695456069031426e-06,
+      "loss": 0.1698,
+      "step": 1240
+    },
+    {
+      "epoch": 0.919793966151582,
+      "grad_norm": 14.45964527130127,
+      "learning_rate": 6.679503111992991e-06,
+      "loss": 0.1356,
+      "step": 1250
+    },
+    {
+      "epoch": 0.9271523178807947,
+      "grad_norm": 99.87224578857422,
+      "learning_rate": 6.663550154954556e-06,
+      "loss": 0.0962,
+      "step": 1260
+    },
+    {
+      "epoch": 0.9345106696100074,
+      "grad_norm": 0.4108155369758606,
+      "learning_rate": 6.64759719791612e-06,
+      "loss": 0.1787,
+      "step": 1270
+    },
+    {
+      "epoch": 0.94186902133922,
+      "grad_norm": 108.73641204833984,
+      "learning_rate": 6.631644240877684e-06,
+      "loss": 0.279,
+      "step": 1280
+    },
+    {
+      "epoch": 0.9492273730684326,
+      "grad_norm": 131.88836669921875,
+      "learning_rate": 6.6156912838392486e-06,
+      "loss": 0.1746,
+      "step": 1290
+    },
+    {
+      "epoch": 0.9565857247976454,
+      "grad_norm": 0.547515869140625,
+      "learning_rate": 6.599738326800813e-06,
+      "loss": 0.0208,
+      "step": 1300
+    },
+    {
+      "epoch": 0.963944076526858,
+      "grad_norm": 19.8458251953125,
+      "learning_rate": 6.583785369762378e-06,
+      "loss": 0.1389,
+      "step": 1310
+    },
+    {
+      "epoch": 0.9713024282560706,
+      "grad_norm": 0.5687592625617981,
+      "learning_rate": 6.5678324127239416e-06,
+      "loss": 0.1856,
+      "step": 1320
+    },
+    {
+      "epoch": 0.9786607799852833,
+      "grad_norm": 0.06650309264659882,
+      "learning_rate": 6.551879455685506e-06,
+      "loss": 0.2146,
+      "step": 1330
+    },
+    {
+      "epoch": 0.986019131714496,
+      "grad_norm": 38.21836471557617,
+      "learning_rate": 6.53592649864707e-06,
+      "loss": 0.2825,
+      "step": 1340
+    },
+    {
+      "epoch": 0.9933774834437086,
+      "grad_norm": 2.0989203453063965,
+      "learning_rate": 6.519973541608635e-06,
+      "loss": 0.341,
+      "step": 1350
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7880794701986755,
+      "eval_confusion_matrix": [
+        [
+          676,
+          8,
+          78
+        ],
+        [
+          7,
+          70,
+          34
+        ],
+        [
+          110,
+          19,
+          206
+        ]
+      ],
+      "eval_f1": 0.7852632048062911,
+      "eval_loss": 1.1113489866256714,
+      "eval_precision": 0.7836833128242224,
+      "eval_recall": 0.7880794701986755,
+      "eval_runtime": 10.673,
+      "eval_samples_per_second": 113.183,
+      "eval_steps_per_second": 3.56,
+      "step": 1359
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 5436,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1158870714941520.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 8.512497875709243e-06,
+    "num_train_epochs": 4,
+    "per_device_train_batch_size": 4,
+    "seed": 14
+  }
+}

run-2/checkpoint-1359/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1ed213c84afc0c72cb8266b9163d38ffb171814d89789d606f80a887cc280d4
+size 5777

run-2/checkpoint-1359/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b658592e23f2a4cd1a0d40f6dc93783b89faf26708f63097721fc7f888ffd853
 size 5777

 version https://git-lfs.github.com/spec/v1
+oid sha256:f1ed213c84afc0c72cb8266b9163d38ffb171814d89789d606f80a887cc280d4
 size 5777