NadiaGHEZAIEL commited on Sep 15, 2025

Commit

53674dd

verified ·

1 Parent(s): 05f0c2f

Upload folder using huggingface_hub

Browse files

Files changed (34) hide show

best_model/config.json +37 -0
best_model/model.safetensors +3 -0
best_model/special_tokens_map.json +7 -0
best_model/tokenizer.json +0 -0
best_model/tokenizer_config.json +59 -0
best_model/training_args.bin +3 -0
best_model/vocab.txt +0 -0
checkpoint-7604/config.json +37 -0
checkpoint-7604/model.safetensors +3 -0
checkpoint-7604/optimizer.pt +3 -0
checkpoint-7604/rng_state.pth +3 -0
checkpoint-7604/scaler.pt +3 -0
checkpoint-7604/scheduler.pt +3 -0
checkpoint-7604/special_tokens_map.json +7 -0
checkpoint-7604/tokenizer.json +0 -0
checkpoint-7604/tokenizer_config.json +59 -0
checkpoint-7604/trainer_state.json +1163 -0
checkpoint-7604/training_args.bin +3 -0
checkpoint-7604/vocab.txt +0 -0
checkpoint-9505/config.json +37 -0
checkpoint-9505/model.safetensors +3 -0
checkpoint-9505/optimizer.pt +3 -0
checkpoint-9505/rng_state.pth +3 -0
checkpoint-9505/scaler.pt +3 -0
checkpoint-9505/scheduler.pt +3 -0
checkpoint-9505/special_tokens_map.json +7 -0
checkpoint-9505/tokenizer.json +0 -0
checkpoint-9505/tokenizer_config.json +59 -0
checkpoint-9505/trainer_state.json +1443 -0
checkpoint-9505/training_args.bin +3 -0
checkpoint-9505/vocab.txt +0 -0
confusion_matrix_val.csv +5 -0
metrics.txt +1 -0
val_predictions.csv +0 -0

best_model/config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "aeb_Arab",
+    "1": "arb_Arab",
+    "2": "ars_Arab",
+    "3": "arz_Arab"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "aeb_Arab": 0,
+    "arb_Arab": 1,
+    "ars_Arab": 2,
+    "arz_Arab": 3
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.56.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30000
+}

best_model/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:496bb5c365c925f55ceae6595a8e715b409815cc8b6ff91706c35fe83c24f363
+size 436361208

best_model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

best_model/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

best_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": false,
+  "extra_special_tokens": {},
+  "full_tokenizer_file": null,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

best_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fa873479846449ff86b2d50d9e57056c48f72d07a9ffc1fb7f0012ac7d884f8
+size 5777

best_model/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-7604/config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "aeb_Arab",
+    "1": "arb_Arab",
+    "2": "ars_Arab",
+    "3": "arz_Arab"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "aeb_Arab": 0,
+    "arb_Arab": 1,
+    "ars_Arab": 2,
+    "arz_Arab": 3
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.56.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30000
+}

checkpoint-7604/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea97c9f54116987a4efce5003c237fe9ad14dba6bc8df6581b8e1b335acb132a
+size 436361208

checkpoint-7604/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b377722c6852e6cbba0ccadd8c6526da27ed024558775d8e5aef375283656145
+size 872846731

checkpoint-7604/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf843fc7bcd1af4ab3293b3c088707a5f43c002295ebaf38c7b73a37d8b8b6e2
+size 14645

checkpoint-7604/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cff504aea448cd91d3c9132365ba5a6351fe5df0ebe126e322b53a9d87fdfb29
+size 1383

checkpoint-7604/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df49cd2afd04bc85cb2837d2c80f823cd41bf926f70505d505372ff3c1aead5c
+size 1465

checkpoint-7604/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

checkpoint-7604/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-7604/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": false,
+  "extra_special_tokens": {},
+  "full_tokenizer_file": null,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

checkpoint-7604/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1163 @@

+{
+  "best_global_step": 7604,
+  "best_metric": 0.9633717243752477,
+  "best_model_checkpoint": "camelbert_madar_task5/checkpoint-7604",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 7604,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.026301946344029457,
+      "grad_norm": 21.061479568481445,
+      "learning_rate": 1.9896896370331405e-05,
+      "loss": 0.9381,
+      "step": 50
+    },
+    {
+      "epoch": 0.052603892688058915,
+      "grad_norm": 4.199251651763916,
+      "learning_rate": 1.9791688584955288e-05,
+      "loss": 0.5182,
+      "step": 100
+    },
+    {
+      "epoch": 0.07890583903208838,
+      "grad_norm": 22.227828979492188,
+      "learning_rate": 1.968648079957917e-05,
+      "loss": 0.4486,
+      "step": 150
+    },
+    {
+      "epoch": 0.10520778537611783,
+      "grad_norm": 7.481734275817871,
+      "learning_rate": 1.9581273014203053e-05,
+      "loss": 0.4422,
+      "step": 200
+    },
+    {
+      "epoch": 0.1315097317201473,
+      "grad_norm": 9.7647705078125,
+      "learning_rate": 1.9476065228826936e-05,
+      "loss": 0.4304,
+      "step": 250
+    },
+    {
+      "epoch": 0.15781167806417676,
+      "grad_norm": 12.080931663513184,
+      "learning_rate": 1.9370857443450818e-05,
+      "loss": 0.3672,
+      "step": 300
+    },
+    {
+      "epoch": 0.1841136244082062,
+      "grad_norm": 11.353347778320312,
+      "learning_rate": 1.9265649658074697e-05,
+      "loss": 0.3771,
+      "step": 350
+    },
+    {
+      "epoch": 0.21041557075223566,
+      "grad_norm": 3.3302719593048096,
+      "learning_rate": 1.916044187269858e-05,
+      "loss": 0.4053,
+      "step": 400
+    },
+    {
+      "epoch": 0.23671751709626512,
+      "grad_norm": 11.869136810302734,
+      "learning_rate": 1.9055234087322463e-05,
+      "loss": 0.3754,
+      "step": 450
+    },
+    {
+      "epoch": 0.2630194634402946,
+      "grad_norm": 19.71166229248047,
+      "learning_rate": 1.8950026301946345e-05,
+      "loss": 0.3909,
+      "step": 500
+    },
+    {
+      "epoch": 0.289321409784324,
+      "grad_norm": 82.08606719970703,
+      "learning_rate": 1.8844818516570228e-05,
+      "loss": 0.3945,
+      "step": 550
+    },
+    {
+      "epoch": 0.3156233561283535,
+      "grad_norm": 5.5329389572143555,
+      "learning_rate": 1.873961073119411e-05,
+      "loss": 0.3182,
+      "step": 600
+    },
+    {
+      "epoch": 0.34192530247238295,
+      "grad_norm": 10.177448272705078,
+      "learning_rate": 1.8634402945817993e-05,
+      "loss": 0.3294,
+      "step": 650
+    },
+    {
+      "epoch": 0.3682272488164124,
+      "grad_norm": 11.596871376037598,
+      "learning_rate": 1.8529195160441876e-05,
+      "loss": 0.3445,
+      "step": 700
+    },
+    {
+      "epoch": 0.3945291951604419,
+      "grad_norm": 5.0095319747924805,
+      "learning_rate": 1.8423987375065758e-05,
+      "loss": 0.3403,
+      "step": 750
+    },
+    {
+      "epoch": 0.4208311415044713,
+      "grad_norm": 6.569547653198242,
+      "learning_rate": 1.8318779589689638e-05,
+      "loss": 0.2767,
+      "step": 800
+    },
+    {
+      "epoch": 0.4471330878485008,
+      "grad_norm": 7.269279956817627,
+      "learning_rate": 1.821357180431352e-05,
+      "loss": 0.3391,
+      "step": 850
+    },
+    {
+      "epoch": 0.47343503419253025,
+      "grad_norm": 6.403675079345703,
+      "learning_rate": 1.8108364018937403e-05,
+      "loss": 0.2729,
+      "step": 900
+    },
+    {
+      "epoch": 0.4997369805365597,
+      "grad_norm": 18.03633689880371,
+      "learning_rate": 1.8003156233561285e-05,
+      "loss": 0.2912,
+      "step": 950
+    },
+    {
+      "epoch": 0.5260389268805892,
+      "grad_norm": 11.609797477722168,
+      "learning_rate": 1.7897948448185168e-05,
+      "loss": 0.3678,
+      "step": 1000
+    },
+    {
+      "epoch": 0.5523408732246187,
+      "grad_norm": 8.587767601013184,
+      "learning_rate": 1.779274066280905e-05,
+      "loss": 0.2789,
+      "step": 1050
+    },
+    {
+      "epoch": 0.578642819568648,
+      "grad_norm": 2.894766092300415,
+      "learning_rate": 1.768753287743293e-05,
+      "loss": 0.2515,
+      "step": 1100
+    },
+    {
+      "epoch": 0.6049447659126775,
+      "grad_norm": 14.84619140625,
+      "learning_rate": 1.7582325092056812e-05,
+      "loss": 0.2817,
+      "step": 1150
+    },
+    {
+      "epoch": 0.631246712256707,
+      "grad_norm": 14.3659029006958,
+      "learning_rate": 1.7477117306680695e-05,
+      "loss": 0.2819,
+      "step": 1200
+    },
+    {
+      "epoch": 0.6575486586007364,
+      "grad_norm": 24.962841033935547,
+      "learning_rate": 1.7371909521304578e-05,
+      "loss": 0.275,
+      "step": 1250
+    },
+    {
+      "epoch": 0.6838506049447659,
+      "grad_norm": 2.1663622856140137,
+      "learning_rate": 1.726670173592846e-05,
+      "loss": 0.2513,
+      "step": 1300
+    },
+    {
+      "epoch": 0.7101525512887954,
+      "grad_norm": 20.324939727783203,
+      "learning_rate": 1.7161493950552343e-05,
+      "loss": 0.2862,
+      "step": 1350
+    },
+    {
+      "epoch": 0.7364544976328248,
+      "grad_norm": 12.115033149719238,
+      "learning_rate": 1.7056286165176222e-05,
+      "loss": 0.2489,
+      "step": 1400
+    },
+    {
+      "epoch": 0.7627564439768543,
+      "grad_norm": 9.39247989654541,
+      "learning_rate": 1.6951078379800105e-05,
+      "loss": 0.2199,
+      "step": 1450
+    },
+    {
+      "epoch": 0.7890583903208838,
+      "grad_norm": 11.820609092712402,
+      "learning_rate": 1.684587059442399e-05,
+      "loss": 0.2334,
+      "step": 1500
+    },
+    {
+      "epoch": 0.8153603366649133,
+      "grad_norm": 5.685638427734375,
+      "learning_rate": 1.6740662809047873e-05,
+      "loss": 0.2859,
+      "step": 1550
+    },
+    {
+      "epoch": 0.8416622830089426,
+      "grad_norm": 1.4263566732406616,
+      "learning_rate": 1.6635455023671752e-05,
+      "loss": 0.2712,
+      "step": 1600
+    },
+    {
+      "epoch": 0.8679642293529721,
+      "grad_norm": 43.12693786621094,
+      "learning_rate": 1.6530247238295635e-05,
+      "loss": 0.2236,
+      "step": 1650
+    },
+    {
+      "epoch": 0.8942661756970016,
+      "grad_norm": 18.322067260742188,
+      "learning_rate": 1.6425039452919518e-05,
+      "loss": 0.2176,
+      "step": 1700
+    },
+    {
+      "epoch": 0.920568122041031,
+      "grad_norm": 8.125885009765625,
+      "learning_rate": 1.63198316675434e-05,
+      "loss": 0.2344,
+      "step": 1750
+    },
+    {
+      "epoch": 0.9468700683850605,
+      "grad_norm": 4.2774457931518555,
+      "learning_rate": 1.6214623882167283e-05,
+      "loss": 0.2173,
+      "step": 1800
+    },
+    {
+      "epoch": 0.97317201472909,
+      "grad_norm": 8.311309814453125,
+      "learning_rate": 1.6109416096791165e-05,
+      "loss": 0.207,
+      "step": 1850
+    },
+    {
+      "epoch": 0.9994739610731194,
+      "grad_norm": 18.770065307617188,
+      "learning_rate": 1.6004208311415045e-05,
+      "loss": 0.2261,
+      "step": 1900
+    },
+    {
+      "epoch": 1.0,
+      "eval_FPR_aeb_Arab": 0.007114016533913859,
+      "eval_FPR_arb_Arab": 0.007306287251046666,
+      "eval_FPR_ars_Arab": 0.0274961810821419,
+      "eval_FPR_arz_Arab": 0.03769230768505917,
+      "eval_accuracy": 0.9382975924220497,
+      "eval_loss": 0.2227914035320282,
+      "eval_macro_f1": 0.8929982487077235,
+      "eval_runtime": 3.3475,
+      "eval_samples_per_second": 2270.666,
+      "eval_steps_per_second": 35.549,
+      "step": 1901
+    },
+    {
+      "epoch": 1.0257759074171489,
+      "grad_norm": 7.249199390411377,
+      "learning_rate": 1.5899000526038927e-05,
+      "loss": 0.1908,
+      "step": 1950
+    },
+    {
+      "epoch": 1.0520778537611783,
+      "grad_norm": 16.18492889404297,
+      "learning_rate": 1.579379274066281e-05,
+      "loss": 0.1919,
+      "step": 2000
+    },
+    {
+      "epoch": 1.0783798001052078,
+      "grad_norm": 6.383620262145996,
+      "learning_rate": 1.5688584955286692e-05,
+      "loss": 0.1662,
+      "step": 2050
+    },
+    {
+      "epoch": 1.1046817464492373,
+      "grad_norm": 2.7821247577667236,
+      "learning_rate": 1.5583377169910575e-05,
+      "loss": 0.1832,
+      "step": 2100
+    },
+    {
+      "epoch": 1.1309836927932668,
+      "grad_norm": 0.20694231986999512,
+      "learning_rate": 1.5478169384534458e-05,
+      "loss": 0.1277,
+      "step": 2150
+    },
+    {
+      "epoch": 1.157285639137296,
+      "grad_norm": 66.66133880615234,
+      "learning_rate": 1.5372961599158337e-05,
+      "loss": 0.1896,
+      "step": 2200
+    },
+    {
+      "epoch": 1.1835875854813256,
+      "grad_norm": 5.3264055252075195,
+      "learning_rate": 1.526775381378222e-05,
+      "loss": 0.1535,
+      "step": 2250
+    },
+    {
+      "epoch": 1.209889531825355,
+      "grad_norm": 3.480900526046753,
+      "learning_rate": 1.5162546028406104e-05,
+      "loss": 0.1767,
+      "step": 2300
+    },
+    {
+      "epoch": 1.2361914781693846,
+      "grad_norm": 2.1541006565093994,
+      "learning_rate": 1.5057338243029986e-05,
+      "loss": 0.2361,
+      "step": 2350
+    },
+    {
+      "epoch": 1.262493424513414,
+      "grad_norm": 13.037530899047852,
+      "learning_rate": 1.4952130457653869e-05,
+      "loss": 0.1733,
+      "step": 2400
+    },
+    {
+      "epoch": 1.2887953708574433,
+      "grad_norm": 6.1545281410217285,
+      "learning_rate": 1.484692267227775e-05,
+      "loss": 0.1608,
+      "step": 2450
+    },
+    {
+      "epoch": 1.3150973172014728,
+      "grad_norm": 1.8223601579666138,
+      "learning_rate": 1.4741714886901633e-05,
+      "loss": 0.1746,
+      "step": 2500
+    },
+    {
+      "epoch": 1.3413992635455023,
+      "grad_norm": 3.253241777420044,
+      "learning_rate": 1.4636507101525515e-05,
+      "loss": 0.1466,
+      "step": 2550
+    },
+    {
+      "epoch": 1.3677012098895318,
+      "grad_norm": 3.3945982456207275,
+      "learning_rate": 1.4531299316149396e-05,
+      "loss": 0.1732,
+      "step": 2600
+    },
+    {
+      "epoch": 1.3940031562335613,
+      "grad_norm": 6.702133655548096,
+      "learning_rate": 1.4426091530773279e-05,
+      "loss": 0.2324,
+      "step": 2650
+    },
+    {
+      "epoch": 1.4203051025775908,
+      "grad_norm": 3.2291910648345947,
+      "learning_rate": 1.4320883745397161e-05,
+      "loss": 0.1615,
+      "step": 2700
+    },
+    {
+      "epoch": 1.4466070489216203,
+      "grad_norm": 8.065141677856445,
+      "learning_rate": 1.4215675960021042e-05,
+      "loss": 0.1668,
+      "step": 2750
+    },
+    {
+      "epoch": 1.4729089952656498,
+      "grad_norm": 8.395434379577637,
+      "learning_rate": 1.4110468174644925e-05,
+      "loss": 0.2002,
+      "step": 2800
+    },
+    {
+      "epoch": 1.499210941609679,
+      "grad_norm": 5.985948085784912,
+      "learning_rate": 1.4005260389268807e-05,
+      "loss": 0.1338,
+      "step": 2850
+    },
+    {
+      "epoch": 1.5255128879537085,
+      "grad_norm": 4.8504791259765625,
+      "learning_rate": 1.3900052603892688e-05,
+      "loss": 0.1493,
+      "step": 2900
+    },
+    {
+      "epoch": 1.551814834297738,
+      "grad_norm": 30.86811637878418,
+      "learning_rate": 1.3794844818516571e-05,
+      "loss": 0.1653,
+      "step": 2950
+    },
+    {
+      "epoch": 1.5781167806417675,
+      "grad_norm": 8.025301933288574,
+      "learning_rate": 1.3689637033140453e-05,
+      "loss": 0.195,
+      "step": 3000
+    },
+    {
+      "epoch": 1.6044187269857968,
+      "grad_norm": 2.7844748497009277,
+      "learning_rate": 1.3584429247764334e-05,
+      "loss": 0.1513,
+      "step": 3050
+    },
+    {
+      "epoch": 1.6307206733298263,
+      "grad_norm": 15.212594032287598,
+      "learning_rate": 1.3479221462388219e-05,
+      "loss": 0.1311,
+      "step": 3100
+    },
+    {
+      "epoch": 1.6570226196738558,
+      "grad_norm": 7.984399795532227,
+      "learning_rate": 1.3374013677012101e-05,
+      "loss": 0.1699,
+      "step": 3150
+    },
+    {
+      "epoch": 1.6833245660178853,
+      "grad_norm": 2.66343092918396,
+      "learning_rate": 1.3268805891635982e-05,
+      "loss": 0.0987,
+      "step": 3200
+    },
+    {
+      "epoch": 1.7096265123619148,
+      "grad_norm": 1.7281841039657593,
+      "learning_rate": 1.3163598106259865e-05,
+      "loss": 0.1468,
+      "step": 3250
+    },
+    {
+      "epoch": 1.7359284587059443,
+      "grad_norm": 80.2880859375,
+      "learning_rate": 1.3058390320883747e-05,
+      "loss": 0.1225,
+      "step": 3300
+    },
+    {
+      "epoch": 1.7622304050499737,
+      "grad_norm": 3.2839515209198,
+      "learning_rate": 1.2953182535507628e-05,
+      "loss": 0.1612,
+      "step": 3350
+    },
+    {
+      "epoch": 1.7885323513940032,
+      "grad_norm": 6.35798978805542,
+      "learning_rate": 1.2847974750131511e-05,
+      "loss": 0.1319,
+      "step": 3400
+    },
+    {
+      "epoch": 1.8148342977380327,
+      "grad_norm": 17.910255432128906,
+      "learning_rate": 1.2742766964755394e-05,
+      "loss": 0.2161,
+      "step": 3450
+    },
+    {
+      "epoch": 1.8411362440820622,
+      "grad_norm": 2.275036573410034,
+      "learning_rate": 1.2637559179379274e-05,
+      "loss": 0.1118,
+      "step": 3500
+    },
+    {
+      "epoch": 1.8674381904260915,
+      "grad_norm": 20.091514587402344,
+      "learning_rate": 1.2532351394003157e-05,
+      "loss": 0.1463,
+      "step": 3550
+    },
+    {
+      "epoch": 1.893740136770121,
+      "grad_norm": 0.5615454912185669,
+      "learning_rate": 1.242714360862704e-05,
+      "loss": 0.1648,
+      "step": 3600
+    },
+    {
+      "epoch": 1.9200420831141505,
+      "grad_norm": 3.871091604232788,
+      "learning_rate": 1.232193582325092e-05,
+      "loss": 0.1325,
+      "step": 3650
+    },
+    {
+      "epoch": 1.9463440294581797,
+      "grad_norm": 1.768117904663086,
+      "learning_rate": 1.2216728037874803e-05,
+      "loss": 0.1664,
+      "step": 3700
+    },
+    {
+      "epoch": 1.9726459758022092,
+      "grad_norm": 5.8534393310546875,
+      "learning_rate": 1.2111520252498686e-05,
+      "loss": 0.1578,
+      "step": 3750
+    },
+    {
+      "epoch": 1.9989479221462387,
+      "grad_norm": 3.766312837600708,
+      "learning_rate": 1.2006312467122567e-05,
+      "loss": 0.1393,
+      "step": 3800
+    },
+    {
+      "epoch": 2.0,
+      "eval_FPR_aeb_Arab": 0.00384541434265614,
+      "eval_FPR_arb_Arab": 0.02134204960174158,
+      "eval_FPR_ars_Arab": 0.01041522010687193,
+      "eval_FPR_arz_Arab": 0.020192307688424557,
+      "eval_accuracy": 0.9590843310090778,
+      "eval_loss": 0.16003794968128204,
+      "eval_macro_f1": 0.937683933464698,
+      "eval_runtime": 3.3754,
+      "eval_samples_per_second": 2251.882,
+      "eval_steps_per_second": 35.255,
+      "step": 3802
+    },
+    {
+      "epoch": 2.0252498684902682,
+      "grad_norm": 14.620624542236328,
+      "learning_rate": 1.190110468174645e-05,
+      "loss": 0.073,
+      "step": 3850
+    },
+    {
+      "epoch": 2.0515518148342977,
+      "grad_norm": 1.2938824892044067,
+      "learning_rate": 1.1795896896370332e-05,
+      "loss": 0.1148,
+      "step": 3900
+    },
+    {
+      "epoch": 2.077853761178327,
+      "grad_norm": 3.313081979751587,
+      "learning_rate": 1.1690689110994216e-05,
+      "loss": 0.0746,
+      "step": 3950
+    },
+    {
+      "epoch": 2.1041557075223567,
+      "grad_norm": 2.0338821411132812,
+      "learning_rate": 1.1585481325618097e-05,
+      "loss": 0.0977,
+      "step": 4000
+    },
+    {
+      "epoch": 2.130457653866386,
+      "grad_norm": 0.055320367217063904,
+      "learning_rate": 1.148027354024198e-05,
+      "loss": 0.096,
+      "step": 4050
+    },
+    {
+      "epoch": 2.1567596002104157,
+      "grad_norm": 1.0964843034744263,
+      "learning_rate": 1.1375065754865862e-05,
+      "loss": 0.0642,
+      "step": 4100
+    },
+    {
+      "epoch": 2.183061546554445,
+      "grad_norm": 1.0340650081634521,
+      "learning_rate": 1.1269857969489743e-05,
+      "loss": 0.1007,
+      "step": 4150
+    },
+    {
+      "epoch": 2.2093634928984747,
+      "grad_norm": 4.971868515014648,
+      "learning_rate": 1.1164650184113626e-05,
+      "loss": 0.1083,
+      "step": 4200
+    },
+    {
+      "epoch": 2.2356654392425037,
+      "grad_norm": 0.49501538276672363,
+      "learning_rate": 1.1059442398737508e-05,
+      "loss": 0.1068,
+      "step": 4250
+    },
+    {
+      "epoch": 2.2619673855865337,
+      "grad_norm": 6.13097620010376,
+      "learning_rate": 1.095423461336139e-05,
+      "loss": 0.0946,
+      "step": 4300
+    },
+    {
+      "epoch": 2.2882693319305627,
+      "grad_norm": 5.904395580291748,
+      "learning_rate": 1.0849026827985272e-05,
+      "loss": 0.0758,
+      "step": 4350
+    },
+    {
+      "epoch": 2.314571278274592,
+      "grad_norm": 4.2567138671875,
+      "learning_rate": 1.0743819042609155e-05,
+      "loss": 0.111,
+      "step": 4400
+    },
+    {
+      "epoch": 2.3408732246186217,
+      "grad_norm": 0.1440172791481018,
+      "learning_rate": 1.0638611257233035e-05,
+      "loss": 0.1104,
+      "step": 4450
+    },
+    {
+      "epoch": 2.367175170962651,
+      "grad_norm": 7.970292091369629,
+      "learning_rate": 1.0533403471856918e-05,
+      "loss": 0.0891,
+      "step": 4500
+    },
+    {
+      "epoch": 2.3934771173066807,
+      "grad_norm": 2.4047350883483887,
+      "learning_rate": 1.04281956864808e-05,
+      "loss": 0.1242,
+      "step": 4550
+    },
+    {
+      "epoch": 2.41977906365071,
+      "grad_norm": 14.3352689743042,
+      "learning_rate": 1.0322987901104682e-05,
+      "loss": 0.0649,
+      "step": 4600
+    },
+    {
+      "epoch": 2.4460810099947397,
+      "grad_norm": 25.1345157623291,
+      "learning_rate": 1.0217780115728564e-05,
+      "loss": 0.0712,
+      "step": 4650
+    },
+    {
+      "epoch": 2.472382956338769,
+      "grad_norm": 1.9517714977264404,
+      "learning_rate": 1.0112572330352445e-05,
+      "loss": 0.1032,
+      "step": 4700
+    },
+    {
+      "epoch": 2.4986849026827986,
+      "grad_norm": 1.327062726020813,
+      "learning_rate": 1.000736454497633e-05,
+      "loss": 0.0962,
+      "step": 4750
+    },
+    {
+      "epoch": 2.524986849026828,
+      "grad_norm": 10.327136993408203,
+      "learning_rate": 9.90215675960021e-06,
+      "loss": 0.1092,
+      "step": 4800
+    },
+    {
+      "epoch": 2.5512887953708576,
+      "grad_norm": 3.8997962474823,
+      "learning_rate": 9.796948974224093e-06,
+      "loss": 0.0681,
+      "step": 4850
+    },
+    {
+      "epoch": 2.5775907417148867,
+      "grad_norm": 0.270841121673584,
+      "learning_rate": 9.691741188847975e-06,
+      "loss": 0.1265,
+      "step": 4900
+    },
+    {
+      "epoch": 2.6038926880589166,
+      "grad_norm": 0.8220506906509399,
+      "learning_rate": 9.586533403471858e-06,
+      "loss": 0.0726,
+      "step": 4950
+    },
+    {
+      "epoch": 2.6301946344029457,
+      "grad_norm": 1.4264813661575317,
+      "learning_rate": 9.48132561809574e-06,
+      "loss": 0.0707,
+      "step": 5000
+    },
+    {
+      "epoch": 2.656496580746975,
+      "grad_norm": 5.427404880523682,
+      "learning_rate": 9.376117832719622e-06,
+      "loss": 0.0762,
+      "step": 5050
+    },
+    {
+      "epoch": 2.6827985270910046,
+      "grad_norm": 39.103004455566406,
+      "learning_rate": 9.270910047343504e-06,
+      "loss": 0.0733,
+      "step": 5100
+    },
+    {
+      "epoch": 2.709100473435034,
+      "grad_norm": 2.8170275688171387,
+      "learning_rate": 9.165702261967387e-06,
+      "loss": 0.105,
+      "step": 5150
+    },
+    {
+      "epoch": 2.7354024197790636,
+      "grad_norm": 6.285243034362793,
+      "learning_rate": 9.060494476591268e-06,
+      "loss": 0.1054,
+      "step": 5200
+    },
+    {
+      "epoch": 2.761704366123093,
+      "grad_norm": 34.959102630615234,
+      "learning_rate": 8.95528669121515e-06,
+      "loss": 0.1168,
+      "step": 5250
+    },
+    {
+      "epoch": 2.7880063124671226,
+      "grad_norm": 2.698047399520874,
+      "learning_rate": 8.850078905839033e-06,
+      "loss": 0.0664,
+      "step": 5300
+    },
+    {
+      "epoch": 2.814308258811152,
+      "grad_norm": 6.107056617736816,
+      "learning_rate": 8.744871120462914e-06,
+      "loss": 0.0866,
+      "step": 5350
+    },
+    {
+      "epoch": 2.8406102051551816,
+      "grad_norm": 6.0492634773254395,
+      "learning_rate": 8.639663335086798e-06,
+      "loss": 0.0921,
+      "step": 5400
+    },
+    {
+      "epoch": 2.866912151499211,
+      "grad_norm": 38.75687789916992,
+      "learning_rate": 8.534455549710679e-06,
+      "loss": 0.0932,
+      "step": 5450
+    },
+    {
+      "epoch": 2.8932140978432406,
+      "grad_norm": 5.730583190917969,
+      "learning_rate": 8.429247764334562e-06,
+      "loss": 0.0809,
+      "step": 5500
+    },
+    {
+      "epoch": 2.9195160441872696,
+      "grad_norm": 0.2023005187511444,
+      "learning_rate": 8.324039978958444e-06,
+      "loss": 0.0723,
+      "step": 5550
+    },
+    {
+      "epoch": 2.9458179905312996,
+      "grad_norm": 24.816850662231445,
+      "learning_rate": 8.218832193582325e-06,
+      "loss": 0.0758,
+      "step": 5600
+    },
+    {
+      "epoch": 2.9721199368753286,
+      "grad_norm": 0.10021505504846573,
+      "learning_rate": 8.113624408206208e-06,
+      "loss": 0.0787,
+      "step": 5650
+    },
+    {
+      "epoch": 2.998421883219358,
+      "grad_norm": 3.8389430046081543,
+      "learning_rate": 8.00841662283009e-06,
+      "loss": 0.1321,
+      "step": 5700
+    },
+    {
+      "epoch": 3.0,
+      "eval_FPR_aeb_Arab": 0.004037685059788947,
+      "eval_FPR_arb_Arab": 0.009421265139507543,
+      "eval_FPR_ars_Arab": 0.005971392861273241,
+      "eval_FPR_arz_Arab": 0.020192307688424557,
+      "eval_accuracy": 0.9713195632153664,
+      "eval_loss": 0.15336963534355164,
+      "eval_macro_f1": 0.9569564393242584,
+      "eval_runtime": 3.3689,
+      "eval_samples_per_second": 2256.259,
+      "eval_steps_per_second": 35.324,
+      "step": 5703
+    },
+    {
+      "epoch": 3.0247238295633876,
+      "grad_norm": 0.30554988980293274,
+      "learning_rate": 7.903208837453971e-06,
+      "loss": 0.0937,
+      "step": 5750
+    },
+    {
+      "epoch": 3.051025775907417,
+      "grad_norm": 37.439884185791016,
+      "learning_rate": 7.798001052077856e-06,
+      "loss": 0.0578,
+      "step": 5800
+    },
+    {
+      "epoch": 3.0773277222514466,
+      "grad_norm": 0.0822492390871048,
+      "learning_rate": 7.692793266701737e-06,
+      "loss": 0.0636,
+      "step": 5850
+    },
+    {
+      "epoch": 3.103629668595476,
+      "grad_norm": 2.7918007373809814,
+      "learning_rate": 7.587585481325619e-06,
+      "loss": 0.0378,
+      "step": 5900
+    },
+    {
+      "epoch": 3.1299316149395056,
+      "grad_norm": 32.899818420410156,
+      "learning_rate": 7.482377695949501e-06,
+      "loss": 0.0609,
+      "step": 5950
+    },
+    {
+      "epoch": 3.156233561283535,
+      "grad_norm": 0.06830895692110062,
+      "learning_rate": 7.377169910573383e-06,
+      "loss": 0.0433,
+      "step": 6000
+    },
+    {
+      "epoch": 3.1825355076275645,
+      "grad_norm": 54.685489654541016,
+      "learning_rate": 7.271962125197265e-06,
+      "loss": 0.056,
+      "step": 6050
+    },
+    {
+      "epoch": 3.208837453971594,
+      "grad_norm": 0.8175523281097412,
+      "learning_rate": 7.166754339821147e-06,
+      "loss": 0.0341,
+      "step": 6100
+    },
+    {
+      "epoch": 3.2351394003156235,
+      "grad_norm": 0.33226722478866577,
+      "learning_rate": 7.061546554445029e-06,
+      "loss": 0.0482,
+      "step": 6150
+    },
+    {
+      "epoch": 3.2614413466596526,
+      "grad_norm": 1.425661325454712,
+      "learning_rate": 6.956338769068912e-06,
+      "loss": 0.0673,
+      "step": 6200
+    },
+    {
+      "epoch": 3.2877432930036825,
+      "grad_norm": 0.18895921111106873,
+      "learning_rate": 6.851130983692794e-06,
+      "loss": 0.0359,
+      "step": 6250
+    },
+    {
+      "epoch": 3.3140452393477116,
+      "grad_norm": 0.6557305455207825,
+      "learning_rate": 6.7459231983166766e-06,
+      "loss": 0.0382,
+      "step": 6300
+    },
+    {
+      "epoch": 3.340347185691741,
+      "grad_norm": 0.008198770694434643,
+      "learning_rate": 6.640715412940558e-06,
+      "loss": 0.0566,
+      "step": 6350
+    },
+    {
+      "epoch": 3.3666491320357705,
+      "grad_norm": 0.4695976674556732,
+      "learning_rate": 6.53550762756444e-06,
+      "loss": 0.0654,
+      "step": 6400
+    },
+    {
+      "epoch": 3.3929510783798,
+      "grad_norm": 8.628214836120605,
+      "learning_rate": 6.430299842188323e-06,
+      "loss": 0.0427,
+      "step": 6450
+    },
+    {
+      "epoch": 3.4192530247238295,
+      "grad_norm": 0.9650713801383972,
+      "learning_rate": 6.3250920568122044e-06,
+      "loss": 0.0645,
+      "step": 6500
+    },
+    {
+      "epoch": 3.445554971067859,
+      "grad_norm": 5.836668968200684,
+      "learning_rate": 6.219884271436086e-06,
+      "loss": 0.0397,
+      "step": 6550
+    },
+    {
+      "epoch": 3.4718569174118885,
+      "grad_norm": 0.03976545110344887,
+      "learning_rate": 6.11467648605997e-06,
+      "loss": 0.0586,
+      "step": 6600
+    },
+    {
+      "epoch": 3.498158863755918,
+      "grad_norm": 19.784215927124023,
+      "learning_rate": 6.009468700683851e-06,
+      "loss": 0.033,
+      "step": 6650
+    },
+    {
+      "epoch": 3.5244608100999475,
+      "grad_norm": 2.075496196746826,
+      "learning_rate": 5.904260915307733e-06,
+      "loss": 0.0776,
+      "step": 6700
+    },
+    {
+      "epoch": 3.550762756443977,
+      "grad_norm": 7.05810022354126,
+      "learning_rate": 5.799053129931616e-06,
+      "loss": 0.0905,
+      "step": 6750
+    },
+    {
+      "epoch": 3.5770647027880065,
+      "grad_norm": 0.012984913773834705,
+      "learning_rate": 5.6938453445554975e-06,
+      "loss": 0.0542,
+      "step": 6800
+    },
+    {
+      "epoch": 3.6033666491320355,
+      "grad_norm": 2.701481342315674,
+      "learning_rate": 5.588637559179379e-06,
+      "loss": 0.0625,
+      "step": 6850
+    },
+    {
+      "epoch": 3.6296685954760655,
+      "grad_norm": 0.41872379183769226,
+      "learning_rate": 5.483429773803262e-06,
+      "loss": 0.0795,
+      "step": 6900
+    },
+    {
+      "epoch": 3.6559705418200945,
+      "grad_norm": 0.13123294711112976,
+      "learning_rate": 5.378221988427144e-06,
+      "loss": 0.0296,
+      "step": 6950
+    },
+    {
+      "epoch": 3.682272488164124,
+      "grad_norm": 0.7190969586372375,
+      "learning_rate": 5.273014203051027e-06,
+      "loss": 0.0666,
+      "step": 7000
+    },
+    {
+      "epoch": 3.7085744345081535,
+      "grad_norm": 0.1744261384010315,
+      "learning_rate": 5.167806417674909e-06,
+      "loss": 0.0328,
+      "step": 7050
+    },
+    {
+      "epoch": 3.734876380852183,
+      "grad_norm": 0.5619340538978577,
+      "learning_rate": 5.062598632298791e-06,
+      "loss": 0.0755,
+      "step": 7100
+    },
+    {
+      "epoch": 3.7611783271962125,
+      "grad_norm": 40.665706634521484,
+      "learning_rate": 4.957390846922673e-06,
+      "loss": 0.1041,
+      "step": 7150
+    },
+    {
+      "epoch": 3.787480273540242,
+      "grad_norm": 0.06617475301027298,
+      "learning_rate": 4.852183061546555e-06,
+      "loss": 0.0264,
+      "step": 7200
+    },
+    {
+      "epoch": 3.8137822198842715,
+      "grad_norm": 5.0283966064453125,
+      "learning_rate": 4.746975276170437e-06,
+      "loss": 0.0789,
+      "step": 7250
+    },
+    {
+      "epoch": 3.840084166228301,
+      "grad_norm": 5.660898208618164,
+      "learning_rate": 4.641767490794319e-06,
+      "loss": 0.0582,
+      "step": 7300
+    },
+    {
+      "epoch": 3.8663861125723304,
+      "grad_norm": 0.8503484725952148,
+      "learning_rate": 4.536559705418201e-06,
+      "loss": 0.0862,
+      "step": 7350
+    },
+    {
+      "epoch": 3.89268805891636,
+      "grad_norm": 13.575056076049805,
+      "learning_rate": 4.431351920042084e-06,
+      "loss": 0.0554,
+      "step": 7400
+    },
+    {
+      "epoch": 3.9189900052603894,
+      "grad_norm": 0.25003504753112793,
+      "learning_rate": 4.3261441346659654e-06,
+      "loss": 0.0504,
+      "step": 7450
+    },
+    {
+      "epoch": 3.9452919516044185,
+      "grad_norm": 0.022247493267059326,
+      "learning_rate": 4.220936349289847e-06,
+      "loss": 0.0663,
+      "step": 7500
+    },
+    {
+      "epoch": 3.9715938979484484,
+      "grad_norm": 0.2591884136199951,
+      "learning_rate": 4.11572856391373e-06,
+      "loss": 0.0361,
+      "step": 7550
+    },
+    {
+      "epoch": 3.9978958442924775,
+      "grad_norm": 6.533713340759277,
+      "learning_rate": 4.010520778537612e-06,
+      "loss": 0.0293,
+      "step": 7600
+    },
+    {
+      "epoch": 4.0,
+      "eval_FPR_aeb_Arab": 0.004229955776921754,
+      "eval_FPR_arb_Arab": 0.011343972310835613,
+      "eval_FPR_ars_Arab": 0.00458269684702365,
+      "eval_FPR_arz_Arab": 0.015576923073927515,
+      "eval_accuracy": 0.9743454808577818,
+      "eval_loss": 0.15085552632808685,
+      "eval_macro_f1": 0.9633717243752477,
+      "eval_runtime": 3.3689,
+      "eval_samples_per_second": 2256.225,
+      "eval_steps_per_second": 35.323,
+      "step": 7604
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 9505,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 2,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3440682832634112.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-7604/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fa873479846449ff86b2d50d9e57056c48f72d07a9ffc1fb7f0012ac7d884f8
+size 5777

checkpoint-7604/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-9505/config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "aeb_Arab",
+    "1": "arb_Arab",
+    "2": "ars_Arab",
+    "3": "arz_Arab"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "aeb_Arab": 0,
+    "arb_Arab": 1,
+    "ars_Arab": 2,
+    "arz_Arab": 3
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.56.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30000
+}

checkpoint-9505/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:496bb5c365c925f55ceae6595a8e715b409815cc8b6ff91706c35fe83c24f363
+size 436361208

checkpoint-9505/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:78fce8e6a78a3dd9090bb6fea2c41408a1f66de87245c884b78dcd7d0bf917fb
+size 872846731

checkpoint-9505/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c65e00000d546fd729719d9d823f0bbe6b2fad692c4b49b96b131bba6b22e84b
+size 14645

checkpoint-9505/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f176115dc4e83049fd3dff5a3ca9b4c02dc6d882e278af3ec1e96a2bcfccdaf
+size 1383

checkpoint-9505/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60e6a15eeffc5f00f2067bcaf3b35d9b2964df7e31ed9651c83e5191924ebd3b
+size 1465

checkpoint-9505/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

checkpoint-9505/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-9505/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": false,
+  "extra_special_tokens": {},
+  "full_tokenizer_file": null,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

checkpoint-9505/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1443 @@

+{
+  "best_global_step": 9505,
+  "best_metric": 0.9641750099415428,
+  "best_model_checkpoint": "camelbert_madar_task5/checkpoint-9505",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 9505,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.026301946344029457,
+      "grad_norm": 21.061479568481445,
+      "learning_rate": 1.9896896370331405e-05,
+      "loss": 0.9381,
+      "step": 50
+    },
+    {
+      "epoch": 0.052603892688058915,
+      "grad_norm": 4.199251651763916,
+      "learning_rate": 1.9791688584955288e-05,
+      "loss": 0.5182,
+      "step": 100
+    },
+    {
+      "epoch": 0.07890583903208838,
+      "grad_norm": 22.227828979492188,
+      "learning_rate": 1.968648079957917e-05,
+      "loss": 0.4486,
+      "step": 150
+    },
+    {
+      "epoch": 0.10520778537611783,
+      "grad_norm": 7.481734275817871,
+      "learning_rate": 1.9581273014203053e-05,
+      "loss": 0.4422,
+      "step": 200
+    },
+    {
+      "epoch": 0.1315097317201473,
+      "grad_norm": 9.7647705078125,
+      "learning_rate": 1.9476065228826936e-05,
+      "loss": 0.4304,
+      "step": 250
+    },
+    {
+      "epoch": 0.15781167806417676,
+      "grad_norm": 12.080931663513184,
+      "learning_rate": 1.9370857443450818e-05,
+      "loss": 0.3672,
+      "step": 300
+    },
+    {
+      "epoch": 0.1841136244082062,
+      "grad_norm": 11.353347778320312,
+      "learning_rate": 1.9265649658074697e-05,
+      "loss": 0.3771,
+      "step": 350
+    },
+    {
+      "epoch": 0.21041557075223566,
+      "grad_norm": 3.3302719593048096,
+      "learning_rate": 1.916044187269858e-05,
+      "loss": 0.4053,
+      "step": 400
+    },
+    {
+      "epoch": 0.23671751709626512,
+      "grad_norm": 11.869136810302734,
+      "learning_rate": 1.9055234087322463e-05,
+      "loss": 0.3754,
+      "step": 450
+    },
+    {
+      "epoch": 0.2630194634402946,
+      "grad_norm": 19.71166229248047,
+      "learning_rate": 1.8950026301946345e-05,
+      "loss": 0.3909,
+      "step": 500
+    },
+    {
+      "epoch": 0.289321409784324,
+      "grad_norm": 82.08606719970703,
+      "learning_rate": 1.8844818516570228e-05,
+      "loss": 0.3945,
+      "step": 550
+    },
+    {
+      "epoch": 0.3156233561283535,
+      "grad_norm": 5.5329389572143555,
+      "learning_rate": 1.873961073119411e-05,
+      "loss": 0.3182,
+      "step": 600
+    },
+    {
+      "epoch": 0.34192530247238295,
+      "grad_norm": 10.177448272705078,
+      "learning_rate": 1.8634402945817993e-05,
+      "loss": 0.3294,
+      "step": 650
+    },
+    {
+      "epoch": 0.3682272488164124,
+      "grad_norm": 11.596871376037598,
+      "learning_rate": 1.8529195160441876e-05,
+      "loss": 0.3445,
+      "step": 700
+    },
+    {
+      "epoch": 0.3945291951604419,
+      "grad_norm": 5.0095319747924805,
+      "learning_rate": 1.8423987375065758e-05,
+      "loss": 0.3403,
+      "step": 750
+    },
+    {
+      "epoch": 0.4208311415044713,
+      "grad_norm": 6.569547653198242,
+      "learning_rate": 1.8318779589689638e-05,
+      "loss": 0.2767,
+      "step": 800
+    },
+    {
+      "epoch": 0.4471330878485008,
+      "grad_norm": 7.269279956817627,
+      "learning_rate": 1.821357180431352e-05,
+      "loss": 0.3391,
+      "step": 850
+    },
+    {
+      "epoch": 0.47343503419253025,
+      "grad_norm": 6.403675079345703,
+      "learning_rate": 1.8108364018937403e-05,
+      "loss": 0.2729,
+      "step": 900
+    },
+    {
+      "epoch": 0.4997369805365597,
+      "grad_norm": 18.03633689880371,
+      "learning_rate": 1.8003156233561285e-05,
+      "loss": 0.2912,
+      "step": 950
+    },
+    {
+      "epoch": 0.5260389268805892,
+      "grad_norm": 11.609797477722168,
+      "learning_rate": 1.7897948448185168e-05,
+      "loss": 0.3678,
+      "step": 1000
+    },
+    {
+      "epoch": 0.5523408732246187,
+      "grad_norm": 8.587767601013184,
+      "learning_rate": 1.779274066280905e-05,
+      "loss": 0.2789,
+      "step": 1050
+    },
+    {
+      "epoch": 0.578642819568648,
+      "grad_norm": 2.894766092300415,
+      "learning_rate": 1.768753287743293e-05,
+      "loss": 0.2515,
+      "step": 1100
+    },
+    {
+      "epoch": 0.6049447659126775,
+      "grad_norm": 14.84619140625,
+      "learning_rate": 1.7582325092056812e-05,
+      "loss": 0.2817,
+      "step": 1150
+    },
+    {
+      "epoch": 0.631246712256707,
+      "grad_norm": 14.3659029006958,
+      "learning_rate": 1.7477117306680695e-05,
+      "loss": 0.2819,
+      "step": 1200
+    },
+    {
+      "epoch": 0.6575486586007364,
+      "grad_norm": 24.962841033935547,
+      "learning_rate": 1.7371909521304578e-05,
+      "loss": 0.275,
+      "step": 1250
+    },
+    {
+      "epoch": 0.6838506049447659,
+      "grad_norm": 2.1663622856140137,
+      "learning_rate": 1.726670173592846e-05,
+      "loss": 0.2513,
+      "step": 1300
+    },
+    {
+      "epoch": 0.7101525512887954,
+      "grad_norm": 20.324939727783203,
+      "learning_rate": 1.7161493950552343e-05,
+      "loss": 0.2862,
+      "step": 1350
+    },
+    {
+      "epoch": 0.7364544976328248,
+      "grad_norm": 12.115033149719238,
+      "learning_rate": 1.7056286165176222e-05,
+      "loss": 0.2489,
+      "step": 1400
+    },
+    {
+      "epoch": 0.7627564439768543,
+      "grad_norm": 9.39247989654541,
+      "learning_rate": 1.6951078379800105e-05,
+      "loss": 0.2199,
+      "step": 1450
+    },
+    {
+      "epoch": 0.7890583903208838,
+      "grad_norm": 11.820609092712402,
+      "learning_rate": 1.684587059442399e-05,
+      "loss": 0.2334,
+      "step": 1500
+    },
+    {
+      "epoch": 0.8153603366649133,
+      "grad_norm": 5.685638427734375,
+      "learning_rate": 1.6740662809047873e-05,
+      "loss": 0.2859,
+      "step": 1550
+    },
+    {
+      "epoch": 0.8416622830089426,
+      "grad_norm": 1.4263566732406616,
+      "learning_rate": 1.6635455023671752e-05,
+      "loss": 0.2712,
+      "step": 1600
+    },
+    {
+      "epoch": 0.8679642293529721,
+      "grad_norm": 43.12693786621094,
+      "learning_rate": 1.6530247238295635e-05,
+      "loss": 0.2236,
+      "step": 1650
+    },
+    {
+      "epoch": 0.8942661756970016,
+      "grad_norm": 18.322067260742188,
+      "learning_rate": 1.6425039452919518e-05,
+      "loss": 0.2176,
+      "step": 1700
+    },
+    {
+      "epoch": 0.920568122041031,
+      "grad_norm": 8.125885009765625,
+      "learning_rate": 1.63198316675434e-05,
+      "loss": 0.2344,
+      "step": 1750
+    },
+    {
+      "epoch": 0.9468700683850605,
+      "grad_norm": 4.2774457931518555,
+      "learning_rate": 1.6214623882167283e-05,
+      "loss": 0.2173,
+      "step": 1800
+    },
+    {
+      "epoch": 0.97317201472909,
+      "grad_norm": 8.311309814453125,
+      "learning_rate": 1.6109416096791165e-05,
+      "loss": 0.207,
+      "step": 1850
+    },
+    {
+      "epoch": 0.9994739610731194,
+      "grad_norm": 18.770065307617188,
+      "learning_rate": 1.6004208311415045e-05,
+      "loss": 0.2261,
+      "step": 1900
+    },
+    {
+      "epoch": 1.0,
+      "eval_FPR_aeb_Arab": 0.007114016533913859,
+      "eval_FPR_arb_Arab": 0.007306287251046666,
+      "eval_FPR_ars_Arab": 0.0274961810821419,
+      "eval_FPR_arz_Arab": 0.03769230768505917,
+      "eval_accuracy": 0.9382975924220497,
+      "eval_loss": 0.2227914035320282,
+      "eval_macro_f1": 0.8929982487077235,
+      "eval_runtime": 3.3475,
+      "eval_samples_per_second": 2270.666,
+      "eval_steps_per_second": 35.549,
+      "step": 1901
+    },
+    {
+      "epoch": 1.0257759074171489,
+      "grad_norm": 7.249199390411377,
+      "learning_rate": 1.5899000526038927e-05,
+      "loss": 0.1908,
+      "step": 1950
+    },
+    {
+      "epoch": 1.0520778537611783,
+      "grad_norm": 16.18492889404297,
+      "learning_rate": 1.579379274066281e-05,
+      "loss": 0.1919,
+      "step": 2000
+    },
+    {
+      "epoch": 1.0783798001052078,
+      "grad_norm": 6.383620262145996,
+      "learning_rate": 1.5688584955286692e-05,
+      "loss": 0.1662,
+      "step": 2050
+    },
+    {
+      "epoch": 1.1046817464492373,
+      "grad_norm": 2.7821247577667236,
+      "learning_rate": 1.5583377169910575e-05,
+      "loss": 0.1832,
+      "step": 2100
+    },
+    {
+      "epoch": 1.1309836927932668,
+      "grad_norm": 0.20694231986999512,
+      "learning_rate": 1.5478169384534458e-05,
+      "loss": 0.1277,
+      "step": 2150
+    },
+    {
+      "epoch": 1.157285639137296,
+      "grad_norm": 66.66133880615234,
+      "learning_rate": 1.5372961599158337e-05,
+      "loss": 0.1896,
+      "step": 2200
+    },
+    {
+      "epoch": 1.1835875854813256,
+      "grad_norm": 5.3264055252075195,
+      "learning_rate": 1.526775381378222e-05,
+      "loss": 0.1535,
+      "step": 2250
+    },
+    {
+      "epoch": 1.209889531825355,
+      "grad_norm": 3.480900526046753,
+      "learning_rate": 1.5162546028406104e-05,
+      "loss": 0.1767,
+      "step": 2300
+    },
+    {
+      "epoch": 1.2361914781693846,
+      "grad_norm": 2.1541006565093994,
+      "learning_rate": 1.5057338243029986e-05,
+      "loss": 0.2361,
+      "step": 2350
+    },
+    {
+      "epoch": 1.262493424513414,
+      "grad_norm": 13.037530899047852,
+      "learning_rate": 1.4952130457653869e-05,
+      "loss": 0.1733,
+      "step": 2400
+    },
+    {
+      "epoch": 1.2887953708574433,
+      "grad_norm": 6.1545281410217285,
+      "learning_rate": 1.484692267227775e-05,
+      "loss": 0.1608,
+      "step": 2450
+    },
+    {
+      "epoch": 1.3150973172014728,
+      "grad_norm": 1.8223601579666138,
+      "learning_rate": 1.4741714886901633e-05,
+      "loss": 0.1746,
+      "step": 2500
+    },
+    {
+      "epoch": 1.3413992635455023,
+      "grad_norm": 3.253241777420044,
+      "learning_rate": 1.4636507101525515e-05,
+      "loss": 0.1466,
+      "step": 2550
+    },
+    {
+      "epoch": 1.3677012098895318,
+      "grad_norm": 3.3945982456207275,
+      "learning_rate": 1.4531299316149396e-05,
+      "loss": 0.1732,
+      "step": 2600
+    },
+    {
+      "epoch": 1.3940031562335613,
+      "grad_norm": 6.702133655548096,
+      "learning_rate": 1.4426091530773279e-05,
+      "loss": 0.2324,
+      "step": 2650
+    },
+    {
+      "epoch": 1.4203051025775908,
+      "grad_norm": 3.2291910648345947,
+      "learning_rate": 1.4320883745397161e-05,
+      "loss": 0.1615,
+      "step": 2700
+    },
+    {
+      "epoch": 1.4466070489216203,
+      "grad_norm": 8.065141677856445,
+      "learning_rate": 1.4215675960021042e-05,
+      "loss": 0.1668,
+      "step": 2750
+    },
+    {
+      "epoch": 1.4729089952656498,
+      "grad_norm": 8.395434379577637,
+      "learning_rate": 1.4110468174644925e-05,
+      "loss": 0.2002,
+      "step": 2800
+    },
+    {
+      "epoch": 1.499210941609679,
+      "grad_norm": 5.985948085784912,
+      "learning_rate": 1.4005260389268807e-05,
+      "loss": 0.1338,
+      "step": 2850
+    },
+    {
+      "epoch": 1.5255128879537085,
+      "grad_norm": 4.8504791259765625,
+      "learning_rate": 1.3900052603892688e-05,
+      "loss": 0.1493,
+      "step": 2900
+    },
+    {
+      "epoch": 1.551814834297738,
+      "grad_norm": 30.86811637878418,
+      "learning_rate": 1.3794844818516571e-05,
+      "loss": 0.1653,
+      "step": 2950
+    },
+    {
+      "epoch": 1.5781167806417675,
+      "grad_norm": 8.025301933288574,
+      "learning_rate": 1.3689637033140453e-05,
+      "loss": 0.195,
+      "step": 3000
+    },
+    {
+      "epoch": 1.6044187269857968,
+      "grad_norm": 2.7844748497009277,
+      "learning_rate": 1.3584429247764334e-05,
+      "loss": 0.1513,
+      "step": 3050
+    },
+    {
+      "epoch": 1.6307206733298263,
+      "grad_norm": 15.212594032287598,
+      "learning_rate": 1.3479221462388219e-05,
+      "loss": 0.1311,
+      "step": 3100
+    },
+    {
+      "epoch": 1.6570226196738558,
+      "grad_norm": 7.984399795532227,
+      "learning_rate": 1.3374013677012101e-05,
+      "loss": 0.1699,
+      "step": 3150
+    },
+    {
+      "epoch": 1.6833245660178853,
+      "grad_norm": 2.66343092918396,
+      "learning_rate": 1.3268805891635982e-05,
+      "loss": 0.0987,
+      "step": 3200
+    },
+    {
+      "epoch": 1.7096265123619148,
+      "grad_norm": 1.7281841039657593,
+      "learning_rate": 1.3163598106259865e-05,
+      "loss": 0.1468,
+      "step": 3250
+    },
+    {
+      "epoch": 1.7359284587059443,
+      "grad_norm": 80.2880859375,
+      "learning_rate": 1.3058390320883747e-05,
+      "loss": 0.1225,
+      "step": 3300
+    },
+    {
+      "epoch": 1.7622304050499737,
+      "grad_norm": 3.2839515209198,
+      "learning_rate": 1.2953182535507628e-05,
+      "loss": 0.1612,
+      "step": 3350
+    },
+    {
+      "epoch": 1.7885323513940032,
+      "grad_norm": 6.35798978805542,
+      "learning_rate": 1.2847974750131511e-05,
+      "loss": 0.1319,
+      "step": 3400
+    },
+    {
+      "epoch": 1.8148342977380327,
+      "grad_norm": 17.910255432128906,
+      "learning_rate": 1.2742766964755394e-05,
+      "loss": 0.2161,
+      "step": 3450
+    },
+    {
+      "epoch": 1.8411362440820622,
+      "grad_norm": 2.275036573410034,
+      "learning_rate": 1.2637559179379274e-05,
+      "loss": 0.1118,
+      "step": 3500
+    },
+    {
+      "epoch": 1.8674381904260915,
+      "grad_norm": 20.091514587402344,
+      "learning_rate": 1.2532351394003157e-05,
+      "loss": 0.1463,
+      "step": 3550
+    },
+    {
+      "epoch": 1.893740136770121,
+      "grad_norm": 0.5615454912185669,
+      "learning_rate": 1.242714360862704e-05,
+      "loss": 0.1648,
+      "step": 3600
+    },
+    {
+      "epoch": 1.9200420831141505,
+      "grad_norm": 3.871091604232788,
+      "learning_rate": 1.232193582325092e-05,
+      "loss": 0.1325,
+      "step": 3650
+    },
+    {
+      "epoch": 1.9463440294581797,
+      "grad_norm": 1.768117904663086,
+      "learning_rate": 1.2216728037874803e-05,
+      "loss": 0.1664,
+      "step": 3700
+    },
+    {
+      "epoch": 1.9726459758022092,
+      "grad_norm": 5.8534393310546875,
+      "learning_rate": 1.2111520252498686e-05,
+      "loss": 0.1578,
+      "step": 3750
+    },
+    {
+      "epoch": 1.9989479221462387,
+      "grad_norm": 3.766312837600708,
+      "learning_rate": 1.2006312467122567e-05,
+      "loss": 0.1393,
+      "step": 3800
+    },
+    {
+      "epoch": 2.0,
+      "eval_FPR_aeb_Arab": 0.00384541434265614,
+      "eval_FPR_arb_Arab": 0.02134204960174158,
+      "eval_FPR_ars_Arab": 0.01041522010687193,
+      "eval_FPR_arz_Arab": 0.020192307688424557,
+      "eval_accuracy": 0.9590843310090778,
+      "eval_loss": 0.16003794968128204,
+      "eval_macro_f1": 0.937683933464698,
+      "eval_runtime": 3.3754,
+      "eval_samples_per_second": 2251.882,
+      "eval_steps_per_second": 35.255,
+      "step": 3802
+    },
+    {
+      "epoch": 2.0252498684902682,
+      "grad_norm": 14.620624542236328,
+      "learning_rate": 1.190110468174645e-05,
+      "loss": 0.073,
+      "step": 3850
+    },
+    {
+      "epoch": 2.0515518148342977,
+      "grad_norm": 1.2938824892044067,
+      "learning_rate": 1.1795896896370332e-05,
+      "loss": 0.1148,
+      "step": 3900
+    },
+    {
+      "epoch": 2.077853761178327,
+      "grad_norm": 3.313081979751587,
+      "learning_rate": 1.1690689110994216e-05,
+      "loss": 0.0746,
+      "step": 3950
+    },
+    {
+      "epoch": 2.1041557075223567,
+      "grad_norm": 2.0338821411132812,
+      "learning_rate": 1.1585481325618097e-05,
+      "loss": 0.0977,
+      "step": 4000
+    },
+    {
+      "epoch": 2.130457653866386,
+      "grad_norm": 0.055320367217063904,
+      "learning_rate": 1.148027354024198e-05,
+      "loss": 0.096,
+      "step": 4050
+    },
+    {
+      "epoch": 2.1567596002104157,
+      "grad_norm": 1.0964843034744263,
+      "learning_rate": 1.1375065754865862e-05,
+      "loss": 0.0642,
+      "step": 4100
+    },
+    {
+      "epoch": 2.183061546554445,
+      "grad_norm": 1.0340650081634521,
+      "learning_rate": 1.1269857969489743e-05,
+      "loss": 0.1007,
+      "step": 4150
+    },
+    {
+      "epoch": 2.2093634928984747,
+      "grad_norm": 4.971868515014648,
+      "learning_rate": 1.1164650184113626e-05,
+      "loss": 0.1083,
+      "step": 4200
+    },
+    {
+      "epoch": 2.2356654392425037,
+      "grad_norm": 0.49501538276672363,
+      "learning_rate": 1.1059442398737508e-05,
+      "loss": 0.1068,
+      "step": 4250
+    },
+    {
+      "epoch": 2.2619673855865337,
+      "grad_norm": 6.13097620010376,
+      "learning_rate": 1.095423461336139e-05,
+      "loss": 0.0946,
+      "step": 4300
+    },
+    {
+      "epoch": 2.2882693319305627,
+      "grad_norm": 5.904395580291748,
+      "learning_rate": 1.0849026827985272e-05,
+      "loss": 0.0758,
+      "step": 4350
+    },
+    {
+      "epoch": 2.314571278274592,
+      "grad_norm": 4.2567138671875,
+      "learning_rate": 1.0743819042609155e-05,
+      "loss": 0.111,
+      "step": 4400
+    },
+    {
+      "epoch": 2.3408732246186217,
+      "grad_norm": 0.1440172791481018,
+      "learning_rate": 1.0638611257233035e-05,
+      "loss": 0.1104,
+      "step": 4450
+    },
+    {
+      "epoch": 2.367175170962651,
+      "grad_norm": 7.970292091369629,
+      "learning_rate": 1.0533403471856918e-05,
+      "loss": 0.0891,
+      "step": 4500
+    },
+    {
+      "epoch": 2.3934771173066807,
+      "grad_norm": 2.4047350883483887,
+      "learning_rate": 1.04281956864808e-05,
+      "loss": 0.1242,
+      "step": 4550
+    },
+    {
+      "epoch": 2.41977906365071,
+      "grad_norm": 14.3352689743042,
+      "learning_rate": 1.0322987901104682e-05,
+      "loss": 0.0649,
+      "step": 4600
+    },
+    {
+      "epoch": 2.4460810099947397,
+      "grad_norm": 25.1345157623291,
+      "learning_rate": 1.0217780115728564e-05,
+      "loss": 0.0712,
+      "step": 4650
+    },
+    {
+      "epoch": 2.472382956338769,
+      "grad_norm": 1.9517714977264404,
+      "learning_rate": 1.0112572330352445e-05,
+      "loss": 0.1032,
+      "step": 4700
+    },
+    {
+      "epoch": 2.4986849026827986,
+      "grad_norm": 1.327062726020813,
+      "learning_rate": 1.000736454497633e-05,
+      "loss": 0.0962,
+      "step": 4750
+    },
+    {
+      "epoch": 2.524986849026828,
+      "grad_norm": 10.327136993408203,
+      "learning_rate": 9.90215675960021e-06,
+      "loss": 0.1092,
+      "step": 4800
+    },
+    {
+      "epoch": 2.5512887953708576,
+      "grad_norm": 3.8997962474823,
+      "learning_rate": 9.796948974224093e-06,
+      "loss": 0.0681,
+      "step": 4850
+    },
+    {
+      "epoch": 2.5775907417148867,
+      "grad_norm": 0.270841121673584,
+      "learning_rate": 9.691741188847975e-06,
+      "loss": 0.1265,
+      "step": 4900
+    },
+    {
+      "epoch": 2.6038926880589166,
+      "grad_norm": 0.8220506906509399,
+      "learning_rate": 9.586533403471858e-06,
+      "loss": 0.0726,
+      "step": 4950
+    },
+    {
+      "epoch": 2.6301946344029457,
+      "grad_norm": 1.4264813661575317,
+      "learning_rate": 9.48132561809574e-06,
+      "loss": 0.0707,
+      "step": 5000
+    },
+    {
+      "epoch": 2.656496580746975,
+      "grad_norm": 5.427404880523682,
+      "learning_rate": 9.376117832719622e-06,
+      "loss": 0.0762,
+      "step": 5050
+    },
+    {
+      "epoch": 2.6827985270910046,
+      "grad_norm": 39.103004455566406,
+      "learning_rate": 9.270910047343504e-06,
+      "loss": 0.0733,
+      "step": 5100
+    },
+    {
+      "epoch": 2.709100473435034,
+      "grad_norm": 2.8170275688171387,
+      "learning_rate": 9.165702261967387e-06,
+      "loss": 0.105,
+      "step": 5150
+    },
+    {
+      "epoch": 2.7354024197790636,
+      "grad_norm": 6.285243034362793,
+      "learning_rate": 9.060494476591268e-06,
+      "loss": 0.1054,
+      "step": 5200
+    },
+    {
+      "epoch": 2.761704366123093,
+      "grad_norm": 34.959102630615234,
+      "learning_rate": 8.95528669121515e-06,
+      "loss": 0.1168,
+      "step": 5250
+    },
+    {
+      "epoch": 2.7880063124671226,
+      "grad_norm": 2.698047399520874,
+      "learning_rate": 8.850078905839033e-06,
+      "loss": 0.0664,
+      "step": 5300
+    },
+    {
+      "epoch": 2.814308258811152,
+      "grad_norm": 6.107056617736816,
+      "learning_rate": 8.744871120462914e-06,
+      "loss": 0.0866,
+      "step": 5350
+    },
+    {
+      "epoch": 2.8406102051551816,
+      "grad_norm": 6.0492634773254395,
+      "learning_rate": 8.639663335086798e-06,
+      "loss": 0.0921,
+      "step": 5400
+    },
+    {
+      "epoch": 2.866912151499211,
+      "grad_norm": 38.75687789916992,
+      "learning_rate": 8.534455549710679e-06,
+      "loss": 0.0932,
+      "step": 5450
+    },
+    {
+      "epoch": 2.8932140978432406,
+      "grad_norm": 5.730583190917969,
+      "learning_rate": 8.429247764334562e-06,
+      "loss": 0.0809,
+      "step": 5500
+    },
+    {
+      "epoch": 2.9195160441872696,
+      "grad_norm": 0.2023005187511444,
+      "learning_rate": 8.324039978958444e-06,
+      "loss": 0.0723,
+      "step": 5550
+    },
+    {
+      "epoch": 2.9458179905312996,
+      "grad_norm": 24.816850662231445,
+      "learning_rate": 8.218832193582325e-06,
+      "loss": 0.0758,
+      "step": 5600
+    },
+    {
+      "epoch": 2.9721199368753286,
+      "grad_norm": 0.10021505504846573,
+      "learning_rate": 8.113624408206208e-06,
+      "loss": 0.0787,
+      "step": 5650
+    },
+    {
+      "epoch": 2.998421883219358,
+      "grad_norm": 3.8389430046081543,
+      "learning_rate": 8.00841662283009e-06,
+      "loss": 0.1321,
+      "step": 5700
+    },
+    {
+      "epoch": 3.0,
+      "eval_FPR_aeb_Arab": 0.004037685059788947,
+      "eval_FPR_arb_Arab": 0.009421265139507543,
+      "eval_FPR_ars_Arab": 0.005971392861273241,
+      "eval_FPR_arz_Arab": 0.020192307688424557,
+      "eval_accuracy": 0.9713195632153664,
+      "eval_loss": 0.15336963534355164,
+      "eval_macro_f1": 0.9569564393242584,
+      "eval_runtime": 3.3689,
+      "eval_samples_per_second": 2256.259,
+      "eval_steps_per_second": 35.324,
+      "step": 5703
+    },
+    {
+      "epoch": 3.0247238295633876,
+      "grad_norm": 0.30554988980293274,
+      "learning_rate": 7.903208837453971e-06,
+      "loss": 0.0937,
+      "step": 5750
+    },
+    {
+      "epoch": 3.051025775907417,
+      "grad_norm": 37.439884185791016,
+      "learning_rate": 7.798001052077856e-06,
+      "loss": 0.0578,
+      "step": 5800
+    },
+    {
+      "epoch": 3.0773277222514466,
+      "grad_norm": 0.0822492390871048,
+      "learning_rate": 7.692793266701737e-06,
+      "loss": 0.0636,
+      "step": 5850
+    },
+    {
+      "epoch": 3.103629668595476,
+      "grad_norm": 2.7918007373809814,
+      "learning_rate": 7.587585481325619e-06,
+      "loss": 0.0378,
+      "step": 5900
+    },
+    {
+      "epoch": 3.1299316149395056,
+      "grad_norm": 32.899818420410156,
+      "learning_rate": 7.482377695949501e-06,
+      "loss": 0.0609,
+      "step": 5950
+    },
+    {
+      "epoch": 3.156233561283535,
+      "grad_norm": 0.06830895692110062,
+      "learning_rate": 7.377169910573383e-06,
+      "loss": 0.0433,
+      "step": 6000
+    },
+    {
+      "epoch": 3.1825355076275645,
+      "grad_norm": 54.685489654541016,
+      "learning_rate": 7.271962125197265e-06,
+      "loss": 0.056,
+      "step": 6050
+    },
+    {
+      "epoch": 3.208837453971594,
+      "grad_norm": 0.8175523281097412,
+      "learning_rate": 7.166754339821147e-06,
+      "loss": 0.0341,
+      "step": 6100
+    },
+    {
+      "epoch": 3.2351394003156235,
+      "grad_norm": 0.33226722478866577,
+      "learning_rate": 7.061546554445029e-06,
+      "loss": 0.0482,
+      "step": 6150
+    },
+    {
+      "epoch": 3.2614413466596526,
+      "grad_norm": 1.425661325454712,
+      "learning_rate": 6.956338769068912e-06,
+      "loss": 0.0673,
+      "step": 6200
+    },
+    {
+      "epoch": 3.2877432930036825,
+      "grad_norm": 0.18895921111106873,
+      "learning_rate": 6.851130983692794e-06,
+      "loss": 0.0359,
+      "step": 6250
+    },
+    {
+      "epoch": 3.3140452393477116,
+      "grad_norm": 0.6557305455207825,
+      "learning_rate": 6.7459231983166766e-06,
+      "loss": 0.0382,
+      "step": 6300
+    },
+    {
+      "epoch": 3.340347185691741,
+      "grad_norm": 0.008198770694434643,
+      "learning_rate": 6.640715412940558e-06,
+      "loss": 0.0566,
+      "step": 6350
+    },
+    {
+      "epoch": 3.3666491320357705,
+      "grad_norm": 0.4695976674556732,
+      "learning_rate": 6.53550762756444e-06,
+      "loss": 0.0654,
+      "step": 6400
+    },
+    {
+      "epoch": 3.3929510783798,
+      "grad_norm": 8.628214836120605,
+      "learning_rate": 6.430299842188323e-06,
+      "loss": 0.0427,
+      "step": 6450
+    },
+    {
+      "epoch": 3.4192530247238295,
+      "grad_norm": 0.9650713801383972,
+      "learning_rate": 6.3250920568122044e-06,
+      "loss": 0.0645,
+      "step": 6500
+    },
+    {
+      "epoch": 3.445554971067859,
+      "grad_norm": 5.836668968200684,
+      "learning_rate": 6.219884271436086e-06,
+      "loss": 0.0397,
+      "step": 6550
+    },
+    {
+      "epoch": 3.4718569174118885,
+      "grad_norm": 0.03976545110344887,
+      "learning_rate": 6.11467648605997e-06,
+      "loss": 0.0586,
+      "step": 6600
+    },
+    {
+      "epoch": 3.498158863755918,
+      "grad_norm": 19.784215927124023,
+      "learning_rate": 6.009468700683851e-06,
+      "loss": 0.033,
+      "step": 6650
+    },
+    {
+      "epoch": 3.5244608100999475,
+      "grad_norm": 2.075496196746826,
+      "learning_rate": 5.904260915307733e-06,
+      "loss": 0.0776,
+      "step": 6700
+    },
+    {
+      "epoch": 3.550762756443977,
+      "grad_norm": 7.05810022354126,
+      "learning_rate": 5.799053129931616e-06,
+      "loss": 0.0905,
+      "step": 6750
+    },
+    {
+      "epoch": 3.5770647027880065,
+      "grad_norm": 0.012984913773834705,
+      "learning_rate": 5.6938453445554975e-06,
+      "loss": 0.0542,
+      "step": 6800
+    },
+    {
+      "epoch": 3.6033666491320355,
+      "grad_norm": 2.701481342315674,
+      "learning_rate": 5.588637559179379e-06,
+      "loss": 0.0625,
+      "step": 6850
+    },
+    {
+      "epoch": 3.6296685954760655,
+      "grad_norm": 0.41872379183769226,
+      "learning_rate": 5.483429773803262e-06,
+      "loss": 0.0795,
+      "step": 6900
+    },
+    {
+      "epoch": 3.6559705418200945,
+      "grad_norm": 0.13123294711112976,
+      "learning_rate": 5.378221988427144e-06,
+      "loss": 0.0296,
+      "step": 6950
+    },
+    {
+      "epoch": 3.682272488164124,
+      "grad_norm": 0.7190969586372375,
+      "learning_rate": 5.273014203051027e-06,
+      "loss": 0.0666,
+      "step": 7000
+    },
+    {
+      "epoch": 3.7085744345081535,
+      "grad_norm": 0.1744261384010315,
+      "learning_rate": 5.167806417674909e-06,
+      "loss": 0.0328,
+      "step": 7050
+    },
+    {
+      "epoch": 3.734876380852183,
+      "grad_norm": 0.5619340538978577,
+      "learning_rate": 5.062598632298791e-06,
+      "loss": 0.0755,
+      "step": 7100
+    },
+    {
+      "epoch": 3.7611783271962125,
+      "grad_norm": 40.665706634521484,
+      "learning_rate": 4.957390846922673e-06,
+      "loss": 0.1041,
+      "step": 7150
+    },
+    {
+      "epoch": 3.787480273540242,
+      "grad_norm": 0.06617475301027298,
+      "learning_rate": 4.852183061546555e-06,
+      "loss": 0.0264,
+      "step": 7200
+    },
+    {
+      "epoch": 3.8137822198842715,
+      "grad_norm": 5.0283966064453125,
+      "learning_rate": 4.746975276170437e-06,
+      "loss": 0.0789,
+      "step": 7250
+    },
+    {
+      "epoch": 3.840084166228301,
+      "grad_norm": 5.660898208618164,
+      "learning_rate": 4.641767490794319e-06,
+      "loss": 0.0582,
+      "step": 7300
+    },
+    {
+      "epoch": 3.8663861125723304,
+      "grad_norm": 0.8503484725952148,
+      "learning_rate": 4.536559705418201e-06,
+      "loss": 0.0862,
+      "step": 7350
+    },
+    {
+      "epoch": 3.89268805891636,
+      "grad_norm": 13.575056076049805,
+      "learning_rate": 4.431351920042084e-06,
+      "loss": 0.0554,
+      "step": 7400
+    },
+    {
+      "epoch": 3.9189900052603894,
+      "grad_norm": 0.25003504753112793,
+      "learning_rate": 4.3261441346659654e-06,
+      "loss": 0.0504,
+      "step": 7450
+    },
+    {
+      "epoch": 3.9452919516044185,
+      "grad_norm": 0.022247493267059326,
+      "learning_rate": 4.220936349289847e-06,
+      "loss": 0.0663,
+      "step": 7500
+    },
+    {
+      "epoch": 3.9715938979484484,
+      "grad_norm": 0.2591884136199951,
+      "learning_rate": 4.11572856391373e-06,
+      "loss": 0.0361,
+      "step": 7550
+    },
+    {
+      "epoch": 3.9978958442924775,
+      "grad_norm": 6.533713340759277,
+      "learning_rate": 4.010520778537612e-06,
+      "loss": 0.0293,
+      "step": 7600
+    },
+    {
+      "epoch": 4.0,
+      "eval_FPR_aeb_Arab": 0.004229955776921754,
+      "eval_FPR_arb_Arab": 0.011343972310835613,
+      "eval_FPR_ars_Arab": 0.00458269684702365,
+      "eval_FPR_arz_Arab": 0.015576923073927515,
+      "eval_accuracy": 0.9743454808577818,
+      "eval_loss": 0.15085552632808685,
+      "eval_macro_f1": 0.9633717243752477,
+      "eval_runtime": 3.3689,
+      "eval_samples_per_second": 2256.225,
+      "eval_steps_per_second": 35.323,
+      "step": 7604
+    },
+    {
+      "epoch": 4.024197790636507,
+      "grad_norm": 2.82965087890625,
+      "learning_rate": 3.905312993161494e-06,
+      "loss": 0.0275,
+      "step": 7650
+    },
+    {
+      "epoch": 4.0504997369805364,
+      "grad_norm": 0.05096087604761124,
+      "learning_rate": 3.8001052077853763e-06,
+      "loss": 0.0334,
+      "step": 7700
+    },
+    {
+      "epoch": 4.076801683324566,
+      "grad_norm": 0.017893170937895775,
+      "learning_rate": 3.694897422409259e-06,
+      "loss": 0.0435,
+      "step": 7750
+    },
+    {
+      "epoch": 4.103103629668595,
+      "grad_norm": 0.43649783730506897,
+      "learning_rate": 3.5896896370331407e-06,
+      "loss": 0.0297,
+      "step": 7800
+    },
+    {
+      "epoch": 4.1294055760126245,
+      "grad_norm": 0.1193922609090805,
+      "learning_rate": 3.484481851657023e-06,
+      "loss": 0.0308,
+      "step": 7850
+    },
+    {
+      "epoch": 4.155707522356654,
+      "grad_norm": 0.029588880017399788,
+      "learning_rate": 3.3792740662809046e-06,
+      "loss": 0.0338,
+      "step": 7900
+    },
+    {
+      "epoch": 4.1820094687006835,
+      "grad_norm": 9.658980369567871,
+      "learning_rate": 3.2740662809047872e-06,
+      "loss": 0.0431,
+      "step": 7950
+    },
+    {
+      "epoch": 4.208311415044713,
+      "grad_norm": 0.018898559734225273,
+      "learning_rate": 3.1688584955286694e-06,
+      "loss": 0.0436,
+      "step": 8000
+    },
+    {
+      "epoch": 4.2346133613887424,
+      "grad_norm": 0.33016514778137207,
+      "learning_rate": 3.0636507101525516e-06,
+      "loss": 0.0103,
+      "step": 8050
+    },
+    {
+      "epoch": 4.260915307732772,
+      "grad_norm": 4.2456583976745605,
+      "learning_rate": 2.9584429247764334e-06,
+      "loss": 0.0234,
+      "step": 8100
+    },
+    {
+      "epoch": 4.287217254076801,
+      "grad_norm": 7.066432476043701,
+      "learning_rate": 2.853235139400316e-06,
+      "loss": 0.0267,
+      "step": 8150
+    },
+    {
+      "epoch": 4.313519200420831,
+      "grad_norm": 0.010746636427938938,
+      "learning_rate": 2.748027354024198e-06,
+      "loss": 0.0257,
+      "step": 8200
+    },
+    {
+      "epoch": 4.33982114676486,
+      "grad_norm": 0.03323914483189583,
+      "learning_rate": 2.64281956864808e-06,
+      "loss": 0.0515,
+      "step": 8250
+    },
+    {
+      "epoch": 4.36612309310889,
+      "grad_norm": 0.2795711159706116,
+      "learning_rate": 2.537611783271962e-06,
+      "loss": 0.0489,
+      "step": 8300
+    },
+    {
+      "epoch": 4.392425039452919,
+      "grad_norm": 9.179369926452637,
+      "learning_rate": 2.4324039978958443e-06,
+      "loss": 0.0339,
+      "step": 8350
+    },
+    {
+      "epoch": 4.418726985796949,
+      "grad_norm": 3.0741329193115234,
+      "learning_rate": 2.327196212519727e-06,
+      "loss": 0.0203,
+      "step": 8400
+    },
+    {
+      "epoch": 4.445028932140978,
+      "grad_norm": 21.926807403564453,
+      "learning_rate": 2.2219884271436086e-06,
+      "loss": 0.0481,
+      "step": 8450
+    },
+    {
+      "epoch": 4.471330878485007,
+      "grad_norm": 1.929383635520935,
+      "learning_rate": 2.1167806417674912e-06,
+      "loss": 0.0231,
+      "step": 8500
+    },
+    {
+      "epoch": 4.497632824829037,
+      "grad_norm": 4.422601699829102,
+      "learning_rate": 2.011572856391373e-06,
+      "loss": 0.034,
+      "step": 8550
+    },
+    {
+      "epoch": 4.523934771173067,
+      "grad_norm": 0.010013488121330738,
+      "learning_rate": 1.9063650710152554e-06,
+      "loss": 0.0114,
+      "step": 8600
+    },
+    {
+      "epoch": 4.550236717517096,
+      "grad_norm": 0.002675453433766961,
+      "learning_rate": 1.8011572856391374e-06,
+      "loss": 0.0575,
+      "step": 8650
+    },
+    {
+      "epoch": 4.576538663861125,
+      "grad_norm": 0.04695653170347214,
+      "learning_rate": 1.6959495002630197e-06,
+      "loss": 0.0419,
+      "step": 8700
+    },
+    {
+      "epoch": 4.602840610205155,
+      "grad_norm": 0.005277259275317192,
+      "learning_rate": 1.5907417148869017e-06,
+      "loss": 0.0465,
+      "step": 8750
+    },
+    {
+      "epoch": 4.629142556549184,
+      "grad_norm": 0.03051823005080223,
+      "learning_rate": 1.4855339295107841e-06,
+      "loss": 0.0494,
+      "step": 8800
+    },
+    {
+      "epoch": 4.655444502893214,
+      "grad_norm": 0.03725295141339302,
+      "learning_rate": 1.380326144134666e-06,
+      "loss": 0.0279,
+      "step": 8850
+    },
+    {
+      "epoch": 4.681746449237243,
+      "grad_norm": 9.204965591430664,
+      "learning_rate": 1.2751183587585483e-06,
+      "loss": 0.0421,
+      "step": 8900
+    },
+    {
+      "epoch": 4.708048395581273,
+      "grad_norm": 0.6778242588043213,
+      "learning_rate": 1.1699105733824304e-06,
+      "loss": 0.0275,
+      "step": 8950
+    },
+    {
+      "epoch": 4.734350341925302,
+      "grad_norm": 0.02842475101351738,
+      "learning_rate": 1.0647027880063126e-06,
+      "loss": 0.0312,
+      "step": 9000
+    },
+    {
+      "epoch": 4.760652288269332,
+      "grad_norm": 0.03680500015616417,
+      "learning_rate": 9.594950026301946e-07,
+      "loss": 0.0349,
+      "step": 9050
+    },
+    {
+      "epoch": 4.786954234613361,
+      "grad_norm": 0.4056846797466278,
+      "learning_rate": 8.542872172540769e-07,
+      "loss": 0.0473,
+      "step": 9100
+    },
+    {
+      "epoch": 4.81325618095739,
+      "grad_norm": 0.005888829007744789,
+      "learning_rate": 7.490794318779591e-07,
+      "loss": 0.0418,
+      "step": 9150
+    },
+    {
+      "epoch": 4.83955812730142,
+      "grad_norm": 1.9934979677200317,
+      "learning_rate": 6.438716465018411e-07,
+      "loss": 0.0282,
+      "step": 9200
+    },
+    {
+      "epoch": 4.86586007364545,
+      "grad_norm": 2.1665430068969727,
+      "learning_rate": 5.386638611257233e-07,
+      "loss": 0.0303,
+      "step": 9250
+    },
+    {
+      "epoch": 4.892162019989479,
+      "grad_norm": 0.07777859270572662,
+      "learning_rate": 4.334560757496055e-07,
+      "loss": 0.0385,
+      "step": 9300
+    },
+    {
+      "epoch": 4.918463966333508,
+      "grad_norm": 0.1822308897972107,
+      "learning_rate": 3.2824829037348767e-07,
+      "loss": 0.062,
+      "step": 9350
+    },
+    {
+      "epoch": 4.944765912677538,
+      "grad_norm": 0.13670164346694946,
+      "learning_rate": 2.2304050499736983e-07,
+      "loss": 0.0187,
+      "step": 9400
+    },
+    {
+      "epoch": 4.971067859021567,
+      "grad_norm": 0.0051955850794911385,
+      "learning_rate": 1.1783271962125198e-07,
+      "loss": 0.0421,
+      "step": 9450
+    },
+    {
+      "epoch": 4.997369805365597,
+      "grad_norm": 0.008878646418452263,
+      "learning_rate": 1.262493424513414e-08,
+      "loss": 0.0477,
+      "step": 9500
+    },
+    {
+      "epoch": 5.0,
+      "eval_FPR_aeb_Arab": 0.00384541434265614,
+      "eval_FPR_arb_Arab": 0.011728513745101227,
+      "eval_FPR_ars_Arab": 0.004721566448448609,
+      "eval_FPR_arz_Arab": 0.013461538458949703,
+      "eval_accuracy": 0.9756610972240495,
+      "eval_loss": 0.15560674667358398,
+      "eval_macro_f1": 0.9641750099415428,
+      "eval_runtime": 3.3579,
+      "eval_samples_per_second": 2263.594,
+      "eval_steps_per_second": 35.438,
+      "step": 9505
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 9505,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 2,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4298818504680960.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-9505/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fa873479846449ff86b2d50d9e57056c48f72d07a9ffc1fb7f0012ac7d884f8
+size 5777

checkpoint-9505/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

confusion_matrix_val.csv ADDED Viewed

	@@ -0,0 +1,5 @@

+,aeb_Arab,arb_Arab,ars_Arab,arz_Arab
+aeb_Arab,2345,15,7,33
+arb_Arab,6,2352,17,25
+ars_Arab,3,15,370,12
+arz_Arab,11,31,10,2349

metrics.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ {'eval_loss': 0.15560674667358398, 'eval_accuracy': 0.9756610972240495, 'eval_macro_f1': 0.9641750099415428, 'eval_FPR_aeb_Arab': 0.00384541434265614, 'eval_FPR_arb_Arab': 0.011728513745101227, 'eval_FPR_ars_Arab': 0.004721566448448609, 'eval_FPR_arz_Arab': 0.013461538458949703, 'eval_runtime': 3.397, 'eval_samples_per_second': 2237.548, 'eval_steps_per_second': 35.031, 'epoch': 5.0}

val_predictions.csv ADDED Viewed

The diff for this file is too large to render. See raw diff