NadiaGHEZAIEL commited on
Commit
53674dd
·
verified ·
1 Parent(s): 05f0c2f

Upload folder using huggingface_hub

Browse files
best_model/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "aeb_Arab",
14
+ "1": "arb_Arab",
15
+ "2": "ars_Arab",
16
+ "3": "arz_Arab"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "label2id": {
21
+ "aeb_Arab": 0,
22
+ "arb_Arab": 1,
23
+ "ars_Arab": 2,
24
+ "arz_Arab": 3
25
+ },
26
+ "layer_norm_eps": 1e-12,
27
+ "max_position_embeddings": 512,
28
+ "model_type": "bert",
29
+ "num_attention_heads": 12,
30
+ "num_hidden_layers": 12,
31
+ "pad_token_id": 0,
32
+ "position_embedding_type": "absolute",
33
+ "transformers_version": "4.56.1",
34
+ "type_vocab_size": 2,
35
+ "use_cache": true,
36
+ "vocab_size": 30000
37
+ }
best_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:496bb5c365c925f55ceae6595a8e715b409815cc8b6ff91706c35fe83c24f363
3
+ size 436361208
best_model/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
best_model/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
best_model/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "extra_special_tokens": {},
49
+ "full_tokenizer_file": null,
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "never_split": null,
53
+ "pad_token": "[PAD]",
54
+ "sep_token": "[SEP]",
55
+ "strip_accents": null,
56
+ "tokenize_chinese_chars": true,
57
+ "tokenizer_class": "BertTokenizer",
58
+ "unk_token": "[UNK]"
59
+ }
best_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fa873479846449ff86b2d50d9e57056c48f72d07a9ffc1fb7f0012ac7d884f8
3
+ size 5777
best_model/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-7604/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "aeb_Arab",
14
+ "1": "arb_Arab",
15
+ "2": "ars_Arab",
16
+ "3": "arz_Arab"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "label2id": {
21
+ "aeb_Arab": 0,
22
+ "arb_Arab": 1,
23
+ "ars_Arab": 2,
24
+ "arz_Arab": 3
25
+ },
26
+ "layer_norm_eps": 1e-12,
27
+ "max_position_embeddings": 512,
28
+ "model_type": "bert",
29
+ "num_attention_heads": 12,
30
+ "num_hidden_layers": 12,
31
+ "pad_token_id": 0,
32
+ "position_embedding_type": "absolute",
33
+ "transformers_version": "4.56.1",
34
+ "type_vocab_size": 2,
35
+ "use_cache": true,
36
+ "vocab_size": 30000
37
+ }
checkpoint-7604/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea97c9f54116987a4efce5003c237fe9ad14dba6bc8df6581b8e1b335acb132a
3
+ size 436361208
checkpoint-7604/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b377722c6852e6cbba0ccadd8c6526da27ed024558775d8e5aef375283656145
3
+ size 872846731
checkpoint-7604/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf843fc7bcd1af4ab3293b3c088707a5f43c002295ebaf38c7b73a37d8b8b6e2
3
+ size 14645
checkpoint-7604/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cff504aea448cd91d3c9132365ba5a6351fe5df0ebe126e322b53a9d87fdfb29
3
+ size 1383
checkpoint-7604/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df49cd2afd04bc85cb2837d2c80f823cd41bf926f70505d505372ff3c1aead5c
3
+ size 1465
checkpoint-7604/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-7604/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-7604/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "extra_special_tokens": {},
49
+ "full_tokenizer_file": null,
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "never_split": null,
53
+ "pad_token": "[PAD]",
54
+ "sep_token": "[SEP]",
55
+ "strip_accents": null,
56
+ "tokenize_chinese_chars": true,
57
+ "tokenizer_class": "BertTokenizer",
58
+ "unk_token": "[UNK]"
59
+ }
checkpoint-7604/trainer_state.json ADDED
@@ -0,0 +1,1163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 7604,
3
+ "best_metric": 0.9633717243752477,
4
+ "best_model_checkpoint": "camelbert_madar_task5/checkpoint-7604",
5
+ "epoch": 4.0,
6
+ "eval_steps": 500,
7
+ "global_step": 7604,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.026301946344029457,
14
+ "grad_norm": 21.061479568481445,
15
+ "learning_rate": 1.9896896370331405e-05,
16
+ "loss": 0.9381,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.052603892688058915,
21
+ "grad_norm": 4.199251651763916,
22
+ "learning_rate": 1.9791688584955288e-05,
23
+ "loss": 0.5182,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.07890583903208838,
28
+ "grad_norm": 22.227828979492188,
29
+ "learning_rate": 1.968648079957917e-05,
30
+ "loss": 0.4486,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 0.10520778537611783,
35
+ "grad_norm": 7.481734275817871,
36
+ "learning_rate": 1.9581273014203053e-05,
37
+ "loss": 0.4422,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 0.1315097317201473,
42
+ "grad_norm": 9.7647705078125,
43
+ "learning_rate": 1.9476065228826936e-05,
44
+ "loss": 0.4304,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 0.15781167806417676,
49
+ "grad_norm": 12.080931663513184,
50
+ "learning_rate": 1.9370857443450818e-05,
51
+ "loss": 0.3672,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 0.1841136244082062,
56
+ "grad_norm": 11.353347778320312,
57
+ "learning_rate": 1.9265649658074697e-05,
58
+ "loss": 0.3771,
59
+ "step": 350
60
+ },
61
+ {
62
+ "epoch": 0.21041557075223566,
63
+ "grad_norm": 3.3302719593048096,
64
+ "learning_rate": 1.916044187269858e-05,
65
+ "loss": 0.4053,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 0.23671751709626512,
70
+ "grad_norm": 11.869136810302734,
71
+ "learning_rate": 1.9055234087322463e-05,
72
+ "loss": 0.3754,
73
+ "step": 450
74
+ },
75
+ {
76
+ "epoch": 0.2630194634402946,
77
+ "grad_norm": 19.71166229248047,
78
+ "learning_rate": 1.8950026301946345e-05,
79
+ "loss": 0.3909,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.289321409784324,
84
+ "grad_norm": 82.08606719970703,
85
+ "learning_rate": 1.8844818516570228e-05,
86
+ "loss": 0.3945,
87
+ "step": 550
88
+ },
89
+ {
90
+ "epoch": 0.3156233561283535,
91
+ "grad_norm": 5.5329389572143555,
92
+ "learning_rate": 1.873961073119411e-05,
93
+ "loss": 0.3182,
94
+ "step": 600
95
+ },
96
+ {
97
+ "epoch": 0.34192530247238295,
98
+ "grad_norm": 10.177448272705078,
99
+ "learning_rate": 1.8634402945817993e-05,
100
+ "loss": 0.3294,
101
+ "step": 650
102
+ },
103
+ {
104
+ "epoch": 0.3682272488164124,
105
+ "grad_norm": 11.596871376037598,
106
+ "learning_rate": 1.8529195160441876e-05,
107
+ "loss": 0.3445,
108
+ "step": 700
109
+ },
110
+ {
111
+ "epoch": 0.3945291951604419,
112
+ "grad_norm": 5.0095319747924805,
113
+ "learning_rate": 1.8423987375065758e-05,
114
+ "loss": 0.3403,
115
+ "step": 750
116
+ },
117
+ {
118
+ "epoch": 0.4208311415044713,
119
+ "grad_norm": 6.569547653198242,
120
+ "learning_rate": 1.8318779589689638e-05,
121
+ "loss": 0.2767,
122
+ "step": 800
123
+ },
124
+ {
125
+ "epoch": 0.4471330878485008,
126
+ "grad_norm": 7.269279956817627,
127
+ "learning_rate": 1.821357180431352e-05,
128
+ "loss": 0.3391,
129
+ "step": 850
130
+ },
131
+ {
132
+ "epoch": 0.47343503419253025,
133
+ "grad_norm": 6.403675079345703,
134
+ "learning_rate": 1.8108364018937403e-05,
135
+ "loss": 0.2729,
136
+ "step": 900
137
+ },
138
+ {
139
+ "epoch": 0.4997369805365597,
140
+ "grad_norm": 18.03633689880371,
141
+ "learning_rate": 1.8003156233561285e-05,
142
+ "loss": 0.2912,
143
+ "step": 950
144
+ },
145
+ {
146
+ "epoch": 0.5260389268805892,
147
+ "grad_norm": 11.609797477722168,
148
+ "learning_rate": 1.7897948448185168e-05,
149
+ "loss": 0.3678,
150
+ "step": 1000
151
+ },
152
+ {
153
+ "epoch": 0.5523408732246187,
154
+ "grad_norm": 8.587767601013184,
155
+ "learning_rate": 1.779274066280905e-05,
156
+ "loss": 0.2789,
157
+ "step": 1050
158
+ },
159
+ {
160
+ "epoch": 0.578642819568648,
161
+ "grad_norm": 2.894766092300415,
162
+ "learning_rate": 1.768753287743293e-05,
163
+ "loss": 0.2515,
164
+ "step": 1100
165
+ },
166
+ {
167
+ "epoch": 0.6049447659126775,
168
+ "grad_norm": 14.84619140625,
169
+ "learning_rate": 1.7582325092056812e-05,
170
+ "loss": 0.2817,
171
+ "step": 1150
172
+ },
173
+ {
174
+ "epoch": 0.631246712256707,
175
+ "grad_norm": 14.3659029006958,
176
+ "learning_rate": 1.7477117306680695e-05,
177
+ "loss": 0.2819,
178
+ "step": 1200
179
+ },
180
+ {
181
+ "epoch": 0.6575486586007364,
182
+ "grad_norm": 24.962841033935547,
183
+ "learning_rate": 1.7371909521304578e-05,
184
+ "loss": 0.275,
185
+ "step": 1250
186
+ },
187
+ {
188
+ "epoch": 0.6838506049447659,
189
+ "grad_norm": 2.1663622856140137,
190
+ "learning_rate": 1.726670173592846e-05,
191
+ "loss": 0.2513,
192
+ "step": 1300
193
+ },
194
+ {
195
+ "epoch": 0.7101525512887954,
196
+ "grad_norm": 20.324939727783203,
197
+ "learning_rate": 1.7161493950552343e-05,
198
+ "loss": 0.2862,
199
+ "step": 1350
200
+ },
201
+ {
202
+ "epoch": 0.7364544976328248,
203
+ "grad_norm": 12.115033149719238,
204
+ "learning_rate": 1.7056286165176222e-05,
205
+ "loss": 0.2489,
206
+ "step": 1400
207
+ },
208
+ {
209
+ "epoch": 0.7627564439768543,
210
+ "grad_norm": 9.39247989654541,
211
+ "learning_rate": 1.6951078379800105e-05,
212
+ "loss": 0.2199,
213
+ "step": 1450
214
+ },
215
+ {
216
+ "epoch": 0.7890583903208838,
217
+ "grad_norm": 11.820609092712402,
218
+ "learning_rate": 1.684587059442399e-05,
219
+ "loss": 0.2334,
220
+ "step": 1500
221
+ },
222
+ {
223
+ "epoch": 0.8153603366649133,
224
+ "grad_norm": 5.685638427734375,
225
+ "learning_rate": 1.6740662809047873e-05,
226
+ "loss": 0.2859,
227
+ "step": 1550
228
+ },
229
+ {
230
+ "epoch": 0.8416622830089426,
231
+ "grad_norm": 1.4263566732406616,
232
+ "learning_rate": 1.6635455023671752e-05,
233
+ "loss": 0.2712,
234
+ "step": 1600
235
+ },
236
+ {
237
+ "epoch": 0.8679642293529721,
238
+ "grad_norm": 43.12693786621094,
239
+ "learning_rate": 1.6530247238295635e-05,
240
+ "loss": 0.2236,
241
+ "step": 1650
242
+ },
243
+ {
244
+ "epoch": 0.8942661756970016,
245
+ "grad_norm": 18.322067260742188,
246
+ "learning_rate": 1.6425039452919518e-05,
247
+ "loss": 0.2176,
248
+ "step": 1700
249
+ },
250
+ {
251
+ "epoch": 0.920568122041031,
252
+ "grad_norm": 8.125885009765625,
253
+ "learning_rate": 1.63198316675434e-05,
254
+ "loss": 0.2344,
255
+ "step": 1750
256
+ },
257
+ {
258
+ "epoch": 0.9468700683850605,
259
+ "grad_norm": 4.2774457931518555,
260
+ "learning_rate": 1.6214623882167283e-05,
261
+ "loss": 0.2173,
262
+ "step": 1800
263
+ },
264
+ {
265
+ "epoch": 0.97317201472909,
266
+ "grad_norm": 8.311309814453125,
267
+ "learning_rate": 1.6109416096791165e-05,
268
+ "loss": 0.207,
269
+ "step": 1850
270
+ },
271
+ {
272
+ "epoch": 0.9994739610731194,
273
+ "grad_norm": 18.770065307617188,
274
+ "learning_rate": 1.6004208311415045e-05,
275
+ "loss": 0.2261,
276
+ "step": 1900
277
+ },
278
+ {
279
+ "epoch": 1.0,
280
+ "eval_FPR_aeb_Arab": 0.007114016533913859,
281
+ "eval_FPR_arb_Arab": 0.007306287251046666,
282
+ "eval_FPR_ars_Arab": 0.0274961810821419,
283
+ "eval_FPR_arz_Arab": 0.03769230768505917,
284
+ "eval_accuracy": 0.9382975924220497,
285
+ "eval_loss": 0.2227914035320282,
286
+ "eval_macro_f1": 0.8929982487077235,
287
+ "eval_runtime": 3.3475,
288
+ "eval_samples_per_second": 2270.666,
289
+ "eval_steps_per_second": 35.549,
290
+ "step": 1901
291
+ },
292
+ {
293
+ "epoch": 1.0257759074171489,
294
+ "grad_norm": 7.249199390411377,
295
+ "learning_rate": 1.5899000526038927e-05,
296
+ "loss": 0.1908,
297
+ "step": 1950
298
+ },
299
+ {
300
+ "epoch": 1.0520778537611783,
301
+ "grad_norm": 16.18492889404297,
302
+ "learning_rate": 1.579379274066281e-05,
303
+ "loss": 0.1919,
304
+ "step": 2000
305
+ },
306
+ {
307
+ "epoch": 1.0783798001052078,
308
+ "grad_norm": 6.383620262145996,
309
+ "learning_rate": 1.5688584955286692e-05,
310
+ "loss": 0.1662,
311
+ "step": 2050
312
+ },
313
+ {
314
+ "epoch": 1.1046817464492373,
315
+ "grad_norm": 2.7821247577667236,
316
+ "learning_rate": 1.5583377169910575e-05,
317
+ "loss": 0.1832,
318
+ "step": 2100
319
+ },
320
+ {
321
+ "epoch": 1.1309836927932668,
322
+ "grad_norm": 0.20694231986999512,
323
+ "learning_rate": 1.5478169384534458e-05,
324
+ "loss": 0.1277,
325
+ "step": 2150
326
+ },
327
+ {
328
+ "epoch": 1.157285639137296,
329
+ "grad_norm": 66.66133880615234,
330
+ "learning_rate": 1.5372961599158337e-05,
331
+ "loss": 0.1896,
332
+ "step": 2200
333
+ },
334
+ {
335
+ "epoch": 1.1835875854813256,
336
+ "grad_norm": 5.3264055252075195,
337
+ "learning_rate": 1.526775381378222e-05,
338
+ "loss": 0.1535,
339
+ "step": 2250
340
+ },
341
+ {
342
+ "epoch": 1.209889531825355,
343
+ "grad_norm": 3.480900526046753,
344
+ "learning_rate": 1.5162546028406104e-05,
345
+ "loss": 0.1767,
346
+ "step": 2300
347
+ },
348
+ {
349
+ "epoch": 1.2361914781693846,
350
+ "grad_norm": 2.1541006565093994,
351
+ "learning_rate": 1.5057338243029986e-05,
352
+ "loss": 0.2361,
353
+ "step": 2350
354
+ },
355
+ {
356
+ "epoch": 1.262493424513414,
357
+ "grad_norm": 13.037530899047852,
358
+ "learning_rate": 1.4952130457653869e-05,
359
+ "loss": 0.1733,
360
+ "step": 2400
361
+ },
362
+ {
363
+ "epoch": 1.2887953708574433,
364
+ "grad_norm": 6.1545281410217285,
365
+ "learning_rate": 1.484692267227775e-05,
366
+ "loss": 0.1608,
367
+ "step": 2450
368
+ },
369
+ {
370
+ "epoch": 1.3150973172014728,
371
+ "grad_norm": 1.8223601579666138,
372
+ "learning_rate": 1.4741714886901633e-05,
373
+ "loss": 0.1746,
374
+ "step": 2500
375
+ },
376
+ {
377
+ "epoch": 1.3413992635455023,
378
+ "grad_norm": 3.253241777420044,
379
+ "learning_rate": 1.4636507101525515e-05,
380
+ "loss": 0.1466,
381
+ "step": 2550
382
+ },
383
+ {
384
+ "epoch": 1.3677012098895318,
385
+ "grad_norm": 3.3945982456207275,
386
+ "learning_rate": 1.4531299316149396e-05,
387
+ "loss": 0.1732,
388
+ "step": 2600
389
+ },
390
+ {
391
+ "epoch": 1.3940031562335613,
392
+ "grad_norm": 6.702133655548096,
393
+ "learning_rate": 1.4426091530773279e-05,
394
+ "loss": 0.2324,
395
+ "step": 2650
396
+ },
397
+ {
398
+ "epoch": 1.4203051025775908,
399
+ "grad_norm": 3.2291910648345947,
400
+ "learning_rate": 1.4320883745397161e-05,
401
+ "loss": 0.1615,
402
+ "step": 2700
403
+ },
404
+ {
405
+ "epoch": 1.4466070489216203,
406
+ "grad_norm": 8.065141677856445,
407
+ "learning_rate": 1.4215675960021042e-05,
408
+ "loss": 0.1668,
409
+ "step": 2750
410
+ },
411
+ {
412
+ "epoch": 1.4729089952656498,
413
+ "grad_norm": 8.395434379577637,
414
+ "learning_rate": 1.4110468174644925e-05,
415
+ "loss": 0.2002,
416
+ "step": 2800
417
+ },
418
+ {
419
+ "epoch": 1.499210941609679,
420
+ "grad_norm": 5.985948085784912,
421
+ "learning_rate": 1.4005260389268807e-05,
422
+ "loss": 0.1338,
423
+ "step": 2850
424
+ },
425
+ {
426
+ "epoch": 1.5255128879537085,
427
+ "grad_norm": 4.8504791259765625,
428
+ "learning_rate": 1.3900052603892688e-05,
429
+ "loss": 0.1493,
430
+ "step": 2900
431
+ },
432
+ {
433
+ "epoch": 1.551814834297738,
434
+ "grad_norm": 30.86811637878418,
435
+ "learning_rate": 1.3794844818516571e-05,
436
+ "loss": 0.1653,
437
+ "step": 2950
438
+ },
439
+ {
440
+ "epoch": 1.5781167806417675,
441
+ "grad_norm": 8.025301933288574,
442
+ "learning_rate": 1.3689637033140453e-05,
443
+ "loss": 0.195,
444
+ "step": 3000
445
+ },
446
+ {
447
+ "epoch": 1.6044187269857968,
448
+ "grad_norm": 2.7844748497009277,
449
+ "learning_rate": 1.3584429247764334e-05,
450
+ "loss": 0.1513,
451
+ "step": 3050
452
+ },
453
+ {
454
+ "epoch": 1.6307206733298263,
455
+ "grad_norm": 15.212594032287598,
456
+ "learning_rate": 1.3479221462388219e-05,
457
+ "loss": 0.1311,
458
+ "step": 3100
459
+ },
460
+ {
461
+ "epoch": 1.6570226196738558,
462
+ "grad_norm": 7.984399795532227,
463
+ "learning_rate": 1.3374013677012101e-05,
464
+ "loss": 0.1699,
465
+ "step": 3150
466
+ },
467
+ {
468
+ "epoch": 1.6833245660178853,
469
+ "grad_norm": 2.66343092918396,
470
+ "learning_rate": 1.3268805891635982e-05,
471
+ "loss": 0.0987,
472
+ "step": 3200
473
+ },
474
+ {
475
+ "epoch": 1.7096265123619148,
476
+ "grad_norm": 1.7281841039657593,
477
+ "learning_rate": 1.3163598106259865e-05,
478
+ "loss": 0.1468,
479
+ "step": 3250
480
+ },
481
+ {
482
+ "epoch": 1.7359284587059443,
483
+ "grad_norm": 80.2880859375,
484
+ "learning_rate": 1.3058390320883747e-05,
485
+ "loss": 0.1225,
486
+ "step": 3300
487
+ },
488
+ {
489
+ "epoch": 1.7622304050499737,
490
+ "grad_norm": 3.2839515209198,
491
+ "learning_rate": 1.2953182535507628e-05,
492
+ "loss": 0.1612,
493
+ "step": 3350
494
+ },
495
+ {
496
+ "epoch": 1.7885323513940032,
497
+ "grad_norm": 6.35798978805542,
498
+ "learning_rate": 1.2847974750131511e-05,
499
+ "loss": 0.1319,
500
+ "step": 3400
501
+ },
502
+ {
503
+ "epoch": 1.8148342977380327,
504
+ "grad_norm": 17.910255432128906,
505
+ "learning_rate": 1.2742766964755394e-05,
506
+ "loss": 0.2161,
507
+ "step": 3450
508
+ },
509
+ {
510
+ "epoch": 1.8411362440820622,
511
+ "grad_norm": 2.275036573410034,
512
+ "learning_rate": 1.2637559179379274e-05,
513
+ "loss": 0.1118,
514
+ "step": 3500
515
+ },
516
+ {
517
+ "epoch": 1.8674381904260915,
518
+ "grad_norm": 20.091514587402344,
519
+ "learning_rate": 1.2532351394003157e-05,
520
+ "loss": 0.1463,
521
+ "step": 3550
522
+ },
523
+ {
524
+ "epoch": 1.893740136770121,
525
+ "grad_norm": 0.5615454912185669,
526
+ "learning_rate": 1.242714360862704e-05,
527
+ "loss": 0.1648,
528
+ "step": 3600
529
+ },
530
+ {
531
+ "epoch": 1.9200420831141505,
532
+ "grad_norm": 3.871091604232788,
533
+ "learning_rate": 1.232193582325092e-05,
534
+ "loss": 0.1325,
535
+ "step": 3650
536
+ },
537
+ {
538
+ "epoch": 1.9463440294581797,
539
+ "grad_norm": 1.768117904663086,
540
+ "learning_rate": 1.2216728037874803e-05,
541
+ "loss": 0.1664,
542
+ "step": 3700
543
+ },
544
+ {
545
+ "epoch": 1.9726459758022092,
546
+ "grad_norm": 5.8534393310546875,
547
+ "learning_rate": 1.2111520252498686e-05,
548
+ "loss": 0.1578,
549
+ "step": 3750
550
+ },
551
+ {
552
+ "epoch": 1.9989479221462387,
553
+ "grad_norm": 3.766312837600708,
554
+ "learning_rate": 1.2006312467122567e-05,
555
+ "loss": 0.1393,
556
+ "step": 3800
557
+ },
558
+ {
559
+ "epoch": 2.0,
560
+ "eval_FPR_aeb_Arab": 0.00384541434265614,
561
+ "eval_FPR_arb_Arab": 0.02134204960174158,
562
+ "eval_FPR_ars_Arab": 0.01041522010687193,
563
+ "eval_FPR_arz_Arab": 0.020192307688424557,
564
+ "eval_accuracy": 0.9590843310090778,
565
+ "eval_loss": 0.16003794968128204,
566
+ "eval_macro_f1": 0.937683933464698,
567
+ "eval_runtime": 3.3754,
568
+ "eval_samples_per_second": 2251.882,
569
+ "eval_steps_per_second": 35.255,
570
+ "step": 3802
571
+ },
572
+ {
573
+ "epoch": 2.0252498684902682,
574
+ "grad_norm": 14.620624542236328,
575
+ "learning_rate": 1.190110468174645e-05,
576
+ "loss": 0.073,
577
+ "step": 3850
578
+ },
579
+ {
580
+ "epoch": 2.0515518148342977,
581
+ "grad_norm": 1.2938824892044067,
582
+ "learning_rate": 1.1795896896370332e-05,
583
+ "loss": 0.1148,
584
+ "step": 3900
585
+ },
586
+ {
587
+ "epoch": 2.077853761178327,
588
+ "grad_norm": 3.313081979751587,
589
+ "learning_rate": 1.1690689110994216e-05,
590
+ "loss": 0.0746,
591
+ "step": 3950
592
+ },
593
+ {
594
+ "epoch": 2.1041557075223567,
595
+ "grad_norm": 2.0338821411132812,
596
+ "learning_rate": 1.1585481325618097e-05,
597
+ "loss": 0.0977,
598
+ "step": 4000
599
+ },
600
+ {
601
+ "epoch": 2.130457653866386,
602
+ "grad_norm": 0.055320367217063904,
603
+ "learning_rate": 1.148027354024198e-05,
604
+ "loss": 0.096,
605
+ "step": 4050
606
+ },
607
+ {
608
+ "epoch": 2.1567596002104157,
609
+ "grad_norm": 1.0964843034744263,
610
+ "learning_rate": 1.1375065754865862e-05,
611
+ "loss": 0.0642,
612
+ "step": 4100
613
+ },
614
+ {
615
+ "epoch": 2.183061546554445,
616
+ "grad_norm": 1.0340650081634521,
617
+ "learning_rate": 1.1269857969489743e-05,
618
+ "loss": 0.1007,
619
+ "step": 4150
620
+ },
621
+ {
622
+ "epoch": 2.2093634928984747,
623
+ "grad_norm": 4.971868515014648,
624
+ "learning_rate": 1.1164650184113626e-05,
625
+ "loss": 0.1083,
626
+ "step": 4200
627
+ },
628
+ {
629
+ "epoch": 2.2356654392425037,
630
+ "grad_norm": 0.49501538276672363,
631
+ "learning_rate": 1.1059442398737508e-05,
632
+ "loss": 0.1068,
633
+ "step": 4250
634
+ },
635
+ {
636
+ "epoch": 2.2619673855865337,
637
+ "grad_norm": 6.13097620010376,
638
+ "learning_rate": 1.095423461336139e-05,
639
+ "loss": 0.0946,
640
+ "step": 4300
641
+ },
642
+ {
643
+ "epoch": 2.2882693319305627,
644
+ "grad_norm": 5.904395580291748,
645
+ "learning_rate": 1.0849026827985272e-05,
646
+ "loss": 0.0758,
647
+ "step": 4350
648
+ },
649
+ {
650
+ "epoch": 2.314571278274592,
651
+ "grad_norm": 4.2567138671875,
652
+ "learning_rate": 1.0743819042609155e-05,
653
+ "loss": 0.111,
654
+ "step": 4400
655
+ },
656
+ {
657
+ "epoch": 2.3408732246186217,
658
+ "grad_norm": 0.1440172791481018,
659
+ "learning_rate": 1.0638611257233035e-05,
660
+ "loss": 0.1104,
661
+ "step": 4450
662
+ },
663
+ {
664
+ "epoch": 2.367175170962651,
665
+ "grad_norm": 7.970292091369629,
666
+ "learning_rate": 1.0533403471856918e-05,
667
+ "loss": 0.0891,
668
+ "step": 4500
669
+ },
670
+ {
671
+ "epoch": 2.3934771173066807,
672
+ "grad_norm": 2.4047350883483887,
673
+ "learning_rate": 1.04281956864808e-05,
674
+ "loss": 0.1242,
675
+ "step": 4550
676
+ },
677
+ {
678
+ "epoch": 2.41977906365071,
679
+ "grad_norm": 14.3352689743042,
680
+ "learning_rate": 1.0322987901104682e-05,
681
+ "loss": 0.0649,
682
+ "step": 4600
683
+ },
684
+ {
685
+ "epoch": 2.4460810099947397,
686
+ "grad_norm": 25.1345157623291,
687
+ "learning_rate": 1.0217780115728564e-05,
688
+ "loss": 0.0712,
689
+ "step": 4650
690
+ },
691
+ {
692
+ "epoch": 2.472382956338769,
693
+ "grad_norm": 1.9517714977264404,
694
+ "learning_rate": 1.0112572330352445e-05,
695
+ "loss": 0.1032,
696
+ "step": 4700
697
+ },
698
+ {
699
+ "epoch": 2.4986849026827986,
700
+ "grad_norm": 1.327062726020813,
701
+ "learning_rate": 1.000736454497633e-05,
702
+ "loss": 0.0962,
703
+ "step": 4750
704
+ },
705
+ {
706
+ "epoch": 2.524986849026828,
707
+ "grad_norm": 10.327136993408203,
708
+ "learning_rate": 9.90215675960021e-06,
709
+ "loss": 0.1092,
710
+ "step": 4800
711
+ },
712
+ {
713
+ "epoch": 2.5512887953708576,
714
+ "grad_norm": 3.8997962474823,
715
+ "learning_rate": 9.796948974224093e-06,
716
+ "loss": 0.0681,
717
+ "step": 4850
718
+ },
719
+ {
720
+ "epoch": 2.5775907417148867,
721
+ "grad_norm": 0.270841121673584,
722
+ "learning_rate": 9.691741188847975e-06,
723
+ "loss": 0.1265,
724
+ "step": 4900
725
+ },
726
+ {
727
+ "epoch": 2.6038926880589166,
728
+ "grad_norm": 0.8220506906509399,
729
+ "learning_rate": 9.586533403471858e-06,
730
+ "loss": 0.0726,
731
+ "step": 4950
732
+ },
733
+ {
734
+ "epoch": 2.6301946344029457,
735
+ "grad_norm": 1.4264813661575317,
736
+ "learning_rate": 9.48132561809574e-06,
737
+ "loss": 0.0707,
738
+ "step": 5000
739
+ },
740
+ {
741
+ "epoch": 2.656496580746975,
742
+ "grad_norm": 5.427404880523682,
743
+ "learning_rate": 9.376117832719622e-06,
744
+ "loss": 0.0762,
745
+ "step": 5050
746
+ },
747
+ {
748
+ "epoch": 2.6827985270910046,
749
+ "grad_norm": 39.103004455566406,
750
+ "learning_rate": 9.270910047343504e-06,
751
+ "loss": 0.0733,
752
+ "step": 5100
753
+ },
754
+ {
755
+ "epoch": 2.709100473435034,
756
+ "grad_norm": 2.8170275688171387,
757
+ "learning_rate": 9.165702261967387e-06,
758
+ "loss": 0.105,
759
+ "step": 5150
760
+ },
761
+ {
762
+ "epoch": 2.7354024197790636,
763
+ "grad_norm": 6.285243034362793,
764
+ "learning_rate": 9.060494476591268e-06,
765
+ "loss": 0.1054,
766
+ "step": 5200
767
+ },
768
+ {
769
+ "epoch": 2.761704366123093,
770
+ "grad_norm": 34.959102630615234,
771
+ "learning_rate": 8.95528669121515e-06,
772
+ "loss": 0.1168,
773
+ "step": 5250
774
+ },
775
+ {
776
+ "epoch": 2.7880063124671226,
777
+ "grad_norm": 2.698047399520874,
778
+ "learning_rate": 8.850078905839033e-06,
779
+ "loss": 0.0664,
780
+ "step": 5300
781
+ },
782
+ {
783
+ "epoch": 2.814308258811152,
784
+ "grad_norm": 6.107056617736816,
785
+ "learning_rate": 8.744871120462914e-06,
786
+ "loss": 0.0866,
787
+ "step": 5350
788
+ },
789
+ {
790
+ "epoch": 2.8406102051551816,
791
+ "grad_norm": 6.0492634773254395,
792
+ "learning_rate": 8.639663335086798e-06,
793
+ "loss": 0.0921,
794
+ "step": 5400
795
+ },
796
+ {
797
+ "epoch": 2.866912151499211,
798
+ "grad_norm": 38.75687789916992,
799
+ "learning_rate": 8.534455549710679e-06,
800
+ "loss": 0.0932,
801
+ "step": 5450
802
+ },
803
+ {
804
+ "epoch": 2.8932140978432406,
805
+ "grad_norm": 5.730583190917969,
806
+ "learning_rate": 8.429247764334562e-06,
807
+ "loss": 0.0809,
808
+ "step": 5500
809
+ },
810
+ {
811
+ "epoch": 2.9195160441872696,
812
+ "grad_norm": 0.2023005187511444,
813
+ "learning_rate": 8.324039978958444e-06,
814
+ "loss": 0.0723,
815
+ "step": 5550
816
+ },
817
+ {
818
+ "epoch": 2.9458179905312996,
819
+ "grad_norm": 24.816850662231445,
820
+ "learning_rate": 8.218832193582325e-06,
821
+ "loss": 0.0758,
822
+ "step": 5600
823
+ },
824
+ {
825
+ "epoch": 2.9721199368753286,
826
+ "grad_norm": 0.10021505504846573,
827
+ "learning_rate": 8.113624408206208e-06,
828
+ "loss": 0.0787,
829
+ "step": 5650
830
+ },
831
+ {
832
+ "epoch": 2.998421883219358,
833
+ "grad_norm": 3.8389430046081543,
834
+ "learning_rate": 8.00841662283009e-06,
835
+ "loss": 0.1321,
836
+ "step": 5700
837
+ },
838
+ {
839
+ "epoch": 3.0,
840
+ "eval_FPR_aeb_Arab": 0.004037685059788947,
841
+ "eval_FPR_arb_Arab": 0.009421265139507543,
842
+ "eval_FPR_ars_Arab": 0.005971392861273241,
843
+ "eval_FPR_arz_Arab": 0.020192307688424557,
844
+ "eval_accuracy": 0.9713195632153664,
845
+ "eval_loss": 0.15336963534355164,
846
+ "eval_macro_f1": 0.9569564393242584,
847
+ "eval_runtime": 3.3689,
848
+ "eval_samples_per_second": 2256.259,
849
+ "eval_steps_per_second": 35.324,
850
+ "step": 5703
851
+ },
852
+ {
853
+ "epoch": 3.0247238295633876,
854
+ "grad_norm": 0.30554988980293274,
855
+ "learning_rate": 7.903208837453971e-06,
856
+ "loss": 0.0937,
857
+ "step": 5750
858
+ },
859
+ {
860
+ "epoch": 3.051025775907417,
861
+ "grad_norm": 37.439884185791016,
862
+ "learning_rate": 7.798001052077856e-06,
863
+ "loss": 0.0578,
864
+ "step": 5800
865
+ },
866
+ {
867
+ "epoch": 3.0773277222514466,
868
+ "grad_norm": 0.0822492390871048,
869
+ "learning_rate": 7.692793266701737e-06,
870
+ "loss": 0.0636,
871
+ "step": 5850
872
+ },
873
+ {
874
+ "epoch": 3.103629668595476,
875
+ "grad_norm": 2.7918007373809814,
876
+ "learning_rate": 7.587585481325619e-06,
877
+ "loss": 0.0378,
878
+ "step": 5900
879
+ },
880
+ {
881
+ "epoch": 3.1299316149395056,
882
+ "grad_norm": 32.899818420410156,
883
+ "learning_rate": 7.482377695949501e-06,
884
+ "loss": 0.0609,
885
+ "step": 5950
886
+ },
887
+ {
888
+ "epoch": 3.156233561283535,
889
+ "grad_norm": 0.06830895692110062,
890
+ "learning_rate": 7.377169910573383e-06,
891
+ "loss": 0.0433,
892
+ "step": 6000
893
+ },
894
+ {
895
+ "epoch": 3.1825355076275645,
896
+ "grad_norm": 54.685489654541016,
897
+ "learning_rate": 7.271962125197265e-06,
898
+ "loss": 0.056,
899
+ "step": 6050
900
+ },
901
+ {
902
+ "epoch": 3.208837453971594,
903
+ "grad_norm": 0.8175523281097412,
904
+ "learning_rate": 7.166754339821147e-06,
905
+ "loss": 0.0341,
906
+ "step": 6100
907
+ },
908
+ {
909
+ "epoch": 3.2351394003156235,
910
+ "grad_norm": 0.33226722478866577,
911
+ "learning_rate": 7.061546554445029e-06,
912
+ "loss": 0.0482,
913
+ "step": 6150
914
+ },
915
+ {
916
+ "epoch": 3.2614413466596526,
917
+ "grad_norm": 1.425661325454712,
918
+ "learning_rate": 6.956338769068912e-06,
919
+ "loss": 0.0673,
920
+ "step": 6200
921
+ },
922
+ {
923
+ "epoch": 3.2877432930036825,
924
+ "grad_norm": 0.18895921111106873,
925
+ "learning_rate": 6.851130983692794e-06,
926
+ "loss": 0.0359,
927
+ "step": 6250
928
+ },
929
+ {
930
+ "epoch": 3.3140452393477116,
931
+ "grad_norm": 0.6557305455207825,
932
+ "learning_rate": 6.7459231983166766e-06,
933
+ "loss": 0.0382,
934
+ "step": 6300
935
+ },
936
+ {
937
+ "epoch": 3.340347185691741,
938
+ "grad_norm": 0.008198770694434643,
939
+ "learning_rate": 6.640715412940558e-06,
940
+ "loss": 0.0566,
941
+ "step": 6350
942
+ },
943
+ {
944
+ "epoch": 3.3666491320357705,
945
+ "grad_norm": 0.4695976674556732,
946
+ "learning_rate": 6.53550762756444e-06,
947
+ "loss": 0.0654,
948
+ "step": 6400
949
+ },
950
+ {
951
+ "epoch": 3.3929510783798,
952
+ "grad_norm": 8.628214836120605,
953
+ "learning_rate": 6.430299842188323e-06,
954
+ "loss": 0.0427,
955
+ "step": 6450
956
+ },
957
+ {
958
+ "epoch": 3.4192530247238295,
959
+ "grad_norm": 0.9650713801383972,
960
+ "learning_rate": 6.3250920568122044e-06,
961
+ "loss": 0.0645,
962
+ "step": 6500
963
+ },
964
+ {
965
+ "epoch": 3.445554971067859,
966
+ "grad_norm": 5.836668968200684,
967
+ "learning_rate": 6.219884271436086e-06,
968
+ "loss": 0.0397,
969
+ "step": 6550
970
+ },
971
+ {
972
+ "epoch": 3.4718569174118885,
973
+ "grad_norm": 0.03976545110344887,
974
+ "learning_rate": 6.11467648605997e-06,
975
+ "loss": 0.0586,
976
+ "step": 6600
977
+ },
978
+ {
979
+ "epoch": 3.498158863755918,
980
+ "grad_norm": 19.784215927124023,
981
+ "learning_rate": 6.009468700683851e-06,
982
+ "loss": 0.033,
983
+ "step": 6650
984
+ },
985
+ {
986
+ "epoch": 3.5244608100999475,
987
+ "grad_norm": 2.075496196746826,
988
+ "learning_rate": 5.904260915307733e-06,
989
+ "loss": 0.0776,
990
+ "step": 6700
991
+ },
992
+ {
993
+ "epoch": 3.550762756443977,
994
+ "grad_norm": 7.05810022354126,
995
+ "learning_rate": 5.799053129931616e-06,
996
+ "loss": 0.0905,
997
+ "step": 6750
998
+ },
999
+ {
1000
+ "epoch": 3.5770647027880065,
1001
+ "grad_norm": 0.012984913773834705,
1002
+ "learning_rate": 5.6938453445554975e-06,
1003
+ "loss": 0.0542,
1004
+ "step": 6800
1005
+ },
1006
+ {
1007
+ "epoch": 3.6033666491320355,
1008
+ "grad_norm": 2.701481342315674,
1009
+ "learning_rate": 5.588637559179379e-06,
1010
+ "loss": 0.0625,
1011
+ "step": 6850
1012
+ },
1013
+ {
1014
+ "epoch": 3.6296685954760655,
1015
+ "grad_norm": 0.41872379183769226,
1016
+ "learning_rate": 5.483429773803262e-06,
1017
+ "loss": 0.0795,
1018
+ "step": 6900
1019
+ },
1020
+ {
1021
+ "epoch": 3.6559705418200945,
1022
+ "grad_norm": 0.13123294711112976,
1023
+ "learning_rate": 5.378221988427144e-06,
1024
+ "loss": 0.0296,
1025
+ "step": 6950
1026
+ },
1027
+ {
1028
+ "epoch": 3.682272488164124,
1029
+ "grad_norm": 0.7190969586372375,
1030
+ "learning_rate": 5.273014203051027e-06,
1031
+ "loss": 0.0666,
1032
+ "step": 7000
1033
+ },
1034
+ {
1035
+ "epoch": 3.7085744345081535,
1036
+ "grad_norm": 0.1744261384010315,
1037
+ "learning_rate": 5.167806417674909e-06,
1038
+ "loss": 0.0328,
1039
+ "step": 7050
1040
+ },
1041
+ {
1042
+ "epoch": 3.734876380852183,
1043
+ "grad_norm": 0.5619340538978577,
1044
+ "learning_rate": 5.062598632298791e-06,
1045
+ "loss": 0.0755,
1046
+ "step": 7100
1047
+ },
1048
+ {
1049
+ "epoch": 3.7611783271962125,
1050
+ "grad_norm": 40.665706634521484,
1051
+ "learning_rate": 4.957390846922673e-06,
1052
+ "loss": 0.1041,
1053
+ "step": 7150
1054
+ },
1055
+ {
1056
+ "epoch": 3.787480273540242,
1057
+ "grad_norm": 0.06617475301027298,
1058
+ "learning_rate": 4.852183061546555e-06,
1059
+ "loss": 0.0264,
1060
+ "step": 7200
1061
+ },
1062
+ {
1063
+ "epoch": 3.8137822198842715,
1064
+ "grad_norm": 5.0283966064453125,
1065
+ "learning_rate": 4.746975276170437e-06,
1066
+ "loss": 0.0789,
1067
+ "step": 7250
1068
+ },
1069
+ {
1070
+ "epoch": 3.840084166228301,
1071
+ "grad_norm": 5.660898208618164,
1072
+ "learning_rate": 4.641767490794319e-06,
1073
+ "loss": 0.0582,
1074
+ "step": 7300
1075
+ },
1076
+ {
1077
+ "epoch": 3.8663861125723304,
1078
+ "grad_norm": 0.8503484725952148,
1079
+ "learning_rate": 4.536559705418201e-06,
1080
+ "loss": 0.0862,
1081
+ "step": 7350
1082
+ },
1083
+ {
1084
+ "epoch": 3.89268805891636,
1085
+ "grad_norm": 13.575056076049805,
1086
+ "learning_rate": 4.431351920042084e-06,
1087
+ "loss": 0.0554,
1088
+ "step": 7400
1089
+ },
1090
+ {
1091
+ "epoch": 3.9189900052603894,
1092
+ "grad_norm": 0.25003504753112793,
1093
+ "learning_rate": 4.3261441346659654e-06,
1094
+ "loss": 0.0504,
1095
+ "step": 7450
1096
+ },
1097
+ {
1098
+ "epoch": 3.9452919516044185,
1099
+ "grad_norm": 0.022247493267059326,
1100
+ "learning_rate": 4.220936349289847e-06,
1101
+ "loss": 0.0663,
1102
+ "step": 7500
1103
+ },
1104
+ {
1105
+ "epoch": 3.9715938979484484,
1106
+ "grad_norm": 0.2591884136199951,
1107
+ "learning_rate": 4.11572856391373e-06,
1108
+ "loss": 0.0361,
1109
+ "step": 7550
1110
+ },
1111
+ {
1112
+ "epoch": 3.9978958442924775,
1113
+ "grad_norm": 6.533713340759277,
1114
+ "learning_rate": 4.010520778537612e-06,
1115
+ "loss": 0.0293,
1116
+ "step": 7600
1117
+ },
1118
+ {
1119
+ "epoch": 4.0,
1120
+ "eval_FPR_aeb_Arab": 0.004229955776921754,
1121
+ "eval_FPR_arb_Arab": 0.011343972310835613,
1122
+ "eval_FPR_ars_Arab": 0.00458269684702365,
1123
+ "eval_FPR_arz_Arab": 0.015576923073927515,
1124
+ "eval_accuracy": 0.9743454808577818,
1125
+ "eval_loss": 0.15085552632808685,
1126
+ "eval_macro_f1": 0.9633717243752477,
1127
+ "eval_runtime": 3.3689,
1128
+ "eval_samples_per_second": 2256.225,
1129
+ "eval_steps_per_second": 35.323,
1130
+ "step": 7604
1131
+ }
1132
+ ],
1133
+ "logging_steps": 50,
1134
+ "max_steps": 9505,
1135
+ "num_input_tokens_seen": 0,
1136
+ "num_train_epochs": 5,
1137
+ "save_steps": 500,
1138
+ "stateful_callbacks": {
1139
+ "EarlyStoppingCallback": {
1140
+ "args": {
1141
+ "early_stopping_patience": 2,
1142
+ "early_stopping_threshold": 0.0
1143
+ },
1144
+ "attributes": {
1145
+ "early_stopping_patience_counter": 0
1146
+ }
1147
+ },
1148
+ "TrainerControl": {
1149
+ "args": {
1150
+ "should_epoch_stop": false,
1151
+ "should_evaluate": false,
1152
+ "should_log": false,
1153
+ "should_save": true,
1154
+ "should_training_stop": false
1155
+ },
1156
+ "attributes": {}
1157
+ }
1158
+ },
1159
+ "total_flos": 3440682832634112.0,
1160
+ "train_batch_size": 32,
1161
+ "trial_name": null,
1162
+ "trial_params": null
1163
+ }
checkpoint-7604/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fa873479846449ff86b2d50d9e57056c48f72d07a9ffc1fb7f0012ac7d884f8
3
+ size 5777
checkpoint-7604/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-9505/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "aeb_Arab",
14
+ "1": "arb_Arab",
15
+ "2": "ars_Arab",
16
+ "3": "arz_Arab"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "label2id": {
21
+ "aeb_Arab": 0,
22
+ "arb_Arab": 1,
23
+ "ars_Arab": 2,
24
+ "arz_Arab": 3
25
+ },
26
+ "layer_norm_eps": 1e-12,
27
+ "max_position_embeddings": 512,
28
+ "model_type": "bert",
29
+ "num_attention_heads": 12,
30
+ "num_hidden_layers": 12,
31
+ "pad_token_id": 0,
32
+ "position_embedding_type": "absolute",
33
+ "transformers_version": "4.56.1",
34
+ "type_vocab_size": 2,
35
+ "use_cache": true,
36
+ "vocab_size": 30000
37
+ }
checkpoint-9505/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:496bb5c365c925f55ceae6595a8e715b409815cc8b6ff91706c35fe83c24f363
3
+ size 436361208
checkpoint-9505/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78fce8e6a78a3dd9090bb6fea2c41408a1f66de87245c884b78dcd7d0bf917fb
3
+ size 872846731
checkpoint-9505/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c65e00000d546fd729719d9d823f0bbe6b2fad692c4b49b96b131bba6b22e84b
3
+ size 14645
checkpoint-9505/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f176115dc4e83049fd3dff5a3ca9b4c02dc6d882e278af3ec1e96a2bcfccdaf
3
+ size 1383
checkpoint-9505/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60e6a15eeffc5f00f2067bcaf3b35d9b2964df7e31ed9651c83e5191924ebd3b
3
+ size 1465
checkpoint-9505/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-9505/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-9505/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "extra_special_tokens": {},
49
+ "full_tokenizer_file": null,
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "never_split": null,
53
+ "pad_token": "[PAD]",
54
+ "sep_token": "[SEP]",
55
+ "strip_accents": null,
56
+ "tokenize_chinese_chars": true,
57
+ "tokenizer_class": "BertTokenizer",
58
+ "unk_token": "[UNK]"
59
+ }
checkpoint-9505/trainer_state.json ADDED
@@ -0,0 +1,1443 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 9505,
3
+ "best_metric": 0.9641750099415428,
4
+ "best_model_checkpoint": "camelbert_madar_task5/checkpoint-9505",
5
+ "epoch": 5.0,
6
+ "eval_steps": 500,
7
+ "global_step": 9505,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.026301946344029457,
14
+ "grad_norm": 21.061479568481445,
15
+ "learning_rate": 1.9896896370331405e-05,
16
+ "loss": 0.9381,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.052603892688058915,
21
+ "grad_norm": 4.199251651763916,
22
+ "learning_rate": 1.9791688584955288e-05,
23
+ "loss": 0.5182,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.07890583903208838,
28
+ "grad_norm": 22.227828979492188,
29
+ "learning_rate": 1.968648079957917e-05,
30
+ "loss": 0.4486,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 0.10520778537611783,
35
+ "grad_norm": 7.481734275817871,
36
+ "learning_rate": 1.9581273014203053e-05,
37
+ "loss": 0.4422,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 0.1315097317201473,
42
+ "grad_norm": 9.7647705078125,
43
+ "learning_rate": 1.9476065228826936e-05,
44
+ "loss": 0.4304,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 0.15781167806417676,
49
+ "grad_norm": 12.080931663513184,
50
+ "learning_rate": 1.9370857443450818e-05,
51
+ "loss": 0.3672,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 0.1841136244082062,
56
+ "grad_norm": 11.353347778320312,
57
+ "learning_rate": 1.9265649658074697e-05,
58
+ "loss": 0.3771,
59
+ "step": 350
60
+ },
61
+ {
62
+ "epoch": 0.21041557075223566,
63
+ "grad_norm": 3.3302719593048096,
64
+ "learning_rate": 1.916044187269858e-05,
65
+ "loss": 0.4053,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 0.23671751709626512,
70
+ "grad_norm": 11.869136810302734,
71
+ "learning_rate": 1.9055234087322463e-05,
72
+ "loss": 0.3754,
73
+ "step": 450
74
+ },
75
+ {
76
+ "epoch": 0.2630194634402946,
77
+ "grad_norm": 19.71166229248047,
78
+ "learning_rate": 1.8950026301946345e-05,
79
+ "loss": 0.3909,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.289321409784324,
84
+ "grad_norm": 82.08606719970703,
85
+ "learning_rate": 1.8844818516570228e-05,
86
+ "loss": 0.3945,
87
+ "step": 550
88
+ },
89
+ {
90
+ "epoch": 0.3156233561283535,
91
+ "grad_norm": 5.5329389572143555,
92
+ "learning_rate": 1.873961073119411e-05,
93
+ "loss": 0.3182,
94
+ "step": 600
95
+ },
96
+ {
97
+ "epoch": 0.34192530247238295,
98
+ "grad_norm": 10.177448272705078,
99
+ "learning_rate": 1.8634402945817993e-05,
100
+ "loss": 0.3294,
101
+ "step": 650
102
+ },
103
+ {
104
+ "epoch": 0.3682272488164124,
105
+ "grad_norm": 11.596871376037598,
106
+ "learning_rate": 1.8529195160441876e-05,
107
+ "loss": 0.3445,
108
+ "step": 700
109
+ },
110
+ {
111
+ "epoch": 0.3945291951604419,
112
+ "grad_norm": 5.0095319747924805,
113
+ "learning_rate": 1.8423987375065758e-05,
114
+ "loss": 0.3403,
115
+ "step": 750
116
+ },
117
+ {
118
+ "epoch": 0.4208311415044713,
119
+ "grad_norm": 6.569547653198242,
120
+ "learning_rate": 1.8318779589689638e-05,
121
+ "loss": 0.2767,
122
+ "step": 800
123
+ },
124
+ {
125
+ "epoch": 0.4471330878485008,
126
+ "grad_norm": 7.269279956817627,
127
+ "learning_rate": 1.821357180431352e-05,
128
+ "loss": 0.3391,
129
+ "step": 850
130
+ },
131
+ {
132
+ "epoch": 0.47343503419253025,
133
+ "grad_norm": 6.403675079345703,
134
+ "learning_rate": 1.8108364018937403e-05,
135
+ "loss": 0.2729,
136
+ "step": 900
137
+ },
138
+ {
139
+ "epoch": 0.4997369805365597,
140
+ "grad_norm": 18.03633689880371,
141
+ "learning_rate": 1.8003156233561285e-05,
142
+ "loss": 0.2912,
143
+ "step": 950
144
+ },
145
+ {
146
+ "epoch": 0.5260389268805892,
147
+ "grad_norm": 11.609797477722168,
148
+ "learning_rate": 1.7897948448185168e-05,
149
+ "loss": 0.3678,
150
+ "step": 1000
151
+ },
152
+ {
153
+ "epoch": 0.5523408732246187,
154
+ "grad_norm": 8.587767601013184,
155
+ "learning_rate": 1.779274066280905e-05,
156
+ "loss": 0.2789,
157
+ "step": 1050
158
+ },
159
+ {
160
+ "epoch": 0.578642819568648,
161
+ "grad_norm": 2.894766092300415,
162
+ "learning_rate": 1.768753287743293e-05,
163
+ "loss": 0.2515,
164
+ "step": 1100
165
+ },
166
+ {
167
+ "epoch": 0.6049447659126775,
168
+ "grad_norm": 14.84619140625,
169
+ "learning_rate": 1.7582325092056812e-05,
170
+ "loss": 0.2817,
171
+ "step": 1150
172
+ },
173
+ {
174
+ "epoch": 0.631246712256707,
175
+ "grad_norm": 14.3659029006958,
176
+ "learning_rate": 1.7477117306680695e-05,
177
+ "loss": 0.2819,
178
+ "step": 1200
179
+ },
180
+ {
181
+ "epoch": 0.6575486586007364,
182
+ "grad_norm": 24.962841033935547,
183
+ "learning_rate": 1.7371909521304578e-05,
184
+ "loss": 0.275,
185
+ "step": 1250
186
+ },
187
+ {
188
+ "epoch": 0.6838506049447659,
189
+ "grad_norm": 2.1663622856140137,
190
+ "learning_rate": 1.726670173592846e-05,
191
+ "loss": 0.2513,
192
+ "step": 1300
193
+ },
194
+ {
195
+ "epoch": 0.7101525512887954,
196
+ "grad_norm": 20.324939727783203,
197
+ "learning_rate": 1.7161493950552343e-05,
198
+ "loss": 0.2862,
199
+ "step": 1350
200
+ },
201
+ {
202
+ "epoch": 0.7364544976328248,
203
+ "grad_norm": 12.115033149719238,
204
+ "learning_rate": 1.7056286165176222e-05,
205
+ "loss": 0.2489,
206
+ "step": 1400
207
+ },
208
+ {
209
+ "epoch": 0.7627564439768543,
210
+ "grad_norm": 9.39247989654541,
211
+ "learning_rate": 1.6951078379800105e-05,
212
+ "loss": 0.2199,
213
+ "step": 1450
214
+ },
215
+ {
216
+ "epoch": 0.7890583903208838,
217
+ "grad_norm": 11.820609092712402,
218
+ "learning_rate": 1.684587059442399e-05,
219
+ "loss": 0.2334,
220
+ "step": 1500
221
+ },
222
+ {
223
+ "epoch": 0.8153603366649133,
224
+ "grad_norm": 5.685638427734375,
225
+ "learning_rate": 1.6740662809047873e-05,
226
+ "loss": 0.2859,
227
+ "step": 1550
228
+ },
229
+ {
230
+ "epoch": 0.8416622830089426,
231
+ "grad_norm": 1.4263566732406616,
232
+ "learning_rate": 1.6635455023671752e-05,
233
+ "loss": 0.2712,
234
+ "step": 1600
235
+ },
236
+ {
237
+ "epoch": 0.8679642293529721,
238
+ "grad_norm": 43.12693786621094,
239
+ "learning_rate": 1.6530247238295635e-05,
240
+ "loss": 0.2236,
241
+ "step": 1650
242
+ },
243
+ {
244
+ "epoch": 0.8942661756970016,
245
+ "grad_norm": 18.322067260742188,
246
+ "learning_rate": 1.6425039452919518e-05,
247
+ "loss": 0.2176,
248
+ "step": 1700
249
+ },
250
+ {
251
+ "epoch": 0.920568122041031,
252
+ "grad_norm": 8.125885009765625,
253
+ "learning_rate": 1.63198316675434e-05,
254
+ "loss": 0.2344,
255
+ "step": 1750
256
+ },
257
+ {
258
+ "epoch": 0.9468700683850605,
259
+ "grad_norm": 4.2774457931518555,
260
+ "learning_rate": 1.6214623882167283e-05,
261
+ "loss": 0.2173,
262
+ "step": 1800
263
+ },
264
+ {
265
+ "epoch": 0.97317201472909,
266
+ "grad_norm": 8.311309814453125,
267
+ "learning_rate": 1.6109416096791165e-05,
268
+ "loss": 0.207,
269
+ "step": 1850
270
+ },
271
+ {
272
+ "epoch": 0.9994739610731194,
273
+ "grad_norm": 18.770065307617188,
274
+ "learning_rate": 1.6004208311415045e-05,
275
+ "loss": 0.2261,
276
+ "step": 1900
277
+ },
278
+ {
279
+ "epoch": 1.0,
280
+ "eval_FPR_aeb_Arab": 0.007114016533913859,
281
+ "eval_FPR_arb_Arab": 0.007306287251046666,
282
+ "eval_FPR_ars_Arab": 0.0274961810821419,
283
+ "eval_FPR_arz_Arab": 0.03769230768505917,
284
+ "eval_accuracy": 0.9382975924220497,
285
+ "eval_loss": 0.2227914035320282,
286
+ "eval_macro_f1": 0.8929982487077235,
287
+ "eval_runtime": 3.3475,
288
+ "eval_samples_per_second": 2270.666,
289
+ "eval_steps_per_second": 35.549,
290
+ "step": 1901
291
+ },
292
+ {
293
+ "epoch": 1.0257759074171489,
294
+ "grad_norm": 7.249199390411377,
295
+ "learning_rate": 1.5899000526038927e-05,
296
+ "loss": 0.1908,
297
+ "step": 1950
298
+ },
299
+ {
300
+ "epoch": 1.0520778537611783,
301
+ "grad_norm": 16.18492889404297,
302
+ "learning_rate": 1.579379274066281e-05,
303
+ "loss": 0.1919,
304
+ "step": 2000
305
+ },
306
+ {
307
+ "epoch": 1.0783798001052078,
308
+ "grad_norm": 6.383620262145996,
309
+ "learning_rate": 1.5688584955286692e-05,
310
+ "loss": 0.1662,
311
+ "step": 2050
312
+ },
313
+ {
314
+ "epoch": 1.1046817464492373,
315
+ "grad_norm": 2.7821247577667236,
316
+ "learning_rate": 1.5583377169910575e-05,
317
+ "loss": 0.1832,
318
+ "step": 2100
319
+ },
320
+ {
321
+ "epoch": 1.1309836927932668,
322
+ "grad_norm": 0.20694231986999512,
323
+ "learning_rate": 1.5478169384534458e-05,
324
+ "loss": 0.1277,
325
+ "step": 2150
326
+ },
327
+ {
328
+ "epoch": 1.157285639137296,
329
+ "grad_norm": 66.66133880615234,
330
+ "learning_rate": 1.5372961599158337e-05,
331
+ "loss": 0.1896,
332
+ "step": 2200
333
+ },
334
+ {
335
+ "epoch": 1.1835875854813256,
336
+ "grad_norm": 5.3264055252075195,
337
+ "learning_rate": 1.526775381378222e-05,
338
+ "loss": 0.1535,
339
+ "step": 2250
340
+ },
341
+ {
342
+ "epoch": 1.209889531825355,
343
+ "grad_norm": 3.480900526046753,
344
+ "learning_rate": 1.5162546028406104e-05,
345
+ "loss": 0.1767,
346
+ "step": 2300
347
+ },
348
+ {
349
+ "epoch": 1.2361914781693846,
350
+ "grad_norm": 2.1541006565093994,
351
+ "learning_rate": 1.5057338243029986e-05,
352
+ "loss": 0.2361,
353
+ "step": 2350
354
+ },
355
+ {
356
+ "epoch": 1.262493424513414,
357
+ "grad_norm": 13.037530899047852,
358
+ "learning_rate": 1.4952130457653869e-05,
359
+ "loss": 0.1733,
360
+ "step": 2400
361
+ },
362
+ {
363
+ "epoch": 1.2887953708574433,
364
+ "grad_norm": 6.1545281410217285,
365
+ "learning_rate": 1.484692267227775e-05,
366
+ "loss": 0.1608,
367
+ "step": 2450
368
+ },
369
+ {
370
+ "epoch": 1.3150973172014728,
371
+ "grad_norm": 1.8223601579666138,
372
+ "learning_rate": 1.4741714886901633e-05,
373
+ "loss": 0.1746,
374
+ "step": 2500
375
+ },
376
+ {
377
+ "epoch": 1.3413992635455023,
378
+ "grad_norm": 3.253241777420044,
379
+ "learning_rate": 1.4636507101525515e-05,
380
+ "loss": 0.1466,
381
+ "step": 2550
382
+ },
383
+ {
384
+ "epoch": 1.3677012098895318,
385
+ "grad_norm": 3.3945982456207275,
386
+ "learning_rate": 1.4531299316149396e-05,
387
+ "loss": 0.1732,
388
+ "step": 2600
389
+ },
390
+ {
391
+ "epoch": 1.3940031562335613,
392
+ "grad_norm": 6.702133655548096,
393
+ "learning_rate": 1.4426091530773279e-05,
394
+ "loss": 0.2324,
395
+ "step": 2650
396
+ },
397
+ {
398
+ "epoch": 1.4203051025775908,
399
+ "grad_norm": 3.2291910648345947,
400
+ "learning_rate": 1.4320883745397161e-05,
401
+ "loss": 0.1615,
402
+ "step": 2700
403
+ },
404
+ {
405
+ "epoch": 1.4466070489216203,
406
+ "grad_norm": 8.065141677856445,
407
+ "learning_rate": 1.4215675960021042e-05,
408
+ "loss": 0.1668,
409
+ "step": 2750
410
+ },
411
+ {
412
+ "epoch": 1.4729089952656498,
413
+ "grad_norm": 8.395434379577637,
414
+ "learning_rate": 1.4110468174644925e-05,
415
+ "loss": 0.2002,
416
+ "step": 2800
417
+ },
418
+ {
419
+ "epoch": 1.499210941609679,
420
+ "grad_norm": 5.985948085784912,
421
+ "learning_rate": 1.4005260389268807e-05,
422
+ "loss": 0.1338,
423
+ "step": 2850
424
+ },
425
+ {
426
+ "epoch": 1.5255128879537085,
427
+ "grad_norm": 4.8504791259765625,
428
+ "learning_rate": 1.3900052603892688e-05,
429
+ "loss": 0.1493,
430
+ "step": 2900
431
+ },
432
+ {
433
+ "epoch": 1.551814834297738,
434
+ "grad_norm": 30.86811637878418,
435
+ "learning_rate": 1.3794844818516571e-05,
436
+ "loss": 0.1653,
437
+ "step": 2950
438
+ },
439
+ {
440
+ "epoch": 1.5781167806417675,
441
+ "grad_norm": 8.025301933288574,
442
+ "learning_rate": 1.3689637033140453e-05,
443
+ "loss": 0.195,
444
+ "step": 3000
445
+ },
446
+ {
447
+ "epoch": 1.6044187269857968,
448
+ "grad_norm": 2.7844748497009277,
449
+ "learning_rate": 1.3584429247764334e-05,
450
+ "loss": 0.1513,
451
+ "step": 3050
452
+ },
453
+ {
454
+ "epoch": 1.6307206733298263,
455
+ "grad_norm": 15.212594032287598,
456
+ "learning_rate": 1.3479221462388219e-05,
457
+ "loss": 0.1311,
458
+ "step": 3100
459
+ },
460
+ {
461
+ "epoch": 1.6570226196738558,
462
+ "grad_norm": 7.984399795532227,
463
+ "learning_rate": 1.3374013677012101e-05,
464
+ "loss": 0.1699,
465
+ "step": 3150
466
+ },
467
+ {
468
+ "epoch": 1.6833245660178853,
469
+ "grad_norm": 2.66343092918396,
470
+ "learning_rate": 1.3268805891635982e-05,
471
+ "loss": 0.0987,
472
+ "step": 3200
473
+ },
474
+ {
475
+ "epoch": 1.7096265123619148,
476
+ "grad_norm": 1.7281841039657593,
477
+ "learning_rate": 1.3163598106259865e-05,
478
+ "loss": 0.1468,
479
+ "step": 3250
480
+ },
481
+ {
482
+ "epoch": 1.7359284587059443,
483
+ "grad_norm": 80.2880859375,
484
+ "learning_rate": 1.3058390320883747e-05,
485
+ "loss": 0.1225,
486
+ "step": 3300
487
+ },
488
+ {
489
+ "epoch": 1.7622304050499737,
490
+ "grad_norm": 3.2839515209198,
491
+ "learning_rate": 1.2953182535507628e-05,
492
+ "loss": 0.1612,
493
+ "step": 3350
494
+ },
495
+ {
496
+ "epoch": 1.7885323513940032,
497
+ "grad_norm": 6.35798978805542,
498
+ "learning_rate": 1.2847974750131511e-05,
499
+ "loss": 0.1319,
500
+ "step": 3400
501
+ },
502
+ {
503
+ "epoch": 1.8148342977380327,
504
+ "grad_norm": 17.910255432128906,
505
+ "learning_rate": 1.2742766964755394e-05,
506
+ "loss": 0.2161,
507
+ "step": 3450
508
+ },
509
+ {
510
+ "epoch": 1.8411362440820622,
511
+ "grad_norm": 2.275036573410034,
512
+ "learning_rate": 1.2637559179379274e-05,
513
+ "loss": 0.1118,
514
+ "step": 3500
515
+ },
516
+ {
517
+ "epoch": 1.8674381904260915,
518
+ "grad_norm": 20.091514587402344,
519
+ "learning_rate": 1.2532351394003157e-05,
520
+ "loss": 0.1463,
521
+ "step": 3550
522
+ },
523
+ {
524
+ "epoch": 1.893740136770121,
525
+ "grad_norm": 0.5615454912185669,
526
+ "learning_rate": 1.242714360862704e-05,
527
+ "loss": 0.1648,
528
+ "step": 3600
529
+ },
530
+ {
531
+ "epoch": 1.9200420831141505,
532
+ "grad_norm": 3.871091604232788,
533
+ "learning_rate": 1.232193582325092e-05,
534
+ "loss": 0.1325,
535
+ "step": 3650
536
+ },
537
+ {
538
+ "epoch": 1.9463440294581797,
539
+ "grad_norm": 1.768117904663086,
540
+ "learning_rate": 1.2216728037874803e-05,
541
+ "loss": 0.1664,
542
+ "step": 3700
543
+ },
544
+ {
545
+ "epoch": 1.9726459758022092,
546
+ "grad_norm": 5.8534393310546875,
547
+ "learning_rate": 1.2111520252498686e-05,
548
+ "loss": 0.1578,
549
+ "step": 3750
550
+ },
551
+ {
552
+ "epoch": 1.9989479221462387,
553
+ "grad_norm": 3.766312837600708,
554
+ "learning_rate": 1.2006312467122567e-05,
555
+ "loss": 0.1393,
556
+ "step": 3800
557
+ },
558
+ {
559
+ "epoch": 2.0,
560
+ "eval_FPR_aeb_Arab": 0.00384541434265614,
561
+ "eval_FPR_arb_Arab": 0.02134204960174158,
562
+ "eval_FPR_ars_Arab": 0.01041522010687193,
563
+ "eval_FPR_arz_Arab": 0.020192307688424557,
564
+ "eval_accuracy": 0.9590843310090778,
565
+ "eval_loss": 0.16003794968128204,
566
+ "eval_macro_f1": 0.937683933464698,
567
+ "eval_runtime": 3.3754,
568
+ "eval_samples_per_second": 2251.882,
569
+ "eval_steps_per_second": 35.255,
570
+ "step": 3802
571
+ },
572
+ {
573
+ "epoch": 2.0252498684902682,
574
+ "grad_norm": 14.620624542236328,
575
+ "learning_rate": 1.190110468174645e-05,
576
+ "loss": 0.073,
577
+ "step": 3850
578
+ },
579
+ {
580
+ "epoch": 2.0515518148342977,
581
+ "grad_norm": 1.2938824892044067,
582
+ "learning_rate": 1.1795896896370332e-05,
583
+ "loss": 0.1148,
584
+ "step": 3900
585
+ },
586
+ {
587
+ "epoch": 2.077853761178327,
588
+ "grad_norm": 3.313081979751587,
589
+ "learning_rate": 1.1690689110994216e-05,
590
+ "loss": 0.0746,
591
+ "step": 3950
592
+ },
593
+ {
594
+ "epoch": 2.1041557075223567,
595
+ "grad_norm": 2.0338821411132812,
596
+ "learning_rate": 1.1585481325618097e-05,
597
+ "loss": 0.0977,
598
+ "step": 4000
599
+ },
600
+ {
601
+ "epoch": 2.130457653866386,
602
+ "grad_norm": 0.055320367217063904,
603
+ "learning_rate": 1.148027354024198e-05,
604
+ "loss": 0.096,
605
+ "step": 4050
606
+ },
607
+ {
608
+ "epoch": 2.1567596002104157,
609
+ "grad_norm": 1.0964843034744263,
610
+ "learning_rate": 1.1375065754865862e-05,
611
+ "loss": 0.0642,
612
+ "step": 4100
613
+ },
614
+ {
615
+ "epoch": 2.183061546554445,
616
+ "grad_norm": 1.0340650081634521,
617
+ "learning_rate": 1.1269857969489743e-05,
618
+ "loss": 0.1007,
619
+ "step": 4150
620
+ },
621
+ {
622
+ "epoch": 2.2093634928984747,
623
+ "grad_norm": 4.971868515014648,
624
+ "learning_rate": 1.1164650184113626e-05,
625
+ "loss": 0.1083,
626
+ "step": 4200
627
+ },
628
+ {
629
+ "epoch": 2.2356654392425037,
630
+ "grad_norm": 0.49501538276672363,
631
+ "learning_rate": 1.1059442398737508e-05,
632
+ "loss": 0.1068,
633
+ "step": 4250
634
+ },
635
+ {
636
+ "epoch": 2.2619673855865337,
637
+ "grad_norm": 6.13097620010376,
638
+ "learning_rate": 1.095423461336139e-05,
639
+ "loss": 0.0946,
640
+ "step": 4300
641
+ },
642
+ {
643
+ "epoch": 2.2882693319305627,
644
+ "grad_norm": 5.904395580291748,
645
+ "learning_rate": 1.0849026827985272e-05,
646
+ "loss": 0.0758,
647
+ "step": 4350
648
+ },
649
+ {
650
+ "epoch": 2.314571278274592,
651
+ "grad_norm": 4.2567138671875,
652
+ "learning_rate": 1.0743819042609155e-05,
653
+ "loss": 0.111,
654
+ "step": 4400
655
+ },
656
+ {
657
+ "epoch": 2.3408732246186217,
658
+ "grad_norm": 0.1440172791481018,
659
+ "learning_rate": 1.0638611257233035e-05,
660
+ "loss": 0.1104,
661
+ "step": 4450
662
+ },
663
+ {
664
+ "epoch": 2.367175170962651,
665
+ "grad_norm": 7.970292091369629,
666
+ "learning_rate": 1.0533403471856918e-05,
667
+ "loss": 0.0891,
668
+ "step": 4500
669
+ },
670
+ {
671
+ "epoch": 2.3934771173066807,
672
+ "grad_norm": 2.4047350883483887,
673
+ "learning_rate": 1.04281956864808e-05,
674
+ "loss": 0.1242,
675
+ "step": 4550
676
+ },
677
+ {
678
+ "epoch": 2.41977906365071,
679
+ "grad_norm": 14.3352689743042,
680
+ "learning_rate": 1.0322987901104682e-05,
681
+ "loss": 0.0649,
682
+ "step": 4600
683
+ },
684
+ {
685
+ "epoch": 2.4460810099947397,
686
+ "grad_norm": 25.1345157623291,
687
+ "learning_rate": 1.0217780115728564e-05,
688
+ "loss": 0.0712,
689
+ "step": 4650
690
+ },
691
+ {
692
+ "epoch": 2.472382956338769,
693
+ "grad_norm": 1.9517714977264404,
694
+ "learning_rate": 1.0112572330352445e-05,
695
+ "loss": 0.1032,
696
+ "step": 4700
697
+ },
698
+ {
699
+ "epoch": 2.4986849026827986,
700
+ "grad_norm": 1.327062726020813,
701
+ "learning_rate": 1.000736454497633e-05,
702
+ "loss": 0.0962,
703
+ "step": 4750
704
+ },
705
+ {
706
+ "epoch": 2.524986849026828,
707
+ "grad_norm": 10.327136993408203,
708
+ "learning_rate": 9.90215675960021e-06,
709
+ "loss": 0.1092,
710
+ "step": 4800
711
+ },
712
+ {
713
+ "epoch": 2.5512887953708576,
714
+ "grad_norm": 3.8997962474823,
715
+ "learning_rate": 9.796948974224093e-06,
716
+ "loss": 0.0681,
717
+ "step": 4850
718
+ },
719
+ {
720
+ "epoch": 2.5775907417148867,
721
+ "grad_norm": 0.270841121673584,
722
+ "learning_rate": 9.691741188847975e-06,
723
+ "loss": 0.1265,
724
+ "step": 4900
725
+ },
726
+ {
727
+ "epoch": 2.6038926880589166,
728
+ "grad_norm": 0.8220506906509399,
729
+ "learning_rate": 9.586533403471858e-06,
730
+ "loss": 0.0726,
731
+ "step": 4950
732
+ },
733
+ {
734
+ "epoch": 2.6301946344029457,
735
+ "grad_norm": 1.4264813661575317,
736
+ "learning_rate": 9.48132561809574e-06,
737
+ "loss": 0.0707,
738
+ "step": 5000
739
+ },
740
+ {
741
+ "epoch": 2.656496580746975,
742
+ "grad_norm": 5.427404880523682,
743
+ "learning_rate": 9.376117832719622e-06,
744
+ "loss": 0.0762,
745
+ "step": 5050
746
+ },
747
+ {
748
+ "epoch": 2.6827985270910046,
749
+ "grad_norm": 39.103004455566406,
750
+ "learning_rate": 9.270910047343504e-06,
751
+ "loss": 0.0733,
752
+ "step": 5100
753
+ },
754
+ {
755
+ "epoch": 2.709100473435034,
756
+ "grad_norm": 2.8170275688171387,
757
+ "learning_rate": 9.165702261967387e-06,
758
+ "loss": 0.105,
759
+ "step": 5150
760
+ },
761
+ {
762
+ "epoch": 2.7354024197790636,
763
+ "grad_norm": 6.285243034362793,
764
+ "learning_rate": 9.060494476591268e-06,
765
+ "loss": 0.1054,
766
+ "step": 5200
767
+ },
768
+ {
769
+ "epoch": 2.761704366123093,
770
+ "grad_norm": 34.959102630615234,
771
+ "learning_rate": 8.95528669121515e-06,
772
+ "loss": 0.1168,
773
+ "step": 5250
774
+ },
775
+ {
776
+ "epoch": 2.7880063124671226,
777
+ "grad_norm": 2.698047399520874,
778
+ "learning_rate": 8.850078905839033e-06,
779
+ "loss": 0.0664,
780
+ "step": 5300
781
+ },
782
+ {
783
+ "epoch": 2.814308258811152,
784
+ "grad_norm": 6.107056617736816,
785
+ "learning_rate": 8.744871120462914e-06,
786
+ "loss": 0.0866,
787
+ "step": 5350
788
+ },
789
+ {
790
+ "epoch": 2.8406102051551816,
791
+ "grad_norm": 6.0492634773254395,
792
+ "learning_rate": 8.639663335086798e-06,
793
+ "loss": 0.0921,
794
+ "step": 5400
795
+ },
796
+ {
797
+ "epoch": 2.866912151499211,
798
+ "grad_norm": 38.75687789916992,
799
+ "learning_rate": 8.534455549710679e-06,
800
+ "loss": 0.0932,
801
+ "step": 5450
802
+ },
803
+ {
804
+ "epoch": 2.8932140978432406,
805
+ "grad_norm": 5.730583190917969,
806
+ "learning_rate": 8.429247764334562e-06,
807
+ "loss": 0.0809,
808
+ "step": 5500
809
+ },
810
+ {
811
+ "epoch": 2.9195160441872696,
812
+ "grad_norm": 0.2023005187511444,
813
+ "learning_rate": 8.324039978958444e-06,
814
+ "loss": 0.0723,
815
+ "step": 5550
816
+ },
817
+ {
818
+ "epoch": 2.9458179905312996,
819
+ "grad_norm": 24.816850662231445,
820
+ "learning_rate": 8.218832193582325e-06,
821
+ "loss": 0.0758,
822
+ "step": 5600
823
+ },
824
+ {
825
+ "epoch": 2.9721199368753286,
826
+ "grad_norm": 0.10021505504846573,
827
+ "learning_rate": 8.113624408206208e-06,
828
+ "loss": 0.0787,
829
+ "step": 5650
830
+ },
831
+ {
832
+ "epoch": 2.998421883219358,
833
+ "grad_norm": 3.8389430046081543,
834
+ "learning_rate": 8.00841662283009e-06,
835
+ "loss": 0.1321,
836
+ "step": 5700
837
+ },
838
+ {
839
+ "epoch": 3.0,
840
+ "eval_FPR_aeb_Arab": 0.004037685059788947,
841
+ "eval_FPR_arb_Arab": 0.009421265139507543,
842
+ "eval_FPR_ars_Arab": 0.005971392861273241,
843
+ "eval_FPR_arz_Arab": 0.020192307688424557,
844
+ "eval_accuracy": 0.9713195632153664,
845
+ "eval_loss": 0.15336963534355164,
846
+ "eval_macro_f1": 0.9569564393242584,
847
+ "eval_runtime": 3.3689,
848
+ "eval_samples_per_second": 2256.259,
849
+ "eval_steps_per_second": 35.324,
850
+ "step": 5703
851
+ },
852
+ {
853
+ "epoch": 3.0247238295633876,
854
+ "grad_norm": 0.30554988980293274,
855
+ "learning_rate": 7.903208837453971e-06,
856
+ "loss": 0.0937,
857
+ "step": 5750
858
+ },
859
+ {
860
+ "epoch": 3.051025775907417,
861
+ "grad_norm": 37.439884185791016,
862
+ "learning_rate": 7.798001052077856e-06,
863
+ "loss": 0.0578,
864
+ "step": 5800
865
+ },
866
+ {
867
+ "epoch": 3.0773277222514466,
868
+ "grad_norm": 0.0822492390871048,
869
+ "learning_rate": 7.692793266701737e-06,
870
+ "loss": 0.0636,
871
+ "step": 5850
872
+ },
873
+ {
874
+ "epoch": 3.103629668595476,
875
+ "grad_norm": 2.7918007373809814,
876
+ "learning_rate": 7.587585481325619e-06,
877
+ "loss": 0.0378,
878
+ "step": 5900
879
+ },
880
+ {
881
+ "epoch": 3.1299316149395056,
882
+ "grad_norm": 32.899818420410156,
883
+ "learning_rate": 7.482377695949501e-06,
884
+ "loss": 0.0609,
885
+ "step": 5950
886
+ },
887
+ {
888
+ "epoch": 3.156233561283535,
889
+ "grad_norm": 0.06830895692110062,
890
+ "learning_rate": 7.377169910573383e-06,
891
+ "loss": 0.0433,
892
+ "step": 6000
893
+ },
894
+ {
895
+ "epoch": 3.1825355076275645,
896
+ "grad_norm": 54.685489654541016,
897
+ "learning_rate": 7.271962125197265e-06,
898
+ "loss": 0.056,
899
+ "step": 6050
900
+ },
901
+ {
902
+ "epoch": 3.208837453971594,
903
+ "grad_norm": 0.8175523281097412,
904
+ "learning_rate": 7.166754339821147e-06,
905
+ "loss": 0.0341,
906
+ "step": 6100
907
+ },
908
+ {
909
+ "epoch": 3.2351394003156235,
910
+ "grad_norm": 0.33226722478866577,
911
+ "learning_rate": 7.061546554445029e-06,
912
+ "loss": 0.0482,
913
+ "step": 6150
914
+ },
915
+ {
916
+ "epoch": 3.2614413466596526,
917
+ "grad_norm": 1.425661325454712,
918
+ "learning_rate": 6.956338769068912e-06,
919
+ "loss": 0.0673,
920
+ "step": 6200
921
+ },
922
+ {
923
+ "epoch": 3.2877432930036825,
924
+ "grad_norm": 0.18895921111106873,
925
+ "learning_rate": 6.851130983692794e-06,
926
+ "loss": 0.0359,
927
+ "step": 6250
928
+ },
929
+ {
930
+ "epoch": 3.3140452393477116,
931
+ "grad_norm": 0.6557305455207825,
932
+ "learning_rate": 6.7459231983166766e-06,
933
+ "loss": 0.0382,
934
+ "step": 6300
935
+ },
936
+ {
937
+ "epoch": 3.340347185691741,
938
+ "grad_norm": 0.008198770694434643,
939
+ "learning_rate": 6.640715412940558e-06,
940
+ "loss": 0.0566,
941
+ "step": 6350
942
+ },
943
+ {
944
+ "epoch": 3.3666491320357705,
945
+ "grad_norm": 0.4695976674556732,
946
+ "learning_rate": 6.53550762756444e-06,
947
+ "loss": 0.0654,
948
+ "step": 6400
949
+ },
950
+ {
951
+ "epoch": 3.3929510783798,
952
+ "grad_norm": 8.628214836120605,
953
+ "learning_rate": 6.430299842188323e-06,
954
+ "loss": 0.0427,
955
+ "step": 6450
956
+ },
957
+ {
958
+ "epoch": 3.4192530247238295,
959
+ "grad_norm": 0.9650713801383972,
960
+ "learning_rate": 6.3250920568122044e-06,
961
+ "loss": 0.0645,
962
+ "step": 6500
963
+ },
964
+ {
965
+ "epoch": 3.445554971067859,
966
+ "grad_norm": 5.836668968200684,
967
+ "learning_rate": 6.219884271436086e-06,
968
+ "loss": 0.0397,
969
+ "step": 6550
970
+ },
971
+ {
972
+ "epoch": 3.4718569174118885,
973
+ "grad_norm": 0.03976545110344887,
974
+ "learning_rate": 6.11467648605997e-06,
975
+ "loss": 0.0586,
976
+ "step": 6600
977
+ },
978
+ {
979
+ "epoch": 3.498158863755918,
980
+ "grad_norm": 19.784215927124023,
981
+ "learning_rate": 6.009468700683851e-06,
982
+ "loss": 0.033,
983
+ "step": 6650
984
+ },
985
+ {
986
+ "epoch": 3.5244608100999475,
987
+ "grad_norm": 2.075496196746826,
988
+ "learning_rate": 5.904260915307733e-06,
989
+ "loss": 0.0776,
990
+ "step": 6700
991
+ },
992
+ {
993
+ "epoch": 3.550762756443977,
994
+ "grad_norm": 7.05810022354126,
995
+ "learning_rate": 5.799053129931616e-06,
996
+ "loss": 0.0905,
997
+ "step": 6750
998
+ },
999
+ {
1000
+ "epoch": 3.5770647027880065,
1001
+ "grad_norm": 0.012984913773834705,
1002
+ "learning_rate": 5.6938453445554975e-06,
1003
+ "loss": 0.0542,
1004
+ "step": 6800
1005
+ },
1006
+ {
1007
+ "epoch": 3.6033666491320355,
1008
+ "grad_norm": 2.701481342315674,
1009
+ "learning_rate": 5.588637559179379e-06,
1010
+ "loss": 0.0625,
1011
+ "step": 6850
1012
+ },
1013
+ {
1014
+ "epoch": 3.6296685954760655,
1015
+ "grad_norm": 0.41872379183769226,
1016
+ "learning_rate": 5.483429773803262e-06,
1017
+ "loss": 0.0795,
1018
+ "step": 6900
1019
+ },
1020
+ {
1021
+ "epoch": 3.6559705418200945,
1022
+ "grad_norm": 0.13123294711112976,
1023
+ "learning_rate": 5.378221988427144e-06,
1024
+ "loss": 0.0296,
1025
+ "step": 6950
1026
+ },
1027
+ {
1028
+ "epoch": 3.682272488164124,
1029
+ "grad_norm": 0.7190969586372375,
1030
+ "learning_rate": 5.273014203051027e-06,
1031
+ "loss": 0.0666,
1032
+ "step": 7000
1033
+ },
1034
+ {
1035
+ "epoch": 3.7085744345081535,
1036
+ "grad_norm": 0.1744261384010315,
1037
+ "learning_rate": 5.167806417674909e-06,
1038
+ "loss": 0.0328,
1039
+ "step": 7050
1040
+ },
1041
+ {
1042
+ "epoch": 3.734876380852183,
1043
+ "grad_norm": 0.5619340538978577,
1044
+ "learning_rate": 5.062598632298791e-06,
1045
+ "loss": 0.0755,
1046
+ "step": 7100
1047
+ },
1048
+ {
1049
+ "epoch": 3.7611783271962125,
1050
+ "grad_norm": 40.665706634521484,
1051
+ "learning_rate": 4.957390846922673e-06,
1052
+ "loss": 0.1041,
1053
+ "step": 7150
1054
+ },
1055
+ {
1056
+ "epoch": 3.787480273540242,
1057
+ "grad_norm": 0.06617475301027298,
1058
+ "learning_rate": 4.852183061546555e-06,
1059
+ "loss": 0.0264,
1060
+ "step": 7200
1061
+ },
1062
+ {
1063
+ "epoch": 3.8137822198842715,
1064
+ "grad_norm": 5.0283966064453125,
1065
+ "learning_rate": 4.746975276170437e-06,
1066
+ "loss": 0.0789,
1067
+ "step": 7250
1068
+ },
1069
+ {
1070
+ "epoch": 3.840084166228301,
1071
+ "grad_norm": 5.660898208618164,
1072
+ "learning_rate": 4.641767490794319e-06,
1073
+ "loss": 0.0582,
1074
+ "step": 7300
1075
+ },
1076
+ {
1077
+ "epoch": 3.8663861125723304,
1078
+ "grad_norm": 0.8503484725952148,
1079
+ "learning_rate": 4.536559705418201e-06,
1080
+ "loss": 0.0862,
1081
+ "step": 7350
1082
+ },
1083
+ {
1084
+ "epoch": 3.89268805891636,
1085
+ "grad_norm": 13.575056076049805,
1086
+ "learning_rate": 4.431351920042084e-06,
1087
+ "loss": 0.0554,
1088
+ "step": 7400
1089
+ },
1090
+ {
1091
+ "epoch": 3.9189900052603894,
1092
+ "grad_norm": 0.25003504753112793,
1093
+ "learning_rate": 4.3261441346659654e-06,
1094
+ "loss": 0.0504,
1095
+ "step": 7450
1096
+ },
1097
+ {
1098
+ "epoch": 3.9452919516044185,
1099
+ "grad_norm": 0.022247493267059326,
1100
+ "learning_rate": 4.220936349289847e-06,
1101
+ "loss": 0.0663,
1102
+ "step": 7500
1103
+ },
1104
+ {
1105
+ "epoch": 3.9715938979484484,
1106
+ "grad_norm": 0.2591884136199951,
1107
+ "learning_rate": 4.11572856391373e-06,
1108
+ "loss": 0.0361,
1109
+ "step": 7550
1110
+ },
1111
+ {
1112
+ "epoch": 3.9978958442924775,
1113
+ "grad_norm": 6.533713340759277,
1114
+ "learning_rate": 4.010520778537612e-06,
1115
+ "loss": 0.0293,
1116
+ "step": 7600
1117
+ },
1118
+ {
1119
+ "epoch": 4.0,
1120
+ "eval_FPR_aeb_Arab": 0.004229955776921754,
1121
+ "eval_FPR_arb_Arab": 0.011343972310835613,
1122
+ "eval_FPR_ars_Arab": 0.00458269684702365,
1123
+ "eval_FPR_arz_Arab": 0.015576923073927515,
1124
+ "eval_accuracy": 0.9743454808577818,
1125
+ "eval_loss": 0.15085552632808685,
1126
+ "eval_macro_f1": 0.9633717243752477,
1127
+ "eval_runtime": 3.3689,
1128
+ "eval_samples_per_second": 2256.225,
1129
+ "eval_steps_per_second": 35.323,
1130
+ "step": 7604
1131
+ },
1132
+ {
1133
+ "epoch": 4.024197790636507,
1134
+ "grad_norm": 2.82965087890625,
1135
+ "learning_rate": 3.905312993161494e-06,
1136
+ "loss": 0.0275,
1137
+ "step": 7650
1138
+ },
1139
+ {
1140
+ "epoch": 4.0504997369805364,
1141
+ "grad_norm": 0.05096087604761124,
1142
+ "learning_rate": 3.8001052077853763e-06,
1143
+ "loss": 0.0334,
1144
+ "step": 7700
1145
+ },
1146
+ {
1147
+ "epoch": 4.076801683324566,
1148
+ "grad_norm": 0.017893170937895775,
1149
+ "learning_rate": 3.694897422409259e-06,
1150
+ "loss": 0.0435,
1151
+ "step": 7750
1152
+ },
1153
+ {
1154
+ "epoch": 4.103103629668595,
1155
+ "grad_norm": 0.43649783730506897,
1156
+ "learning_rate": 3.5896896370331407e-06,
1157
+ "loss": 0.0297,
1158
+ "step": 7800
1159
+ },
1160
+ {
1161
+ "epoch": 4.1294055760126245,
1162
+ "grad_norm": 0.1193922609090805,
1163
+ "learning_rate": 3.484481851657023e-06,
1164
+ "loss": 0.0308,
1165
+ "step": 7850
1166
+ },
1167
+ {
1168
+ "epoch": 4.155707522356654,
1169
+ "grad_norm": 0.029588880017399788,
1170
+ "learning_rate": 3.3792740662809046e-06,
1171
+ "loss": 0.0338,
1172
+ "step": 7900
1173
+ },
1174
+ {
1175
+ "epoch": 4.1820094687006835,
1176
+ "grad_norm": 9.658980369567871,
1177
+ "learning_rate": 3.2740662809047872e-06,
1178
+ "loss": 0.0431,
1179
+ "step": 7950
1180
+ },
1181
+ {
1182
+ "epoch": 4.208311415044713,
1183
+ "grad_norm": 0.018898559734225273,
1184
+ "learning_rate": 3.1688584955286694e-06,
1185
+ "loss": 0.0436,
1186
+ "step": 8000
1187
+ },
1188
+ {
1189
+ "epoch": 4.2346133613887424,
1190
+ "grad_norm": 0.33016514778137207,
1191
+ "learning_rate": 3.0636507101525516e-06,
1192
+ "loss": 0.0103,
1193
+ "step": 8050
1194
+ },
1195
+ {
1196
+ "epoch": 4.260915307732772,
1197
+ "grad_norm": 4.2456583976745605,
1198
+ "learning_rate": 2.9584429247764334e-06,
1199
+ "loss": 0.0234,
1200
+ "step": 8100
1201
+ },
1202
+ {
1203
+ "epoch": 4.287217254076801,
1204
+ "grad_norm": 7.066432476043701,
1205
+ "learning_rate": 2.853235139400316e-06,
1206
+ "loss": 0.0267,
1207
+ "step": 8150
1208
+ },
1209
+ {
1210
+ "epoch": 4.313519200420831,
1211
+ "grad_norm": 0.010746636427938938,
1212
+ "learning_rate": 2.748027354024198e-06,
1213
+ "loss": 0.0257,
1214
+ "step": 8200
1215
+ },
1216
+ {
1217
+ "epoch": 4.33982114676486,
1218
+ "grad_norm": 0.03323914483189583,
1219
+ "learning_rate": 2.64281956864808e-06,
1220
+ "loss": 0.0515,
1221
+ "step": 8250
1222
+ },
1223
+ {
1224
+ "epoch": 4.36612309310889,
1225
+ "grad_norm": 0.2795711159706116,
1226
+ "learning_rate": 2.537611783271962e-06,
1227
+ "loss": 0.0489,
1228
+ "step": 8300
1229
+ },
1230
+ {
1231
+ "epoch": 4.392425039452919,
1232
+ "grad_norm": 9.179369926452637,
1233
+ "learning_rate": 2.4324039978958443e-06,
1234
+ "loss": 0.0339,
1235
+ "step": 8350
1236
+ },
1237
+ {
1238
+ "epoch": 4.418726985796949,
1239
+ "grad_norm": 3.0741329193115234,
1240
+ "learning_rate": 2.327196212519727e-06,
1241
+ "loss": 0.0203,
1242
+ "step": 8400
1243
+ },
1244
+ {
1245
+ "epoch": 4.445028932140978,
1246
+ "grad_norm": 21.926807403564453,
1247
+ "learning_rate": 2.2219884271436086e-06,
1248
+ "loss": 0.0481,
1249
+ "step": 8450
1250
+ },
1251
+ {
1252
+ "epoch": 4.471330878485007,
1253
+ "grad_norm": 1.929383635520935,
1254
+ "learning_rate": 2.1167806417674912e-06,
1255
+ "loss": 0.0231,
1256
+ "step": 8500
1257
+ },
1258
+ {
1259
+ "epoch": 4.497632824829037,
1260
+ "grad_norm": 4.422601699829102,
1261
+ "learning_rate": 2.011572856391373e-06,
1262
+ "loss": 0.034,
1263
+ "step": 8550
1264
+ },
1265
+ {
1266
+ "epoch": 4.523934771173067,
1267
+ "grad_norm": 0.010013488121330738,
1268
+ "learning_rate": 1.9063650710152554e-06,
1269
+ "loss": 0.0114,
1270
+ "step": 8600
1271
+ },
1272
+ {
1273
+ "epoch": 4.550236717517096,
1274
+ "grad_norm": 0.002675453433766961,
1275
+ "learning_rate": 1.8011572856391374e-06,
1276
+ "loss": 0.0575,
1277
+ "step": 8650
1278
+ },
1279
+ {
1280
+ "epoch": 4.576538663861125,
1281
+ "grad_norm": 0.04695653170347214,
1282
+ "learning_rate": 1.6959495002630197e-06,
1283
+ "loss": 0.0419,
1284
+ "step": 8700
1285
+ },
1286
+ {
1287
+ "epoch": 4.602840610205155,
1288
+ "grad_norm": 0.005277259275317192,
1289
+ "learning_rate": 1.5907417148869017e-06,
1290
+ "loss": 0.0465,
1291
+ "step": 8750
1292
+ },
1293
+ {
1294
+ "epoch": 4.629142556549184,
1295
+ "grad_norm": 0.03051823005080223,
1296
+ "learning_rate": 1.4855339295107841e-06,
1297
+ "loss": 0.0494,
1298
+ "step": 8800
1299
+ },
1300
+ {
1301
+ "epoch": 4.655444502893214,
1302
+ "grad_norm": 0.03725295141339302,
1303
+ "learning_rate": 1.380326144134666e-06,
1304
+ "loss": 0.0279,
1305
+ "step": 8850
1306
+ },
1307
+ {
1308
+ "epoch": 4.681746449237243,
1309
+ "grad_norm": 9.204965591430664,
1310
+ "learning_rate": 1.2751183587585483e-06,
1311
+ "loss": 0.0421,
1312
+ "step": 8900
1313
+ },
1314
+ {
1315
+ "epoch": 4.708048395581273,
1316
+ "grad_norm": 0.6778242588043213,
1317
+ "learning_rate": 1.1699105733824304e-06,
1318
+ "loss": 0.0275,
1319
+ "step": 8950
1320
+ },
1321
+ {
1322
+ "epoch": 4.734350341925302,
1323
+ "grad_norm": 0.02842475101351738,
1324
+ "learning_rate": 1.0647027880063126e-06,
1325
+ "loss": 0.0312,
1326
+ "step": 9000
1327
+ },
1328
+ {
1329
+ "epoch": 4.760652288269332,
1330
+ "grad_norm": 0.03680500015616417,
1331
+ "learning_rate": 9.594950026301946e-07,
1332
+ "loss": 0.0349,
1333
+ "step": 9050
1334
+ },
1335
+ {
1336
+ "epoch": 4.786954234613361,
1337
+ "grad_norm": 0.4056846797466278,
1338
+ "learning_rate": 8.542872172540769e-07,
1339
+ "loss": 0.0473,
1340
+ "step": 9100
1341
+ },
1342
+ {
1343
+ "epoch": 4.81325618095739,
1344
+ "grad_norm": 0.005888829007744789,
1345
+ "learning_rate": 7.490794318779591e-07,
1346
+ "loss": 0.0418,
1347
+ "step": 9150
1348
+ },
1349
+ {
1350
+ "epoch": 4.83955812730142,
1351
+ "grad_norm": 1.9934979677200317,
1352
+ "learning_rate": 6.438716465018411e-07,
1353
+ "loss": 0.0282,
1354
+ "step": 9200
1355
+ },
1356
+ {
1357
+ "epoch": 4.86586007364545,
1358
+ "grad_norm": 2.1665430068969727,
1359
+ "learning_rate": 5.386638611257233e-07,
1360
+ "loss": 0.0303,
1361
+ "step": 9250
1362
+ },
1363
+ {
1364
+ "epoch": 4.892162019989479,
1365
+ "grad_norm": 0.07777859270572662,
1366
+ "learning_rate": 4.334560757496055e-07,
1367
+ "loss": 0.0385,
1368
+ "step": 9300
1369
+ },
1370
+ {
1371
+ "epoch": 4.918463966333508,
1372
+ "grad_norm": 0.1822308897972107,
1373
+ "learning_rate": 3.2824829037348767e-07,
1374
+ "loss": 0.062,
1375
+ "step": 9350
1376
+ },
1377
+ {
1378
+ "epoch": 4.944765912677538,
1379
+ "grad_norm": 0.13670164346694946,
1380
+ "learning_rate": 2.2304050499736983e-07,
1381
+ "loss": 0.0187,
1382
+ "step": 9400
1383
+ },
1384
+ {
1385
+ "epoch": 4.971067859021567,
1386
+ "grad_norm": 0.0051955850794911385,
1387
+ "learning_rate": 1.1783271962125198e-07,
1388
+ "loss": 0.0421,
1389
+ "step": 9450
1390
+ },
1391
+ {
1392
+ "epoch": 4.997369805365597,
1393
+ "grad_norm": 0.008878646418452263,
1394
+ "learning_rate": 1.262493424513414e-08,
1395
+ "loss": 0.0477,
1396
+ "step": 9500
1397
+ },
1398
+ {
1399
+ "epoch": 5.0,
1400
+ "eval_FPR_aeb_Arab": 0.00384541434265614,
1401
+ "eval_FPR_arb_Arab": 0.011728513745101227,
1402
+ "eval_FPR_ars_Arab": 0.004721566448448609,
1403
+ "eval_FPR_arz_Arab": 0.013461538458949703,
1404
+ "eval_accuracy": 0.9756610972240495,
1405
+ "eval_loss": 0.15560674667358398,
1406
+ "eval_macro_f1": 0.9641750099415428,
1407
+ "eval_runtime": 3.3579,
1408
+ "eval_samples_per_second": 2263.594,
1409
+ "eval_steps_per_second": 35.438,
1410
+ "step": 9505
1411
+ }
1412
+ ],
1413
+ "logging_steps": 50,
1414
+ "max_steps": 9505,
1415
+ "num_input_tokens_seen": 0,
1416
+ "num_train_epochs": 5,
1417
+ "save_steps": 500,
1418
+ "stateful_callbacks": {
1419
+ "EarlyStoppingCallback": {
1420
+ "args": {
1421
+ "early_stopping_patience": 2,
1422
+ "early_stopping_threshold": 0.0
1423
+ },
1424
+ "attributes": {
1425
+ "early_stopping_patience_counter": 0
1426
+ }
1427
+ },
1428
+ "TrainerControl": {
1429
+ "args": {
1430
+ "should_epoch_stop": false,
1431
+ "should_evaluate": false,
1432
+ "should_log": false,
1433
+ "should_save": true,
1434
+ "should_training_stop": true
1435
+ },
1436
+ "attributes": {}
1437
+ }
1438
+ },
1439
+ "total_flos": 4298818504680960.0,
1440
+ "train_batch_size": 32,
1441
+ "trial_name": null,
1442
+ "trial_params": null
1443
+ }
checkpoint-9505/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fa873479846449ff86b2d50d9e57056c48f72d07a9ffc1fb7f0012ac7d884f8
3
+ size 5777
checkpoint-9505/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
confusion_matrix_val.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ,aeb_Arab,arb_Arab,ars_Arab,arz_Arab
2
+ aeb_Arab,2345,15,7,33
3
+ arb_Arab,6,2352,17,25
4
+ ars_Arab,3,15,370,12
5
+ arz_Arab,11,31,10,2349
metrics.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ {'eval_loss': 0.15560674667358398, 'eval_accuracy': 0.9756610972240495, 'eval_macro_f1': 0.9641750099415428, 'eval_FPR_aeb_Arab': 0.00384541434265614, 'eval_FPR_arb_Arab': 0.011728513745101227, 'eval_FPR_ars_Arab': 0.004721566448448609, 'eval_FPR_arz_Arab': 0.013461538458949703, 'eval_runtime': 3.397, 'eval_samples_per_second': 2237.548, 'eval_steps_per_second': 35.031, 'epoch': 5.0}
val_predictions.csv ADDED
The diff for this file is too large to render. See raw diff