YagiASAFAS commited on
Commit
08718b1
·
verified ·
1 Parent(s): d5c4a53

Add tokenizer files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. base/config.json +125 -0
  2. base/model.safetensors +3 -0
  3. base/run-0/checkpoint-12114/config.json +125 -0
  4. base/run-0/checkpoint-12114/model.safetensors +3 -0
  5. base/run-0/checkpoint-12114/optimizer.pt +3 -0
  6. base/run-0/checkpoint-12114/rng_state.pth +3 -0
  7. base/run-0/checkpoint-12114/scaler.pt +3 -0
  8. base/run-0/checkpoint-12114/scheduler.pt +3 -0
  9. base/run-0/checkpoint-12114/trainer_state.json +1200 -0
  10. base/run-0/checkpoint-12114/training_args.bin +3 -0
  11. base/run-0/checkpoint-13460/config.json +125 -0
  12. base/run-0/checkpoint-13460/model.safetensors +3 -0
  13. base/run-0/checkpoint-13460/optimizer.pt +3 -0
  14. base/run-0/checkpoint-13460/rng_state.pth +3 -0
  15. base/run-0/checkpoint-13460/scaler.pt +3 -0
  16. base/run-0/checkpoint-13460/scheduler.pt +3 -0
  17. base/run-0/checkpoint-13460/trainer_state.json +1325 -0
  18. base/run-0/checkpoint-13460/training_args.bin +3 -0
  19. base/training_args.bin +3 -0
  20. model.safetensors +1 -1
  21. run-0/checkpoint-1011/config.json +125 -0
  22. run-0/checkpoint-1011/model.safetensors +3 -0
  23. run-0/checkpoint-1011/optimizer.pt +3 -0
  24. run-0/checkpoint-1011/rng_state.pth +3 -0
  25. run-0/checkpoint-1011/scaler.pt +3 -0
  26. run-0/checkpoint-1011/scheduler.pt +3 -0
  27. run-0/checkpoint-1011/trainer_state.json +219 -0
  28. run-0/checkpoint-1011/training_args.bin +3 -0
  29. run-0/checkpoint-1348/config.json +125 -0
  30. run-0/checkpoint-1348/model.safetensors +3 -0
  31. run-0/checkpoint-1348/optimizer.pt +3 -0
  32. run-0/checkpoint-1348/rng_state.pth +3 -0
  33. run-0/checkpoint-1348/scaler.pt +3 -0
  34. run-0/checkpoint-1348/scheduler.pt +3 -0
  35. run-0/checkpoint-1348/trainer_state.json +274 -0
  36. run-0/checkpoint-1348/training_args.bin +3 -0
  37. run-0/checkpoint-1685/config.json +125 -0
  38. run-0/checkpoint-1685/model.safetensors +3 -0
  39. run-0/checkpoint-1685/optimizer.pt +3 -0
  40. run-0/checkpoint-1685/rng_state.pth +3 -0
  41. run-0/checkpoint-1685/scaler.pt +3 -0
  42. run-0/checkpoint-1685/scheduler.pt +3 -0
  43. run-0/checkpoint-1685/trainer_state.json +329 -0
  44. run-0/checkpoint-1685/training_args.bin +3 -0
  45. run-1/checkpoint-1011/config.json +125 -0
  46. run-1/checkpoint-1011/model.safetensors +3 -0
  47. run-1/checkpoint-1011/optimizer.pt +3 -0
  48. run-1/checkpoint-1011/rng_state.pth +3 -0
  49. run-1/checkpoint-1011/scaler.pt +3 -0
  50. run-1/checkpoint-1011/scheduler.pt +3 -0
base/config.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Democracy Unknown",
14
+ "1": "Democracy Negative",
15
+ "2": "Democracy Neutral",
16
+ "3": "Democracy Positive",
17
+ "4": "Economy Unknown",
18
+ "5": "Economy Negative",
19
+ "6": "Economy Neutral",
20
+ "7": "Economy Positive",
21
+ "8": "Race Unknown",
22
+ "9": "Race Negative",
23
+ "10": "Race Neutral",
24
+ "11": "Race Positive",
25
+ "12": "Leadership Unknown",
26
+ "13": "Leadership Negative",
27
+ "14": "Leadership Neutral",
28
+ "15": "Leadership Positive",
29
+ "16": "Development Unknown",
30
+ "17": "Development Negative",
31
+ "18": "Development Neutral",
32
+ "19": "Development Positive",
33
+ "20": "Corruption Unknown",
34
+ "21": "Corruption Negative",
35
+ "22": "Corruption Neutral",
36
+ "23": "Corruption Positive",
37
+ "24": "Instability Unknown",
38
+ "25": "Instability Negative",
39
+ "26": "Instability Neutral",
40
+ "27": "Instability Positive",
41
+ "28": "Safety Unknown",
42
+ "29": "Safety Negative",
43
+ "30": "Safety Neutral",
44
+ "31": "Safety Positive",
45
+ "32": "Administration Unknown",
46
+ "33": "Administration Negative",
47
+ "34": "Administration Neutral",
48
+ "35": "Administration Positive",
49
+ "36": "Education Unknown",
50
+ "37": "Education Negative",
51
+ "38": "Education Neutral",
52
+ "39": "Education Positive",
53
+ "40": "Religion Unknown",
54
+ "41": "Religion Negative",
55
+ "42": "Religion Neutral",
56
+ "43": "Religion Positive",
57
+ "44": "Environment Unknown",
58
+ "45": "Environment Negative",
59
+ "46": "Environment Neutral",
60
+ "47": "Environment Positive"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "Administration Negative": 33,
66
+ "Administration Neutral": 34,
67
+ "Administration Positive": 35,
68
+ "Administration Unknown": 32,
69
+ "Corruption Negative": 21,
70
+ "Corruption Neutral": 22,
71
+ "Corruption Positive": 23,
72
+ "Corruption Unknown": 20,
73
+ "Democracy Negative": 1,
74
+ "Democracy Neutral": 2,
75
+ "Democracy Positive": 3,
76
+ "Democracy Unknown": 0,
77
+ "Development Negative": 17,
78
+ "Development Neutral": 18,
79
+ "Development Positive": 19,
80
+ "Development Unknown": 16,
81
+ "Economy Negative": 5,
82
+ "Economy Neutral": 6,
83
+ "Economy Positive": 7,
84
+ "Economy Unknown": 4,
85
+ "Education Negative": 37,
86
+ "Education Neutral": 38,
87
+ "Education Positive": 39,
88
+ "Education Unknown": 36,
89
+ "Environment Negative": 45,
90
+ "Environment Neutral": 46,
91
+ "Environment Positive": 47,
92
+ "Environment Unknown": 44,
93
+ "Instability Negative": 25,
94
+ "Instability Neutral": 26,
95
+ "Instability Positive": 27,
96
+ "Instability Unknown": 24,
97
+ "Leadership Negative": 13,
98
+ "Leadership Neutral": 14,
99
+ "Leadership Positive": 15,
100
+ "Leadership Unknown": 12,
101
+ "Race Negative": 9,
102
+ "Race Neutral": 10,
103
+ "Race Positive": 11,
104
+ "Race Unknown": 8,
105
+ "Religion Negative": 41,
106
+ "Religion Neutral": 42,
107
+ "Religion Positive": 43,
108
+ "Religion Unknown": 40,
109
+ "Safety Negative": 29,
110
+ "Safety Neutral": 30,
111
+ "Safety Positive": 31,
112
+ "Safety Unknown": 28
113
+ },
114
+ "layer_norm_eps": 1e-12,
115
+ "max_position_embeddings": 512,
116
+ "model_type": "bert",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
+ "pad_token_id": 0,
120
+ "position_embedding_type": "absolute",
121
+ "transformers_version": "4.57.0",
122
+ "type_vocab_size": 2,
123
+ "use_cache": true,
124
+ "vocab_size": 30522
125
+ }
base/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b76674ecc6fc0919cc8f40d45de3432489dfaf9d2fdb70e34c7ccf5f8b9cec02
3
+ size 438100144
base/run-0/checkpoint-12114/config.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Democracy Unknown",
14
+ "1": "Democracy Negative",
15
+ "2": "Democracy Neutral",
16
+ "3": "Democracy Positive",
17
+ "4": "Economy Unknown",
18
+ "5": "Economy Negative",
19
+ "6": "Economy Neutral",
20
+ "7": "Economy Positive",
21
+ "8": "Race Unknown",
22
+ "9": "Race Negative",
23
+ "10": "Race Neutral",
24
+ "11": "Race Positive",
25
+ "12": "Leadership Unknown",
26
+ "13": "Leadership Negative",
27
+ "14": "Leadership Neutral",
28
+ "15": "Leadership Positive",
29
+ "16": "Development Unknown",
30
+ "17": "Development Negative",
31
+ "18": "Development Neutral",
32
+ "19": "Development Positive",
33
+ "20": "Corruption Unknown",
34
+ "21": "Corruption Negative",
35
+ "22": "Corruption Neutral",
36
+ "23": "Corruption Positive",
37
+ "24": "Instability Unknown",
38
+ "25": "Instability Negative",
39
+ "26": "Instability Neutral",
40
+ "27": "Instability Positive",
41
+ "28": "Safety Unknown",
42
+ "29": "Safety Negative",
43
+ "30": "Safety Neutral",
44
+ "31": "Safety Positive",
45
+ "32": "Administration Unknown",
46
+ "33": "Administration Negative",
47
+ "34": "Administration Neutral",
48
+ "35": "Administration Positive",
49
+ "36": "Education Unknown",
50
+ "37": "Education Negative",
51
+ "38": "Education Neutral",
52
+ "39": "Education Positive",
53
+ "40": "Religion Unknown",
54
+ "41": "Religion Negative",
55
+ "42": "Religion Neutral",
56
+ "43": "Religion Positive",
57
+ "44": "Environment Unknown",
58
+ "45": "Environment Negative",
59
+ "46": "Environment Neutral",
60
+ "47": "Environment Positive"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "Administration Negative": 33,
66
+ "Administration Neutral": 34,
67
+ "Administration Positive": 35,
68
+ "Administration Unknown": 32,
69
+ "Corruption Negative": 21,
70
+ "Corruption Neutral": 22,
71
+ "Corruption Positive": 23,
72
+ "Corruption Unknown": 20,
73
+ "Democracy Negative": 1,
74
+ "Democracy Neutral": 2,
75
+ "Democracy Positive": 3,
76
+ "Democracy Unknown": 0,
77
+ "Development Negative": 17,
78
+ "Development Neutral": 18,
79
+ "Development Positive": 19,
80
+ "Development Unknown": 16,
81
+ "Economy Negative": 5,
82
+ "Economy Neutral": 6,
83
+ "Economy Positive": 7,
84
+ "Economy Unknown": 4,
85
+ "Education Negative": 37,
86
+ "Education Neutral": 38,
87
+ "Education Positive": 39,
88
+ "Education Unknown": 36,
89
+ "Environment Negative": 45,
90
+ "Environment Neutral": 46,
91
+ "Environment Positive": 47,
92
+ "Environment Unknown": 44,
93
+ "Instability Negative": 25,
94
+ "Instability Neutral": 26,
95
+ "Instability Positive": 27,
96
+ "Instability Unknown": 24,
97
+ "Leadership Negative": 13,
98
+ "Leadership Neutral": 14,
99
+ "Leadership Positive": 15,
100
+ "Leadership Unknown": 12,
101
+ "Race Negative": 9,
102
+ "Race Neutral": 10,
103
+ "Race Positive": 11,
104
+ "Race Unknown": 8,
105
+ "Religion Negative": 41,
106
+ "Religion Neutral": 42,
107
+ "Religion Positive": 43,
108
+ "Religion Unknown": 40,
109
+ "Safety Negative": 29,
110
+ "Safety Neutral": 30,
111
+ "Safety Positive": 31,
112
+ "Safety Unknown": 28
113
+ },
114
+ "layer_norm_eps": 1e-12,
115
+ "max_position_embeddings": 512,
116
+ "model_type": "bert",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
+ "pad_token_id": 0,
120
+ "position_embedding_type": "absolute",
121
+ "transformers_version": "4.57.0",
122
+ "type_vocab_size": 2,
123
+ "use_cache": true,
124
+ "vocab_size": 30522
125
+ }
base/run-0/checkpoint-12114/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ceb47c4a65913aad1c202a37a8f335f3f7341a98bc2771651d11ebada123a3a1
3
+ size 438100144
base/run-0/checkpoint-12114/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0576d4ec15f6667395da0c44d28110f02fe479d0e40f63fe43bf131d57c3eee
3
+ size 876324619
base/run-0/checkpoint-12114/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4304a38ad9b182a0a2a10944ac07c17d4feedf14cd480884bddd0b4e2598024c
3
+ size 14645
base/run-0/checkpoint-12114/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f0a237feebe23fc2a2b21f74770edeb23806837ea34a8ebfe926f79f4333264
3
+ size 1383
base/run-0/checkpoint-12114/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cacb3542e3734fc040315ed400d039f8eda18c7df7a89867ea47973feb2f39c
3
+ size 1465
base/run-0/checkpoint-12114/trainer_state.json ADDED
@@ -0,0 +1,1200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 12114,
3
+ "best_metric": 0.9276388104924375,
4
+ "best_model_checkpoint": "./results/base/run-0/checkpoint-12114",
5
+ "epoch": 9.0,
6
+ "eval_steps": 500,
7
+ "global_step": 12114,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.07429420505200594,
14
+ "grad_norm": 3.0634188652038574,
15
+ "learning_rate": 9.900000000000002e-06,
16
+ "loss": 24.5432,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.1485884101040119,
21
+ "grad_norm": 1.4259201288223267,
22
+ "learning_rate": 1.9900000000000003e-05,
23
+ "loss": 0.4008,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.22288261515601784,
28
+ "grad_norm": 1.9871093034744263,
29
+ "learning_rate": 2.9900000000000002e-05,
30
+ "loss": 0.3389,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.2971768202080238,
35
+ "grad_norm": 1.3319053649902344,
36
+ "learning_rate": 3.99e-05,
37
+ "loss": 0.3105,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.37147102526002973,
42
+ "grad_norm": 1.7693461179733276,
43
+ "learning_rate": 4.99e-05,
44
+ "loss": 0.2934,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.4457652303120357,
49
+ "grad_norm": 1.8536063432693481,
50
+ "learning_rate": 4.9618055555555556e-05,
51
+ "loss": 0.2794,
52
+ "step": 600
53
+ },
54
+ {
55
+ "epoch": 0.5200594353640416,
56
+ "grad_norm": 0.8980482220649719,
57
+ "learning_rate": 4.9232253086419754e-05,
58
+ "loss": 0.2584,
59
+ "step": 700
60
+ },
61
+ {
62
+ "epoch": 0.5943536404160475,
63
+ "grad_norm": 2.4894111156463623,
64
+ "learning_rate": 4.884645061728395e-05,
65
+ "loss": 0.257,
66
+ "step": 800
67
+ },
68
+ {
69
+ "epoch": 0.6686478454680534,
70
+ "grad_norm": 2.6494672298431396,
71
+ "learning_rate": 4.846064814814815e-05,
72
+ "loss": 0.2434,
73
+ "step": 900
74
+ },
75
+ {
76
+ "epoch": 0.7429420505200595,
77
+ "grad_norm": 1.483652114868164,
78
+ "learning_rate": 4.807484567901235e-05,
79
+ "loss": 0.2612,
80
+ "step": 1000
81
+ },
82
+ {
83
+ "epoch": 0.8172362555720654,
84
+ "grad_norm": 1.8052722215652466,
85
+ "learning_rate": 4.7689043209876544e-05,
86
+ "loss": 0.2339,
87
+ "step": 1100
88
+ },
89
+ {
90
+ "epoch": 0.8915304606240714,
91
+ "grad_norm": 1.347399115562439,
92
+ "learning_rate": 4.730324074074074e-05,
93
+ "loss": 0.2337,
94
+ "step": 1200
95
+ },
96
+ {
97
+ "epoch": 0.9658246656760773,
98
+ "grad_norm": 2.325223922729492,
99
+ "learning_rate": 4.691743827160494e-05,
100
+ "loss": 0.2313,
101
+ "step": 1300
102
+ },
103
+ {
104
+ "epoch": 1.0,
105
+ "eval_administration_accuracy": 0.8859583952451708,
106
+ "eval_administration_f1": 0.8647430840179304,
107
+ "eval_corruption_accuracy": 0.9409361069836553,
108
+ "eval_corruption_f1": 0.9344770310075945,
109
+ "eval_democracy_accuracy": 0.923848439821694,
110
+ "eval_democracy_f1": 0.9042551904669028,
111
+ "eval_development_accuracy": 0.8554977711738484,
112
+ "eval_development_f1": 0.841554343091632,
113
+ "eval_economy_accuracy": 0.912518573551263,
114
+ "eval_economy_f1": 0.9048608101573996,
115
+ "eval_education_accuracy": 0.9567236255572066,
116
+ "eval_education_f1": 0.9544184265074659,
117
+ "eval_environment_accuracy": 0.9702823179791976,
118
+ "eval_environment_f1": 0.9701205662520187,
119
+ "eval_leadership_accuracy": 0.799405646359584,
120
+ "eval_leadership_f1": 0.7721670287980339,
121
+ "eval_loss": 0.21857140958309174,
122
+ "eval_overall_accuracy": 0.9179513372956909,
123
+ "eval_overall_f1": 0.9086014006669906,
124
+ "eval_race_accuracy": 0.9448365527488856,
125
+ "eval_race_f1": 0.9432357218391869,
126
+ "eval_religion_accuracy": 0.937778603268945,
127
+ "eval_religion_f1": 0.9363911612570991,
128
+ "eval_runtime": 6.046,
129
+ "eval_safety_accuracy": 0.887630014858841,
130
+ "eval_safety_f1": 0.8769934446086213,
131
+ "eval_samples_per_second": 890.499,
132
+ "eval_stability_accuracy": 1.0,
133
+ "eval_stability_f1": 1.0,
134
+ "eval_steps_per_second": 55.739,
135
+ "step": 1346
136
+ },
137
+ {
138
+ "epoch": 1.0401188707280833,
139
+ "grad_norm": 1.5913788080215454,
140
+ "learning_rate": 4.653163580246914e-05,
141
+ "loss": 0.2066,
142
+ "step": 1400
143
+ },
144
+ {
145
+ "epoch": 1.1144130757800892,
146
+ "grad_norm": 1.1565213203430176,
147
+ "learning_rate": 4.614583333333334e-05,
148
+ "loss": 0.1776,
149
+ "step": 1500
150
+ },
151
+ {
152
+ "epoch": 1.188707280832095,
153
+ "grad_norm": 1.8071492910385132,
154
+ "learning_rate": 4.576003086419753e-05,
155
+ "loss": 0.1812,
156
+ "step": 1600
157
+ },
158
+ {
159
+ "epoch": 1.263001485884101,
160
+ "grad_norm": 0.9088364839553833,
161
+ "learning_rate": 4.537422839506173e-05,
162
+ "loss": 0.1714,
163
+ "step": 1700
164
+ },
165
+ {
166
+ "epoch": 1.3372956909361071,
167
+ "grad_norm": 2.001739263534546,
168
+ "learning_rate": 4.498842592592593e-05,
169
+ "loss": 0.1821,
170
+ "step": 1800
171
+ },
172
+ {
173
+ "epoch": 1.4115898959881128,
174
+ "grad_norm": 1.3958568572998047,
175
+ "learning_rate": 4.4602623456790125e-05,
176
+ "loss": 0.1766,
177
+ "step": 1900
178
+ },
179
+ {
180
+ "epoch": 1.485884101040119,
181
+ "grad_norm": 1.5210611820220947,
182
+ "learning_rate": 4.421682098765432e-05,
183
+ "loss": 0.1641,
184
+ "step": 2000
185
+ },
186
+ {
187
+ "epoch": 1.5601783060921248,
188
+ "grad_norm": 1.0359656810760498,
189
+ "learning_rate": 4.383101851851852e-05,
190
+ "loss": 0.1847,
191
+ "step": 2100
192
+ },
193
+ {
194
+ "epoch": 1.6344725111441307,
195
+ "grad_norm": 1.6115169525146484,
196
+ "learning_rate": 4.344521604938272e-05,
197
+ "loss": 0.1727,
198
+ "step": 2200
199
+ },
200
+ {
201
+ "epoch": 1.7087667161961368,
202
+ "grad_norm": 1.7786448001861572,
203
+ "learning_rate": 4.3059413580246916e-05,
204
+ "loss": 0.1713,
205
+ "step": 2300
206
+ },
207
+ {
208
+ "epoch": 1.7830609212481425,
209
+ "grad_norm": 1.320279598236084,
210
+ "learning_rate": 4.267361111111111e-05,
211
+ "loss": 0.1716,
212
+ "step": 2400
213
+ },
214
+ {
215
+ "epoch": 1.8573551263001487,
216
+ "grad_norm": 1.895094871520996,
217
+ "learning_rate": 4.228780864197531e-05,
218
+ "loss": 0.1703,
219
+ "step": 2500
220
+ },
221
+ {
222
+ "epoch": 1.9316493313521546,
223
+ "grad_norm": 3.2609572410583496,
224
+ "learning_rate": 4.190200617283951e-05,
225
+ "loss": 0.1712,
226
+ "step": 2600
227
+ },
228
+ {
229
+ "epoch": 2.0,
230
+ "eval_administration_accuracy": 0.8870728083209509,
231
+ "eval_administration_f1": 0.8829499518818239,
232
+ "eval_corruption_accuracy": 0.9452080237741456,
233
+ "eval_corruption_f1": 0.939301394284435,
234
+ "eval_democracy_accuracy": 0.9385215453194651,
235
+ "eval_democracy_f1": 0.9323158476933774,
236
+ "eval_development_accuracy": 0.8723997028231798,
237
+ "eval_development_f1": 0.8634790103771273,
238
+ "eval_economy_accuracy": 0.9141901931649331,
239
+ "eval_economy_f1": 0.9117841265429144,
240
+ "eval_education_accuracy": 0.9598811292719168,
241
+ "eval_education_f1": 0.9586754141781413,
242
+ "eval_environment_accuracy": 0.9723254086181278,
243
+ "eval_environment_f1": 0.9700404332564391,
244
+ "eval_leadership_accuracy": 0.8359955423476969,
245
+ "eval_leadership_f1": 0.8314878671023622,
246
+ "eval_loss": 0.19932226836681366,
247
+ "eval_overall_accuracy": 0.9257522288261515,
248
+ "eval_overall_f1": 0.9224257865223552,
249
+ "eval_race_accuracy": 0.9450222882615156,
250
+ "eval_race_f1": 0.9446551524293766,
251
+ "eval_religion_accuracy": 0.9418647845468053,
252
+ "eval_religion_f1": 0.9399027996250938,
253
+ "eval_runtime": 6.0661,
254
+ "eval_safety_accuracy": 0.8965453194650818,
255
+ "eval_safety_f1": 0.894517440897171,
256
+ "eval_samples_per_second": 887.559,
257
+ "eval_stability_accuracy": 1.0,
258
+ "eval_stability_f1": 1.0,
259
+ "eval_steps_per_second": 55.555,
260
+ "step": 2692
261
+ },
262
+ {
263
+ "epoch": 2.0059435364041605,
264
+ "grad_norm": 1.0471062660217285,
265
+ "learning_rate": 4.1516203703703706e-05,
266
+ "loss": 0.1676,
267
+ "step": 2700
268
+ },
269
+ {
270
+ "epoch": 2.0802377414561666,
271
+ "grad_norm": 1.2078527212142944,
272
+ "learning_rate": 4.1130401234567904e-05,
273
+ "loss": 0.1192,
274
+ "step": 2800
275
+ },
276
+ {
277
+ "epoch": 2.1545319465081723,
278
+ "grad_norm": 0.9038800001144409,
279
+ "learning_rate": 4.07445987654321e-05,
280
+ "loss": 0.119,
281
+ "step": 2900
282
+ },
283
+ {
284
+ "epoch": 2.2288261515601784,
285
+ "grad_norm": 2.5787911415100098,
286
+ "learning_rate": 4.03587962962963e-05,
287
+ "loss": 0.1195,
288
+ "step": 3000
289
+ },
290
+ {
291
+ "epoch": 2.303120356612184,
292
+ "grad_norm": 1.0092898607254028,
293
+ "learning_rate": 3.9972993827160496e-05,
294
+ "loss": 0.1197,
295
+ "step": 3100
296
+ },
297
+ {
298
+ "epoch": 2.37741456166419,
299
+ "grad_norm": 1.6934309005737305,
300
+ "learning_rate": 3.9587191358024694e-05,
301
+ "loss": 0.117,
302
+ "step": 3200
303
+ },
304
+ {
305
+ "epoch": 2.4517087667161963,
306
+ "grad_norm": 4.028139591217041,
307
+ "learning_rate": 3.920138888888889e-05,
308
+ "loss": 0.121,
309
+ "step": 3300
310
+ },
311
+ {
312
+ "epoch": 2.526002971768202,
313
+ "grad_norm": 1.1819978952407837,
314
+ "learning_rate": 3.881558641975309e-05,
315
+ "loss": 0.1163,
316
+ "step": 3400
317
+ },
318
+ {
319
+ "epoch": 2.600297176820208,
320
+ "grad_norm": 3.9428632259368896,
321
+ "learning_rate": 3.842978395061729e-05,
322
+ "loss": 0.1172,
323
+ "step": 3500
324
+ },
325
+ {
326
+ "epoch": 2.6745913818722142,
327
+ "grad_norm": 2.3489303588867188,
328
+ "learning_rate": 3.8043981481481484e-05,
329
+ "loss": 0.1149,
330
+ "step": 3600
331
+ },
332
+ {
333
+ "epoch": 2.74888558692422,
334
+ "grad_norm": 1.8367408514022827,
335
+ "learning_rate": 3.765817901234568e-05,
336
+ "loss": 0.1227,
337
+ "step": 3700
338
+ },
339
+ {
340
+ "epoch": 2.8231797919762256,
341
+ "grad_norm": 1.0681540966033936,
342
+ "learning_rate": 3.727237654320988e-05,
343
+ "loss": 0.1175,
344
+ "step": 3800
345
+ },
346
+ {
347
+ "epoch": 2.8974739970282317,
348
+ "grad_norm": 1.2929691076278687,
349
+ "learning_rate": 3.688657407407408e-05,
350
+ "loss": 0.131,
351
+ "step": 3900
352
+ },
353
+ {
354
+ "epoch": 2.971768202080238,
355
+ "grad_norm": 1.355788230895996,
356
+ "learning_rate": 3.6500771604938275e-05,
357
+ "loss": 0.1105,
358
+ "step": 4000
359
+ },
360
+ {
361
+ "epoch": 3.0,
362
+ "eval_administration_accuracy": 0.8723997028231798,
363
+ "eval_administration_f1": 0.8756871269056309,
364
+ "eval_corruption_accuracy": 0.9494799405646359,
365
+ "eval_corruption_f1": 0.9479134867161034,
366
+ "eval_democracy_accuracy": 0.9344353640416048,
367
+ "eval_democracy_f1": 0.9322999557168562,
368
+ "eval_development_accuracy": 0.8673848439821694,
369
+ "eval_development_f1": 0.8644377347103656,
370
+ "eval_economy_accuracy": 0.913261515601783,
371
+ "eval_economy_f1": 0.9130435047866681,
372
+ "eval_education_accuracy": 0.9546805349182764,
373
+ "eval_education_f1": 0.9543166975188848,
374
+ "eval_environment_accuracy": 0.9723254086181278,
375
+ "eval_environment_f1": 0.972495517419135,
376
+ "eval_leadership_accuracy": 0.8346953937592868,
377
+ "eval_leadership_f1": 0.8337596310104887,
378
+ "eval_loss": 0.20719276368618011,
379
+ "eval_overall_accuracy": 0.9245139920752847,
380
+ "eval_overall_f1": 0.9236679939166218,
381
+ "eval_race_accuracy": 0.9470653789004457,
382
+ "eval_race_f1": 0.9466285253518817,
383
+ "eval_religion_accuracy": 0.9470653789004457,
384
+ "eval_religion_f1": 0.9448810907660032,
385
+ "eval_runtime": 6.0863,
386
+ "eval_safety_accuracy": 0.9013744427934621,
387
+ "eval_safety_f1": 0.8985526560974422,
388
+ "eval_samples_per_second": 884.606,
389
+ "eval_stability_accuracy": 1.0,
390
+ "eval_stability_f1": 1.0,
391
+ "eval_steps_per_second": 55.37,
392
+ "step": 4038
393
+ },
394
+ {
395
+ "epoch": 3.0460624071322435,
396
+ "grad_norm": 1.7487143278121948,
397
+ "learning_rate": 3.611496913580247e-05,
398
+ "loss": 0.0922,
399
+ "step": 4100
400
+ },
401
+ {
402
+ "epoch": 3.1203566121842496,
403
+ "grad_norm": 3.5598196983337402,
404
+ "learning_rate": 3.572916666666667e-05,
405
+ "loss": 0.0834,
406
+ "step": 4200
407
+ },
408
+ {
409
+ "epoch": 3.1946508172362558,
410
+ "grad_norm": 1.6862306594848633,
411
+ "learning_rate": 3.534336419753087e-05,
412
+ "loss": 0.0829,
413
+ "step": 4300
414
+ },
415
+ {
416
+ "epoch": 3.2689450222882614,
417
+ "grad_norm": 1.7356230020523071,
418
+ "learning_rate": 3.495756172839506e-05,
419
+ "loss": 0.0793,
420
+ "step": 4400
421
+ },
422
+ {
423
+ "epoch": 3.3432392273402676,
424
+ "grad_norm": 1.658058524131775,
425
+ "learning_rate": 3.457175925925926e-05,
426
+ "loss": 0.0798,
427
+ "step": 4500
428
+ },
429
+ {
430
+ "epoch": 3.4175334323922733,
431
+ "grad_norm": 1.5869959592819214,
432
+ "learning_rate": 3.418595679012346e-05,
433
+ "loss": 0.0753,
434
+ "step": 4600
435
+ },
436
+ {
437
+ "epoch": 3.4918276374442794,
438
+ "grad_norm": 3.271198272705078,
439
+ "learning_rate": 3.380015432098765e-05,
440
+ "loss": 0.0853,
441
+ "step": 4700
442
+ },
443
+ {
444
+ "epoch": 3.566121842496285,
445
+ "grad_norm": 0.7939934730529785,
446
+ "learning_rate": 3.3414351851851856e-05,
447
+ "loss": 0.0798,
448
+ "step": 4800
449
+ },
450
+ {
451
+ "epoch": 3.640416047548291,
452
+ "grad_norm": 0.9978725910186768,
453
+ "learning_rate": 3.302854938271605e-05,
454
+ "loss": 0.0812,
455
+ "step": 4900
456
+ },
457
+ {
458
+ "epoch": 3.7147102526002973,
459
+ "grad_norm": 2.130408763885498,
460
+ "learning_rate": 3.2642746913580244e-05,
461
+ "loss": 0.0829,
462
+ "step": 5000
463
+ },
464
+ {
465
+ "epoch": 3.789004457652303,
466
+ "grad_norm": 0.9364782571792603,
467
+ "learning_rate": 3.225694444444445e-05,
468
+ "loss": 0.0826,
469
+ "step": 5100
470
+ },
471
+ {
472
+ "epoch": 3.863298662704309,
473
+ "grad_norm": 1.2428539991378784,
474
+ "learning_rate": 3.1871141975308646e-05,
475
+ "loss": 0.0842,
476
+ "step": 5200
477
+ },
478
+ {
479
+ "epoch": 3.9375928677563152,
480
+ "grad_norm": 2.34770131111145,
481
+ "learning_rate": 3.148533950617284e-05,
482
+ "loss": 0.086,
483
+ "step": 5300
484
+ },
485
+ {
486
+ "epoch": 4.0,
487
+ "eval_administration_accuracy": 0.8798291233283804,
488
+ "eval_administration_f1": 0.8794945259254936,
489
+ "eval_corruption_accuracy": 0.950780089153046,
490
+ "eval_corruption_f1": 0.9479334429046444,
491
+ "eval_democracy_accuracy": 0.937407132243685,
492
+ "eval_democracy_f1": 0.9365589706445473,
493
+ "eval_development_accuracy": 0.8764858841010401,
494
+ "eval_development_f1": 0.8698032455853882,
495
+ "eval_economy_accuracy": 0.9154903417533432,
496
+ "eval_economy_f1": 0.9143622167643529,
497
+ "eval_education_accuracy": 0.9583952451708767,
498
+ "eval_education_f1": 0.9569735073455015,
499
+ "eval_environment_accuracy": 0.9738112927191679,
500
+ "eval_environment_f1": 0.9731627143599846,
501
+ "eval_leadership_accuracy": 0.8411961367013373,
502
+ "eval_leadership_f1": 0.8406395571014088,
503
+ "eval_loss": 0.21541310846805573,
504
+ "eval_overall_accuracy": 0.9272381129271916,
505
+ "eval_overall_f1": 0.9257917541145674,
506
+ "eval_race_accuracy": 0.9476225854383358,
507
+ "eval_race_f1": 0.9465941365622704,
508
+ "eval_religion_accuracy": 0.9452080237741456,
509
+ "eval_religion_f1": 0.9437790471355962,
510
+ "eval_runtime": 6.0613,
511
+ "eval_safety_accuracy": 0.9006315007429421,
512
+ "eval_safety_f1": 0.9001996850456204,
513
+ "eval_samples_per_second": 888.26,
514
+ "eval_stability_accuracy": 1.0,
515
+ "eval_stability_f1": 1.0,
516
+ "eval_steps_per_second": 55.599,
517
+ "step": 5384
518
+ },
519
+ {
520
+ "epoch": 4.011887072808321,
521
+ "grad_norm": 1.4067624807357788,
522
+ "learning_rate": 3.109953703703704e-05,
523
+ "loss": 0.0752,
524
+ "step": 5400
525
+ },
526
+ {
527
+ "epoch": 4.086181277860327,
528
+ "grad_norm": 3.1831161975860596,
529
+ "learning_rate": 3.071373456790124e-05,
530
+ "loss": 0.0569,
531
+ "step": 5500
532
+ },
533
+ {
534
+ "epoch": 4.160475482912333,
535
+ "grad_norm": 0.7918180823326111,
536
+ "learning_rate": 3.0327932098765433e-05,
537
+ "loss": 0.055,
538
+ "step": 5600
539
+ },
540
+ {
541
+ "epoch": 4.234769687964339,
542
+ "grad_norm": 1.334572672843933,
543
+ "learning_rate": 2.9942129629629627e-05,
544
+ "loss": 0.0557,
545
+ "step": 5700
546
+ },
547
+ {
548
+ "epoch": 4.3090638930163445,
549
+ "grad_norm": 0.9439612030982971,
550
+ "learning_rate": 2.955632716049383e-05,
551
+ "loss": 0.0547,
552
+ "step": 5800
553
+ },
554
+ {
555
+ "epoch": 4.383358098068351,
556
+ "grad_norm": 1.7661114931106567,
557
+ "learning_rate": 2.9170524691358026e-05,
558
+ "loss": 0.0589,
559
+ "step": 5900
560
+ },
561
+ {
562
+ "epoch": 4.457652303120357,
563
+ "grad_norm": 1.5608975887298584,
564
+ "learning_rate": 2.878472222222222e-05,
565
+ "loss": 0.0604,
566
+ "step": 6000
567
+ },
568
+ {
569
+ "epoch": 4.531946508172362,
570
+ "grad_norm": 2.329026460647583,
571
+ "learning_rate": 2.839891975308642e-05,
572
+ "loss": 0.0556,
573
+ "step": 6100
574
+ },
575
+ {
576
+ "epoch": 4.606240713224368,
577
+ "grad_norm": 1.270369529724121,
578
+ "learning_rate": 2.801311728395062e-05,
579
+ "loss": 0.0559,
580
+ "step": 6200
581
+ },
582
+ {
583
+ "epoch": 4.680534918276375,
584
+ "grad_norm": 1.327057123184204,
585
+ "learning_rate": 2.7627314814814813e-05,
586
+ "loss": 0.0563,
587
+ "step": 6300
588
+ },
589
+ {
590
+ "epoch": 4.75482912332838,
591
+ "grad_norm": 1.4530967473983765,
592
+ "learning_rate": 2.7241512345679014e-05,
593
+ "loss": 0.0559,
594
+ "step": 6400
595
+ },
596
+ {
597
+ "epoch": 4.829123328380386,
598
+ "grad_norm": 1.368444800376892,
599
+ "learning_rate": 2.685570987654321e-05,
600
+ "loss": 0.0588,
601
+ "step": 6500
602
+ },
603
+ {
604
+ "epoch": 4.903417533432393,
605
+ "grad_norm": 0.904096782207489,
606
+ "learning_rate": 2.6469907407407406e-05,
607
+ "loss": 0.0558,
608
+ "step": 6600
609
+ },
610
+ {
611
+ "epoch": 4.977711738484398,
612
+ "grad_norm": 1.7562211751937866,
613
+ "learning_rate": 2.608410493827161e-05,
614
+ "loss": 0.0599,
615
+ "step": 6700
616
+ },
617
+ {
618
+ "epoch": 5.0,
619
+ "eval_administration_accuracy": 0.8802005943536404,
620
+ "eval_administration_f1": 0.8791286682112321,
621
+ "eval_corruption_accuracy": 0.9476225854383358,
622
+ "eval_corruption_f1": 0.9465024742317194,
623
+ "eval_democracy_accuracy": 0.9277488855869243,
624
+ "eval_democracy_f1": 0.9283028411081555,
625
+ "eval_development_accuracy": 0.8718424962852898,
626
+ "eval_development_f1": 0.8670635241370714,
627
+ "eval_economy_accuracy": 0.9186478454680534,
628
+ "eval_economy_f1": 0.9177402812083378,
629
+ "eval_education_accuracy": 0.9567236255572066,
630
+ "eval_education_f1": 0.9563906887924067,
631
+ "eval_environment_accuracy": 0.9717682020802377,
632
+ "eval_environment_f1": 0.9714559245580735,
633
+ "eval_leadership_accuracy": 0.8363670133729569,
634
+ "eval_leadership_f1": 0.8372830889258505,
635
+ "eval_loss": 0.23590339720249176,
636
+ "eval_overall_accuracy": 0.9249318969787024,
637
+ "eval_overall_f1": 0.9243598065937545,
638
+ "eval_race_accuracy": 0.9492942050520059,
639
+ "eval_race_f1": 0.9489608272042858,
640
+ "eval_religion_accuracy": 0.9414933135215453,
641
+ "eval_religion_f1": 0.9417116874083546,
642
+ "eval_runtime": 6.0832,
643
+ "eval_safety_accuracy": 0.8974739970282318,
644
+ "eval_safety_f1": 0.897777673339568,
645
+ "eval_samples_per_second": 885.064,
646
+ "eval_stability_accuracy": 1.0,
647
+ "eval_stability_f1": 1.0,
648
+ "eval_steps_per_second": 55.399,
649
+ "step": 6730
650
+ },
651
+ {
652
+ "epoch": 5.052005943536404,
653
+ "grad_norm": 1.0707114934921265,
654
+ "learning_rate": 2.5698302469135804e-05,
655
+ "loss": 0.0467,
656
+ "step": 6800
657
+ },
658
+ {
659
+ "epoch": 5.12630014858841,
660
+ "grad_norm": 1.0622857809066772,
661
+ "learning_rate": 2.53125e-05,
662
+ "loss": 0.0394,
663
+ "step": 6900
664
+ },
665
+ {
666
+ "epoch": 5.200594353640416,
667
+ "grad_norm": 1.1686086654663086,
668
+ "learning_rate": 2.49266975308642e-05,
669
+ "loss": 0.0416,
670
+ "step": 7000
671
+ },
672
+ {
673
+ "epoch": 5.274888558692422,
674
+ "grad_norm": 1.2564952373504639,
675
+ "learning_rate": 2.4540895061728397e-05,
676
+ "loss": 0.0427,
677
+ "step": 7100
678
+ },
679
+ {
680
+ "epoch": 5.349182763744428,
681
+ "grad_norm": 0.693988025188446,
682
+ "learning_rate": 2.415509259259259e-05,
683
+ "loss": 0.0433,
684
+ "step": 7200
685
+ },
686
+ {
687
+ "epoch": 5.423476968796434,
688
+ "grad_norm": 1.402803897857666,
689
+ "learning_rate": 2.3769290123456792e-05,
690
+ "loss": 0.0403,
691
+ "step": 7300
692
+ },
693
+ {
694
+ "epoch": 5.49777117384844,
695
+ "grad_norm": 0.35870230197906494,
696
+ "learning_rate": 2.3383487654320987e-05,
697
+ "loss": 0.0399,
698
+ "step": 7400
699
+ },
700
+ {
701
+ "epoch": 5.5720653789004455,
702
+ "grad_norm": 0.8109584450721741,
703
+ "learning_rate": 2.2997685185185188e-05,
704
+ "loss": 0.0404,
705
+ "step": 7500
706
+ },
707
+ {
708
+ "epoch": 5.646359583952452,
709
+ "grad_norm": 0.6922593116760254,
710
+ "learning_rate": 2.2611882716049385e-05,
711
+ "loss": 0.0415,
712
+ "step": 7600
713
+ },
714
+ {
715
+ "epoch": 5.720653789004458,
716
+ "grad_norm": 0.9838235378265381,
717
+ "learning_rate": 2.222608024691358e-05,
718
+ "loss": 0.0339,
719
+ "step": 7700
720
+ },
721
+ {
722
+ "epoch": 5.794947994056463,
723
+ "grad_norm": 0.4872437119483948,
724
+ "learning_rate": 2.184027777777778e-05,
725
+ "loss": 0.0417,
726
+ "step": 7800
727
+ },
728
+ {
729
+ "epoch": 5.86924219910847,
730
+ "grad_norm": 1.348196029663086,
731
+ "learning_rate": 2.1454475308641978e-05,
732
+ "loss": 0.0447,
733
+ "step": 7900
734
+ },
735
+ {
736
+ "epoch": 5.943536404160476,
737
+ "grad_norm": 1.4830248355865479,
738
+ "learning_rate": 2.1068672839506172e-05,
739
+ "loss": 0.0398,
740
+ "step": 8000
741
+ },
742
+ {
743
+ "epoch": 6.0,
744
+ "eval_administration_accuracy": 0.8893016344725111,
745
+ "eval_administration_f1": 0.8821384586084873,
746
+ "eval_corruption_accuracy": 0.9459509658246656,
747
+ "eval_corruption_f1": 0.9447882680066073,
748
+ "eval_democracy_accuracy": 0.9331352154531947,
749
+ "eval_democracy_f1": 0.9333919857361558,
750
+ "eval_development_accuracy": 0.8701708766716196,
751
+ "eval_development_f1": 0.8701330980665078,
752
+ "eval_economy_accuracy": 0.9171619613670133,
753
+ "eval_economy_f1": 0.9173996309698476,
754
+ "eval_education_accuracy": 0.9587667161961367,
755
+ "eval_education_f1": 0.958317820654174,
756
+ "eval_environment_accuracy": 0.9736255572065379,
757
+ "eval_environment_f1": 0.9732156963181571,
758
+ "eval_leadership_accuracy": 0.8417533432392273,
759
+ "eval_leadership_f1": 0.8369018544959057,
760
+ "eval_loss": 0.2435985654592514,
761
+ "eval_overall_accuracy": 0.9273774145616641,
762
+ "eval_overall_f1": 0.9260046966635366,
763
+ "eval_race_accuracy": 0.9474368499257058,
764
+ "eval_race_f1": 0.945994966362508,
765
+ "eval_religion_accuracy": 0.9474368499257058,
766
+ "eval_religion_f1": 0.9464647349324496,
767
+ "eval_runtime": 6.067,
768
+ "eval_safety_accuracy": 0.9037890044576523,
769
+ "eval_safety_f1": 0.9033098458116402,
770
+ "eval_samples_per_second": 887.42,
771
+ "eval_stability_accuracy": 1.0,
772
+ "eval_stability_f1": 1.0,
773
+ "eval_steps_per_second": 55.546,
774
+ "step": 8076
775
+ },
776
+ {
777
+ "epoch": 6.017830609212481,
778
+ "grad_norm": 1.2368193864822388,
779
+ "learning_rate": 2.0682870370370373e-05,
780
+ "loss": 0.0415,
781
+ "step": 8100
782
+ },
783
+ {
784
+ "epoch": 6.092124814264487,
785
+ "grad_norm": 0.41253241896629333,
786
+ "learning_rate": 2.0297067901234568e-05,
787
+ "loss": 0.0264,
788
+ "step": 8200
789
+ },
790
+ {
791
+ "epoch": 6.166419019316494,
792
+ "grad_norm": 1.4119162559509277,
793
+ "learning_rate": 1.9911265432098765e-05,
794
+ "loss": 0.029,
795
+ "step": 8300
796
+ },
797
+ {
798
+ "epoch": 6.240713224368499,
799
+ "grad_norm": 0.38587984442710876,
800
+ "learning_rate": 1.9525462962962966e-05,
801
+ "loss": 0.0293,
802
+ "step": 8400
803
+ },
804
+ {
805
+ "epoch": 6.315007429420505,
806
+ "grad_norm": 1.214526653289795,
807
+ "learning_rate": 1.913966049382716e-05,
808
+ "loss": 0.0286,
809
+ "step": 8500
810
+ },
811
+ {
812
+ "epoch": 6.3893016344725115,
813
+ "grad_norm": 2.2572736740112305,
814
+ "learning_rate": 1.8753858024691358e-05,
815
+ "loss": 0.0309,
816
+ "step": 8600
817
+ },
818
+ {
819
+ "epoch": 6.463595839524517,
820
+ "grad_norm": 0.7722110152244568,
821
+ "learning_rate": 1.836805555555556e-05,
822
+ "loss": 0.0316,
823
+ "step": 8700
824
+ },
825
+ {
826
+ "epoch": 6.537890044576523,
827
+ "grad_norm": 1.0314061641693115,
828
+ "learning_rate": 1.7982253086419753e-05,
829
+ "loss": 0.0292,
830
+ "step": 8800
831
+ },
832
+ {
833
+ "epoch": 6.612184249628529,
834
+ "grad_norm": 0.6228373050689697,
835
+ "learning_rate": 1.759645061728395e-05,
836
+ "loss": 0.0287,
837
+ "step": 8900
838
+ },
839
+ {
840
+ "epoch": 6.686478454680535,
841
+ "grad_norm": 0.3270525336265564,
842
+ "learning_rate": 1.721064814814815e-05,
843
+ "loss": 0.0307,
844
+ "step": 9000
845
+ },
846
+ {
847
+ "epoch": 6.760772659732541,
848
+ "grad_norm": 1.1799579858779907,
849
+ "learning_rate": 1.6824845679012346e-05,
850
+ "loss": 0.0277,
851
+ "step": 9100
852
+ },
853
+ {
854
+ "epoch": 6.8350668647845465,
855
+ "grad_norm": 3.962522029876709,
856
+ "learning_rate": 1.6439043209876544e-05,
857
+ "loss": 0.029,
858
+ "step": 9200
859
+ },
860
+ {
861
+ "epoch": 6.909361069836553,
862
+ "grad_norm": 0.8366211652755737,
863
+ "learning_rate": 1.605324074074074e-05,
864
+ "loss": 0.03,
865
+ "step": 9300
866
+ },
867
+ {
868
+ "epoch": 6.983655274888559,
869
+ "grad_norm": 1.2808723449707031,
870
+ "learning_rate": 1.566743827160494e-05,
871
+ "loss": 0.0284,
872
+ "step": 9400
873
+ },
874
+ {
875
+ "epoch": 7.0,
876
+ "eval_administration_accuracy": 0.8759286775631501,
877
+ "eval_administration_f1": 0.8768026075421302,
878
+ "eval_corruption_accuracy": 0.9476225854383358,
879
+ "eval_corruption_f1": 0.9471725413953416,
880
+ "eval_democracy_accuracy": 0.9351783060921248,
881
+ "eval_democracy_f1": 0.9350529993842286,
882
+ "eval_development_accuracy": 0.8688707280832095,
883
+ "eval_development_f1": 0.8719178288623293,
884
+ "eval_economy_accuracy": 0.9188335809806835,
885
+ "eval_economy_f1": 0.9195020015774136,
886
+ "eval_education_accuracy": 0.9546805349182764,
887
+ "eval_education_f1": 0.9557389094775754,
888
+ "eval_environment_accuracy": 0.9732540861812778,
889
+ "eval_environment_f1": 0.9731958552682995,
890
+ "eval_leadership_accuracy": 0.8395245170876672,
891
+ "eval_leadership_f1": 0.840487098777998,
892
+ "eval_loss": 0.25289186835289,
893
+ "eval_overall_accuracy": 0.9253652798415057,
894
+ "eval_overall_f1": 0.9258748564697528,
895
+ "eval_race_accuracy": 0.9491084695393759,
896
+ "eval_race_f1": 0.9488621247872209,
897
+ "eval_religion_accuracy": 0.9459509658246656,
898
+ "eval_religion_f1": 0.945886068594284,
899
+ "eval_runtime": 6.0881,
900
+ "eval_safety_accuracy": 0.8954309063893017,
901
+ "eval_safety_f1": 0.8958802419702108,
902
+ "eval_samples_per_second": 884.354,
903
+ "eval_stability_accuracy": 1.0,
904
+ "eval_stability_f1": 1.0,
905
+ "eval_steps_per_second": 55.354,
906
+ "step": 9422
907
+ },
908
+ {
909
+ "epoch": 7.057949479940564,
910
+ "grad_norm": 1.4738227128982544,
911
+ "learning_rate": 1.5281635802469136e-05,
912
+ "loss": 0.0245,
913
+ "step": 9500
914
+ },
915
+ {
916
+ "epoch": 7.132243684992571,
917
+ "grad_norm": 1.0899150371551514,
918
+ "learning_rate": 1.4895833333333334e-05,
919
+ "loss": 0.022,
920
+ "step": 9600
921
+ },
922
+ {
923
+ "epoch": 7.206537890044577,
924
+ "grad_norm": 0.9878177642822266,
925
+ "learning_rate": 1.4510030864197532e-05,
926
+ "loss": 0.0218,
927
+ "step": 9700
928
+ },
929
+ {
930
+ "epoch": 7.280832095096582,
931
+ "grad_norm": 0.37595194578170776,
932
+ "learning_rate": 1.4124228395061728e-05,
933
+ "loss": 0.0232,
934
+ "step": 9800
935
+ },
936
+ {
937
+ "epoch": 7.355126300148588,
938
+ "grad_norm": 0.7727621793746948,
939
+ "learning_rate": 1.3738425925925927e-05,
940
+ "loss": 0.0216,
941
+ "step": 9900
942
+ },
943
+ {
944
+ "epoch": 7.429420505200595,
945
+ "grad_norm": 0.3330998718738556,
946
+ "learning_rate": 1.3352623456790126e-05,
947
+ "loss": 0.0222,
948
+ "step": 10000
949
+ },
950
+ {
951
+ "epoch": 7.5037147102526,
952
+ "grad_norm": 0.5806456804275513,
953
+ "learning_rate": 1.296682098765432e-05,
954
+ "loss": 0.0219,
955
+ "step": 10100
956
+ },
957
+ {
958
+ "epoch": 7.578008915304606,
959
+ "grad_norm": 0.7578392028808594,
960
+ "learning_rate": 1.258101851851852e-05,
961
+ "loss": 0.0218,
962
+ "step": 10200
963
+ },
964
+ {
965
+ "epoch": 7.6523031203566125,
966
+ "grad_norm": 0.3120606541633606,
967
+ "learning_rate": 1.2195216049382717e-05,
968
+ "loss": 0.02,
969
+ "step": 10300
970
+ },
971
+ {
972
+ "epoch": 7.726597325408618,
973
+ "grad_norm": 0.637631893157959,
974
+ "learning_rate": 1.1809413580246915e-05,
975
+ "loss": 0.0196,
976
+ "step": 10400
977
+ },
978
+ {
979
+ "epoch": 7.800891530460624,
980
+ "grad_norm": 0.6013413071632385,
981
+ "learning_rate": 1.142361111111111e-05,
982
+ "loss": 0.0234,
983
+ "step": 10500
984
+ },
985
+ {
986
+ "epoch": 7.8751857355126305,
987
+ "grad_norm": 2.5481185913085938,
988
+ "learning_rate": 1.1037808641975308e-05,
989
+ "loss": 0.0218,
990
+ "step": 10600
991
+ },
992
+ {
993
+ "epoch": 7.949479940564636,
994
+ "grad_norm": 0.5179031491279602,
995
+ "learning_rate": 1.0652006172839508e-05,
996
+ "loss": 0.0208,
997
+ "step": 10700
998
+ },
999
+ {
1000
+ "epoch": 8.0,
1001
+ "eval_administration_accuracy": 0.8813150074294205,
1002
+ "eval_administration_f1": 0.8795800439008289,
1003
+ "eval_corruption_accuracy": 0.9455794947994056,
1004
+ "eval_corruption_f1": 0.9433562694219033,
1005
+ "eval_democracy_accuracy": 0.9366641901931649,
1006
+ "eval_democracy_f1": 0.9365011786875334,
1007
+ "eval_development_accuracy": 0.8694279346210996,
1008
+ "eval_development_f1": 0.8688370502444763,
1009
+ "eval_economy_accuracy": 0.9184621099554234,
1010
+ "eval_economy_f1": 0.918124164419619,
1011
+ "eval_education_accuracy": 0.9580237741456167,
1012
+ "eval_education_f1": 0.9574723795948901,
1013
+ "eval_environment_accuracy": 0.9738112927191679,
1014
+ "eval_environment_f1": 0.9736854631574718,
1015
+ "eval_leadership_accuracy": 0.8393387815750372,
1016
+ "eval_leadership_f1": 0.839741016757979,
1017
+ "eval_loss": 0.2596803605556488,
1018
+ "eval_overall_accuracy": 0.9265725606736006,
1019
+ "eval_overall_f1": 0.9260616678064877,
1020
+ "eval_race_accuracy": 0.9465081723625557,
1021
+ "eval_race_f1": 0.9461343616633416,
1022
+ "eval_religion_accuracy": 0.9446508172362555,
1023
+ "eval_religion_f1": 0.9445423673916528,
1024
+ "eval_runtime": 6.0851,
1025
+ "eval_safety_accuracy": 0.9050891530460624,
1026
+ "eval_safety_f1": 0.904765718438156,
1027
+ "eval_samples_per_second": 884.782,
1028
+ "eval_stability_accuracy": 1.0,
1029
+ "eval_stability_f1": 1.0,
1030
+ "eval_steps_per_second": 55.381,
1031
+ "step": 10768
1032
+ },
1033
+ {
1034
+ "epoch": 8.023774145616642,
1035
+ "grad_norm": 0.6615686416625977,
1036
+ "learning_rate": 1.0266203703703704e-05,
1037
+ "loss": 0.0205,
1038
+ "step": 10800
1039
+ },
1040
+ {
1041
+ "epoch": 8.098068350668647,
1042
+ "grad_norm": 2.661421060562134,
1043
+ "learning_rate": 9.880401234567901e-06,
1044
+ "loss": 0.0185,
1045
+ "step": 10900
1046
+ },
1047
+ {
1048
+ "epoch": 8.172362555720653,
1049
+ "grad_norm": 0.6415339112281799,
1050
+ "learning_rate": 9.494598765432099e-06,
1051
+ "loss": 0.0164,
1052
+ "step": 11000
1053
+ },
1054
+ {
1055
+ "epoch": 8.246656760772659,
1056
+ "grad_norm": 0.761589765548706,
1057
+ "learning_rate": 9.108796296296296e-06,
1058
+ "loss": 0.017,
1059
+ "step": 11100
1060
+ },
1061
+ {
1062
+ "epoch": 8.320950965824666,
1063
+ "grad_norm": 0.6105137467384338,
1064
+ "learning_rate": 8.722993827160494e-06,
1065
+ "loss": 0.0174,
1066
+ "step": 11200
1067
+ },
1068
+ {
1069
+ "epoch": 8.395245170876672,
1070
+ "grad_norm": 0.6517733931541443,
1071
+ "learning_rate": 8.337191358024692e-06,
1072
+ "loss": 0.0175,
1073
+ "step": 11300
1074
+ },
1075
+ {
1076
+ "epoch": 8.469539375928678,
1077
+ "grad_norm": 0.6607377529144287,
1078
+ "learning_rate": 7.95138888888889e-06,
1079
+ "loss": 0.0172,
1080
+ "step": 11400
1081
+ },
1082
+ {
1083
+ "epoch": 8.543833580980683,
1084
+ "grad_norm": 0.72126305103302,
1085
+ "learning_rate": 7.565586419753088e-06,
1086
+ "loss": 0.0155,
1087
+ "step": 11500
1088
+ },
1089
+ {
1090
+ "epoch": 8.618127786032689,
1091
+ "grad_norm": 0.4402889609336853,
1092
+ "learning_rate": 7.1797839506172844e-06,
1093
+ "loss": 0.0173,
1094
+ "step": 11600
1095
+ },
1096
+ {
1097
+ "epoch": 8.692421991084695,
1098
+ "grad_norm": 0.08074043691158295,
1099
+ "learning_rate": 6.793981481481482e-06,
1100
+ "loss": 0.014,
1101
+ "step": 11700
1102
+ },
1103
+ {
1104
+ "epoch": 8.766716196136702,
1105
+ "grad_norm": 0.1940842866897583,
1106
+ "learning_rate": 6.408179012345679e-06,
1107
+ "loss": 0.0145,
1108
+ "step": 11800
1109
+ },
1110
+ {
1111
+ "epoch": 8.841010401188708,
1112
+ "grad_norm": 0.4365254044532776,
1113
+ "learning_rate": 6.022376543209876e-06,
1114
+ "loss": 0.0152,
1115
+ "step": 11900
1116
+ },
1117
+ {
1118
+ "epoch": 8.915304606240714,
1119
+ "grad_norm": 0.4619617462158203,
1120
+ "learning_rate": 5.636574074074075e-06,
1121
+ "loss": 0.0166,
1122
+ "step": 12000
1123
+ },
1124
+ {
1125
+ "epoch": 8.98959881129272,
1126
+ "grad_norm": 0.22596906125545502,
1127
+ "learning_rate": 5.250771604938272e-06,
1128
+ "loss": 0.0156,
1129
+ "step": 12100
1130
+ },
1131
+ {
1132
+ "epoch": 9.0,
1133
+ "eval_administration_accuracy": 0.888001485884101,
1134
+ "eval_administration_f1": 0.8840369764257654,
1135
+ "eval_corruption_accuracy": 0.9474368499257058,
1136
+ "eval_corruption_f1": 0.9462275023250412,
1137
+ "eval_democracy_accuracy": 0.937035661218425,
1138
+ "eval_democracy_f1": 0.9349797337427957,
1139
+ "eval_development_accuracy": 0.8777860326894502,
1140
+ "eval_development_f1": 0.8758984879855292,
1141
+ "eval_economy_accuracy": 0.9188335809806835,
1142
+ "eval_economy_f1": 0.9183485908645431,
1143
+ "eval_education_accuracy": 0.9613670133729569,
1144
+ "eval_education_f1": 0.9605424054563209,
1145
+ "eval_environment_accuracy": 0.9745542347696879,
1146
+ "eval_environment_f1": 0.97406579666089,
1147
+ "eval_leadership_accuracy": 0.8447251114413076,
1148
+ "eval_leadership_f1": 0.8447097008519563,
1149
+ "eval_loss": 0.26206690073013306,
1150
+ "eval_overall_accuracy": 0.9286001733531449,
1151
+ "eval_overall_f1": 0.9276388104924375,
1152
+ "eval_race_accuracy": 0.9476225854383358,
1153
+ "eval_race_f1": 0.947246539022533,
1154
+ "eval_religion_accuracy": 0.9446508172362555,
1155
+ "eval_religion_f1": 0.9438393498317825,
1156
+ "eval_runtime": 6.0648,
1157
+ "eval_safety_accuracy": 0.9011887072808321,
1158
+ "eval_safety_f1": 0.9017706427420923,
1159
+ "eval_samples_per_second": 887.753,
1160
+ "eval_stability_accuracy": 1.0,
1161
+ "eval_stability_f1": 1.0,
1162
+ "eval_steps_per_second": 55.567,
1163
+ "step": 12114
1164
+ }
1165
+ ],
1166
+ "logging_steps": 100,
1167
+ "max_steps": 13460,
1168
+ "num_input_tokens_seen": 0,
1169
+ "num_train_epochs": 10,
1170
+ "save_steps": 500,
1171
+ "stateful_callbacks": {
1172
+ "EarlyStoppingCallback": {
1173
+ "args": {
1174
+ "early_stopping_patience": 2,
1175
+ "early_stopping_threshold": 0.0
1176
+ },
1177
+ "attributes": {
1178
+ "early_stopping_patience_counter": 0
1179
+ }
1180
+ },
1181
+ "TrainerControl": {
1182
+ "args": {
1183
+ "should_epoch_stop": false,
1184
+ "should_evaluate": false,
1185
+ "should_log": false,
1186
+ "should_save": true,
1187
+ "should_training_stop": false
1188
+ },
1189
+ "attributes": {}
1190
+ }
1191
+ },
1192
+ "total_flos": 5.095302134877389e+16,
1193
+ "train_batch_size": 16,
1194
+ "trial_name": null,
1195
+ "trial_params": {
1196
+ "gradient_accumulation_steps": 1,
1197
+ "learning_rate": 5e-05,
1198
+ "num_train_epochs": 10
1199
+ }
1200
+ }
base/run-0/checkpoint-12114/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9f417d3b26827f3c2091fa30ae27025fb9a89bba46b9e16e8ce16798492c41b
3
+ size 5841
base/run-0/checkpoint-13460/config.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Democracy Unknown",
14
+ "1": "Democracy Negative",
15
+ "2": "Democracy Neutral",
16
+ "3": "Democracy Positive",
17
+ "4": "Economy Unknown",
18
+ "5": "Economy Negative",
19
+ "6": "Economy Neutral",
20
+ "7": "Economy Positive",
21
+ "8": "Race Unknown",
22
+ "9": "Race Negative",
23
+ "10": "Race Neutral",
24
+ "11": "Race Positive",
25
+ "12": "Leadership Unknown",
26
+ "13": "Leadership Negative",
27
+ "14": "Leadership Neutral",
28
+ "15": "Leadership Positive",
29
+ "16": "Development Unknown",
30
+ "17": "Development Negative",
31
+ "18": "Development Neutral",
32
+ "19": "Development Positive",
33
+ "20": "Corruption Unknown",
34
+ "21": "Corruption Negative",
35
+ "22": "Corruption Neutral",
36
+ "23": "Corruption Positive",
37
+ "24": "Instability Unknown",
38
+ "25": "Instability Negative",
39
+ "26": "Instability Neutral",
40
+ "27": "Instability Positive",
41
+ "28": "Safety Unknown",
42
+ "29": "Safety Negative",
43
+ "30": "Safety Neutral",
44
+ "31": "Safety Positive",
45
+ "32": "Administration Unknown",
46
+ "33": "Administration Negative",
47
+ "34": "Administration Neutral",
48
+ "35": "Administration Positive",
49
+ "36": "Education Unknown",
50
+ "37": "Education Negative",
51
+ "38": "Education Neutral",
52
+ "39": "Education Positive",
53
+ "40": "Religion Unknown",
54
+ "41": "Religion Negative",
55
+ "42": "Religion Neutral",
56
+ "43": "Religion Positive",
57
+ "44": "Environment Unknown",
58
+ "45": "Environment Negative",
59
+ "46": "Environment Neutral",
60
+ "47": "Environment Positive"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "Administration Negative": 33,
66
+ "Administration Neutral": 34,
67
+ "Administration Positive": 35,
68
+ "Administration Unknown": 32,
69
+ "Corruption Negative": 21,
70
+ "Corruption Neutral": 22,
71
+ "Corruption Positive": 23,
72
+ "Corruption Unknown": 20,
73
+ "Democracy Negative": 1,
74
+ "Democracy Neutral": 2,
75
+ "Democracy Positive": 3,
76
+ "Democracy Unknown": 0,
77
+ "Development Negative": 17,
78
+ "Development Neutral": 18,
79
+ "Development Positive": 19,
80
+ "Development Unknown": 16,
81
+ "Economy Negative": 5,
82
+ "Economy Neutral": 6,
83
+ "Economy Positive": 7,
84
+ "Economy Unknown": 4,
85
+ "Education Negative": 37,
86
+ "Education Neutral": 38,
87
+ "Education Positive": 39,
88
+ "Education Unknown": 36,
89
+ "Environment Negative": 45,
90
+ "Environment Neutral": 46,
91
+ "Environment Positive": 47,
92
+ "Environment Unknown": 44,
93
+ "Instability Negative": 25,
94
+ "Instability Neutral": 26,
95
+ "Instability Positive": 27,
96
+ "Instability Unknown": 24,
97
+ "Leadership Negative": 13,
98
+ "Leadership Neutral": 14,
99
+ "Leadership Positive": 15,
100
+ "Leadership Unknown": 12,
101
+ "Race Negative": 9,
102
+ "Race Neutral": 10,
103
+ "Race Positive": 11,
104
+ "Race Unknown": 8,
105
+ "Religion Negative": 41,
106
+ "Religion Neutral": 42,
107
+ "Religion Positive": 43,
108
+ "Religion Unknown": 40,
109
+ "Safety Negative": 29,
110
+ "Safety Neutral": 30,
111
+ "Safety Positive": 31,
112
+ "Safety Unknown": 28
113
+ },
114
+ "layer_norm_eps": 1e-12,
115
+ "max_position_embeddings": 512,
116
+ "model_type": "bert",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
+ "pad_token_id": 0,
120
+ "position_embedding_type": "absolute",
121
+ "transformers_version": "4.57.0",
122
+ "type_vocab_size": 2,
123
+ "use_cache": true,
124
+ "vocab_size": 30522
125
+ }
base/run-0/checkpoint-13460/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b76674ecc6fc0919cc8f40d45de3432489dfaf9d2fdb70e34c7ccf5f8b9cec02
3
+ size 438100144
base/run-0/checkpoint-13460/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c3b8fca9ea8a8edac76ca86d365899fa3e6b808ba90dcad9714eebdefe6424
3
+ size 876324619
base/run-0/checkpoint-13460/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eff495602e83b92f12f4acdaacae3ab373669aa51da2d8ae878591e05ceec87
3
+ size 14645
base/run-0/checkpoint-13460/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1cec2270e3eb9f1e8481eea08cc2a8e1fda802715fad23443fac1a661d52d19
3
+ size 1383
base/run-0/checkpoint-13460/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2b6ac0e53b00ed502f85792601c8ec35ed8fbb1644371aba2be316f64b7534a
3
+ size 1465
base/run-0/checkpoint-13460/trainer_state.json ADDED
@@ -0,0 +1,1325 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 12114,
3
+ "best_metric": 0.9276388104924375,
4
+ "best_model_checkpoint": "./results/base/run-0/checkpoint-12114",
5
+ "epoch": 10.0,
6
+ "eval_steps": 500,
7
+ "global_step": 13460,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.07429420505200594,
14
+ "grad_norm": 3.0634188652038574,
15
+ "learning_rate": 9.900000000000002e-06,
16
+ "loss": 24.5432,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.1485884101040119,
21
+ "grad_norm": 1.4259201288223267,
22
+ "learning_rate": 1.9900000000000003e-05,
23
+ "loss": 0.4008,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.22288261515601784,
28
+ "grad_norm": 1.9871093034744263,
29
+ "learning_rate": 2.9900000000000002e-05,
30
+ "loss": 0.3389,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.2971768202080238,
35
+ "grad_norm": 1.3319053649902344,
36
+ "learning_rate": 3.99e-05,
37
+ "loss": 0.3105,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.37147102526002973,
42
+ "grad_norm": 1.7693461179733276,
43
+ "learning_rate": 4.99e-05,
44
+ "loss": 0.2934,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.4457652303120357,
49
+ "grad_norm": 1.8536063432693481,
50
+ "learning_rate": 4.9618055555555556e-05,
51
+ "loss": 0.2794,
52
+ "step": 600
53
+ },
54
+ {
55
+ "epoch": 0.5200594353640416,
56
+ "grad_norm": 0.8980482220649719,
57
+ "learning_rate": 4.9232253086419754e-05,
58
+ "loss": 0.2584,
59
+ "step": 700
60
+ },
61
+ {
62
+ "epoch": 0.5943536404160475,
63
+ "grad_norm": 2.4894111156463623,
64
+ "learning_rate": 4.884645061728395e-05,
65
+ "loss": 0.257,
66
+ "step": 800
67
+ },
68
+ {
69
+ "epoch": 0.6686478454680534,
70
+ "grad_norm": 2.6494672298431396,
71
+ "learning_rate": 4.846064814814815e-05,
72
+ "loss": 0.2434,
73
+ "step": 900
74
+ },
75
+ {
76
+ "epoch": 0.7429420505200595,
77
+ "grad_norm": 1.483652114868164,
78
+ "learning_rate": 4.807484567901235e-05,
79
+ "loss": 0.2612,
80
+ "step": 1000
81
+ },
82
+ {
83
+ "epoch": 0.8172362555720654,
84
+ "grad_norm": 1.8052722215652466,
85
+ "learning_rate": 4.7689043209876544e-05,
86
+ "loss": 0.2339,
87
+ "step": 1100
88
+ },
89
+ {
90
+ "epoch": 0.8915304606240714,
91
+ "grad_norm": 1.347399115562439,
92
+ "learning_rate": 4.730324074074074e-05,
93
+ "loss": 0.2337,
94
+ "step": 1200
95
+ },
96
+ {
97
+ "epoch": 0.9658246656760773,
98
+ "grad_norm": 2.325223922729492,
99
+ "learning_rate": 4.691743827160494e-05,
100
+ "loss": 0.2313,
101
+ "step": 1300
102
+ },
103
+ {
104
+ "epoch": 1.0,
105
+ "eval_administration_accuracy": 0.8859583952451708,
106
+ "eval_administration_f1": 0.8647430840179304,
107
+ "eval_corruption_accuracy": 0.9409361069836553,
108
+ "eval_corruption_f1": 0.9344770310075945,
109
+ "eval_democracy_accuracy": 0.923848439821694,
110
+ "eval_democracy_f1": 0.9042551904669028,
111
+ "eval_development_accuracy": 0.8554977711738484,
112
+ "eval_development_f1": 0.841554343091632,
113
+ "eval_economy_accuracy": 0.912518573551263,
114
+ "eval_economy_f1": 0.9048608101573996,
115
+ "eval_education_accuracy": 0.9567236255572066,
116
+ "eval_education_f1": 0.9544184265074659,
117
+ "eval_environment_accuracy": 0.9702823179791976,
118
+ "eval_environment_f1": 0.9701205662520187,
119
+ "eval_leadership_accuracy": 0.799405646359584,
120
+ "eval_leadership_f1": 0.7721670287980339,
121
+ "eval_loss": 0.21857140958309174,
122
+ "eval_overall_accuracy": 0.9179513372956909,
123
+ "eval_overall_f1": 0.9086014006669906,
124
+ "eval_race_accuracy": 0.9448365527488856,
125
+ "eval_race_f1": 0.9432357218391869,
126
+ "eval_religion_accuracy": 0.937778603268945,
127
+ "eval_religion_f1": 0.9363911612570991,
128
+ "eval_runtime": 6.046,
129
+ "eval_safety_accuracy": 0.887630014858841,
130
+ "eval_safety_f1": 0.8769934446086213,
131
+ "eval_samples_per_second": 890.499,
132
+ "eval_stability_accuracy": 1.0,
133
+ "eval_stability_f1": 1.0,
134
+ "eval_steps_per_second": 55.739,
135
+ "step": 1346
136
+ },
137
+ {
138
+ "epoch": 1.0401188707280833,
139
+ "grad_norm": 1.5913788080215454,
140
+ "learning_rate": 4.653163580246914e-05,
141
+ "loss": 0.2066,
142
+ "step": 1400
143
+ },
144
+ {
145
+ "epoch": 1.1144130757800892,
146
+ "grad_norm": 1.1565213203430176,
147
+ "learning_rate": 4.614583333333334e-05,
148
+ "loss": 0.1776,
149
+ "step": 1500
150
+ },
151
+ {
152
+ "epoch": 1.188707280832095,
153
+ "grad_norm": 1.8071492910385132,
154
+ "learning_rate": 4.576003086419753e-05,
155
+ "loss": 0.1812,
156
+ "step": 1600
157
+ },
158
+ {
159
+ "epoch": 1.263001485884101,
160
+ "grad_norm": 0.9088364839553833,
161
+ "learning_rate": 4.537422839506173e-05,
162
+ "loss": 0.1714,
163
+ "step": 1700
164
+ },
165
+ {
166
+ "epoch": 1.3372956909361071,
167
+ "grad_norm": 2.001739263534546,
168
+ "learning_rate": 4.498842592592593e-05,
169
+ "loss": 0.1821,
170
+ "step": 1800
171
+ },
172
+ {
173
+ "epoch": 1.4115898959881128,
174
+ "grad_norm": 1.3958568572998047,
175
+ "learning_rate": 4.4602623456790125e-05,
176
+ "loss": 0.1766,
177
+ "step": 1900
178
+ },
179
+ {
180
+ "epoch": 1.485884101040119,
181
+ "grad_norm": 1.5210611820220947,
182
+ "learning_rate": 4.421682098765432e-05,
183
+ "loss": 0.1641,
184
+ "step": 2000
185
+ },
186
+ {
187
+ "epoch": 1.5601783060921248,
188
+ "grad_norm": 1.0359656810760498,
189
+ "learning_rate": 4.383101851851852e-05,
190
+ "loss": 0.1847,
191
+ "step": 2100
192
+ },
193
+ {
194
+ "epoch": 1.6344725111441307,
195
+ "grad_norm": 1.6115169525146484,
196
+ "learning_rate": 4.344521604938272e-05,
197
+ "loss": 0.1727,
198
+ "step": 2200
199
+ },
200
+ {
201
+ "epoch": 1.7087667161961368,
202
+ "grad_norm": 1.7786448001861572,
203
+ "learning_rate": 4.3059413580246916e-05,
204
+ "loss": 0.1713,
205
+ "step": 2300
206
+ },
207
+ {
208
+ "epoch": 1.7830609212481425,
209
+ "grad_norm": 1.320279598236084,
210
+ "learning_rate": 4.267361111111111e-05,
211
+ "loss": 0.1716,
212
+ "step": 2400
213
+ },
214
+ {
215
+ "epoch": 1.8573551263001487,
216
+ "grad_norm": 1.895094871520996,
217
+ "learning_rate": 4.228780864197531e-05,
218
+ "loss": 0.1703,
219
+ "step": 2500
220
+ },
221
+ {
222
+ "epoch": 1.9316493313521546,
223
+ "grad_norm": 3.2609572410583496,
224
+ "learning_rate": 4.190200617283951e-05,
225
+ "loss": 0.1712,
226
+ "step": 2600
227
+ },
228
+ {
229
+ "epoch": 2.0,
230
+ "eval_administration_accuracy": 0.8870728083209509,
231
+ "eval_administration_f1": 0.8829499518818239,
232
+ "eval_corruption_accuracy": 0.9452080237741456,
233
+ "eval_corruption_f1": 0.939301394284435,
234
+ "eval_democracy_accuracy": 0.9385215453194651,
235
+ "eval_democracy_f1": 0.9323158476933774,
236
+ "eval_development_accuracy": 0.8723997028231798,
237
+ "eval_development_f1": 0.8634790103771273,
238
+ "eval_economy_accuracy": 0.9141901931649331,
239
+ "eval_economy_f1": 0.9117841265429144,
240
+ "eval_education_accuracy": 0.9598811292719168,
241
+ "eval_education_f1": 0.9586754141781413,
242
+ "eval_environment_accuracy": 0.9723254086181278,
243
+ "eval_environment_f1": 0.9700404332564391,
244
+ "eval_leadership_accuracy": 0.8359955423476969,
245
+ "eval_leadership_f1": 0.8314878671023622,
246
+ "eval_loss": 0.19932226836681366,
247
+ "eval_overall_accuracy": 0.9257522288261515,
248
+ "eval_overall_f1": 0.9224257865223552,
249
+ "eval_race_accuracy": 0.9450222882615156,
250
+ "eval_race_f1": 0.9446551524293766,
251
+ "eval_religion_accuracy": 0.9418647845468053,
252
+ "eval_religion_f1": 0.9399027996250938,
253
+ "eval_runtime": 6.0661,
254
+ "eval_safety_accuracy": 0.8965453194650818,
255
+ "eval_safety_f1": 0.894517440897171,
256
+ "eval_samples_per_second": 887.559,
257
+ "eval_stability_accuracy": 1.0,
258
+ "eval_stability_f1": 1.0,
259
+ "eval_steps_per_second": 55.555,
260
+ "step": 2692
261
+ },
262
+ {
263
+ "epoch": 2.0059435364041605,
264
+ "grad_norm": 1.0471062660217285,
265
+ "learning_rate": 4.1516203703703706e-05,
266
+ "loss": 0.1676,
267
+ "step": 2700
268
+ },
269
+ {
270
+ "epoch": 2.0802377414561666,
271
+ "grad_norm": 1.2078527212142944,
272
+ "learning_rate": 4.1130401234567904e-05,
273
+ "loss": 0.1192,
274
+ "step": 2800
275
+ },
276
+ {
277
+ "epoch": 2.1545319465081723,
278
+ "grad_norm": 0.9038800001144409,
279
+ "learning_rate": 4.07445987654321e-05,
280
+ "loss": 0.119,
281
+ "step": 2900
282
+ },
283
+ {
284
+ "epoch": 2.2288261515601784,
285
+ "grad_norm": 2.5787911415100098,
286
+ "learning_rate": 4.03587962962963e-05,
287
+ "loss": 0.1195,
288
+ "step": 3000
289
+ },
290
+ {
291
+ "epoch": 2.303120356612184,
292
+ "grad_norm": 1.0092898607254028,
293
+ "learning_rate": 3.9972993827160496e-05,
294
+ "loss": 0.1197,
295
+ "step": 3100
296
+ },
297
+ {
298
+ "epoch": 2.37741456166419,
299
+ "grad_norm": 1.6934309005737305,
300
+ "learning_rate": 3.9587191358024694e-05,
301
+ "loss": 0.117,
302
+ "step": 3200
303
+ },
304
+ {
305
+ "epoch": 2.4517087667161963,
306
+ "grad_norm": 4.028139591217041,
307
+ "learning_rate": 3.920138888888889e-05,
308
+ "loss": 0.121,
309
+ "step": 3300
310
+ },
311
+ {
312
+ "epoch": 2.526002971768202,
313
+ "grad_norm": 1.1819978952407837,
314
+ "learning_rate": 3.881558641975309e-05,
315
+ "loss": 0.1163,
316
+ "step": 3400
317
+ },
318
+ {
319
+ "epoch": 2.600297176820208,
320
+ "grad_norm": 3.9428632259368896,
321
+ "learning_rate": 3.842978395061729e-05,
322
+ "loss": 0.1172,
323
+ "step": 3500
324
+ },
325
+ {
326
+ "epoch": 2.6745913818722142,
327
+ "grad_norm": 2.3489303588867188,
328
+ "learning_rate": 3.8043981481481484e-05,
329
+ "loss": 0.1149,
330
+ "step": 3600
331
+ },
332
+ {
333
+ "epoch": 2.74888558692422,
334
+ "grad_norm": 1.8367408514022827,
335
+ "learning_rate": 3.765817901234568e-05,
336
+ "loss": 0.1227,
337
+ "step": 3700
338
+ },
339
+ {
340
+ "epoch": 2.8231797919762256,
341
+ "grad_norm": 1.0681540966033936,
342
+ "learning_rate": 3.727237654320988e-05,
343
+ "loss": 0.1175,
344
+ "step": 3800
345
+ },
346
+ {
347
+ "epoch": 2.8974739970282317,
348
+ "grad_norm": 1.2929691076278687,
349
+ "learning_rate": 3.688657407407408e-05,
350
+ "loss": 0.131,
351
+ "step": 3900
352
+ },
353
+ {
354
+ "epoch": 2.971768202080238,
355
+ "grad_norm": 1.355788230895996,
356
+ "learning_rate": 3.6500771604938275e-05,
357
+ "loss": 0.1105,
358
+ "step": 4000
359
+ },
360
+ {
361
+ "epoch": 3.0,
362
+ "eval_administration_accuracy": 0.8723997028231798,
363
+ "eval_administration_f1": 0.8756871269056309,
364
+ "eval_corruption_accuracy": 0.9494799405646359,
365
+ "eval_corruption_f1": 0.9479134867161034,
366
+ "eval_democracy_accuracy": 0.9344353640416048,
367
+ "eval_democracy_f1": 0.9322999557168562,
368
+ "eval_development_accuracy": 0.8673848439821694,
369
+ "eval_development_f1": 0.8644377347103656,
370
+ "eval_economy_accuracy": 0.913261515601783,
371
+ "eval_economy_f1": 0.9130435047866681,
372
+ "eval_education_accuracy": 0.9546805349182764,
373
+ "eval_education_f1": 0.9543166975188848,
374
+ "eval_environment_accuracy": 0.9723254086181278,
375
+ "eval_environment_f1": 0.972495517419135,
376
+ "eval_leadership_accuracy": 0.8346953937592868,
377
+ "eval_leadership_f1": 0.8337596310104887,
378
+ "eval_loss": 0.20719276368618011,
379
+ "eval_overall_accuracy": 0.9245139920752847,
380
+ "eval_overall_f1": 0.9236679939166218,
381
+ "eval_race_accuracy": 0.9470653789004457,
382
+ "eval_race_f1": 0.9466285253518817,
383
+ "eval_religion_accuracy": 0.9470653789004457,
384
+ "eval_religion_f1": 0.9448810907660032,
385
+ "eval_runtime": 6.0863,
386
+ "eval_safety_accuracy": 0.9013744427934621,
387
+ "eval_safety_f1": 0.8985526560974422,
388
+ "eval_samples_per_second": 884.606,
389
+ "eval_stability_accuracy": 1.0,
390
+ "eval_stability_f1": 1.0,
391
+ "eval_steps_per_second": 55.37,
392
+ "step": 4038
393
+ },
394
+ {
395
+ "epoch": 3.0460624071322435,
396
+ "grad_norm": 1.7487143278121948,
397
+ "learning_rate": 3.611496913580247e-05,
398
+ "loss": 0.0922,
399
+ "step": 4100
400
+ },
401
+ {
402
+ "epoch": 3.1203566121842496,
403
+ "grad_norm": 3.5598196983337402,
404
+ "learning_rate": 3.572916666666667e-05,
405
+ "loss": 0.0834,
406
+ "step": 4200
407
+ },
408
+ {
409
+ "epoch": 3.1946508172362558,
410
+ "grad_norm": 1.6862306594848633,
411
+ "learning_rate": 3.534336419753087e-05,
412
+ "loss": 0.0829,
413
+ "step": 4300
414
+ },
415
+ {
416
+ "epoch": 3.2689450222882614,
417
+ "grad_norm": 1.7356230020523071,
418
+ "learning_rate": 3.495756172839506e-05,
419
+ "loss": 0.0793,
420
+ "step": 4400
421
+ },
422
+ {
423
+ "epoch": 3.3432392273402676,
424
+ "grad_norm": 1.658058524131775,
425
+ "learning_rate": 3.457175925925926e-05,
426
+ "loss": 0.0798,
427
+ "step": 4500
428
+ },
429
+ {
430
+ "epoch": 3.4175334323922733,
431
+ "grad_norm": 1.5869959592819214,
432
+ "learning_rate": 3.418595679012346e-05,
433
+ "loss": 0.0753,
434
+ "step": 4600
435
+ },
436
+ {
437
+ "epoch": 3.4918276374442794,
438
+ "grad_norm": 3.271198272705078,
439
+ "learning_rate": 3.380015432098765e-05,
440
+ "loss": 0.0853,
441
+ "step": 4700
442
+ },
443
+ {
444
+ "epoch": 3.566121842496285,
445
+ "grad_norm": 0.7939934730529785,
446
+ "learning_rate": 3.3414351851851856e-05,
447
+ "loss": 0.0798,
448
+ "step": 4800
449
+ },
450
+ {
451
+ "epoch": 3.640416047548291,
452
+ "grad_norm": 0.9978725910186768,
453
+ "learning_rate": 3.302854938271605e-05,
454
+ "loss": 0.0812,
455
+ "step": 4900
456
+ },
457
+ {
458
+ "epoch": 3.7147102526002973,
459
+ "grad_norm": 2.130408763885498,
460
+ "learning_rate": 3.2642746913580244e-05,
461
+ "loss": 0.0829,
462
+ "step": 5000
463
+ },
464
+ {
465
+ "epoch": 3.789004457652303,
466
+ "grad_norm": 0.9364782571792603,
467
+ "learning_rate": 3.225694444444445e-05,
468
+ "loss": 0.0826,
469
+ "step": 5100
470
+ },
471
+ {
472
+ "epoch": 3.863298662704309,
473
+ "grad_norm": 1.2428539991378784,
474
+ "learning_rate": 3.1871141975308646e-05,
475
+ "loss": 0.0842,
476
+ "step": 5200
477
+ },
478
+ {
479
+ "epoch": 3.9375928677563152,
480
+ "grad_norm": 2.34770131111145,
481
+ "learning_rate": 3.148533950617284e-05,
482
+ "loss": 0.086,
483
+ "step": 5300
484
+ },
485
+ {
486
+ "epoch": 4.0,
487
+ "eval_administration_accuracy": 0.8798291233283804,
488
+ "eval_administration_f1": 0.8794945259254936,
489
+ "eval_corruption_accuracy": 0.950780089153046,
490
+ "eval_corruption_f1": 0.9479334429046444,
491
+ "eval_democracy_accuracy": 0.937407132243685,
492
+ "eval_democracy_f1": 0.9365589706445473,
493
+ "eval_development_accuracy": 0.8764858841010401,
494
+ "eval_development_f1": 0.8698032455853882,
495
+ "eval_economy_accuracy": 0.9154903417533432,
496
+ "eval_economy_f1": 0.9143622167643529,
497
+ "eval_education_accuracy": 0.9583952451708767,
498
+ "eval_education_f1": 0.9569735073455015,
499
+ "eval_environment_accuracy": 0.9738112927191679,
500
+ "eval_environment_f1": 0.9731627143599846,
501
+ "eval_leadership_accuracy": 0.8411961367013373,
502
+ "eval_leadership_f1": 0.8406395571014088,
503
+ "eval_loss": 0.21541310846805573,
504
+ "eval_overall_accuracy": 0.9272381129271916,
505
+ "eval_overall_f1": 0.9257917541145674,
506
+ "eval_race_accuracy": 0.9476225854383358,
507
+ "eval_race_f1": 0.9465941365622704,
508
+ "eval_religion_accuracy": 0.9452080237741456,
509
+ "eval_religion_f1": 0.9437790471355962,
510
+ "eval_runtime": 6.0613,
511
+ "eval_safety_accuracy": 0.9006315007429421,
512
+ "eval_safety_f1": 0.9001996850456204,
513
+ "eval_samples_per_second": 888.26,
514
+ "eval_stability_accuracy": 1.0,
515
+ "eval_stability_f1": 1.0,
516
+ "eval_steps_per_second": 55.599,
517
+ "step": 5384
518
+ },
519
+ {
520
+ "epoch": 4.011887072808321,
521
+ "grad_norm": 1.4067624807357788,
522
+ "learning_rate": 3.109953703703704e-05,
523
+ "loss": 0.0752,
524
+ "step": 5400
525
+ },
526
+ {
527
+ "epoch": 4.086181277860327,
528
+ "grad_norm": 3.1831161975860596,
529
+ "learning_rate": 3.071373456790124e-05,
530
+ "loss": 0.0569,
531
+ "step": 5500
532
+ },
533
+ {
534
+ "epoch": 4.160475482912333,
535
+ "grad_norm": 0.7918180823326111,
536
+ "learning_rate": 3.0327932098765433e-05,
537
+ "loss": 0.055,
538
+ "step": 5600
539
+ },
540
+ {
541
+ "epoch": 4.234769687964339,
542
+ "grad_norm": 1.334572672843933,
543
+ "learning_rate": 2.9942129629629627e-05,
544
+ "loss": 0.0557,
545
+ "step": 5700
546
+ },
547
+ {
548
+ "epoch": 4.3090638930163445,
549
+ "grad_norm": 0.9439612030982971,
550
+ "learning_rate": 2.955632716049383e-05,
551
+ "loss": 0.0547,
552
+ "step": 5800
553
+ },
554
+ {
555
+ "epoch": 4.383358098068351,
556
+ "grad_norm": 1.7661114931106567,
557
+ "learning_rate": 2.9170524691358026e-05,
558
+ "loss": 0.0589,
559
+ "step": 5900
560
+ },
561
+ {
562
+ "epoch": 4.457652303120357,
563
+ "grad_norm": 1.5608975887298584,
564
+ "learning_rate": 2.878472222222222e-05,
565
+ "loss": 0.0604,
566
+ "step": 6000
567
+ },
568
+ {
569
+ "epoch": 4.531946508172362,
570
+ "grad_norm": 2.329026460647583,
571
+ "learning_rate": 2.839891975308642e-05,
572
+ "loss": 0.0556,
573
+ "step": 6100
574
+ },
575
+ {
576
+ "epoch": 4.606240713224368,
577
+ "grad_norm": 1.270369529724121,
578
+ "learning_rate": 2.801311728395062e-05,
579
+ "loss": 0.0559,
580
+ "step": 6200
581
+ },
582
+ {
583
+ "epoch": 4.680534918276375,
584
+ "grad_norm": 1.327057123184204,
585
+ "learning_rate": 2.7627314814814813e-05,
586
+ "loss": 0.0563,
587
+ "step": 6300
588
+ },
589
+ {
590
+ "epoch": 4.75482912332838,
591
+ "grad_norm": 1.4530967473983765,
592
+ "learning_rate": 2.7241512345679014e-05,
593
+ "loss": 0.0559,
594
+ "step": 6400
595
+ },
596
+ {
597
+ "epoch": 4.829123328380386,
598
+ "grad_norm": 1.368444800376892,
599
+ "learning_rate": 2.685570987654321e-05,
600
+ "loss": 0.0588,
601
+ "step": 6500
602
+ },
603
+ {
604
+ "epoch": 4.903417533432393,
605
+ "grad_norm": 0.904096782207489,
606
+ "learning_rate": 2.6469907407407406e-05,
607
+ "loss": 0.0558,
608
+ "step": 6600
609
+ },
610
+ {
611
+ "epoch": 4.977711738484398,
612
+ "grad_norm": 1.7562211751937866,
613
+ "learning_rate": 2.608410493827161e-05,
614
+ "loss": 0.0599,
615
+ "step": 6700
616
+ },
617
+ {
618
+ "epoch": 5.0,
619
+ "eval_administration_accuracy": 0.8802005943536404,
620
+ "eval_administration_f1": 0.8791286682112321,
621
+ "eval_corruption_accuracy": 0.9476225854383358,
622
+ "eval_corruption_f1": 0.9465024742317194,
623
+ "eval_democracy_accuracy": 0.9277488855869243,
624
+ "eval_democracy_f1": 0.9283028411081555,
625
+ "eval_development_accuracy": 0.8718424962852898,
626
+ "eval_development_f1": 0.8670635241370714,
627
+ "eval_economy_accuracy": 0.9186478454680534,
628
+ "eval_economy_f1": 0.9177402812083378,
629
+ "eval_education_accuracy": 0.9567236255572066,
630
+ "eval_education_f1": 0.9563906887924067,
631
+ "eval_environment_accuracy": 0.9717682020802377,
632
+ "eval_environment_f1": 0.9714559245580735,
633
+ "eval_leadership_accuracy": 0.8363670133729569,
634
+ "eval_leadership_f1": 0.8372830889258505,
635
+ "eval_loss": 0.23590339720249176,
636
+ "eval_overall_accuracy": 0.9249318969787024,
637
+ "eval_overall_f1": 0.9243598065937545,
638
+ "eval_race_accuracy": 0.9492942050520059,
639
+ "eval_race_f1": 0.9489608272042858,
640
+ "eval_religion_accuracy": 0.9414933135215453,
641
+ "eval_religion_f1": 0.9417116874083546,
642
+ "eval_runtime": 6.0832,
643
+ "eval_safety_accuracy": 0.8974739970282318,
644
+ "eval_safety_f1": 0.897777673339568,
645
+ "eval_samples_per_second": 885.064,
646
+ "eval_stability_accuracy": 1.0,
647
+ "eval_stability_f1": 1.0,
648
+ "eval_steps_per_second": 55.399,
649
+ "step": 6730
650
+ },
651
+ {
652
+ "epoch": 5.052005943536404,
653
+ "grad_norm": 1.0707114934921265,
654
+ "learning_rate": 2.5698302469135804e-05,
655
+ "loss": 0.0467,
656
+ "step": 6800
657
+ },
658
+ {
659
+ "epoch": 5.12630014858841,
660
+ "grad_norm": 1.0622857809066772,
661
+ "learning_rate": 2.53125e-05,
662
+ "loss": 0.0394,
663
+ "step": 6900
664
+ },
665
+ {
666
+ "epoch": 5.200594353640416,
667
+ "grad_norm": 1.1686086654663086,
668
+ "learning_rate": 2.49266975308642e-05,
669
+ "loss": 0.0416,
670
+ "step": 7000
671
+ },
672
+ {
673
+ "epoch": 5.274888558692422,
674
+ "grad_norm": 1.2564952373504639,
675
+ "learning_rate": 2.4540895061728397e-05,
676
+ "loss": 0.0427,
677
+ "step": 7100
678
+ },
679
+ {
680
+ "epoch": 5.349182763744428,
681
+ "grad_norm": 0.693988025188446,
682
+ "learning_rate": 2.415509259259259e-05,
683
+ "loss": 0.0433,
684
+ "step": 7200
685
+ },
686
+ {
687
+ "epoch": 5.423476968796434,
688
+ "grad_norm": 1.402803897857666,
689
+ "learning_rate": 2.3769290123456792e-05,
690
+ "loss": 0.0403,
691
+ "step": 7300
692
+ },
693
+ {
694
+ "epoch": 5.49777117384844,
695
+ "grad_norm": 0.35870230197906494,
696
+ "learning_rate": 2.3383487654320987e-05,
697
+ "loss": 0.0399,
698
+ "step": 7400
699
+ },
700
+ {
701
+ "epoch": 5.5720653789004455,
702
+ "grad_norm": 0.8109584450721741,
703
+ "learning_rate": 2.2997685185185188e-05,
704
+ "loss": 0.0404,
705
+ "step": 7500
706
+ },
707
+ {
708
+ "epoch": 5.646359583952452,
709
+ "grad_norm": 0.6922593116760254,
710
+ "learning_rate": 2.2611882716049385e-05,
711
+ "loss": 0.0415,
712
+ "step": 7600
713
+ },
714
+ {
715
+ "epoch": 5.720653789004458,
716
+ "grad_norm": 0.9838235378265381,
717
+ "learning_rate": 2.222608024691358e-05,
718
+ "loss": 0.0339,
719
+ "step": 7700
720
+ },
721
+ {
722
+ "epoch": 5.794947994056463,
723
+ "grad_norm": 0.4872437119483948,
724
+ "learning_rate": 2.184027777777778e-05,
725
+ "loss": 0.0417,
726
+ "step": 7800
727
+ },
728
+ {
729
+ "epoch": 5.86924219910847,
730
+ "grad_norm": 1.348196029663086,
731
+ "learning_rate": 2.1454475308641978e-05,
732
+ "loss": 0.0447,
733
+ "step": 7900
734
+ },
735
+ {
736
+ "epoch": 5.943536404160476,
737
+ "grad_norm": 1.4830248355865479,
738
+ "learning_rate": 2.1068672839506172e-05,
739
+ "loss": 0.0398,
740
+ "step": 8000
741
+ },
742
+ {
743
+ "epoch": 6.0,
744
+ "eval_administration_accuracy": 0.8893016344725111,
745
+ "eval_administration_f1": 0.8821384586084873,
746
+ "eval_corruption_accuracy": 0.9459509658246656,
747
+ "eval_corruption_f1": 0.9447882680066073,
748
+ "eval_democracy_accuracy": 0.9331352154531947,
749
+ "eval_democracy_f1": 0.9333919857361558,
750
+ "eval_development_accuracy": 0.8701708766716196,
751
+ "eval_development_f1": 0.8701330980665078,
752
+ "eval_economy_accuracy": 0.9171619613670133,
753
+ "eval_economy_f1": 0.9173996309698476,
754
+ "eval_education_accuracy": 0.9587667161961367,
755
+ "eval_education_f1": 0.958317820654174,
756
+ "eval_environment_accuracy": 0.9736255572065379,
757
+ "eval_environment_f1": 0.9732156963181571,
758
+ "eval_leadership_accuracy": 0.8417533432392273,
759
+ "eval_leadership_f1": 0.8369018544959057,
760
+ "eval_loss": 0.2435985654592514,
761
+ "eval_overall_accuracy": 0.9273774145616641,
762
+ "eval_overall_f1": 0.9260046966635366,
763
+ "eval_race_accuracy": 0.9474368499257058,
764
+ "eval_race_f1": 0.945994966362508,
765
+ "eval_religion_accuracy": 0.9474368499257058,
766
+ "eval_religion_f1": 0.9464647349324496,
767
+ "eval_runtime": 6.067,
768
+ "eval_safety_accuracy": 0.9037890044576523,
769
+ "eval_safety_f1": 0.9033098458116402,
770
+ "eval_samples_per_second": 887.42,
771
+ "eval_stability_accuracy": 1.0,
772
+ "eval_stability_f1": 1.0,
773
+ "eval_steps_per_second": 55.546,
774
+ "step": 8076
775
+ },
776
+ {
777
+ "epoch": 6.017830609212481,
778
+ "grad_norm": 1.2368193864822388,
779
+ "learning_rate": 2.0682870370370373e-05,
780
+ "loss": 0.0415,
781
+ "step": 8100
782
+ },
783
+ {
784
+ "epoch": 6.092124814264487,
785
+ "grad_norm": 0.41253241896629333,
786
+ "learning_rate": 2.0297067901234568e-05,
787
+ "loss": 0.0264,
788
+ "step": 8200
789
+ },
790
+ {
791
+ "epoch": 6.166419019316494,
792
+ "grad_norm": 1.4119162559509277,
793
+ "learning_rate": 1.9911265432098765e-05,
794
+ "loss": 0.029,
795
+ "step": 8300
796
+ },
797
+ {
798
+ "epoch": 6.240713224368499,
799
+ "grad_norm": 0.38587984442710876,
800
+ "learning_rate": 1.9525462962962966e-05,
801
+ "loss": 0.0293,
802
+ "step": 8400
803
+ },
804
+ {
805
+ "epoch": 6.315007429420505,
806
+ "grad_norm": 1.214526653289795,
807
+ "learning_rate": 1.913966049382716e-05,
808
+ "loss": 0.0286,
809
+ "step": 8500
810
+ },
811
+ {
812
+ "epoch": 6.3893016344725115,
813
+ "grad_norm": 2.2572736740112305,
814
+ "learning_rate": 1.8753858024691358e-05,
815
+ "loss": 0.0309,
816
+ "step": 8600
817
+ },
818
+ {
819
+ "epoch": 6.463595839524517,
820
+ "grad_norm": 0.7722110152244568,
821
+ "learning_rate": 1.836805555555556e-05,
822
+ "loss": 0.0316,
823
+ "step": 8700
824
+ },
825
+ {
826
+ "epoch": 6.537890044576523,
827
+ "grad_norm": 1.0314061641693115,
828
+ "learning_rate": 1.7982253086419753e-05,
829
+ "loss": 0.0292,
830
+ "step": 8800
831
+ },
832
+ {
833
+ "epoch": 6.612184249628529,
834
+ "grad_norm": 0.6228373050689697,
835
+ "learning_rate": 1.759645061728395e-05,
836
+ "loss": 0.0287,
837
+ "step": 8900
838
+ },
839
+ {
840
+ "epoch": 6.686478454680535,
841
+ "grad_norm": 0.3270525336265564,
842
+ "learning_rate": 1.721064814814815e-05,
843
+ "loss": 0.0307,
844
+ "step": 9000
845
+ },
846
+ {
847
+ "epoch": 6.760772659732541,
848
+ "grad_norm": 1.1799579858779907,
849
+ "learning_rate": 1.6824845679012346e-05,
850
+ "loss": 0.0277,
851
+ "step": 9100
852
+ },
853
+ {
854
+ "epoch": 6.8350668647845465,
855
+ "grad_norm": 3.962522029876709,
856
+ "learning_rate": 1.6439043209876544e-05,
857
+ "loss": 0.029,
858
+ "step": 9200
859
+ },
860
+ {
861
+ "epoch": 6.909361069836553,
862
+ "grad_norm": 0.8366211652755737,
863
+ "learning_rate": 1.605324074074074e-05,
864
+ "loss": 0.03,
865
+ "step": 9300
866
+ },
867
+ {
868
+ "epoch": 6.983655274888559,
869
+ "grad_norm": 1.2808723449707031,
870
+ "learning_rate": 1.566743827160494e-05,
871
+ "loss": 0.0284,
872
+ "step": 9400
873
+ },
874
+ {
875
+ "epoch": 7.0,
876
+ "eval_administration_accuracy": 0.8759286775631501,
877
+ "eval_administration_f1": 0.8768026075421302,
878
+ "eval_corruption_accuracy": 0.9476225854383358,
879
+ "eval_corruption_f1": 0.9471725413953416,
880
+ "eval_democracy_accuracy": 0.9351783060921248,
881
+ "eval_democracy_f1": 0.9350529993842286,
882
+ "eval_development_accuracy": 0.8688707280832095,
883
+ "eval_development_f1": 0.8719178288623293,
884
+ "eval_economy_accuracy": 0.9188335809806835,
885
+ "eval_economy_f1": 0.9195020015774136,
886
+ "eval_education_accuracy": 0.9546805349182764,
887
+ "eval_education_f1": 0.9557389094775754,
888
+ "eval_environment_accuracy": 0.9732540861812778,
889
+ "eval_environment_f1": 0.9731958552682995,
890
+ "eval_leadership_accuracy": 0.8395245170876672,
891
+ "eval_leadership_f1": 0.840487098777998,
892
+ "eval_loss": 0.25289186835289,
893
+ "eval_overall_accuracy": 0.9253652798415057,
894
+ "eval_overall_f1": 0.9258748564697528,
895
+ "eval_race_accuracy": 0.9491084695393759,
896
+ "eval_race_f1": 0.9488621247872209,
897
+ "eval_religion_accuracy": 0.9459509658246656,
898
+ "eval_religion_f1": 0.945886068594284,
899
+ "eval_runtime": 6.0881,
900
+ "eval_safety_accuracy": 0.8954309063893017,
901
+ "eval_safety_f1": 0.8958802419702108,
902
+ "eval_samples_per_second": 884.354,
903
+ "eval_stability_accuracy": 1.0,
904
+ "eval_stability_f1": 1.0,
905
+ "eval_steps_per_second": 55.354,
906
+ "step": 9422
907
+ },
908
+ {
909
+ "epoch": 7.057949479940564,
910
+ "grad_norm": 1.4738227128982544,
911
+ "learning_rate": 1.5281635802469136e-05,
912
+ "loss": 0.0245,
913
+ "step": 9500
914
+ },
915
+ {
916
+ "epoch": 7.132243684992571,
917
+ "grad_norm": 1.0899150371551514,
918
+ "learning_rate": 1.4895833333333334e-05,
919
+ "loss": 0.022,
920
+ "step": 9600
921
+ },
922
+ {
923
+ "epoch": 7.206537890044577,
924
+ "grad_norm": 0.9878177642822266,
925
+ "learning_rate": 1.4510030864197532e-05,
926
+ "loss": 0.0218,
927
+ "step": 9700
928
+ },
929
+ {
930
+ "epoch": 7.280832095096582,
931
+ "grad_norm": 0.37595194578170776,
932
+ "learning_rate": 1.4124228395061728e-05,
933
+ "loss": 0.0232,
934
+ "step": 9800
935
+ },
936
+ {
937
+ "epoch": 7.355126300148588,
938
+ "grad_norm": 0.7727621793746948,
939
+ "learning_rate": 1.3738425925925927e-05,
940
+ "loss": 0.0216,
941
+ "step": 9900
942
+ },
943
+ {
944
+ "epoch": 7.429420505200595,
945
+ "grad_norm": 0.3330998718738556,
946
+ "learning_rate": 1.3352623456790126e-05,
947
+ "loss": 0.0222,
948
+ "step": 10000
949
+ },
950
+ {
951
+ "epoch": 7.5037147102526,
952
+ "grad_norm": 0.5806456804275513,
953
+ "learning_rate": 1.296682098765432e-05,
954
+ "loss": 0.0219,
955
+ "step": 10100
956
+ },
957
+ {
958
+ "epoch": 7.578008915304606,
959
+ "grad_norm": 0.7578392028808594,
960
+ "learning_rate": 1.258101851851852e-05,
961
+ "loss": 0.0218,
962
+ "step": 10200
963
+ },
964
+ {
965
+ "epoch": 7.6523031203566125,
966
+ "grad_norm": 0.3120606541633606,
967
+ "learning_rate": 1.2195216049382717e-05,
968
+ "loss": 0.02,
969
+ "step": 10300
970
+ },
971
+ {
972
+ "epoch": 7.726597325408618,
973
+ "grad_norm": 0.637631893157959,
974
+ "learning_rate": 1.1809413580246915e-05,
975
+ "loss": 0.0196,
976
+ "step": 10400
977
+ },
978
+ {
979
+ "epoch": 7.800891530460624,
980
+ "grad_norm": 0.6013413071632385,
981
+ "learning_rate": 1.142361111111111e-05,
982
+ "loss": 0.0234,
983
+ "step": 10500
984
+ },
985
+ {
986
+ "epoch": 7.8751857355126305,
987
+ "grad_norm": 2.5481185913085938,
988
+ "learning_rate": 1.1037808641975308e-05,
989
+ "loss": 0.0218,
990
+ "step": 10600
991
+ },
992
+ {
993
+ "epoch": 7.949479940564636,
994
+ "grad_norm": 0.5179031491279602,
995
+ "learning_rate": 1.0652006172839508e-05,
996
+ "loss": 0.0208,
997
+ "step": 10700
998
+ },
999
+ {
1000
+ "epoch": 8.0,
1001
+ "eval_administration_accuracy": 0.8813150074294205,
1002
+ "eval_administration_f1": 0.8795800439008289,
1003
+ "eval_corruption_accuracy": 0.9455794947994056,
1004
+ "eval_corruption_f1": 0.9433562694219033,
1005
+ "eval_democracy_accuracy": 0.9366641901931649,
1006
+ "eval_democracy_f1": 0.9365011786875334,
1007
+ "eval_development_accuracy": 0.8694279346210996,
1008
+ "eval_development_f1": 0.8688370502444763,
1009
+ "eval_economy_accuracy": 0.9184621099554234,
1010
+ "eval_economy_f1": 0.918124164419619,
1011
+ "eval_education_accuracy": 0.9580237741456167,
1012
+ "eval_education_f1": 0.9574723795948901,
1013
+ "eval_environment_accuracy": 0.9738112927191679,
1014
+ "eval_environment_f1": 0.9736854631574718,
1015
+ "eval_leadership_accuracy": 0.8393387815750372,
1016
+ "eval_leadership_f1": 0.839741016757979,
1017
+ "eval_loss": 0.2596803605556488,
1018
+ "eval_overall_accuracy": 0.9265725606736006,
1019
+ "eval_overall_f1": 0.9260616678064877,
1020
+ "eval_race_accuracy": 0.9465081723625557,
1021
+ "eval_race_f1": 0.9461343616633416,
1022
+ "eval_religion_accuracy": 0.9446508172362555,
1023
+ "eval_religion_f1": 0.9445423673916528,
1024
+ "eval_runtime": 6.0851,
1025
+ "eval_safety_accuracy": 0.9050891530460624,
1026
+ "eval_safety_f1": 0.904765718438156,
1027
+ "eval_samples_per_second": 884.782,
1028
+ "eval_stability_accuracy": 1.0,
1029
+ "eval_stability_f1": 1.0,
1030
+ "eval_steps_per_second": 55.381,
1031
+ "step": 10768
1032
+ },
1033
+ {
1034
+ "epoch": 8.023774145616642,
1035
+ "grad_norm": 0.6615686416625977,
1036
+ "learning_rate": 1.0266203703703704e-05,
1037
+ "loss": 0.0205,
1038
+ "step": 10800
1039
+ },
1040
+ {
1041
+ "epoch": 8.098068350668647,
1042
+ "grad_norm": 2.661421060562134,
1043
+ "learning_rate": 9.880401234567901e-06,
1044
+ "loss": 0.0185,
1045
+ "step": 10900
1046
+ },
1047
+ {
1048
+ "epoch": 8.172362555720653,
1049
+ "grad_norm": 0.6415339112281799,
1050
+ "learning_rate": 9.494598765432099e-06,
1051
+ "loss": 0.0164,
1052
+ "step": 11000
1053
+ },
1054
+ {
1055
+ "epoch": 8.246656760772659,
1056
+ "grad_norm": 0.761589765548706,
1057
+ "learning_rate": 9.108796296296296e-06,
1058
+ "loss": 0.017,
1059
+ "step": 11100
1060
+ },
1061
+ {
1062
+ "epoch": 8.320950965824666,
1063
+ "grad_norm": 0.6105137467384338,
1064
+ "learning_rate": 8.722993827160494e-06,
1065
+ "loss": 0.0174,
1066
+ "step": 11200
1067
+ },
1068
+ {
1069
+ "epoch": 8.395245170876672,
1070
+ "grad_norm": 0.6517733931541443,
1071
+ "learning_rate": 8.337191358024692e-06,
1072
+ "loss": 0.0175,
1073
+ "step": 11300
1074
+ },
1075
+ {
1076
+ "epoch": 8.469539375928678,
1077
+ "grad_norm": 0.6607377529144287,
1078
+ "learning_rate": 7.95138888888889e-06,
1079
+ "loss": 0.0172,
1080
+ "step": 11400
1081
+ },
1082
+ {
1083
+ "epoch": 8.543833580980683,
1084
+ "grad_norm": 0.72126305103302,
1085
+ "learning_rate": 7.565586419753088e-06,
1086
+ "loss": 0.0155,
1087
+ "step": 11500
1088
+ },
1089
+ {
1090
+ "epoch": 8.618127786032689,
1091
+ "grad_norm": 0.4402889609336853,
1092
+ "learning_rate": 7.1797839506172844e-06,
1093
+ "loss": 0.0173,
1094
+ "step": 11600
1095
+ },
1096
+ {
1097
+ "epoch": 8.692421991084695,
1098
+ "grad_norm": 0.08074043691158295,
1099
+ "learning_rate": 6.793981481481482e-06,
1100
+ "loss": 0.014,
1101
+ "step": 11700
1102
+ },
1103
+ {
1104
+ "epoch": 8.766716196136702,
1105
+ "grad_norm": 0.1940842866897583,
1106
+ "learning_rate": 6.408179012345679e-06,
1107
+ "loss": 0.0145,
1108
+ "step": 11800
1109
+ },
1110
+ {
1111
+ "epoch": 8.841010401188708,
1112
+ "grad_norm": 0.4365254044532776,
1113
+ "learning_rate": 6.022376543209876e-06,
1114
+ "loss": 0.0152,
1115
+ "step": 11900
1116
+ },
1117
+ {
1118
+ "epoch": 8.915304606240714,
1119
+ "grad_norm": 0.4619617462158203,
1120
+ "learning_rate": 5.636574074074075e-06,
1121
+ "loss": 0.0166,
1122
+ "step": 12000
1123
+ },
1124
+ {
1125
+ "epoch": 8.98959881129272,
1126
+ "grad_norm": 0.22596906125545502,
1127
+ "learning_rate": 5.250771604938272e-06,
1128
+ "loss": 0.0156,
1129
+ "step": 12100
1130
+ },
1131
+ {
1132
+ "epoch": 9.0,
1133
+ "eval_administration_accuracy": 0.888001485884101,
1134
+ "eval_administration_f1": 0.8840369764257654,
1135
+ "eval_corruption_accuracy": 0.9474368499257058,
1136
+ "eval_corruption_f1": 0.9462275023250412,
1137
+ "eval_democracy_accuracy": 0.937035661218425,
1138
+ "eval_democracy_f1": 0.9349797337427957,
1139
+ "eval_development_accuracy": 0.8777860326894502,
1140
+ "eval_development_f1": 0.8758984879855292,
1141
+ "eval_economy_accuracy": 0.9188335809806835,
1142
+ "eval_economy_f1": 0.9183485908645431,
1143
+ "eval_education_accuracy": 0.9613670133729569,
1144
+ "eval_education_f1": 0.9605424054563209,
1145
+ "eval_environment_accuracy": 0.9745542347696879,
1146
+ "eval_environment_f1": 0.97406579666089,
1147
+ "eval_leadership_accuracy": 0.8447251114413076,
1148
+ "eval_leadership_f1": 0.8447097008519563,
1149
+ "eval_loss": 0.26206690073013306,
1150
+ "eval_overall_accuracy": 0.9286001733531449,
1151
+ "eval_overall_f1": 0.9276388104924375,
1152
+ "eval_race_accuracy": 0.9476225854383358,
1153
+ "eval_race_f1": 0.947246539022533,
1154
+ "eval_religion_accuracy": 0.9446508172362555,
1155
+ "eval_religion_f1": 0.9438393498317825,
1156
+ "eval_runtime": 6.0648,
1157
+ "eval_safety_accuracy": 0.9011887072808321,
1158
+ "eval_safety_f1": 0.9017706427420923,
1159
+ "eval_samples_per_second": 887.753,
1160
+ "eval_stability_accuracy": 1.0,
1161
+ "eval_stability_f1": 1.0,
1162
+ "eval_steps_per_second": 55.567,
1163
+ "step": 12114
1164
+ },
1165
+ {
1166
+ "epoch": 9.063893016344725,
1167
+ "grad_norm": 0.4198947846889496,
1168
+ "learning_rate": 4.864969135802469e-06,
1169
+ "loss": 0.0129,
1170
+ "step": 12200
1171
+ },
1172
+ {
1173
+ "epoch": 9.13818722139673,
1174
+ "grad_norm": 0.26229700446128845,
1175
+ "learning_rate": 4.479166666666667e-06,
1176
+ "loss": 0.013,
1177
+ "step": 12300
1178
+ },
1179
+ {
1180
+ "epoch": 9.212481426448736,
1181
+ "grad_norm": 1.2866747379302979,
1182
+ "learning_rate": 4.0933641975308644e-06,
1183
+ "loss": 0.014,
1184
+ "step": 12400
1185
+ },
1186
+ {
1187
+ "epoch": 9.286775631500744,
1188
+ "grad_norm": 0.4733564257621765,
1189
+ "learning_rate": 3.707561728395062e-06,
1190
+ "loss": 0.0116,
1191
+ "step": 12500
1192
+ },
1193
+ {
1194
+ "epoch": 9.36106983655275,
1195
+ "grad_norm": 0.26384237408638,
1196
+ "learning_rate": 3.3217592592592592e-06,
1197
+ "loss": 0.0141,
1198
+ "step": 12600
1199
+ },
1200
+ {
1201
+ "epoch": 9.435364041604755,
1202
+ "grad_norm": 0.34422609210014343,
1203
+ "learning_rate": 2.9359567901234573e-06,
1204
+ "loss": 0.0126,
1205
+ "step": 12700
1206
+ },
1207
+ {
1208
+ "epoch": 9.50965824665676,
1209
+ "grad_norm": 0.3310413360595703,
1210
+ "learning_rate": 2.5501543209876544e-06,
1211
+ "loss": 0.0137,
1212
+ "step": 12800
1213
+ },
1214
+ {
1215
+ "epoch": 9.583952451708766,
1216
+ "grad_norm": 0.6999865174293518,
1217
+ "learning_rate": 2.1643518518518516e-06,
1218
+ "loss": 0.0127,
1219
+ "step": 12900
1220
+ },
1221
+ {
1222
+ "epoch": 9.658246656760772,
1223
+ "grad_norm": 0.10555438697338104,
1224
+ "learning_rate": 1.7785493827160492e-06,
1225
+ "loss": 0.0126,
1226
+ "step": 13000
1227
+ },
1228
+ {
1229
+ "epoch": 9.732540861812778,
1230
+ "grad_norm": 0.3336666524410248,
1231
+ "learning_rate": 1.392746913580247e-06,
1232
+ "loss": 0.0132,
1233
+ "step": 13100
1234
+ },
1235
+ {
1236
+ "epoch": 9.806835066864785,
1237
+ "grad_norm": 0.6781222224235535,
1238
+ "learning_rate": 1.0069444444444447e-06,
1239
+ "loss": 0.013,
1240
+ "step": 13200
1241
+ },
1242
+ {
1243
+ "epoch": 9.881129271916791,
1244
+ "grad_norm": 0.07187670469284058,
1245
+ "learning_rate": 6.211419753086421e-07,
1246
+ "loss": 0.0127,
1247
+ "step": 13300
1248
+ },
1249
+ {
1250
+ "epoch": 9.955423476968797,
1251
+ "grad_norm": 0.46965786814689636,
1252
+ "learning_rate": 2.3533950617283953e-07,
1253
+ "loss": 0.0143,
1254
+ "step": 13400
1255
+ },
1256
+ {
1257
+ "epoch": 10.0,
1258
+ "eval_administration_accuracy": 0.8837295690936107,
1259
+ "eval_administration_f1": 0.8818033155613924,
1260
+ "eval_corruption_accuracy": 0.9481797919762258,
1261
+ "eval_corruption_f1": 0.9468526341391379,
1262
+ "eval_democracy_accuracy": 0.937221396731055,
1263
+ "eval_democracy_f1": 0.9362318536991674,
1264
+ "eval_development_accuracy": 0.8763001485884101,
1265
+ "eval_development_f1": 0.8755224543096787,
1266
+ "eval_economy_accuracy": 0.9192050520059435,
1267
+ "eval_economy_f1": 0.9186284798866873,
1268
+ "eval_education_accuracy": 0.9598811292719168,
1269
+ "eval_education_f1": 0.9592558180372266,
1270
+ "eval_environment_accuracy": 0.9741827637444279,
1271
+ "eval_environment_f1": 0.9737776195503588,
1272
+ "eval_leadership_accuracy": 0.8423105497771174,
1273
+ "eval_leadership_f1": 0.8434399815395168,
1274
+ "eval_loss": 0.26387640833854675,
1275
+ "eval_overall_accuracy": 0.9280894006934126,
1276
+ "eval_overall_f1": 0.9276201716738023,
1277
+ "eval_race_accuracy": 0.9479940564635958,
1278
+ "eval_race_f1": 0.9474766439680601,
1279
+ "eval_religion_accuracy": 0.9448365527488856,
1280
+ "eval_religion_f1": 0.9445715277991047,
1281
+ "eval_runtime": 6.0635,
1282
+ "eval_safety_accuracy": 0.9032317979197623,
1283
+ "eval_safety_f1": 0.9038817315952975,
1284
+ "eval_samples_per_second": 887.937,
1285
+ "eval_stability_accuracy": 1.0,
1286
+ "eval_stability_f1": 1.0,
1287
+ "eval_steps_per_second": 55.579,
1288
+ "step": 13460
1289
+ }
1290
+ ],
1291
+ "logging_steps": 100,
1292
+ "max_steps": 13460,
1293
+ "num_input_tokens_seen": 0,
1294
+ "num_train_epochs": 10,
1295
+ "save_steps": 500,
1296
+ "stateful_callbacks": {
1297
+ "EarlyStoppingCallback": {
1298
+ "args": {
1299
+ "early_stopping_patience": 2,
1300
+ "early_stopping_threshold": 0.0
1301
+ },
1302
+ "attributes": {
1303
+ "early_stopping_patience_counter": 1
1304
+ }
1305
+ },
1306
+ "TrainerControl": {
1307
+ "args": {
1308
+ "should_epoch_stop": false,
1309
+ "should_evaluate": false,
1310
+ "should_log": false,
1311
+ "should_save": true,
1312
+ "should_training_stop": true
1313
+ },
1314
+ "attributes": {}
1315
+ }
1316
+ },
1317
+ "total_flos": 5.642720195479142e+16,
1318
+ "train_batch_size": 16,
1319
+ "trial_name": null,
1320
+ "trial_params": {
1321
+ "gradient_accumulation_steps": 1,
1322
+ "learning_rate": 5e-05,
1323
+ "num_train_epochs": 10
1324
+ }
1325
+ }
base/run-0/checkpoint-13460/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9f417d3b26827f3c2091fa30ae27025fb9a89bba46b9e16e8ce16798492c41b
3
+ size 5841
base/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9f417d3b26827f3c2091fa30ae27025fb9a89bba46b9e16e8ce16798492c41b
3
+ size 5841
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1cb16e6d293cfcf0160e3255988e8f860aa54c37c35874fb33b2edc9aea21ff
3
  size 438100144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6d31366fb92da1c2f70020227f8b0eb1357968071e3bcf2657a21622d292634
3
  size 438100144
run-0/checkpoint-1011/config.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Democracy Unknown",
14
+ "1": "Democracy Negative",
15
+ "2": "Democracy Neutral",
16
+ "3": "Democracy Positive",
17
+ "4": "Economy Unknown",
18
+ "5": "Economy Negative",
19
+ "6": "Economy Neutral",
20
+ "7": "Economy Positive",
21
+ "8": "Race Unknown",
22
+ "9": "Race Negative",
23
+ "10": "Race Neutral",
24
+ "11": "Race Positive",
25
+ "12": "Leadership Unknown",
26
+ "13": "Leadership Negative",
27
+ "14": "Leadership Neutral",
28
+ "15": "Leadership Positive",
29
+ "16": "Development Unknown",
30
+ "17": "Development Negative",
31
+ "18": "Development Neutral",
32
+ "19": "Development Positive",
33
+ "20": "Corruption Unknown",
34
+ "21": "Corruption Negative",
35
+ "22": "Corruption Neutral",
36
+ "23": "Corruption Positive",
37
+ "24": "Instability Unknown",
38
+ "25": "Instability Negative",
39
+ "26": "Instability Neutral",
40
+ "27": "Instability Positive",
41
+ "28": "Safety Unknown",
42
+ "29": "Safety Negative",
43
+ "30": "Safety Neutral",
44
+ "31": "Safety Positive",
45
+ "32": "Administration Unknown",
46
+ "33": "Administration Negative",
47
+ "34": "Administration Neutral",
48
+ "35": "Administration Positive",
49
+ "36": "Education Unknown",
50
+ "37": "Education Negative",
51
+ "38": "Education Neutral",
52
+ "39": "Education Positive",
53
+ "40": "Religion Unknown",
54
+ "41": "Religion Negative",
55
+ "42": "Religion Neutral",
56
+ "43": "Religion Positive",
57
+ "44": "Environment Unknown",
58
+ "45": "Environment Negative",
59
+ "46": "Environment Neutral",
60
+ "47": "Environment Positive"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "Administration Negative": 33,
66
+ "Administration Neutral": 34,
67
+ "Administration Positive": 35,
68
+ "Administration Unknown": 32,
69
+ "Corruption Negative": 21,
70
+ "Corruption Neutral": 22,
71
+ "Corruption Positive": 23,
72
+ "Corruption Unknown": 20,
73
+ "Democracy Negative": 1,
74
+ "Democracy Neutral": 2,
75
+ "Democracy Positive": 3,
76
+ "Democracy Unknown": 0,
77
+ "Development Negative": 17,
78
+ "Development Neutral": 18,
79
+ "Development Positive": 19,
80
+ "Development Unknown": 16,
81
+ "Economy Negative": 5,
82
+ "Economy Neutral": 6,
83
+ "Economy Positive": 7,
84
+ "Economy Unknown": 4,
85
+ "Education Negative": 37,
86
+ "Education Neutral": 38,
87
+ "Education Positive": 39,
88
+ "Education Unknown": 36,
89
+ "Environment Negative": 45,
90
+ "Environment Neutral": 46,
91
+ "Environment Positive": 47,
92
+ "Environment Unknown": 44,
93
+ "Instability Negative": 25,
94
+ "Instability Neutral": 26,
95
+ "Instability Positive": 27,
96
+ "Instability Unknown": 24,
97
+ "Leadership Negative": 13,
98
+ "Leadership Neutral": 14,
99
+ "Leadership Positive": 15,
100
+ "Leadership Unknown": 12,
101
+ "Race Negative": 9,
102
+ "Race Neutral": 10,
103
+ "Race Positive": 11,
104
+ "Race Unknown": 8,
105
+ "Religion Negative": 41,
106
+ "Religion Neutral": 42,
107
+ "Religion Positive": 43,
108
+ "Religion Unknown": 40,
109
+ "Safety Negative": 29,
110
+ "Safety Neutral": 30,
111
+ "Safety Positive": 31,
112
+ "Safety Unknown": 28
113
+ },
114
+ "layer_norm_eps": 1e-12,
115
+ "max_position_embeddings": 512,
116
+ "model_type": "bert",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
+ "pad_token_id": 0,
120
+ "position_embedding_type": "absolute",
121
+ "transformers_version": "4.57.0",
122
+ "type_vocab_size": 2,
123
+ "use_cache": true,
124
+ "vocab_size": 30522
125
+ }
run-0/checkpoint-1011/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69d961b7c9c66dc43830278b1687db5149230d91ba6d80df5b8e0ae86624d9ec
3
+ size 438100144
run-0/checkpoint-1011/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83865090135d1e0a596e168764bdcfd0a3c5ce3c9043e89292f9bc82690a070a
3
+ size 876324619
run-0/checkpoint-1011/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2336cb0162aa649da80d774dd375fe462ee046c5de7100aea50899cab0033550
3
+ size 14645
run-0/checkpoint-1011/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19ced76e4b26ff9c7e28969ae263d508804558ca5fb49575ed7c93389ef7e878
3
+ size 1383
run-0/checkpoint-1011/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87b4f705378abe5bddc82dc3bbe7d53a58c7c8f3b0d2b6de843f4af480b4890f
3
+ size 1465
run-0/checkpoint-1011/trainer_state.json ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1011,
3
+ "best_metric": 0.9175833864600228,
4
+ "best_model_checkpoint": "./results/run-0/checkpoint-1011",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1011,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.2969561989606533,
14
+ "grad_norm": 1.1367077827453613,
15
+ "learning_rate": 9.900000000000002e-06,
16
+ "loss": 0.4115,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.5939123979213066,
21
+ "grad_norm": 1.127586007118225,
22
+ "learning_rate": 1.9900000000000003e-05,
23
+ "loss": 0.3507,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.89086859688196,
28
+ "grad_norm": 1.15813148021698,
29
+ "learning_rate": 2.9900000000000002e-05,
30
+ "loss": 0.3184,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 1.0,
35
+ "eval_administration_accuracy": 0.8622098421541319,
36
+ "eval_administration_f1": 0.8377043137649639,
37
+ "eval_corruption_accuracy": 0.9379758588672238,
38
+ "eval_corruption_f1": 0.9310385616534254,
39
+ "eval_democracy_accuracy": 0.9238625812441968,
40
+ "eval_democracy_f1": 0.907103895237725,
41
+ "eval_development_accuracy": 0.8462395543175487,
42
+ "eval_development_f1": 0.8229509269515034,
43
+ "eval_economy_accuracy": 0.887836583101207,
44
+ "eval_economy_f1": 0.8696896434372372,
45
+ "eval_education_accuracy": 0.9478180129990715,
46
+ "eval_education_f1": 0.9425187888090035,
47
+ "eval_environment_accuracy": 0.9569173630454968,
48
+ "eval_environment_f1": 0.9527187476104269,
49
+ "eval_instability_accuracy": 0.9093779015784587,
50
+ "eval_instability_f1": 0.8913980249894993,
51
+ "eval_leadership_accuracy": 0.7756731662024141,
52
+ "eval_leadership_f1": 0.7455137881899888,
53
+ "eval_loss": 0.29490533471107483,
54
+ "eval_overall_accuracy": 0.898963169297431,
55
+ "eval_overall_f1": 0.884602913882178,
56
+ "eval_race_accuracy": 0.9387186629526463,
57
+ "eval_race_f1": 0.933273316670087,
58
+ "eval_religion_accuracy": 0.9273909006499536,
59
+ "eval_religion_f1": 0.9189375034944831,
60
+ "eval_runtime": 6.0718,
61
+ "eval_safety_accuracy": 0.8735376044568245,
62
+ "eval_safety_f1": 0.862387455777795,
63
+ "eval_samples_per_second": 886.885,
64
+ "eval_steps_per_second": 55.502,
65
+ "step": 337
66
+ },
67
+ {
68
+ "epoch": 1.1870824053452116,
69
+ "grad_norm": 1.293520212173462,
70
+ "learning_rate": 3.99e-05,
71
+ "loss": 0.2893,
72
+ "step": 400
73
+ },
74
+ {
75
+ "epoch": 1.4840386043058649,
76
+ "grad_norm": 0.9718915224075317,
77
+ "learning_rate": 4.99e-05,
78
+ "loss": 0.2673,
79
+ "step": 500
80
+ },
81
+ {
82
+ "epoch": 1.7809948032665182,
83
+ "grad_norm": 0.9016148447990417,
84
+ "learning_rate": 4.5822784810126584e-05,
85
+ "loss": 0.2499,
86
+ "step": 600
87
+ },
88
+ {
89
+ "epoch": 2.0,
90
+ "eval_administration_accuracy": 0.8817084493964716,
91
+ "eval_administration_f1": 0.8581364741485047,
92
+ "eval_corruption_accuracy": 0.9504178272980501,
93
+ "eval_corruption_f1": 0.9481110730839337,
94
+ "eval_democracy_accuracy": 0.9364902506963788,
95
+ "eval_democracy_f1": 0.9256699878558502,
96
+ "eval_development_accuracy": 0.8683379758588672,
97
+ "eval_development_f1": 0.8524920643431206,
98
+ "eval_economy_accuracy": 0.8989786443825442,
99
+ "eval_economy_f1": 0.8968225936331777,
100
+ "eval_education_accuracy": 0.9569173630454968,
101
+ "eval_education_f1": 0.9525043175986375,
102
+ "eval_environment_accuracy": 0.9691736304549675,
103
+ "eval_environment_f1": 0.9696177677301354,
104
+ "eval_instability_accuracy": 0.9223769730733519,
105
+ "eval_instability_f1": 0.9182716939857773,
106
+ "eval_leadership_accuracy": 0.8053853296193129,
107
+ "eval_leadership_f1": 0.7803578231203789,
108
+ "eval_loss": 0.24944739043712616,
109
+ "eval_overall_accuracy": 0.9152274837511607,
110
+ "eval_overall_f1": 0.9074667582350125,
111
+ "eval_race_accuracy": 0.9513463324048282,
112
+ "eval_race_f1": 0.9475861405850222,
113
+ "eval_religion_accuracy": 0.9457753017641597,
114
+ "eval_religion_f1": 0.9461902504041225,
115
+ "eval_runtime": 6.0882,
116
+ "eval_safety_accuracy": 0.8958217270194986,
117
+ "eval_safety_f1": 0.8938409123314907,
118
+ "eval_samples_per_second": 884.502,
119
+ "eval_steps_per_second": 55.353,
120
+ "step": 674
121
+ },
122
+ {
123
+ "epoch": 2.0772086117297697,
124
+ "grad_norm": 0.7925682663917542,
125
+ "learning_rate": 4.1603375527426166e-05,
126
+ "loss": 0.2355,
127
+ "step": 700
128
+ },
129
+ {
130
+ "epoch": 2.374164810690423,
131
+ "grad_norm": 1.0376709699630737,
132
+ "learning_rate": 3.7383966244725735e-05,
133
+ "loss": 0.1931,
134
+ "step": 800
135
+ },
136
+ {
137
+ "epoch": 2.6711210096510767,
138
+ "grad_norm": 1.358044981956482,
139
+ "learning_rate": 3.316455696202532e-05,
140
+ "loss": 0.1818,
141
+ "step": 900
142
+ },
143
+ {
144
+ "epoch": 2.9680772086117297,
145
+ "grad_norm": 0.9971312284469604,
146
+ "learning_rate": 2.8945147679324896e-05,
147
+ "loss": 0.1774,
148
+ "step": 1000
149
+ },
150
+ {
151
+ "epoch": 3.0,
152
+ "eval_administration_accuracy": 0.8861652739090065,
153
+ "eval_administration_f1": 0.8738444277192003,
154
+ "eval_corruption_accuracy": 0.9513463324048282,
155
+ "eval_corruption_f1": 0.9491062362635426,
156
+ "eval_democracy_accuracy": 0.9420612813370474,
157
+ "eval_democracy_f1": 0.9372178375746801,
158
+ "eval_development_accuracy": 0.8729805013927576,
159
+ "eval_development_f1": 0.8645602610410228,
160
+ "eval_economy_accuracy": 0.9065923862581244,
161
+ "eval_economy_f1": 0.9052374047877222,
162
+ "eval_education_accuracy": 0.962116991643454,
163
+ "eval_education_f1": 0.9605337374058789,
164
+ "eval_environment_accuracy": 0.9673166202414113,
165
+ "eval_environment_f1": 0.9674105228824886,
166
+ "eval_instability_accuracy": 0.9257195914577531,
167
+ "eval_instability_f1": 0.9245614298805196,
168
+ "eval_leadership_accuracy": 0.8341689879294336,
169
+ "eval_leadership_f1": 0.8290965652136555,
170
+ "eval_loss": 0.23423036932945251,
171
+ "eval_overall_accuracy": 0.9208604147322809,
172
+ "eval_overall_f1": 0.9175833864600228,
173
+ "eval_race_accuracy": 0.9502321262766945,
174
+ "eval_race_f1": 0.9495917574024151,
175
+ "eval_religion_accuracy": 0.9455896007428041,
176
+ "eval_religion_f1": 0.9459064433064331,
177
+ "eval_runtime": 6.0882,
178
+ "eval_safety_accuracy": 0.9060352831940576,
179
+ "eval_safety_f1": 0.903934014042716,
180
+ "eval_samples_per_second": 884.503,
181
+ "eval_steps_per_second": 55.353,
182
+ "step": 1011
183
+ }
184
+ ],
185
+ "logging_steps": 100,
186
+ "max_steps": 1685,
187
+ "num_input_tokens_seen": 0,
188
+ "num_train_epochs": 5,
189
+ "save_steps": 500,
190
+ "stateful_callbacks": {
191
+ "EarlyStoppingCallback": {
192
+ "args": {
193
+ "early_stopping_patience": 2,
194
+ "early_stopping_threshold": 0.0
195
+ },
196
+ "attributes": {
197
+ "early_stopping_patience_counter": 0
198
+ }
199
+ },
200
+ "TrainerControl": {
201
+ "args": {
202
+ "should_epoch_stop": false,
203
+ "should_evaluate": false,
204
+ "should_log": false,
205
+ "should_save": true,
206
+ "should_training_stop": false
207
+ },
208
+ "attributes": {}
209
+ }
210
+ },
211
+ "total_flos": 1.683026917097472e+16,
212
+ "train_batch_size": 16,
213
+ "trial_name": null,
214
+ "trial_params": {
215
+ "gradient_accumulation_steps": 4,
216
+ "learning_rate": 5e-05,
217
+ "num_train_epochs": 5
218
+ }
219
+ }
run-0/checkpoint-1011/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2467658aede69590024c2472029c6010ad84b62a00af5819a2894048e9c36494
3
+ size 5905
run-0/checkpoint-1348/config.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Democracy Unknown",
14
+ "1": "Democracy Negative",
15
+ "2": "Democracy Neutral",
16
+ "3": "Democracy Positive",
17
+ "4": "Economy Unknown",
18
+ "5": "Economy Negative",
19
+ "6": "Economy Neutral",
20
+ "7": "Economy Positive",
21
+ "8": "Race Unknown",
22
+ "9": "Race Negative",
23
+ "10": "Race Neutral",
24
+ "11": "Race Positive",
25
+ "12": "Leadership Unknown",
26
+ "13": "Leadership Negative",
27
+ "14": "Leadership Neutral",
28
+ "15": "Leadership Positive",
29
+ "16": "Development Unknown",
30
+ "17": "Development Negative",
31
+ "18": "Development Neutral",
32
+ "19": "Development Positive",
33
+ "20": "Corruption Unknown",
34
+ "21": "Corruption Negative",
35
+ "22": "Corruption Neutral",
36
+ "23": "Corruption Positive",
37
+ "24": "Instability Unknown",
38
+ "25": "Instability Negative",
39
+ "26": "Instability Neutral",
40
+ "27": "Instability Positive",
41
+ "28": "Safety Unknown",
42
+ "29": "Safety Negative",
43
+ "30": "Safety Neutral",
44
+ "31": "Safety Positive",
45
+ "32": "Administration Unknown",
46
+ "33": "Administration Negative",
47
+ "34": "Administration Neutral",
48
+ "35": "Administration Positive",
49
+ "36": "Education Unknown",
50
+ "37": "Education Negative",
51
+ "38": "Education Neutral",
52
+ "39": "Education Positive",
53
+ "40": "Religion Unknown",
54
+ "41": "Religion Negative",
55
+ "42": "Religion Neutral",
56
+ "43": "Religion Positive",
57
+ "44": "Environment Unknown",
58
+ "45": "Environment Negative",
59
+ "46": "Environment Neutral",
60
+ "47": "Environment Positive"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "Administration Negative": 33,
66
+ "Administration Neutral": 34,
67
+ "Administration Positive": 35,
68
+ "Administration Unknown": 32,
69
+ "Corruption Negative": 21,
70
+ "Corruption Neutral": 22,
71
+ "Corruption Positive": 23,
72
+ "Corruption Unknown": 20,
73
+ "Democracy Negative": 1,
74
+ "Democracy Neutral": 2,
75
+ "Democracy Positive": 3,
76
+ "Democracy Unknown": 0,
77
+ "Development Negative": 17,
78
+ "Development Neutral": 18,
79
+ "Development Positive": 19,
80
+ "Development Unknown": 16,
81
+ "Economy Negative": 5,
82
+ "Economy Neutral": 6,
83
+ "Economy Positive": 7,
84
+ "Economy Unknown": 4,
85
+ "Education Negative": 37,
86
+ "Education Neutral": 38,
87
+ "Education Positive": 39,
88
+ "Education Unknown": 36,
89
+ "Environment Negative": 45,
90
+ "Environment Neutral": 46,
91
+ "Environment Positive": 47,
92
+ "Environment Unknown": 44,
93
+ "Instability Negative": 25,
94
+ "Instability Neutral": 26,
95
+ "Instability Positive": 27,
96
+ "Instability Unknown": 24,
97
+ "Leadership Negative": 13,
98
+ "Leadership Neutral": 14,
99
+ "Leadership Positive": 15,
100
+ "Leadership Unknown": 12,
101
+ "Race Negative": 9,
102
+ "Race Neutral": 10,
103
+ "Race Positive": 11,
104
+ "Race Unknown": 8,
105
+ "Religion Negative": 41,
106
+ "Religion Neutral": 42,
107
+ "Religion Positive": 43,
108
+ "Religion Unknown": 40,
109
+ "Safety Negative": 29,
110
+ "Safety Neutral": 30,
111
+ "Safety Positive": 31,
112
+ "Safety Unknown": 28
113
+ },
114
+ "layer_norm_eps": 1e-12,
115
+ "max_position_embeddings": 512,
116
+ "model_type": "bert",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
+ "pad_token_id": 0,
120
+ "position_embedding_type": "absolute",
121
+ "transformers_version": "4.57.0",
122
+ "type_vocab_size": 2,
123
+ "use_cache": true,
124
+ "vocab_size": 30522
125
+ }
run-0/checkpoint-1348/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c069aea40b3b11e6ff0ffeec0bd06957831d2f30e502b444b4ed179574323110
3
+ size 438100144
run-0/checkpoint-1348/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afe04eec1eea4f1e4ea614efe18d176c924fbec07e5a92ffcde6d67b9db40d09
3
+ size 876324619
run-0/checkpoint-1348/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4b3e48c92222d3c4733090c0bda97c1bc5936e6e546f2d8ff55b5c89c8888a8
3
+ size 14645
run-0/checkpoint-1348/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb958c8c4611c8d2f0af86b177ec35a226bf63f6b0cc54e6d7e35dac9363c1c7
3
+ size 1383
run-0/checkpoint-1348/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80fb557f3d1d8bb3e724b15e454fd44b3814d365d80e2d6a13aace0eba46757e
3
+ size 1465
run-0/checkpoint-1348/trainer_state.json ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1348,
3
+ "best_metric": 0.9203680470994868,
4
+ "best_model_checkpoint": "./results/run-0/checkpoint-1348",
5
+ "epoch": 4.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1348,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.2969561989606533,
14
+ "grad_norm": 1.1367077827453613,
15
+ "learning_rate": 9.900000000000002e-06,
16
+ "loss": 0.4115,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.5939123979213066,
21
+ "grad_norm": 1.127586007118225,
22
+ "learning_rate": 1.9900000000000003e-05,
23
+ "loss": 0.3507,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.89086859688196,
28
+ "grad_norm": 1.15813148021698,
29
+ "learning_rate": 2.9900000000000002e-05,
30
+ "loss": 0.3184,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 1.0,
35
+ "eval_administration_accuracy": 0.8622098421541319,
36
+ "eval_administration_f1": 0.8377043137649639,
37
+ "eval_corruption_accuracy": 0.9379758588672238,
38
+ "eval_corruption_f1": 0.9310385616534254,
39
+ "eval_democracy_accuracy": 0.9238625812441968,
40
+ "eval_democracy_f1": 0.907103895237725,
41
+ "eval_development_accuracy": 0.8462395543175487,
42
+ "eval_development_f1": 0.8229509269515034,
43
+ "eval_economy_accuracy": 0.887836583101207,
44
+ "eval_economy_f1": 0.8696896434372372,
45
+ "eval_education_accuracy": 0.9478180129990715,
46
+ "eval_education_f1": 0.9425187888090035,
47
+ "eval_environment_accuracy": 0.9569173630454968,
48
+ "eval_environment_f1": 0.9527187476104269,
49
+ "eval_instability_accuracy": 0.9093779015784587,
50
+ "eval_instability_f1": 0.8913980249894993,
51
+ "eval_leadership_accuracy": 0.7756731662024141,
52
+ "eval_leadership_f1": 0.7455137881899888,
53
+ "eval_loss": 0.29490533471107483,
54
+ "eval_overall_accuracy": 0.898963169297431,
55
+ "eval_overall_f1": 0.884602913882178,
56
+ "eval_race_accuracy": 0.9387186629526463,
57
+ "eval_race_f1": 0.933273316670087,
58
+ "eval_religion_accuracy": 0.9273909006499536,
59
+ "eval_religion_f1": 0.9189375034944831,
60
+ "eval_runtime": 6.0718,
61
+ "eval_safety_accuracy": 0.8735376044568245,
62
+ "eval_safety_f1": 0.862387455777795,
63
+ "eval_samples_per_second": 886.885,
64
+ "eval_steps_per_second": 55.502,
65
+ "step": 337
66
+ },
67
+ {
68
+ "epoch": 1.1870824053452116,
69
+ "grad_norm": 1.293520212173462,
70
+ "learning_rate": 3.99e-05,
71
+ "loss": 0.2893,
72
+ "step": 400
73
+ },
74
+ {
75
+ "epoch": 1.4840386043058649,
76
+ "grad_norm": 0.9718915224075317,
77
+ "learning_rate": 4.99e-05,
78
+ "loss": 0.2673,
79
+ "step": 500
80
+ },
81
+ {
82
+ "epoch": 1.7809948032665182,
83
+ "grad_norm": 0.9016148447990417,
84
+ "learning_rate": 4.5822784810126584e-05,
85
+ "loss": 0.2499,
86
+ "step": 600
87
+ },
88
+ {
89
+ "epoch": 2.0,
90
+ "eval_administration_accuracy": 0.8817084493964716,
91
+ "eval_administration_f1": 0.8581364741485047,
92
+ "eval_corruption_accuracy": 0.9504178272980501,
93
+ "eval_corruption_f1": 0.9481110730839337,
94
+ "eval_democracy_accuracy": 0.9364902506963788,
95
+ "eval_democracy_f1": 0.9256699878558502,
96
+ "eval_development_accuracy": 0.8683379758588672,
97
+ "eval_development_f1": 0.8524920643431206,
98
+ "eval_economy_accuracy": 0.8989786443825442,
99
+ "eval_economy_f1": 0.8968225936331777,
100
+ "eval_education_accuracy": 0.9569173630454968,
101
+ "eval_education_f1": 0.9525043175986375,
102
+ "eval_environment_accuracy": 0.9691736304549675,
103
+ "eval_environment_f1": 0.9696177677301354,
104
+ "eval_instability_accuracy": 0.9223769730733519,
105
+ "eval_instability_f1": 0.9182716939857773,
106
+ "eval_leadership_accuracy": 0.8053853296193129,
107
+ "eval_leadership_f1": 0.7803578231203789,
108
+ "eval_loss": 0.24944739043712616,
109
+ "eval_overall_accuracy": 0.9152274837511607,
110
+ "eval_overall_f1": 0.9074667582350125,
111
+ "eval_race_accuracy": 0.9513463324048282,
112
+ "eval_race_f1": 0.9475861405850222,
113
+ "eval_religion_accuracy": 0.9457753017641597,
114
+ "eval_religion_f1": 0.9461902504041225,
115
+ "eval_runtime": 6.0882,
116
+ "eval_safety_accuracy": 0.8958217270194986,
117
+ "eval_safety_f1": 0.8938409123314907,
118
+ "eval_samples_per_second": 884.502,
119
+ "eval_steps_per_second": 55.353,
120
+ "step": 674
121
+ },
122
+ {
123
+ "epoch": 2.0772086117297697,
124
+ "grad_norm": 0.7925682663917542,
125
+ "learning_rate": 4.1603375527426166e-05,
126
+ "loss": 0.2355,
127
+ "step": 700
128
+ },
129
+ {
130
+ "epoch": 2.374164810690423,
131
+ "grad_norm": 1.0376709699630737,
132
+ "learning_rate": 3.7383966244725735e-05,
133
+ "loss": 0.1931,
134
+ "step": 800
135
+ },
136
+ {
137
+ "epoch": 2.6711210096510767,
138
+ "grad_norm": 1.358044981956482,
139
+ "learning_rate": 3.316455696202532e-05,
140
+ "loss": 0.1818,
141
+ "step": 900
142
+ },
143
+ {
144
+ "epoch": 2.9680772086117297,
145
+ "grad_norm": 0.9971312284469604,
146
+ "learning_rate": 2.8945147679324896e-05,
147
+ "loss": 0.1774,
148
+ "step": 1000
149
+ },
150
+ {
151
+ "epoch": 3.0,
152
+ "eval_administration_accuracy": 0.8861652739090065,
153
+ "eval_administration_f1": 0.8738444277192003,
154
+ "eval_corruption_accuracy": 0.9513463324048282,
155
+ "eval_corruption_f1": 0.9491062362635426,
156
+ "eval_democracy_accuracy": 0.9420612813370474,
157
+ "eval_democracy_f1": 0.9372178375746801,
158
+ "eval_development_accuracy": 0.8729805013927576,
159
+ "eval_development_f1": 0.8645602610410228,
160
+ "eval_economy_accuracy": 0.9065923862581244,
161
+ "eval_economy_f1": 0.9052374047877222,
162
+ "eval_education_accuracy": 0.962116991643454,
163
+ "eval_education_f1": 0.9605337374058789,
164
+ "eval_environment_accuracy": 0.9673166202414113,
165
+ "eval_environment_f1": 0.9674105228824886,
166
+ "eval_instability_accuracy": 0.9257195914577531,
167
+ "eval_instability_f1": 0.9245614298805196,
168
+ "eval_leadership_accuracy": 0.8341689879294336,
169
+ "eval_leadership_f1": 0.8290965652136555,
170
+ "eval_loss": 0.23423036932945251,
171
+ "eval_overall_accuracy": 0.9208604147322809,
172
+ "eval_overall_f1": 0.9175833864600228,
173
+ "eval_race_accuracy": 0.9502321262766945,
174
+ "eval_race_f1": 0.9495917574024151,
175
+ "eval_religion_accuracy": 0.9455896007428041,
176
+ "eval_religion_f1": 0.9459064433064331,
177
+ "eval_runtime": 6.0882,
178
+ "eval_safety_accuracy": 0.9060352831940576,
179
+ "eval_safety_f1": 0.903934014042716,
180
+ "eval_samples_per_second": 884.503,
181
+ "eval_steps_per_second": 55.353,
182
+ "step": 1011
183
+ },
184
+ {
185
+ "epoch": 3.2642910170749815,
186
+ "grad_norm": 0.8202979564666748,
187
+ "learning_rate": 2.4725738396624472e-05,
188
+ "loss": 0.1398,
189
+ "step": 1100
190
+ },
191
+ {
192
+ "epoch": 3.5612472160356345,
193
+ "grad_norm": 0.8103386759757996,
194
+ "learning_rate": 2.0506329113924054e-05,
195
+ "loss": 0.1362,
196
+ "step": 1200
197
+ },
198
+ {
199
+ "epoch": 3.858203414996288,
200
+ "grad_norm": 0.7706596851348877,
201
+ "learning_rate": 1.628691983122363e-05,
202
+ "loss": 0.132,
203
+ "step": 1300
204
+ },
205
+ {
206
+ "epoch": 4.0,
207
+ "eval_administration_accuracy": 0.8891364902506964,
208
+ "eval_administration_f1": 0.8799307094225493,
209
+ "eval_corruption_accuracy": 0.9506035283194058,
210
+ "eval_corruption_f1": 0.9484943116838682,
211
+ "eval_democracy_accuracy": 0.9441039925719591,
212
+ "eval_democracy_f1": 0.9386490275754458,
213
+ "eval_development_accuracy": 0.8789229340761374,
214
+ "eval_development_f1": 0.8726999754002892,
215
+ "eval_economy_accuracy": 0.9149489322191272,
216
+ "eval_economy_f1": 0.9115444384683159,
217
+ "eval_education_accuracy": 0.9600742804085423,
218
+ "eval_education_f1": 0.9589725262235815,
219
+ "eval_environment_accuracy": 0.9704735376044569,
220
+ "eval_environment_f1": 0.9698588908694847,
221
+ "eval_instability_accuracy": 0.9301764159702879,
222
+ "eval_instability_f1": 0.9264928998102437,
223
+ "eval_leadership_accuracy": 0.8358402971216342,
224
+ "eval_leadership_f1": 0.8320914070689603,
225
+ "eval_loss": 0.23071259260177612,
226
+ "eval_overall_accuracy": 0.9235995047972764,
227
+ "eval_overall_f1": 0.9203680470994868,
228
+ "eval_race_accuracy": 0.9515320334261839,
229
+ "eval_race_f1": 0.9496886745661435,
230
+ "eval_religion_accuracy": 0.9468895078922934,
231
+ "eval_religion_f1": 0.9473765040380958,
232
+ "eval_runtime": 6.0642,
233
+ "eval_safety_accuracy": 0.9104921077065924,
234
+ "eval_safety_f1": 0.9086172000668638,
235
+ "eval_samples_per_second": 887.991,
236
+ "eval_steps_per_second": 55.572,
237
+ "step": 1348
238
+ }
239
+ ],
240
+ "logging_steps": 100,
241
+ "max_steps": 1685,
242
+ "num_input_tokens_seen": 0,
243
+ "num_train_epochs": 5,
244
+ "save_steps": 500,
245
+ "stateful_callbacks": {
246
+ "EarlyStoppingCallback": {
247
+ "args": {
248
+ "early_stopping_patience": 2,
249
+ "early_stopping_threshold": 0.0
250
+ },
251
+ "attributes": {
252
+ "early_stopping_patience_counter": 0
253
+ }
254
+ },
255
+ "TrainerControl": {
256
+ "args": {
257
+ "should_epoch_stop": false,
258
+ "should_evaluate": false,
259
+ "should_log": false,
260
+ "should_save": true,
261
+ "should_training_stop": false
262
+ },
263
+ "attributes": {}
264
+ }
265
+ },
266
+ "total_flos": 2.187619128557568e+16,
267
+ "train_batch_size": 16,
268
+ "trial_name": null,
269
+ "trial_params": {
270
+ "gradient_accumulation_steps": 4,
271
+ "learning_rate": 5e-05,
272
+ "num_train_epochs": 5
273
+ }
274
+ }
run-0/checkpoint-1348/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2467658aede69590024c2472029c6010ad84b62a00af5819a2894048e9c36494
3
+ size 5905
run-0/checkpoint-1685/config.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Democracy Unknown",
14
+ "1": "Democracy Negative",
15
+ "2": "Democracy Neutral",
16
+ "3": "Democracy Positive",
17
+ "4": "Economy Unknown",
18
+ "5": "Economy Negative",
19
+ "6": "Economy Neutral",
20
+ "7": "Economy Positive",
21
+ "8": "Race Unknown",
22
+ "9": "Race Negative",
23
+ "10": "Race Neutral",
24
+ "11": "Race Positive",
25
+ "12": "Leadership Unknown",
26
+ "13": "Leadership Negative",
27
+ "14": "Leadership Neutral",
28
+ "15": "Leadership Positive",
29
+ "16": "Development Unknown",
30
+ "17": "Development Negative",
31
+ "18": "Development Neutral",
32
+ "19": "Development Positive",
33
+ "20": "Corruption Unknown",
34
+ "21": "Corruption Negative",
35
+ "22": "Corruption Neutral",
36
+ "23": "Corruption Positive",
37
+ "24": "Instability Unknown",
38
+ "25": "Instability Negative",
39
+ "26": "Instability Neutral",
40
+ "27": "Instability Positive",
41
+ "28": "Safety Unknown",
42
+ "29": "Safety Negative",
43
+ "30": "Safety Neutral",
44
+ "31": "Safety Positive",
45
+ "32": "Administration Unknown",
46
+ "33": "Administration Negative",
47
+ "34": "Administration Neutral",
48
+ "35": "Administration Positive",
49
+ "36": "Education Unknown",
50
+ "37": "Education Negative",
51
+ "38": "Education Neutral",
52
+ "39": "Education Positive",
53
+ "40": "Religion Unknown",
54
+ "41": "Religion Negative",
55
+ "42": "Religion Neutral",
56
+ "43": "Religion Positive",
57
+ "44": "Environment Unknown",
58
+ "45": "Environment Negative",
59
+ "46": "Environment Neutral",
60
+ "47": "Environment Positive"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "Administration Negative": 33,
66
+ "Administration Neutral": 34,
67
+ "Administration Positive": 35,
68
+ "Administration Unknown": 32,
69
+ "Corruption Negative": 21,
70
+ "Corruption Neutral": 22,
71
+ "Corruption Positive": 23,
72
+ "Corruption Unknown": 20,
73
+ "Democracy Negative": 1,
74
+ "Democracy Neutral": 2,
75
+ "Democracy Positive": 3,
76
+ "Democracy Unknown": 0,
77
+ "Development Negative": 17,
78
+ "Development Neutral": 18,
79
+ "Development Positive": 19,
80
+ "Development Unknown": 16,
81
+ "Economy Negative": 5,
82
+ "Economy Neutral": 6,
83
+ "Economy Positive": 7,
84
+ "Economy Unknown": 4,
85
+ "Education Negative": 37,
86
+ "Education Neutral": 38,
87
+ "Education Positive": 39,
88
+ "Education Unknown": 36,
89
+ "Environment Negative": 45,
90
+ "Environment Neutral": 46,
91
+ "Environment Positive": 47,
92
+ "Environment Unknown": 44,
93
+ "Instability Negative": 25,
94
+ "Instability Neutral": 26,
95
+ "Instability Positive": 27,
96
+ "Instability Unknown": 24,
97
+ "Leadership Negative": 13,
98
+ "Leadership Neutral": 14,
99
+ "Leadership Positive": 15,
100
+ "Leadership Unknown": 12,
101
+ "Race Negative": 9,
102
+ "Race Neutral": 10,
103
+ "Race Positive": 11,
104
+ "Race Unknown": 8,
105
+ "Religion Negative": 41,
106
+ "Religion Neutral": 42,
107
+ "Religion Positive": 43,
108
+ "Religion Unknown": 40,
109
+ "Safety Negative": 29,
110
+ "Safety Neutral": 30,
111
+ "Safety Positive": 31,
112
+ "Safety Unknown": 28
113
+ },
114
+ "layer_norm_eps": 1e-12,
115
+ "max_position_embeddings": 512,
116
+ "model_type": "bert",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
+ "pad_token_id": 0,
120
+ "position_embedding_type": "absolute",
121
+ "transformers_version": "4.57.0",
122
+ "type_vocab_size": 2,
123
+ "use_cache": true,
124
+ "vocab_size": 30522
125
+ }
run-0/checkpoint-1685/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ae34050d7ecb9ccc6a9f73d3aee219b589d51e507cef46c578586d4383aa682
3
+ size 438100144
run-0/checkpoint-1685/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f174efa39e8742d13a588732165e7d530f9aa68121c9ada20334765012c4b5a7
3
+ size 876324619
run-0/checkpoint-1685/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e590151069bb6bc505f9ec54912ea73b0b5ce98ecdee7e1bacb0e2f6ae75b28d
3
+ size 14645
run-0/checkpoint-1685/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b0d3fe37ce74cd7a18b7a55f336180d86659b85cd3b4a13d201a3636feda476
3
+ size 1383
run-0/checkpoint-1685/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4178bb040d3eca68d2f150c151af1ae58b42f1c1fbc30c5aaf7e95f32879b2dc
3
+ size 1465
run-0/checkpoint-1685/trainer_state.json ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1348,
3
+ "best_metric": 0.9203680470994868,
4
+ "best_model_checkpoint": "./results/run-0/checkpoint-1348",
5
+ "epoch": 5.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1685,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.2969561989606533,
14
+ "grad_norm": 1.1367077827453613,
15
+ "learning_rate": 9.900000000000002e-06,
16
+ "loss": 0.4115,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.5939123979213066,
21
+ "grad_norm": 1.127586007118225,
22
+ "learning_rate": 1.9900000000000003e-05,
23
+ "loss": 0.3507,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.89086859688196,
28
+ "grad_norm": 1.15813148021698,
29
+ "learning_rate": 2.9900000000000002e-05,
30
+ "loss": 0.3184,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 1.0,
35
+ "eval_administration_accuracy": 0.8622098421541319,
36
+ "eval_administration_f1": 0.8377043137649639,
37
+ "eval_corruption_accuracy": 0.9379758588672238,
38
+ "eval_corruption_f1": 0.9310385616534254,
39
+ "eval_democracy_accuracy": 0.9238625812441968,
40
+ "eval_democracy_f1": 0.907103895237725,
41
+ "eval_development_accuracy": 0.8462395543175487,
42
+ "eval_development_f1": 0.8229509269515034,
43
+ "eval_economy_accuracy": 0.887836583101207,
44
+ "eval_economy_f1": 0.8696896434372372,
45
+ "eval_education_accuracy": 0.9478180129990715,
46
+ "eval_education_f1": 0.9425187888090035,
47
+ "eval_environment_accuracy": 0.9569173630454968,
48
+ "eval_environment_f1": 0.9527187476104269,
49
+ "eval_instability_accuracy": 0.9093779015784587,
50
+ "eval_instability_f1": 0.8913980249894993,
51
+ "eval_leadership_accuracy": 0.7756731662024141,
52
+ "eval_leadership_f1": 0.7455137881899888,
53
+ "eval_loss": 0.29490533471107483,
54
+ "eval_overall_accuracy": 0.898963169297431,
55
+ "eval_overall_f1": 0.884602913882178,
56
+ "eval_race_accuracy": 0.9387186629526463,
57
+ "eval_race_f1": 0.933273316670087,
58
+ "eval_religion_accuracy": 0.9273909006499536,
59
+ "eval_religion_f1": 0.9189375034944831,
60
+ "eval_runtime": 6.0718,
61
+ "eval_safety_accuracy": 0.8735376044568245,
62
+ "eval_safety_f1": 0.862387455777795,
63
+ "eval_samples_per_second": 886.885,
64
+ "eval_steps_per_second": 55.502,
65
+ "step": 337
66
+ },
67
+ {
68
+ "epoch": 1.1870824053452116,
69
+ "grad_norm": 1.293520212173462,
70
+ "learning_rate": 3.99e-05,
71
+ "loss": 0.2893,
72
+ "step": 400
73
+ },
74
+ {
75
+ "epoch": 1.4840386043058649,
76
+ "grad_norm": 0.9718915224075317,
77
+ "learning_rate": 4.99e-05,
78
+ "loss": 0.2673,
79
+ "step": 500
80
+ },
81
+ {
82
+ "epoch": 1.7809948032665182,
83
+ "grad_norm": 0.9016148447990417,
84
+ "learning_rate": 4.5822784810126584e-05,
85
+ "loss": 0.2499,
86
+ "step": 600
87
+ },
88
+ {
89
+ "epoch": 2.0,
90
+ "eval_administration_accuracy": 0.8817084493964716,
91
+ "eval_administration_f1": 0.8581364741485047,
92
+ "eval_corruption_accuracy": 0.9504178272980501,
93
+ "eval_corruption_f1": 0.9481110730839337,
94
+ "eval_democracy_accuracy": 0.9364902506963788,
95
+ "eval_democracy_f1": 0.9256699878558502,
96
+ "eval_development_accuracy": 0.8683379758588672,
97
+ "eval_development_f1": 0.8524920643431206,
98
+ "eval_economy_accuracy": 0.8989786443825442,
99
+ "eval_economy_f1": 0.8968225936331777,
100
+ "eval_education_accuracy": 0.9569173630454968,
101
+ "eval_education_f1": 0.9525043175986375,
102
+ "eval_environment_accuracy": 0.9691736304549675,
103
+ "eval_environment_f1": 0.9696177677301354,
104
+ "eval_instability_accuracy": 0.9223769730733519,
105
+ "eval_instability_f1": 0.9182716939857773,
106
+ "eval_leadership_accuracy": 0.8053853296193129,
107
+ "eval_leadership_f1": 0.7803578231203789,
108
+ "eval_loss": 0.24944739043712616,
109
+ "eval_overall_accuracy": 0.9152274837511607,
110
+ "eval_overall_f1": 0.9074667582350125,
111
+ "eval_race_accuracy": 0.9513463324048282,
112
+ "eval_race_f1": 0.9475861405850222,
113
+ "eval_religion_accuracy": 0.9457753017641597,
114
+ "eval_religion_f1": 0.9461902504041225,
115
+ "eval_runtime": 6.0882,
116
+ "eval_safety_accuracy": 0.8958217270194986,
117
+ "eval_safety_f1": 0.8938409123314907,
118
+ "eval_samples_per_second": 884.502,
119
+ "eval_steps_per_second": 55.353,
120
+ "step": 674
121
+ },
122
+ {
123
+ "epoch": 2.0772086117297697,
124
+ "grad_norm": 0.7925682663917542,
125
+ "learning_rate": 4.1603375527426166e-05,
126
+ "loss": 0.2355,
127
+ "step": 700
128
+ },
129
+ {
130
+ "epoch": 2.374164810690423,
131
+ "grad_norm": 1.0376709699630737,
132
+ "learning_rate": 3.7383966244725735e-05,
133
+ "loss": 0.1931,
134
+ "step": 800
135
+ },
136
+ {
137
+ "epoch": 2.6711210096510767,
138
+ "grad_norm": 1.358044981956482,
139
+ "learning_rate": 3.316455696202532e-05,
140
+ "loss": 0.1818,
141
+ "step": 900
142
+ },
143
+ {
144
+ "epoch": 2.9680772086117297,
145
+ "grad_norm": 0.9971312284469604,
146
+ "learning_rate": 2.8945147679324896e-05,
147
+ "loss": 0.1774,
148
+ "step": 1000
149
+ },
150
+ {
151
+ "epoch": 3.0,
152
+ "eval_administration_accuracy": 0.8861652739090065,
153
+ "eval_administration_f1": 0.8738444277192003,
154
+ "eval_corruption_accuracy": 0.9513463324048282,
155
+ "eval_corruption_f1": 0.9491062362635426,
156
+ "eval_democracy_accuracy": 0.9420612813370474,
157
+ "eval_democracy_f1": 0.9372178375746801,
158
+ "eval_development_accuracy": 0.8729805013927576,
159
+ "eval_development_f1": 0.8645602610410228,
160
+ "eval_economy_accuracy": 0.9065923862581244,
161
+ "eval_economy_f1": 0.9052374047877222,
162
+ "eval_education_accuracy": 0.962116991643454,
163
+ "eval_education_f1": 0.9605337374058789,
164
+ "eval_environment_accuracy": 0.9673166202414113,
165
+ "eval_environment_f1": 0.9674105228824886,
166
+ "eval_instability_accuracy": 0.9257195914577531,
167
+ "eval_instability_f1": 0.9245614298805196,
168
+ "eval_leadership_accuracy": 0.8341689879294336,
169
+ "eval_leadership_f1": 0.8290965652136555,
170
+ "eval_loss": 0.23423036932945251,
171
+ "eval_overall_accuracy": 0.9208604147322809,
172
+ "eval_overall_f1": 0.9175833864600228,
173
+ "eval_race_accuracy": 0.9502321262766945,
174
+ "eval_race_f1": 0.9495917574024151,
175
+ "eval_religion_accuracy": 0.9455896007428041,
176
+ "eval_religion_f1": 0.9459064433064331,
177
+ "eval_runtime": 6.0882,
178
+ "eval_safety_accuracy": 0.9060352831940576,
179
+ "eval_safety_f1": 0.903934014042716,
180
+ "eval_samples_per_second": 884.503,
181
+ "eval_steps_per_second": 55.353,
182
+ "step": 1011
183
+ },
184
+ {
185
+ "epoch": 3.2642910170749815,
186
+ "grad_norm": 0.8202979564666748,
187
+ "learning_rate": 2.4725738396624472e-05,
188
+ "loss": 0.1398,
189
+ "step": 1100
190
+ },
191
+ {
192
+ "epoch": 3.5612472160356345,
193
+ "grad_norm": 0.8103386759757996,
194
+ "learning_rate": 2.0506329113924054e-05,
195
+ "loss": 0.1362,
196
+ "step": 1200
197
+ },
198
+ {
199
+ "epoch": 3.858203414996288,
200
+ "grad_norm": 0.7706596851348877,
201
+ "learning_rate": 1.628691983122363e-05,
202
+ "loss": 0.132,
203
+ "step": 1300
204
+ },
205
+ {
206
+ "epoch": 4.0,
207
+ "eval_administration_accuracy": 0.8891364902506964,
208
+ "eval_administration_f1": 0.8799307094225493,
209
+ "eval_corruption_accuracy": 0.9506035283194058,
210
+ "eval_corruption_f1": 0.9484943116838682,
211
+ "eval_democracy_accuracy": 0.9441039925719591,
212
+ "eval_democracy_f1": 0.9386490275754458,
213
+ "eval_development_accuracy": 0.8789229340761374,
214
+ "eval_development_f1": 0.8726999754002892,
215
+ "eval_economy_accuracy": 0.9149489322191272,
216
+ "eval_economy_f1": 0.9115444384683159,
217
+ "eval_education_accuracy": 0.9600742804085423,
218
+ "eval_education_f1": 0.9589725262235815,
219
+ "eval_environment_accuracy": 0.9704735376044569,
220
+ "eval_environment_f1": 0.9698588908694847,
221
+ "eval_instability_accuracy": 0.9301764159702879,
222
+ "eval_instability_f1": 0.9264928998102437,
223
+ "eval_leadership_accuracy": 0.8358402971216342,
224
+ "eval_leadership_f1": 0.8320914070689603,
225
+ "eval_loss": 0.23071259260177612,
226
+ "eval_overall_accuracy": 0.9235995047972764,
227
+ "eval_overall_f1": 0.9203680470994868,
228
+ "eval_race_accuracy": 0.9515320334261839,
229
+ "eval_race_f1": 0.9496886745661435,
230
+ "eval_religion_accuracy": 0.9468895078922934,
231
+ "eval_religion_f1": 0.9473765040380958,
232
+ "eval_runtime": 6.0642,
233
+ "eval_safety_accuracy": 0.9104921077065924,
234
+ "eval_safety_f1": 0.9086172000668638,
235
+ "eval_samples_per_second": 887.991,
236
+ "eval_steps_per_second": 55.572,
237
+ "step": 1348
238
+ },
239
+ {
240
+ "epoch": 4.154417223459539,
241
+ "grad_norm": 0.759730339050293,
242
+ "learning_rate": 1.2067510548523207e-05,
243
+ "loss": 0.1193,
244
+ "step": 1400
245
+ },
246
+ {
247
+ "epoch": 4.451373422420193,
248
+ "grad_norm": 0.895845353603363,
249
+ "learning_rate": 7.848101265822786e-06,
250
+ "loss": 0.1074,
251
+ "step": 1500
252
+ },
253
+ {
254
+ "epoch": 4.748329621380846,
255
+ "grad_norm": 0.889885425567627,
256
+ "learning_rate": 3.6286919831223627e-06,
257
+ "loss": 0.1074,
258
+ "step": 1600
259
+ },
260
+ {
261
+ "epoch": 5.0,
262
+ "eval_administration_accuracy": 0.8870937790157846,
263
+ "eval_administration_f1": 0.8800187021336342,
264
+ "eval_corruption_accuracy": 0.949489322191272,
265
+ "eval_corruption_f1": 0.9478600433858793,
266
+ "eval_democracy_accuracy": 0.9403899721448468,
267
+ "eval_democracy_f1": 0.9368950511711482,
268
+ "eval_development_accuracy": 0.8794800371402043,
269
+ "eval_development_f1": 0.8729188521267028,
270
+ "eval_economy_accuracy": 0.9112349117920149,
271
+ "eval_economy_f1": 0.9084007954063611,
272
+ "eval_education_accuracy": 0.9595171773444754,
273
+ "eval_education_f1": 0.9586986398725112,
274
+ "eval_environment_accuracy": 0.9693593314763231,
275
+ "eval_environment_f1": 0.9688217857436425,
276
+ "eval_instability_accuracy": 0.9283194057567317,
277
+ "eval_instability_f1": 0.9249472293024158,
278
+ "eval_leadership_accuracy": 0.8395543175487465,
279
+ "eval_leadership_f1": 0.8365072895157208,
280
+ "eval_loss": 0.23093268275260925,
281
+ "eval_overall_accuracy": 0.9227329000309501,
282
+ "eval_overall_f1": 0.9201225505567104,
283
+ "eval_race_accuracy": 0.952460538532962,
284
+ "eval_race_f1": 0.9514489266576783,
285
+ "eval_religion_accuracy": 0.9467038068709378,
286
+ "eval_religion_f1": 0.9472204828765739,
287
+ "eval_runtime": 6.0594,
288
+ "eval_safety_accuracy": 0.909192200557103,
289
+ "eval_safety_f1": 0.9077328084882567,
290
+ "eval_samples_per_second": 888.699,
291
+ "eval_steps_per_second": 55.616,
292
+ "step": 1685
293
+ }
294
+ ],
295
+ "logging_steps": 100,
296
+ "max_steps": 1685,
297
+ "num_input_tokens_seen": 0,
298
+ "num_train_epochs": 5,
299
+ "save_steps": 500,
300
+ "stateful_callbacks": {
301
+ "EarlyStoppingCallback": {
302
+ "args": {
303
+ "early_stopping_patience": 2,
304
+ "early_stopping_threshold": 0.0
305
+ },
306
+ "attributes": {
307
+ "early_stopping_patience_counter": 1
308
+ }
309
+ },
310
+ "TrainerControl": {
311
+ "args": {
312
+ "should_epoch_stop": false,
313
+ "should_evaluate": false,
314
+ "should_log": false,
315
+ "should_save": true,
316
+ "should_training_stop": true
317
+ },
318
+ "attributes": {}
319
+ }
320
+ },
321
+ "total_flos": 2.692211340017664e+16,
322
+ "train_batch_size": 16,
323
+ "trial_name": null,
324
+ "trial_params": {
325
+ "gradient_accumulation_steps": 4,
326
+ "learning_rate": 5e-05,
327
+ "num_train_epochs": 5
328
+ }
329
+ }
run-0/checkpoint-1685/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2467658aede69590024c2472029c6010ad84b62a00af5819a2894048e9c36494
3
+ size 5905
run-1/checkpoint-1011/config.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Democracy Unknown",
14
+ "1": "Democracy Negative",
15
+ "2": "Democracy Neutral",
16
+ "3": "Democracy Positive",
17
+ "4": "Economy Unknown",
18
+ "5": "Economy Negative",
19
+ "6": "Economy Neutral",
20
+ "7": "Economy Positive",
21
+ "8": "Race Unknown",
22
+ "9": "Race Negative",
23
+ "10": "Race Neutral",
24
+ "11": "Race Positive",
25
+ "12": "Leadership Unknown",
26
+ "13": "Leadership Negative",
27
+ "14": "Leadership Neutral",
28
+ "15": "Leadership Positive",
29
+ "16": "Development Unknown",
30
+ "17": "Development Negative",
31
+ "18": "Development Neutral",
32
+ "19": "Development Positive",
33
+ "20": "Corruption Unknown",
34
+ "21": "Corruption Negative",
35
+ "22": "Corruption Neutral",
36
+ "23": "Corruption Positive",
37
+ "24": "Instability Unknown",
38
+ "25": "Instability Negative",
39
+ "26": "Instability Neutral",
40
+ "27": "Instability Positive",
41
+ "28": "Safety Unknown",
42
+ "29": "Safety Negative",
43
+ "30": "Safety Neutral",
44
+ "31": "Safety Positive",
45
+ "32": "Administration Unknown",
46
+ "33": "Administration Negative",
47
+ "34": "Administration Neutral",
48
+ "35": "Administration Positive",
49
+ "36": "Education Unknown",
50
+ "37": "Education Negative",
51
+ "38": "Education Neutral",
52
+ "39": "Education Positive",
53
+ "40": "Religion Unknown",
54
+ "41": "Religion Negative",
55
+ "42": "Religion Neutral",
56
+ "43": "Religion Positive",
57
+ "44": "Environment Unknown",
58
+ "45": "Environment Negative",
59
+ "46": "Environment Neutral",
60
+ "47": "Environment Positive"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "Administration Negative": 33,
66
+ "Administration Neutral": 34,
67
+ "Administration Positive": 35,
68
+ "Administration Unknown": 32,
69
+ "Corruption Negative": 21,
70
+ "Corruption Neutral": 22,
71
+ "Corruption Positive": 23,
72
+ "Corruption Unknown": 20,
73
+ "Democracy Negative": 1,
74
+ "Democracy Neutral": 2,
75
+ "Democracy Positive": 3,
76
+ "Democracy Unknown": 0,
77
+ "Development Negative": 17,
78
+ "Development Neutral": 18,
79
+ "Development Positive": 19,
80
+ "Development Unknown": 16,
81
+ "Economy Negative": 5,
82
+ "Economy Neutral": 6,
83
+ "Economy Positive": 7,
84
+ "Economy Unknown": 4,
85
+ "Education Negative": 37,
86
+ "Education Neutral": 38,
87
+ "Education Positive": 39,
88
+ "Education Unknown": 36,
89
+ "Environment Negative": 45,
90
+ "Environment Neutral": 46,
91
+ "Environment Positive": 47,
92
+ "Environment Unknown": 44,
93
+ "Instability Negative": 25,
94
+ "Instability Neutral": 26,
95
+ "Instability Positive": 27,
96
+ "Instability Unknown": 24,
97
+ "Leadership Negative": 13,
98
+ "Leadership Neutral": 14,
99
+ "Leadership Positive": 15,
100
+ "Leadership Unknown": 12,
101
+ "Race Negative": 9,
102
+ "Race Neutral": 10,
103
+ "Race Positive": 11,
104
+ "Race Unknown": 8,
105
+ "Religion Negative": 41,
106
+ "Religion Neutral": 42,
107
+ "Religion Positive": 43,
108
+ "Religion Unknown": 40,
109
+ "Safety Negative": 29,
110
+ "Safety Neutral": 30,
111
+ "Safety Positive": 31,
112
+ "Safety Unknown": 28
113
+ },
114
+ "layer_norm_eps": 1e-12,
115
+ "max_position_embeddings": 512,
116
+ "model_type": "bert",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
+ "pad_token_id": 0,
120
+ "position_embedding_type": "absolute",
121
+ "transformers_version": "4.57.0",
122
+ "type_vocab_size": 2,
123
+ "use_cache": true,
124
+ "vocab_size": 30522
125
+ }
run-1/checkpoint-1011/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f24e627fb0f34f3734ca3162e3cdc3d91455714d3acb8708f7944124019813da
3
+ size 438100144
run-1/checkpoint-1011/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3fb6acf24a00816f010d5a96d61239c6c184de7d6393f1305dcc3251a45a098
3
+ size 876324619
run-1/checkpoint-1011/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2336cb0162aa649da80d774dd375fe462ee046c5de7100aea50899cab0033550
3
+ size 14645
run-1/checkpoint-1011/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19ced76e4b26ff9c7e28969ae263d508804558ca5fb49575ed7c93389ef7e878
3
+ size 1383
run-1/checkpoint-1011/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87b4f705378abe5bddc82dc3bbe7d53a58c7c8f3b0d2b6de843f4af480b4890f
3
+ size 1465