YagiASAFAS commited on
Commit
e04e294
·
1 Parent(s): 0efbf92

Add tokenizer files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoint-1348/config.json +126 -0
  2. checkpoint-1348/model.safetensors +3 -0
  3. checkpoint-1348/optimizer.pt +3 -0
  4. checkpoint-1348/rng_state.pth +3 -0
  5. checkpoint-1348/scheduler.pt +3 -0
  6. checkpoint-1348/trainer_state.json +124 -0
  7. checkpoint-1348/training_args.bin +3 -0
  8. checkpoint-2022/config.json +126 -0
  9. checkpoint-2022/model.safetensors +3 -0
  10. checkpoint-2022/optimizer.pt +3 -0
  11. checkpoint-2022/rng_state.pth +3 -0
  12. checkpoint-2022/scheduler.pt +3 -0
  13. checkpoint-2022/trainer_state.json +172 -0
  14. checkpoint-2022/training_args.bin +3 -0
  15. checkpoint-2696/config.json +126 -0
  16. checkpoint-2696/model.safetensors +3 -0
  17. checkpoint-2696/optimizer.pt +3 -0
  18. checkpoint-2696/rng_state.pth +3 -0
  19. checkpoint-2696/scheduler.pt +3 -0
  20. checkpoint-2696/trainer_state.json +213 -0
  21. checkpoint-2696/training_args.bin +3 -0
  22. checkpoint-3370/config.json +126 -0
  23. checkpoint-3370/model.safetensors +3 -0
  24. checkpoint-3370/optimizer.pt +3 -0
  25. checkpoint-3370/rng_state.pth +3 -0
  26. checkpoint-3370/scheduler.pt +3 -0
  27. checkpoint-3370/trainer_state.json +254 -0
  28. checkpoint-3370/training_args.bin +3 -0
  29. checkpoint-4044/config.json +126 -0
  30. checkpoint-4044/model.safetensors +3 -0
  31. checkpoint-4044/optimizer.pt +3 -0
  32. checkpoint-4044/rng_state.pth +3 -0
  33. checkpoint-4044/scheduler.pt +3 -0
  34. checkpoint-4044/trainer_state.json +302 -0
  35. checkpoint-4044/training_args.bin +3 -0
  36. checkpoint-4718/config.json +126 -0
  37. checkpoint-4718/model.safetensors +3 -0
  38. checkpoint-4718/optimizer.pt +3 -0
  39. checkpoint-4718/rng_state.pth +3 -0
  40. checkpoint-4718/scheduler.pt +3 -0
  41. checkpoint-4718/trainer_state.json +343 -0
  42. checkpoint-4718/training_args.bin +3 -0
  43. checkpoint-674/config.json +126 -0
  44. checkpoint-674/model.safetensors +3 -0
  45. checkpoint-674/optimizer.pt +3 -0
  46. checkpoint-674/rng_state.pth +3 -0
  47. checkpoint-674/scheduler.pt +3 -0
  48. checkpoint-674/trainer_state.json +83 -0
  49. checkpoint-674/training_args.bin +3 -0
  50. special_tokens_map.json +7 -0
checkpoint-1348/config.json ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "LABEL_0": 0,
66
+ "LABEL_1": 1,
67
+ "LABEL_10": 10,
68
+ "LABEL_11": 11,
69
+ "LABEL_12": 12,
70
+ "LABEL_13": 13,
71
+ "LABEL_14": 14,
72
+ "LABEL_15": 15,
73
+ "LABEL_16": 16,
74
+ "LABEL_17": 17,
75
+ "LABEL_18": 18,
76
+ "LABEL_19": 19,
77
+ "LABEL_2": 2,
78
+ "LABEL_20": 20,
79
+ "LABEL_21": 21,
80
+ "LABEL_22": 22,
81
+ "LABEL_23": 23,
82
+ "LABEL_24": 24,
83
+ "LABEL_25": 25,
84
+ "LABEL_26": 26,
85
+ "LABEL_27": 27,
86
+ "LABEL_28": 28,
87
+ "LABEL_29": 29,
88
+ "LABEL_3": 3,
89
+ "LABEL_30": 30,
90
+ "LABEL_31": 31,
91
+ "LABEL_32": 32,
92
+ "LABEL_33": 33,
93
+ "LABEL_34": 34,
94
+ "LABEL_35": 35,
95
+ "LABEL_36": 36,
96
+ "LABEL_37": 37,
97
+ "LABEL_38": 38,
98
+ "LABEL_39": 39,
99
+ "LABEL_4": 4,
100
+ "LABEL_40": 40,
101
+ "LABEL_41": 41,
102
+ "LABEL_42": 42,
103
+ "LABEL_43": 43,
104
+ "LABEL_44": 44,
105
+ "LABEL_45": 45,
106
+ "LABEL_46": 46,
107
+ "LABEL_47": 47,
108
+ "LABEL_5": 5,
109
+ "LABEL_6": 6,
110
+ "LABEL_7": 7,
111
+ "LABEL_8": 8,
112
+ "LABEL_9": 9
113
+ },
114
+ "layer_norm_eps": 1e-12,
115
+ "max_position_embeddings": 512,
116
+ "model_type": "bert",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
+ "pad_token_id": 0,
120
+ "position_embedding_type": "absolute",
121
+ "torch_dtype": "float32",
122
+ "transformers_version": "4.48.2",
123
+ "type_vocab_size": 2,
124
+ "use_cache": true,
125
+ "vocab_size": 30522
126
+ }
checkpoint-1348/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fce8f95d3cf8aff1a50c809fe98dd604891e8de87b59f6120aa33197dca0a66
3
+ size 438100144
checkpoint-1348/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9187dde97cea8c1cf9cb99c4687e70c49adae5398710ee1afd363eb067ec64e5
3
+ size 876321402
checkpoint-1348/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f15d59ec259156654b7d699d265c49f473bc80117753dbc660df0ce861bf6667
3
+ size 14244
checkpoint-1348/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b6ecb4f315a32a13883fe1ef1036339d6775c6db67dc4dd1bd600ddb6aece2b
3
+ size 1064
checkpoint-1348/trainer_state.json ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9156647148374422,
3
+ "best_model_checkpoint": "./results/checkpoint-1348",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1348,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7423904974016332,
13
+ "grad_norm": 1.0851198434829712,
14
+ "learning_rate": 2.8606983655274892e-05,
15
+ "loss": 0.448,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_administration_accuracy": 0.8700092850510678,
21
+ "eval_administration_f1": 0.830681542112301,
22
+ "eval_corruption_accuracy": 0.9301764159702879,
23
+ "eval_corruption_f1": 0.9210203978746075,
24
+ "eval_democracy_accuracy": 0.9201485608170845,
25
+ "eval_democracy_f1": 0.8972549864645475,
26
+ "eval_development_accuracy": 0.8672237697307336,
27
+ "eval_development_f1": 0.8462048329953127,
28
+ "eval_economy_accuracy": 0.9062209842154132,
29
+ "eval_economy_f1": 0.8952254345075943,
30
+ "eval_education_accuracy": 0.9467038068709378,
31
+ "eval_education_f1": 0.9344408657756663,
32
+ "eval_environment_accuracy": 0.9619312906220984,
33
+ "eval_environment_f1": 0.9564784032426319,
34
+ "eval_instability_accuracy": 0.9084493964716805,
35
+ "eval_instability_f1": 0.8872576883139862,
36
+ "eval_leadership_accuracy": 0.833983286908078,
37
+ "eval_leadership_f1": 0.8199437016930928,
38
+ "eval_loss": 0.2781185507774353,
39
+ "eval_overall_accuracy": 0.9090374497059734,
40
+ "eval_overall_f1": 0.8943279031006514,
41
+ "eval_race_accuracy": 0.9385329619312907,
42
+ "eval_race_f1": 0.9345823913548884,
43
+ "eval_religion_accuracy": 0.9303621169916435,
44
+ "eval_religion_f1": 0.9219388519537324,
45
+ "eval_runtime": 6.3896,
46
+ "eval_safety_accuracy": 0.8947075208913648,
47
+ "eval_safety_f1": 0.8869057409194557,
48
+ "eval_samples_per_second": 842.781,
49
+ "eval_steps_per_second": 52.742,
50
+ "step": 674
51
+ },
52
+ {
53
+ "epoch": 1.4840386043058649,
54
+ "grad_norm": 1.3921394348144531,
55
+ "learning_rate": 2.7213967310549777e-05,
56
+ "loss": 0.2646,
57
+ "step": 1000
58
+ },
59
+ {
60
+ "epoch": 2.0,
61
+ "eval_administration_accuracy": 0.886908077994429,
62
+ "eval_administration_f1": 0.8723732992505114,
63
+ "eval_corruption_accuracy": 0.9424326833797586,
64
+ "eval_corruption_f1": 0.9384898864061544,
65
+ "eval_democracy_accuracy": 0.933519034354689,
66
+ "eval_democracy_f1": 0.9231869597067329,
67
+ "eval_development_accuracy": 0.8739090064995357,
68
+ "eval_development_f1": 0.8668624356641937,
69
+ "eval_economy_accuracy": 0.9143918291550603,
70
+ "eval_economy_f1": 0.9111034488927022,
71
+ "eval_education_accuracy": 0.9580315691736304,
72
+ "eval_education_f1": 0.9543147194312244,
73
+ "eval_environment_accuracy": 0.9734447539461467,
74
+ "eval_environment_f1": 0.9731508098039792,
75
+ "eval_instability_accuracy": 0.9277623026926648,
76
+ "eval_instability_f1": 0.9222378035229967,
77
+ "eval_leadership_accuracy": 0.840297121634169,
78
+ "eval_leadership_f1": 0.8406461991345243,
79
+ "eval_loss": 0.23724055290222168,
80
+ "eval_overall_accuracy": 0.9205354379449089,
81
+ "eval_overall_f1": 0.9156647148374422,
82
+ "eval_race_accuracy": 0.9467038068709378,
83
+ "eval_race_f1": 0.943797657082642,
84
+ "eval_religion_accuracy": 0.9409470752089136,
85
+ "eval_religion_f1": 0.9380295054445577,
86
+ "eval_runtime": 6.3736,
87
+ "eval_safety_accuracy": 0.9080779944289693,
88
+ "eval_safety_f1": 0.9037838537090879,
89
+ "eval_samples_per_second": 844.886,
90
+ "eval_steps_per_second": 52.874,
91
+ "step": 1348
92
+ }
93
+ ],
94
+ "logging_steps": 500,
95
+ "max_steps": 10768,
96
+ "num_input_tokens_seen": 0,
97
+ "num_train_epochs": 16,
98
+ "save_steps": 500,
99
+ "stateful_callbacks": {
100
+ "EarlyStoppingCallback": {
101
+ "args": {
102
+ "early_stopping_patience": 2,
103
+ "early_stopping_threshold": 0.0
104
+ },
105
+ "attributes": {
106
+ "early_stopping_patience_counter": 0
107
+ }
108
+ },
109
+ "TrainerControl": {
110
+ "args": {
111
+ "should_epoch_stop": false,
112
+ "should_evaluate": false,
113
+ "should_log": false,
114
+ "should_save": true,
115
+ "should_training_stop": false
116
+ },
117
+ "attributes": {}
118
+ }
119
+ },
120
+ "total_flos": 1.1338452843429888e+16,
121
+ "train_batch_size": 16,
122
+ "trial_name": null,
123
+ "trial_params": null
124
+ }
checkpoint-1348/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e04a520e0b54409a3029464cde7d12079be452340929d293e17e45309f482f8
3
+ size 5368
checkpoint-2022/config.json ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "LABEL_0": 0,
66
+ "LABEL_1": 1,
67
+ "LABEL_10": 10,
68
+ "LABEL_11": 11,
69
+ "LABEL_12": 12,
70
+ "LABEL_13": 13,
71
+ "LABEL_14": 14,
72
+ "LABEL_15": 15,
73
+ "LABEL_16": 16,
74
+ "LABEL_17": 17,
75
+ "LABEL_18": 18,
76
+ "LABEL_19": 19,
77
+ "LABEL_2": 2,
78
+ "LABEL_20": 20,
79
+ "LABEL_21": 21,
80
+ "LABEL_22": 22,
81
+ "LABEL_23": 23,
82
+ "LABEL_24": 24,
83
+ "LABEL_25": 25,
84
+ "LABEL_26": 26,
85
+ "LABEL_27": 27,
86
+ "LABEL_28": 28,
87
+ "LABEL_29": 29,
88
+ "LABEL_3": 3,
89
+ "LABEL_30": 30,
90
+ "LABEL_31": 31,
91
+ "LABEL_32": 32,
92
+ "LABEL_33": 33,
93
+ "LABEL_34": 34,
94
+ "LABEL_35": 35,
95
+ "LABEL_36": 36,
96
+ "LABEL_37": 37,
97
+ "LABEL_38": 38,
98
+ "LABEL_39": 39,
99
+ "LABEL_4": 4,
100
+ "LABEL_40": 40,
101
+ "LABEL_41": 41,
102
+ "LABEL_42": 42,
103
+ "LABEL_43": 43,
104
+ "LABEL_44": 44,
105
+ "LABEL_45": 45,
106
+ "LABEL_46": 46,
107
+ "LABEL_47": 47,
108
+ "LABEL_5": 5,
109
+ "LABEL_6": 6,
110
+ "LABEL_7": 7,
111
+ "LABEL_8": 8,
112
+ "LABEL_9": 9
113
+ },
114
+ "layer_norm_eps": 1e-12,
115
+ "max_position_embeddings": 512,
116
+ "model_type": "bert",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
+ "pad_token_id": 0,
120
+ "position_embedding_type": "absolute",
121
+ "torch_dtype": "float32",
122
+ "transformers_version": "4.48.2",
123
+ "type_vocab_size": 2,
124
+ "use_cache": true,
125
+ "vocab_size": 30522
126
+ }
checkpoint-2022/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5da5bcf984a34091cb18e050126d51750543e1ed6c4a773905c484ffd83d8b4b
3
+ size 438100144
checkpoint-2022/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66dbdd9a56f01c2cecf497957017972c09ca424ef5659d8fe692bfc125ef6f57
3
+ size 876321402
checkpoint-2022/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b73932204efe53704e932ab7c4720df6118415e4984d139269a56ea57da1c48a
3
+ size 14244
checkpoint-2022/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc65d5c1626b5099811d02a9fb91c5737fc97e0195b3258c9705ce310425541a
3
+ size 1064
checkpoint-2022/trainer_state.json ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9198927823234619,
3
+ "best_model_checkpoint": "./results/checkpoint-2022",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2022,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7423904974016332,
13
+ "grad_norm": 1.0851198434829712,
14
+ "learning_rate": 2.8606983655274892e-05,
15
+ "loss": 0.448,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_administration_accuracy": 0.8700092850510678,
21
+ "eval_administration_f1": 0.830681542112301,
22
+ "eval_corruption_accuracy": 0.9301764159702879,
23
+ "eval_corruption_f1": 0.9210203978746075,
24
+ "eval_democracy_accuracy": 0.9201485608170845,
25
+ "eval_democracy_f1": 0.8972549864645475,
26
+ "eval_development_accuracy": 0.8672237697307336,
27
+ "eval_development_f1": 0.8462048329953127,
28
+ "eval_economy_accuracy": 0.9062209842154132,
29
+ "eval_economy_f1": 0.8952254345075943,
30
+ "eval_education_accuracy": 0.9467038068709378,
31
+ "eval_education_f1": 0.9344408657756663,
32
+ "eval_environment_accuracy": 0.9619312906220984,
33
+ "eval_environment_f1": 0.9564784032426319,
34
+ "eval_instability_accuracy": 0.9084493964716805,
35
+ "eval_instability_f1": 0.8872576883139862,
36
+ "eval_leadership_accuracy": 0.833983286908078,
37
+ "eval_leadership_f1": 0.8199437016930928,
38
+ "eval_loss": 0.2781185507774353,
39
+ "eval_overall_accuracy": 0.9090374497059734,
40
+ "eval_overall_f1": 0.8943279031006514,
41
+ "eval_race_accuracy": 0.9385329619312907,
42
+ "eval_race_f1": 0.9345823913548884,
43
+ "eval_religion_accuracy": 0.9303621169916435,
44
+ "eval_religion_f1": 0.9219388519537324,
45
+ "eval_runtime": 6.3896,
46
+ "eval_safety_accuracy": 0.8947075208913648,
47
+ "eval_safety_f1": 0.8869057409194557,
48
+ "eval_samples_per_second": 842.781,
49
+ "eval_steps_per_second": 52.742,
50
+ "step": 674
51
+ },
52
+ {
53
+ "epoch": 1.4840386043058649,
54
+ "grad_norm": 1.3921394348144531,
55
+ "learning_rate": 2.7213967310549777e-05,
56
+ "loss": 0.2646,
57
+ "step": 1000
58
+ },
59
+ {
60
+ "epoch": 2.0,
61
+ "eval_administration_accuracy": 0.886908077994429,
62
+ "eval_administration_f1": 0.8723732992505114,
63
+ "eval_corruption_accuracy": 0.9424326833797586,
64
+ "eval_corruption_f1": 0.9384898864061544,
65
+ "eval_democracy_accuracy": 0.933519034354689,
66
+ "eval_democracy_f1": 0.9231869597067329,
67
+ "eval_development_accuracy": 0.8739090064995357,
68
+ "eval_development_f1": 0.8668624356641937,
69
+ "eval_economy_accuracy": 0.9143918291550603,
70
+ "eval_economy_f1": 0.9111034488927022,
71
+ "eval_education_accuracy": 0.9580315691736304,
72
+ "eval_education_f1": 0.9543147194312244,
73
+ "eval_environment_accuracy": 0.9734447539461467,
74
+ "eval_environment_f1": 0.9731508098039792,
75
+ "eval_instability_accuracy": 0.9277623026926648,
76
+ "eval_instability_f1": 0.9222378035229967,
77
+ "eval_leadership_accuracy": 0.840297121634169,
78
+ "eval_leadership_f1": 0.8406461991345243,
79
+ "eval_loss": 0.23724055290222168,
80
+ "eval_overall_accuracy": 0.9205354379449089,
81
+ "eval_overall_f1": 0.9156647148374422,
82
+ "eval_race_accuracy": 0.9467038068709378,
83
+ "eval_race_f1": 0.943797657082642,
84
+ "eval_religion_accuracy": 0.9409470752089136,
85
+ "eval_religion_f1": 0.9380295054445577,
86
+ "eval_runtime": 6.3736,
87
+ "eval_safety_accuracy": 0.9080779944289693,
88
+ "eval_safety_f1": 0.9037838537090879,
89
+ "eval_samples_per_second": 844.886,
90
+ "eval_steps_per_second": 52.874,
91
+ "step": 1348
92
+ },
93
+ {
94
+ "epoch": 2.2256867112100966,
95
+ "grad_norm": 0.9178161025047302,
96
+ "learning_rate": 2.582095096582467e-05,
97
+ "loss": 0.2085,
98
+ "step": 1500
99
+ },
100
+ {
101
+ "epoch": 2.9680772086117297,
102
+ "grad_norm": 1.1747676134109497,
103
+ "learning_rate": 2.4427934621099553e-05,
104
+ "loss": 0.1696,
105
+ "step": 2000
106
+ },
107
+ {
108
+ "epoch": 3.0,
109
+ "eval_administration_accuracy": 0.8919220055710306,
110
+ "eval_administration_f1": 0.8805907698259323,
111
+ "eval_corruption_accuracy": 0.9454038997214484,
112
+ "eval_corruption_f1": 0.9422755927443824,
113
+ "eval_democracy_accuracy": 0.9333333333333333,
114
+ "eval_democracy_f1": 0.9277380787973176,
115
+ "eval_development_accuracy": 0.8846796657381616,
116
+ "eval_development_f1": 0.8767801428288444,
117
+ "eval_economy_accuracy": 0.9177344475394614,
118
+ "eval_economy_f1": 0.913190124488594,
119
+ "eval_education_accuracy": 0.959702878365831,
120
+ "eval_education_f1": 0.9592018840910198,
121
+ "eval_environment_accuracy": 0.9766016713091922,
122
+ "eval_environment_f1": 0.9753052667455842,
123
+ "eval_instability_accuracy": 0.9255338904363974,
124
+ "eval_instability_f1": 0.9219441146038561,
125
+ "eval_leadership_accuracy": 0.8503249767873723,
126
+ "eval_leadership_f1": 0.8464660955301658,
127
+ "eval_loss": 0.22912514209747314,
128
+ "eval_overall_accuracy": 0.923785205818632,
129
+ "eval_overall_f1": 0.9198927823234619,
130
+ "eval_race_accuracy": 0.9468895078922934,
131
+ "eval_race_f1": 0.9441406664359246,
132
+ "eval_religion_accuracy": 0.9418755803156917,
133
+ "eval_religion_f1": 0.9406664102976392,
134
+ "eval_runtime": 6.3983,
135
+ "eval_safety_accuracy": 0.9114206128133705,
136
+ "eval_safety_f1": 0.9104142414922826,
137
+ "eval_samples_per_second": 841.634,
138
+ "eval_steps_per_second": 52.67,
139
+ "step": 2022
140
+ }
141
+ ],
142
+ "logging_steps": 500,
143
+ "max_steps": 10768,
144
+ "num_input_tokens_seen": 0,
145
+ "num_train_epochs": 16,
146
+ "save_steps": 500,
147
+ "stateful_callbacks": {
148
+ "EarlyStoppingCallback": {
149
+ "args": {
150
+ "early_stopping_patience": 2,
151
+ "early_stopping_threshold": 0.0
152
+ },
153
+ "attributes": {
154
+ "early_stopping_patience_counter": 0
155
+ }
156
+ },
157
+ "TrainerControl": {
158
+ "args": {
159
+ "should_epoch_stop": false,
160
+ "should_evaluate": false,
161
+ "should_log": false,
162
+ "should_save": true,
163
+ "should_training_stop": false
164
+ },
165
+ "attributes": {}
166
+ }
167
+ },
168
+ "total_flos": 1.7007679265144832e+16,
169
+ "train_batch_size": 16,
170
+ "trial_name": null,
171
+ "trial_params": null
172
+ }
checkpoint-2022/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e04a520e0b54409a3029464cde7d12079be452340929d293e17e45309f482f8
3
+ size 5368
checkpoint-2696/config.json ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "LABEL_0": 0,
66
+ "LABEL_1": 1,
67
+ "LABEL_10": 10,
68
+ "LABEL_11": 11,
69
+ "LABEL_12": 12,
70
+ "LABEL_13": 13,
71
+ "LABEL_14": 14,
72
+ "LABEL_15": 15,
73
+ "LABEL_16": 16,
74
+ "LABEL_17": 17,
75
+ "LABEL_18": 18,
76
+ "LABEL_19": 19,
77
+ "LABEL_2": 2,
78
+ "LABEL_20": 20,
79
+ "LABEL_21": 21,
80
+ "LABEL_22": 22,
81
+ "LABEL_23": 23,
82
+ "LABEL_24": 24,
83
+ "LABEL_25": 25,
84
+ "LABEL_26": 26,
85
+ "LABEL_27": 27,
86
+ "LABEL_28": 28,
87
+ "LABEL_29": 29,
88
+ "LABEL_3": 3,
89
+ "LABEL_30": 30,
90
+ "LABEL_31": 31,
91
+ "LABEL_32": 32,
92
+ "LABEL_33": 33,
93
+ "LABEL_34": 34,
94
+ "LABEL_35": 35,
95
+ "LABEL_36": 36,
96
+ "LABEL_37": 37,
97
+ "LABEL_38": 38,
98
+ "LABEL_39": 39,
99
+ "LABEL_4": 4,
100
+ "LABEL_40": 40,
101
+ "LABEL_41": 41,
102
+ "LABEL_42": 42,
103
+ "LABEL_43": 43,
104
+ "LABEL_44": 44,
105
+ "LABEL_45": 45,
106
+ "LABEL_46": 46,
107
+ "LABEL_47": 47,
108
+ "LABEL_5": 5,
109
+ "LABEL_6": 6,
110
+ "LABEL_7": 7,
111
+ "LABEL_8": 8,
112
+ "LABEL_9": 9
113
+ },
114
+ "layer_norm_eps": 1e-12,
115
+ "max_position_embeddings": 512,
116
+ "model_type": "bert",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
+ "pad_token_id": 0,
120
+ "position_embedding_type": "absolute",
121
+ "torch_dtype": "float32",
122
+ "transformers_version": "4.48.2",
123
+ "type_vocab_size": 2,
124
+ "use_cache": true,
125
+ "vocab_size": 30522
126
+ }
checkpoint-2696/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:386ff4bc5faac38b06b3db23ca121834b04cf585a5b7e049d1ae21f0791260be
3
+ size 438100144
checkpoint-2696/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e99fe4775ef593f4ecc30c37478b3a3aba9d37aa2d50779e0c69a2e5f94d9c1
3
+ size 876321402
checkpoint-2696/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64dc7c8d35ce0619ad041d92905273538a85dfb6cdd09fa753450f6ca183ddd5
3
+ size 14244
checkpoint-2696/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a874b7ba73317e7750ce0ec4ee79350af46c2657c894e4dfbe16f8ec62ccdbaf
3
+ size 1064
checkpoint-2696/trainer_state.json ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9198927823234619,
3
+ "best_model_checkpoint": "./results/checkpoint-2022",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2696,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7423904974016332,
13
+ "grad_norm": 1.0851198434829712,
14
+ "learning_rate": 2.8606983655274892e-05,
15
+ "loss": 0.448,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_administration_accuracy": 0.8700092850510678,
21
+ "eval_administration_f1": 0.830681542112301,
22
+ "eval_corruption_accuracy": 0.9301764159702879,
23
+ "eval_corruption_f1": 0.9210203978746075,
24
+ "eval_democracy_accuracy": 0.9201485608170845,
25
+ "eval_democracy_f1": 0.8972549864645475,
26
+ "eval_development_accuracy": 0.8672237697307336,
27
+ "eval_development_f1": 0.8462048329953127,
28
+ "eval_economy_accuracy": 0.9062209842154132,
29
+ "eval_economy_f1": 0.8952254345075943,
30
+ "eval_education_accuracy": 0.9467038068709378,
31
+ "eval_education_f1": 0.9344408657756663,
32
+ "eval_environment_accuracy": 0.9619312906220984,
33
+ "eval_environment_f1": 0.9564784032426319,
34
+ "eval_instability_accuracy": 0.9084493964716805,
35
+ "eval_instability_f1": 0.8872576883139862,
36
+ "eval_leadership_accuracy": 0.833983286908078,
37
+ "eval_leadership_f1": 0.8199437016930928,
38
+ "eval_loss": 0.2781185507774353,
39
+ "eval_overall_accuracy": 0.9090374497059734,
40
+ "eval_overall_f1": 0.8943279031006514,
41
+ "eval_race_accuracy": 0.9385329619312907,
42
+ "eval_race_f1": 0.9345823913548884,
43
+ "eval_religion_accuracy": 0.9303621169916435,
44
+ "eval_religion_f1": 0.9219388519537324,
45
+ "eval_runtime": 6.3896,
46
+ "eval_safety_accuracy": 0.8947075208913648,
47
+ "eval_safety_f1": 0.8869057409194557,
48
+ "eval_samples_per_second": 842.781,
49
+ "eval_steps_per_second": 52.742,
50
+ "step": 674
51
+ },
52
+ {
53
+ "epoch": 1.4840386043058649,
54
+ "grad_norm": 1.3921394348144531,
55
+ "learning_rate": 2.7213967310549777e-05,
56
+ "loss": 0.2646,
57
+ "step": 1000
58
+ },
59
+ {
60
+ "epoch": 2.0,
61
+ "eval_administration_accuracy": 0.886908077994429,
62
+ "eval_administration_f1": 0.8723732992505114,
63
+ "eval_corruption_accuracy": 0.9424326833797586,
64
+ "eval_corruption_f1": 0.9384898864061544,
65
+ "eval_democracy_accuracy": 0.933519034354689,
66
+ "eval_democracy_f1": 0.9231869597067329,
67
+ "eval_development_accuracy": 0.8739090064995357,
68
+ "eval_development_f1": 0.8668624356641937,
69
+ "eval_economy_accuracy": 0.9143918291550603,
70
+ "eval_economy_f1": 0.9111034488927022,
71
+ "eval_education_accuracy": 0.9580315691736304,
72
+ "eval_education_f1": 0.9543147194312244,
73
+ "eval_environment_accuracy": 0.9734447539461467,
74
+ "eval_environment_f1": 0.9731508098039792,
75
+ "eval_instability_accuracy": 0.9277623026926648,
76
+ "eval_instability_f1": 0.9222378035229967,
77
+ "eval_leadership_accuracy": 0.840297121634169,
78
+ "eval_leadership_f1": 0.8406461991345243,
79
+ "eval_loss": 0.23724055290222168,
80
+ "eval_overall_accuracy": 0.9205354379449089,
81
+ "eval_overall_f1": 0.9156647148374422,
82
+ "eval_race_accuracy": 0.9467038068709378,
83
+ "eval_race_f1": 0.943797657082642,
84
+ "eval_religion_accuracy": 0.9409470752089136,
85
+ "eval_religion_f1": 0.9380295054445577,
86
+ "eval_runtime": 6.3736,
87
+ "eval_safety_accuracy": 0.9080779944289693,
88
+ "eval_safety_f1": 0.9037838537090879,
89
+ "eval_samples_per_second": 844.886,
90
+ "eval_steps_per_second": 52.874,
91
+ "step": 1348
92
+ },
93
+ {
94
+ "epoch": 2.2256867112100966,
95
+ "grad_norm": 0.9178161025047302,
96
+ "learning_rate": 2.582095096582467e-05,
97
+ "loss": 0.2085,
98
+ "step": 1500
99
+ },
100
+ {
101
+ "epoch": 2.9680772086117297,
102
+ "grad_norm": 1.1747676134109497,
103
+ "learning_rate": 2.4427934621099553e-05,
104
+ "loss": 0.1696,
105
+ "step": 2000
106
+ },
107
+ {
108
+ "epoch": 3.0,
109
+ "eval_administration_accuracy": 0.8919220055710306,
110
+ "eval_administration_f1": 0.8805907698259323,
111
+ "eval_corruption_accuracy": 0.9454038997214484,
112
+ "eval_corruption_f1": 0.9422755927443824,
113
+ "eval_democracy_accuracy": 0.9333333333333333,
114
+ "eval_democracy_f1": 0.9277380787973176,
115
+ "eval_development_accuracy": 0.8846796657381616,
116
+ "eval_development_f1": 0.8767801428288444,
117
+ "eval_economy_accuracy": 0.9177344475394614,
118
+ "eval_economy_f1": 0.913190124488594,
119
+ "eval_education_accuracy": 0.959702878365831,
120
+ "eval_education_f1": 0.9592018840910198,
121
+ "eval_environment_accuracy": 0.9766016713091922,
122
+ "eval_environment_f1": 0.9753052667455842,
123
+ "eval_instability_accuracy": 0.9255338904363974,
124
+ "eval_instability_f1": 0.9219441146038561,
125
+ "eval_leadership_accuracy": 0.8503249767873723,
126
+ "eval_leadership_f1": 0.8464660955301658,
127
+ "eval_loss": 0.22912514209747314,
128
+ "eval_overall_accuracy": 0.923785205818632,
129
+ "eval_overall_f1": 0.9198927823234619,
130
+ "eval_race_accuracy": 0.9468895078922934,
131
+ "eval_race_f1": 0.9441406664359246,
132
+ "eval_religion_accuracy": 0.9418755803156917,
133
+ "eval_religion_f1": 0.9406664102976392,
134
+ "eval_runtime": 6.3983,
135
+ "eval_safety_accuracy": 0.9114206128133705,
136
+ "eval_safety_f1": 0.9104142414922826,
137
+ "eval_samples_per_second": 841.634,
138
+ "eval_steps_per_second": 52.67,
139
+ "step": 2022
140
+ },
141
+ {
142
+ "epoch": 3.7097253155159615,
143
+ "grad_norm": 1.7257879972457886,
144
+ "learning_rate": 2.3034918276374445e-05,
145
+ "loss": 0.1309,
146
+ "step": 2500
147
+ },
148
+ {
149
+ "epoch": 4.0,
150
+ "eval_administration_accuracy": 0.875766016713092,
151
+ "eval_administration_f1": 0.8761692862915822,
152
+ "eval_corruption_accuracy": 0.9465181058495822,
153
+ "eval_corruption_f1": 0.9432932911595113,
154
+ "eval_democracy_accuracy": 0.9344475394614671,
155
+ "eval_democracy_f1": 0.9289539940724922,
156
+ "eval_development_accuracy": 0.8804085422469824,
157
+ "eval_development_f1": 0.8733152085361343,
158
+ "eval_economy_accuracy": 0.9175487465181058,
159
+ "eval_economy_f1": 0.9168308054942083,
160
+ "eval_education_accuracy": 0.959702878365831,
161
+ "eval_education_f1": 0.9577025286068839,
162
+ "eval_environment_accuracy": 0.9740018570102136,
163
+ "eval_environment_f1": 0.9740155750677182,
164
+ "eval_instability_accuracy": 0.92330547818013,
165
+ "eval_instability_f1": 0.9214961270553598,
166
+ "eval_leadership_accuracy": 0.8469823584029712,
167
+ "eval_leadership_f1": 0.8454208641557382,
168
+ "eval_loss": 0.23740428686141968,
169
+ "eval_overall_accuracy": 0.9211853915196534,
170
+ "eval_overall_f1": 0.9191929747362485,
171
+ "eval_race_accuracy": 0.9452181987000928,
172
+ "eval_race_f1": 0.9441252497587079,
173
+ "eval_religion_accuracy": 0.940761374187558,
174
+ "eval_religion_f1": 0.9389083250935241,
175
+ "eval_runtime": 6.3816,
176
+ "eval_safety_accuracy": 0.9095636025998143,
177
+ "eval_safety_f1": 0.9100844415431236,
178
+ "eval_samples_per_second": 843.83,
179
+ "eval_steps_per_second": 52.808,
180
+ "step": 2696
181
+ }
182
+ ],
183
+ "logging_steps": 500,
184
+ "max_steps": 10768,
185
+ "num_input_tokens_seen": 0,
186
+ "num_train_epochs": 16,
187
+ "save_steps": 500,
188
+ "stateful_callbacks": {
189
+ "EarlyStoppingCallback": {
190
+ "args": {
191
+ "early_stopping_patience": 2,
192
+ "early_stopping_threshold": 0.0
193
+ },
194
+ "attributes": {
195
+ "early_stopping_patience_counter": 1
196
+ }
197
+ },
198
+ "TrainerControl": {
199
+ "args": {
200
+ "should_epoch_stop": false,
201
+ "should_evaluate": false,
202
+ "should_log": false,
203
+ "should_save": true,
204
+ "should_training_stop": false
205
+ },
206
+ "attributes": {}
207
+ }
208
+ },
209
+ "total_flos": 2.2676905686859776e+16,
210
+ "train_batch_size": 16,
211
+ "trial_name": null,
212
+ "trial_params": null
213
+ }
checkpoint-2696/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e04a520e0b54409a3029464cde7d12079be452340929d293e17e45309f482f8
3
+ size 5368
checkpoint-3370/config.json ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "LABEL_0": 0,
66
+ "LABEL_1": 1,
67
+ "LABEL_10": 10,
68
+ "LABEL_11": 11,
69
+ "LABEL_12": 12,
70
+ "LABEL_13": 13,
71
+ "LABEL_14": 14,
72
+ "LABEL_15": 15,
73
+ "LABEL_16": 16,
74
+ "LABEL_17": 17,
75
+ "LABEL_18": 18,
76
+ "LABEL_19": 19,
77
+ "LABEL_2": 2,
78
+ "LABEL_20": 20,
79
+ "LABEL_21": 21,
80
+ "LABEL_22": 22,
81
+ "LABEL_23": 23,
82
+ "LABEL_24": 24,
83
+ "LABEL_25": 25,
84
+ "LABEL_26": 26,
85
+ "LABEL_27": 27,
86
+ "LABEL_28": 28,
87
+ "LABEL_29": 29,
88
+ "LABEL_3": 3,
89
+ "LABEL_30": 30,
90
+ "LABEL_31": 31,
91
+ "LABEL_32": 32,
92
+ "LABEL_33": 33,
93
+ "LABEL_34": 34,
94
+ "LABEL_35": 35,
95
+ "LABEL_36": 36,
96
+ "LABEL_37": 37,
97
+ "LABEL_38": 38,
98
+ "LABEL_39": 39,
99
+ "LABEL_4": 4,
100
+ "LABEL_40": 40,
101
+ "LABEL_41": 41,
102
+ "LABEL_42": 42,
103
+ "LABEL_43": 43,
104
+ "LABEL_44": 44,
105
+ "LABEL_45": 45,
106
+ "LABEL_46": 46,
107
+ "LABEL_47": 47,
108
+ "LABEL_5": 5,
109
+ "LABEL_6": 6,
110
+ "LABEL_7": 7,
111
+ "LABEL_8": 8,
112
+ "LABEL_9": 9
113
+ },
114
+ "layer_norm_eps": 1e-12,
115
+ "max_position_embeddings": 512,
116
+ "model_type": "bert",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
+ "pad_token_id": 0,
120
+ "position_embedding_type": "absolute",
121
+ "torch_dtype": "float32",
122
+ "transformers_version": "4.48.2",
123
+ "type_vocab_size": 2,
124
+ "use_cache": true,
125
+ "vocab_size": 30522
126
+ }
checkpoint-3370/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc069e386a6deb789edc1a12057f57b654519f5f9fe6868369fcde2f08a30b74
3
+ size 438100144
checkpoint-3370/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35e4b3f3f995bb1fae31c38a3b82ef5cd9569281b00de71c26e8c4f58b271614
3
+ size 876321402
checkpoint-3370/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7a8de39f56cebb77b98b62c5b8379c529ea942c4cf0e172c9d640d6d9f81168
3
+ size 14244
checkpoint-3370/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5908ea72e3438172d9daa4995d49ed342e883ef405edcfc9f86191cf29863ecf
3
+ size 1064
checkpoint-3370/trainer_state.json ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9203949718676169,
3
+ "best_model_checkpoint": "./results/checkpoint-3370",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 3370,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7423904974016332,
13
+ "grad_norm": 1.0851198434829712,
14
+ "learning_rate": 2.8606983655274892e-05,
15
+ "loss": 0.448,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_administration_accuracy": 0.8700092850510678,
21
+ "eval_administration_f1": 0.830681542112301,
22
+ "eval_corruption_accuracy": 0.9301764159702879,
23
+ "eval_corruption_f1": 0.9210203978746075,
24
+ "eval_democracy_accuracy": 0.9201485608170845,
25
+ "eval_democracy_f1": 0.8972549864645475,
26
+ "eval_development_accuracy": 0.8672237697307336,
27
+ "eval_development_f1": 0.8462048329953127,
28
+ "eval_economy_accuracy": 0.9062209842154132,
29
+ "eval_economy_f1": 0.8952254345075943,
30
+ "eval_education_accuracy": 0.9467038068709378,
31
+ "eval_education_f1": 0.9344408657756663,
32
+ "eval_environment_accuracy": 0.9619312906220984,
33
+ "eval_environment_f1": 0.9564784032426319,
34
+ "eval_instability_accuracy": 0.9084493964716805,
35
+ "eval_instability_f1": 0.8872576883139862,
36
+ "eval_leadership_accuracy": 0.833983286908078,
37
+ "eval_leadership_f1": 0.8199437016930928,
38
+ "eval_loss": 0.2781185507774353,
39
+ "eval_overall_accuracy": 0.9090374497059734,
40
+ "eval_overall_f1": 0.8943279031006514,
41
+ "eval_race_accuracy": 0.9385329619312907,
42
+ "eval_race_f1": 0.9345823913548884,
43
+ "eval_religion_accuracy": 0.9303621169916435,
44
+ "eval_religion_f1": 0.9219388519537324,
45
+ "eval_runtime": 6.3896,
46
+ "eval_safety_accuracy": 0.8947075208913648,
47
+ "eval_safety_f1": 0.8869057409194557,
48
+ "eval_samples_per_second": 842.781,
49
+ "eval_steps_per_second": 52.742,
50
+ "step": 674
51
+ },
52
+ {
53
+ "epoch": 1.4840386043058649,
54
+ "grad_norm": 1.3921394348144531,
55
+ "learning_rate": 2.7213967310549777e-05,
56
+ "loss": 0.2646,
57
+ "step": 1000
58
+ },
59
+ {
60
+ "epoch": 2.0,
61
+ "eval_administration_accuracy": 0.886908077994429,
62
+ "eval_administration_f1": 0.8723732992505114,
63
+ "eval_corruption_accuracy": 0.9424326833797586,
64
+ "eval_corruption_f1": 0.9384898864061544,
65
+ "eval_democracy_accuracy": 0.933519034354689,
66
+ "eval_democracy_f1": 0.9231869597067329,
67
+ "eval_development_accuracy": 0.8739090064995357,
68
+ "eval_development_f1": 0.8668624356641937,
69
+ "eval_economy_accuracy": 0.9143918291550603,
70
+ "eval_economy_f1": 0.9111034488927022,
71
+ "eval_education_accuracy": 0.9580315691736304,
72
+ "eval_education_f1": 0.9543147194312244,
73
+ "eval_environment_accuracy": 0.9734447539461467,
74
+ "eval_environment_f1": 0.9731508098039792,
75
+ "eval_instability_accuracy": 0.9277623026926648,
76
+ "eval_instability_f1": 0.9222378035229967,
77
+ "eval_leadership_accuracy": 0.840297121634169,
78
+ "eval_leadership_f1": 0.8406461991345243,
79
+ "eval_loss": 0.23724055290222168,
80
+ "eval_overall_accuracy": 0.9205354379449089,
81
+ "eval_overall_f1": 0.9156647148374422,
82
+ "eval_race_accuracy": 0.9467038068709378,
83
+ "eval_race_f1": 0.943797657082642,
84
+ "eval_religion_accuracy": 0.9409470752089136,
85
+ "eval_religion_f1": 0.9380295054445577,
86
+ "eval_runtime": 6.3736,
87
+ "eval_safety_accuracy": 0.9080779944289693,
88
+ "eval_safety_f1": 0.9037838537090879,
89
+ "eval_samples_per_second": 844.886,
90
+ "eval_steps_per_second": 52.874,
91
+ "step": 1348
92
+ },
93
+ {
94
+ "epoch": 2.2256867112100966,
95
+ "grad_norm": 0.9178161025047302,
96
+ "learning_rate": 2.582095096582467e-05,
97
+ "loss": 0.2085,
98
+ "step": 1500
99
+ },
100
+ {
101
+ "epoch": 2.9680772086117297,
102
+ "grad_norm": 1.1747676134109497,
103
+ "learning_rate": 2.4427934621099553e-05,
104
+ "loss": 0.1696,
105
+ "step": 2000
106
+ },
107
+ {
108
+ "epoch": 3.0,
109
+ "eval_administration_accuracy": 0.8919220055710306,
110
+ "eval_administration_f1": 0.8805907698259323,
111
+ "eval_corruption_accuracy": 0.9454038997214484,
112
+ "eval_corruption_f1": 0.9422755927443824,
113
+ "eval_democracy_accuracy": 0.9333333333333333,
114
+ "eval_democracy_f1": 0.9277380787973176,
115
+ "eval_development_accuracy": 0.8846796657381616,
116
+ "eval_development_f1": 0.8767801428288444,
117
+ "eval_economy_accuracy": 0.9177344475394614,
118
+ "eval_economy_f1": 0.913190124488594,
119
+ "eval_education_accuracy": 0.959702878365831,
120
+ "eval_education_f1": 0.9592018840910198,
121
+ "eval_environment_accuracy": 0.9766016713091922,
122
+ "eval_environment_f1": 0.9753052667455842,
123
+ "eval_instability_accuracy": 0.9255338904363974,
124
+ "eval_instability_f1": 0.9219441146038561,
125
+ "eval_leadership_accuracy": 0.8503249767873723,
126
+ "eval_leadership_f1": 0.8464660955301658,
127
+ "eval_loss": 0.22912514209747314,
128
+ "eval_overall_accuracy": 0.923785205818632,
129
+ "eval_overall_f1": 0.9198927823234619,
130
+ "eval_race_accuracy": 0.9468895078922934,
131
+ "eval_race_f1": 0.9441406664359246,
132
+ "eval_religion_accuracy": 0.9418755803156917,
133
+ "eval_religion_f1": 0.9406664102976392,
134
+ "eval_runtime": 6.3983,
135
+ "eval_safety_accuracy": 0.9114206128133705,
136
+ "eval_safety_f1": 0.9104142414922826,
137
+ "eval_samples_per_second": 841.634,
138
+ "eval_steps_per_second": 52.67,
139
+ "step": 2022
140
+ },
141
+ {
142
+ "epoch": 3.7097253155159615,
143
+ "grad_norm": 1.7257879972457886,
144
+ "learning_rate": 2.3034918276374445e-05,
145
+ "loss": 0.1309,
146
+ "step": 2500
147
+ },
148
+ {
149
+ "epoch": 4.0,
150
+ "eval_administration_accuracy": 0.875766016713092,
151
+ "eval_administration_f1": 0.8761692862915822,
152
+ "eval_corruption_accuracy": 0.9465181058495822,
153
+ "eval_corruption_f1": 0.9432932911595113,
154
+ "eval_democracy_accuracy": 0.9344475394614671,
155
+ "eval_democracy_f1": 0.9289539940724922,
156
+ "eval_development_accuracy": 0.8804085422469824,
157
+ "eval_development_f1": 0.8733152085361343,
158
+ "eval_economy_accuracy": 0.9175487465181058,
159
+ "eval_economy_f1": 0.9168308054942083,
160
+ "eval_education_accuracy": 0.959702878365831,
161
+ "eval_education_f1": 0.9577025286068839,
162
+ "eval_environment_accuracy": 0.9740018570102136,
163
+ "eval_environment_f1": 0.9740155750677182,
164
+ "eval_instability_accuracy": 0.92330547818013,
165
+ "eval_instability_f1": 0.9214961270553598,
166
+ "eval_leadership_accuracy": 0.8469823584029712,
167
+ "eval_leadership_f1": 0.8454208641557382,
168
+ "eval_loss": 0.23740428686141968,
169
+ "eval_overall_accuracy": 0.9211853915196534,
170
+ "eval_overall_f1": 0.9191929747362485,
171
+ "eval_race_accuracy": 0.9452181987000928,
172
+ "eval_race_f1": 0.9441252497587079,
173
+ "eval_religion_accuracy": 0.940761374187558,
174
+ "eval_religion_f1": 0.9389083250935241,
175
+ "eval_runtime": 6.3816,
176
+ "eval_safety_accuracy": 0.9095636025998143,
177
+ "eval_safety_f1": 0.9100844415431236,
178
+ "eval_samples_per_second": 843.83,
179
+ "eval_steps_per_second": 52.808,
180
+ "step": 2696
181
+ },
182
+ {
183
+ "epoch": 4.451373422420193,
184
+ "grad_norm": 1.5328147411346436,
185
+ "learning_rate": 2.1641901931649333e-05,
186
+ "loss": 0.1085,
187
+ "step": 3000
188
+ },
189
+ {
190
+ "epoch": 5.0,
191
+ "eval_administration_accuracy": 0.8921077065923863,
192
+ "eval_administration_f1": 0.8861580465319954,
193
+ "eval_corruption_accuracy": 0.9463324048282266,
194
+ "eval_corruption_f1": 0.943458379396768,
195
+ "eval_democracy_accuracy": 0.9346332404828227,
196
+ "eval_democracy_f1": 0.9313693520601084,
197
+ "eval_development_accuracy": 0.8807799442896936,
198
+ "eval_development_f1": 0.8746742414193872,
199
+ "eval_economy_accuracy": 0.9175487465181058,
200
+ "eval_economy_f1": 0.9165794145446547,
201
+ "eval_education_accuracy": 0.9587743732590529,
202
+ "eval_education_f1": 0.9574397967431261,
203
+ "eval_environment_accuracy": 0.9736304549675023,
204
+ "eval_environment_f1": 0.9731615305259281,
205
+ "eval_instability_accuracy": 0.9257195914577531,
206
+ "eval_instability_f1": 0.9217833078621397,
207
+ "eval_leadership_accuracy": 0.8458681522748375,
208
+ "eval_leadership_f1": 0.849217017756863,
209
+ "eval_loss": 0.24142640829086304,
210
+ "eval_overall_accuracy": 0.9226245744351593,
211
+ "eval_overall_f1": 0.9203949718676169,
212
+ "eval_race_accuracy": 0.9452181987000928,
213
+ "eval_race_f1": 0.9418839421000289,
214
+ "eval_religion_accuracy": 0.9426183844011142,
215
+ "eval_religion_f1": 0.9420269885121756,
216
+ "eval_runtime": 6.3895,
217
+ "eval_safety_accuracy": 0.9082636954503249,
218
+ "eval_safety_f1": 0.9069876449582265,
219
+ "eval_samples_per_second": 842.785,
220
+ "eval_steps_per_second": 52.743,
221
+ "step": 3370
222
+ }
223
+ ],
224
+ "logging_steps": 500,
225
+ "max_steps": 10768,
226
+ "num_input_tokens_seen": 0,
227
+ "num_train_epochs": 16,
228
+ "save_steps": 500,
229
+ "stateful_callbacks": {
230
+ "EarlyStoppingCallback": {
231
+ "args": {
232
+ "early_stopping_patience": 2,
233
+ "early_stopping_threshold": 0.0
234
+ },
235
+ "attributes": {
236
+ "early_stopping_patience_counter": 0
237
+ }
238
+ },
239
+ "TrainerControl": {
240
+ "args": {
241
+ "should_epoch_stop": false,
242
+ "should_evaluate": false,
243
+ "should_log": false,
244
+ "should_save": true,
245
+ "should_training_stop": false
246
+ },
247
+ "attributes": {}
248
+ }
249
+ },
250
+ "total_flos": 2.834613210857472e+16,
251
+ "train_batch_size": 16,
252
+ "trial_name": null,
253
+ "trial_params": null
254
+ }
checkpoint-3370/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e04a520e0b54409a3029464cde7d12079be452340929d293e17e45309f482f8
3
+ size 5368
checkpoint-4044/config.json ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "LABEL_0": 0,
66
+ "LABEL_1": 1,
67
+ "LABEL_10": 10,
68
+ "LABEL_11": 11,
69
+ "LABEL_12": 12,
70
+ "LABEL_13": 13,
71
+ "LABEL_14": 14,
72
+ "LABEL_15": 15,
73
+ "LABEL_16": 16,
74
+ "LABEL_17": 17,
75
+ "LABEL_18": 18,
76
+ "LABEL_19": 19,
77
+ "LABEL_2": 2,
78
+ "LABEL_20": 20,
79
+ "LABEL_21": 21,
80
+ "LABEL_22": 22,
81
+ "LABEL_23": 23,
82
+ "LABEL_24": 24,
83
+ "LABEL_25": 25,
84
+ "LABEL_26": 26,
85
+ "LABEL_27": 27,
86
+ "LABEL_28": 28,
87
+ "LABEL_29": 29,
88
+ "LABEL_3": 3,
89
+ "LABEL_30": 30,
90
+ "LABEL_31": 31,
91
+ "LABEL_32": 32,
92
+ "LABEL_33": 33,
93
+ "LABEL_34": 34,
94
+ "LABEL_35": 35,
95
+ "LABEL_36": 36,
96
+ "LABEL_37": 37,
97
+ "LABEL_38": 38,
98
+ "LABEL_39": 39,
99
+ "LABEL_4": 4,
100
+ "LABEL_40": 40,
101
+ "LABEL_41": 41,
102
+ "LABEL_42": 42,
103
+ "LABEL_43": 43,
104
+ "LABEL_44": 44,
105
+ "LABEL_45": 45,
106
+ "LABEL_46": 46,
107
+ "LABEL_47": 47,
108
+ "LABEL_5": 5,
109
+ "LABEL_6": 6,
110
+ "LABEL_7": 7,
111
+ "LABEL_8": 8,
112
+ "LABEL_9": 9
113
+ },
114
+ "layer_norm_eps": 1e-12,
115
+ "max_position_embeddings": 512,
116
+ "model_type": "bert",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
+ "pad_token_id": 0,
120
+ "position_embedding_type": "absolute",
121
+ "torch_dtype": "float32",
122
+ "transformers_version": "4.48.2",
123
+ "type_vocab_size": 2,
124
+ "use_cache": true,
125
+ "vocab_size": 30522
126
+ }
checkpoint-4044/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50ae65792f946dd206a56e7171e8841d42a6d25d5695f1e1d083a0e8ad3cee3a
3
+ size 438100144
checkpoint-4044/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b4f90ccce91d0d7244cb66f2172c62962b5f639102025a0f65a9e9c5c9577a8
3
+ size 876321402
checkpoint-4044/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc14f83f1d8bcc8e0cf5de47b08170b2935f5ce887d598ce751ad81394d1f39d
3
+ size 14244
checkpoint-4044/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d5bbe16a9975d3dcaf6e23f5f25fced7bda78de9f7385dedda983b7eb1e5c42
3
+ size 1064
checkpoint-4044/trainer_state.json ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9203949718676169,
3
+ "best_model_checkpoint": "./results/checkpoint-3370",
4
+ "epoch": 6.0,
5
+ "eval_steps": 500,
6
+ "global_step": 4044,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7423904974016332,
13
+ "grad_norm": 1.0851198434829712,
14
+ "learning_rate": 2.8606983655274892e-05,
15
+ "loss": 0.448,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_administration_accuracy": 0.8700092850510678,
21
+ "eval_administration_f1": 0.830681542112301,
22
+ "eval_corruption_accuracy": 0.9301764159702879,
23
+ "eval_corruption_f1": 0.9210203978746075,
24
+ "eval_democracy_accuracy": 0.9201485608170845,
25
+ "eval_democracy_f1": 0.8972549864645475,
26
+ "eval_development_accuracy": 0.8672237697307336,
27
+ "eval_development_f1": 0.8462048329953127,
28
+ "eval_economy_accuracy": 0.9062209842154132,
29
+ "eval_economy_f1": 0.8952254345075943,
30
+ "eval_education_accuracy": 0.9467038068709378,
31
+ "eval_education_f1": 0.9344408657756663,
32
+ "eval_environment_accuracy": 0.9619312906220984,
33
+ "eval_environment_f1": 0.9564784032426319,
34
+ "eval_instability_accuracy": 0.9084493964716805,
35
+ "eval_instability_f1": 0.8872576883139862,
36
+ "eval_leadership_accuracy": 0.833983286908078,
37
+ "eval_leadership_f1": 0.8199437016930928,
38
+ "eval_loss": 0.2781185507774353,
39
+ "eval_overall_accuracy": 0.9090374497059734,
40
+ "eval_overall_f1": 0.8943279031006514,
41
+ "eval_race_accuracy": 0.9385329619312907,
42
+ "eval_race_f1": 0.9345823913548884,
43
+ "eval_religion_accuracy": 0.9303621169916435,
44
+ "eval_religion_f1": 0.9219388519537324,
45
+ "eval_runtime": 6.3896,
46
+ "eval_safety_accuracy": 0.8947075208913648,
47
+ "eval_safety_f1": 0.8869057409194557,
48
+ "eval_samples_per_second": 842.781,
49
+ "eval_steps_per_second": 52.742,
50
+ "step": 674
51
+ },
52
+ {
53
+ "epoch": 1.4840386043058649,
54
+ "grad_norm": 1.3921394348144531,
55
+ "learning_rate": 2.7213967310549777e-05,
56
+ "loss": 0.2646,
57
+ "step": 1000
58
+ },
59
+ {
60
+ "epoch": 2.0,
61
+ "eval_administration_accuracy": 0.886908077994429,
62
+ "eval_administration_f1": 0.8723732992505114,
63
+ "eval_corruption_accuracy": 0.9424326833797586,
64
+ "eval_corruption_f1": 0.9384898864061544,
65
+ "eval_democracy_accuracy": 0.933519034354689,
66
+ "eval_democracy_f1": 0.9231869597067329,
67
+ "eval_development_accuracy": 0.8739090064995357,
68
+ "eval_development_f1": 0.8668624356641937,
69
+ "eval_economy_accuracy": 0.9143918291550603,
70
+ "eval_economy_f1": 0.9111034488927022,
71
+ "eval_education_accuracy": 0.9580315691736304,
72
+ "eval_education_f1": 0.9543147194312244,
73
+ "eval_environment_accuracy": 0.9734447539461467,
74
+ "eval_environment_f1": 0.9731508098039792,
75
+ "eval_instability_accuracy": 0.9277623026926648,
76
+ "eval_instability_f1": 0.9222378035229967,
77
+ "eval_leadership_accuracy": 0.840297121634169,
78
+ "eval_leadership_f1": 0.8406461991345243,
79
+ "eval_loss": 0.23724055290222168,
80
+ "eval_overall_accuracy": 0.9205354379449089,
81
+ "eval_overall_f1": 0.9156647148374422,
82
+ "eval_race_accuracy": 0.9467038068709378,
83
+ "eval_race_f1": 0.943797657082642,
84
+ "eval_religion_accuracy": 0.9409470752089136,
85
+ "eval_religion_f1": 0.9380295054445577,
86
+ "eval_runtime": 6.3736,
87
+ "eval_safety_accuracy": 0.9080779944289693,
88
+ "eval_safety_f1": 0.9037838537090879,
89
+ "eval_samples_per_second": 844.886,
90
+ "eval_steps_per_second": 52.874,
91
+ "step": 1348
92
+ },
93
+ {
94
+ "epoch": 2.2256867112100966,
95
+ "grad_norm": 0.9178161025047302,
96
+ "learning_rate": 2.582095096582467e-05,
97
+ "loss": 0.2085,
98
+ "step": 1500
99
+ },
100
+ {
101
+ "epoch": 2.9680772086117297,
102
+ "grad_norm": 1.1747676134109497,
103
+ "learning_rate": 2.4427934621099553e-05,
104
+ "loss": 0.1696,
105
+ "step": 2000
106
+ },
107
+ {
108
+ "epoch": 3.0,
109
+ "eval_administration_accuracy": 0.8919220055710306,
110
+ "eval_administration_f1": 0.8805907698259323,
111
+ "eval_corruption_accuracy": 0.9454038997214484,
112
+ "eval_corruption_f1": 0.9422755927443824,
113
+ "eval_democracy_accuracy": 0.9333333333333333,
114
+ "eval_democracy_f1": 0.9277380787973176,
115
+ "eval_development_accuracy": 0.8846796657381616,
116
+ "eval_development_f1": 0.8767801428288444,
117
+ "eval_economy_accuracy": 0.9177344475394614,
118
+ "eval_economy_f1": 0.913190124488594,
119
+ "eval_education_accuracy": 0.959702878365831,
120
+ "eval_education_f1": 0.9592018840910198,
121
+ "eval_environment_accuracy": 0.9766016713091922,
122
+ "eval_environment_f1": 0.9753052667455842,
123
+ "eval_instability_accuracy": 0.9255338904363974,
124
+ "eval_instability_f1": 0.9219441146038561,
125
+ "eval_leadership_accuracy": 0.8503249767873723,
126
+ "eval_leadership_f1": 0.8464660955301658,
127
+ "eval_loss": 0.22912514209747314,
128
+ "eval_overall_accuracy": 0.923785205818632,
129
+ "eval_overall_f1": 0.9198927823234619,
130
+ "eval_race_accuracy": 0.9468895078922934,
131
+ "eval_race_f1": 0.9441406664359246,
132
+ "eval_religion_accuracy": 0.9418755803156917,
133
+ "eval_religion_f1": 0.9406664102976392,
134
+ "eval_runtime": 6.3983,
135
+ "eval_safety_accuracy": 0.9114206128133705,
136
+ "eval_safety_f1": 0.9104142414922826,
137
+ "eval_samples_per_second": 841.634,
138
+ "eval_steps_per_second": 52.67,
139
+ "step": 2022
140
+ },
141
+ {
142
+ "epoch": 3.7097253155159615,
143
+ "grad_norm": 1.7257879972457886,
144
+ "learning_rate": 2.3034918276374445e-05,
145
+ "loss": 0.1309,
146
+ "step": 2500
147
+ },
148
+ {
149
+ "epoch": 4.0,
150
+ "eval_administration_accuracy": 0.875766016713092,
151
+ "eval_administration_f1": 0.8761692862915822,
152
+ "eval_corruption_accuracy": 0.9465181058495822,
153
+ "eval_corruption_f1": 0.9432932911595113,
154
+ "eval_democracy_accuracy": 0.9344475394614671,
155
+ "eval_democracy_f1": 0.9289539940724922,
156
+ "eval_development_accuracy": 0.8804085422469824,
157
+ "eval_development_f1": 0.8733152085361343,
158
+ "eval_economy_accuracy": 0.9175487465181058,
159
+ "eval_economy_f1": 0.9168308054942083,
160
+ "eval_education_accuracy": 0.959702878365831,
161
+ "eval_education_f1": 0.9577025286068839,
162
+ "eval_environment_accuracy": 0.9740018570102136,
163
+ "eval_environment_f1": 0.9740155750677182,
164
+ "eval_instability_accuracy": 0.92330547818013,
165
+ "eval_instability_f1": 0.9214961270553598,
166
+ "eval_leadership_accuracy": 0.8469823584029712,
167
+ "eval_leadership_f1": 0.8454208641557382,
168
+ "eval_loss": 0.23740428686141968,
169
+ "eval_overall_accuracy": 0.9211853915196534,
170
+ "eval_overall_f1": 0.9191929747362485,
171
+ "eval_race_accuracy": 0.9452181987000928,
172
+ "eval_race_f1": 0.9441252497587079,
173
+ "eval_religion_accuracy": 0.940761374187558,
174
+ "eval_religion_f1": 0.9389083250935241,
175
+ "eval_runtime": 6.3816,
176
+ "eval_safety_accuracy": 0.9095636025998143,
177
+ "eval_safety_f1": 0.9100844415431236,
178
+ "eval_samples_per_second": 843.83,
179
+ "eval_steps_per_second": 52.808,
180
+ "step": 2696
181
+ },
182
+ {
183
+ "epoch": 4.451373422420193,
184
+ "grad_norm": 1.5328147411346436,
185
+ "learning_rate": 2.1641901931649333e-05,
186
+ "loss": 0.1085,
187
+ "step": 3000
188
+ },
189
+ {
190
+ "epoch": 5.0,
191
+ "eval_administration_accuracy": 0.8921077065923863,
192
+ "eval_administration_f1": 0.8861580465319954,
193
+ "eval_corruption_accuracy": 0.9463324048282266,
194
+ "eval_corruption_f1": 0.943458379396768,
195
+ "eval_democracy_accuracy": 0.9346332404828227,
196
+ "eval_democracy_f1": 0.9313693520601084,
197
+ "eval_development_accuracy": 0.8807799442896936,
198
+ "eval_development_f1": 0.8746742414193872,
199
+ "eval_economy_accuracy": 0.9175487465181058,
200
+ "eval_economy_f1": 0.9165794145446547,
201
+ "eval_education_accuracy": 0.9587743732590529,
202
+ "eval_education_f1": 0.9574397967431261,
203
+ "eval_environment_accuracy": 0.9736304549675023,
204
+ "eval_environment_f1": 0.9731615305259281,
205
+ "eval_instability_accuracy": 0.9257195914577531,
206
+ "eval_instability_f1": 0.9217833078621397,
207
+ "eval_leadership_accuracy": 0.8458681522748375,
208
+ "eval_leadership_f1": 0.849217017756863,
209
+ "eval_loss": 0.24142640829086304,
210
+ "eval_overall_accuracy": 0.9226245744351593,
211
+ "eval_overall_f1": 0.9203949718676169,
212
+ "eval_race_accuracy": 0.9452181987000928,
213
+ "eval_race_f1": 0.9418839421000289,
214
+ "eval_religion_accuracy": 0.9426183844011142,
215
+ "eval_religion_f1": 0.9420269885121756,
216
+ "eval_runtime": 6.3895,
217
+ "eval_safety_accuracy": 0.9082636954503249,
218
+ "eval_safety_f1": 0.9069876449582265,
219
+ "eval_samples_per_second": 842.785,
220
+ "eval_steps_per_second": 52.743,
221
+ "step": 3370
222
+ },
223
+ {
224
+ "epoch": 5.193021529324425,
225
+ "grad_norm": 1.2458630800247192,
226
+ "learning_rate": 2.024888558692422e-05,
227
+ "loss": 0.0925,
228
+ "step": 3500
229
+ },
230
+ {
231
+ "epoch": 5.935412026726058,
232
+ "grad_norm": 1.964983582496643,
233
+ "learning_rate": 1.885586924219911e-05,
234
+ "loss": 0.0759,
235
+ "step": 4000
236
+ },
237
+ {
238
+ "epoch": 6.0,
239
+ "eval_administration_accuracy": 0.8885793871866295,
240
+ "eval_administration_f1": 0.8796540295327503,
241
+ "eval_corruption_accuracy": 0.944661095636026,
242
+ "eval_corruption_f1": 0.9430977714288632,
243
+ "eval_democracy_accuracy": 0.9312906220984215,
244
+ "eval_democracy_f1": 0.9311108081348881,
245
+ "eval_development_accuracy": 0.8809656453110493,
246
+ "eval_development_f1": 0.8743050043114865,
247
+ "eval_economy_accuracy": 0.9162488393686166,
248
+ "eval_economy_f1": 0.9153006920861309,
249
+ "eval_education_accuracy": 0.9600742804085423,
250
+ "eval_education_f1": 0.9587642973646463,
251
+ "eval_environment_accuracy": 0.9756731662024141,
252
+ "eval_environment_f1": 0.9753440362005743,
253
+ "eval_instability_accuracy": 0.9205199628597958,
254
+ "eval_instability_f1": 0.9185046153727873,
255
+ "eval_leadership_accuracy": 0.8510677808727948,
256
+ "eval_leadership_f1": 0.8491756854928866,
257
+ "eval_loss": 0.25557437539100647,
258
+ "eval_overall_accuracy": 0.9218198700092851,
259
+ "eval_overall_f1": 0.9198843837682896,
260
+ "eval_race_accuracy": 0.9472609099350047,
261
+ "eval_race_f1": 0.9465463984676691,
262
+ "eval_religion_accuracy": 0.9420612813370474,
263
+ "eval_religion_f1": 0.941883774169552,
264
+ "eval_runtime": 6.406,
265
+ "eval_safety_accuracy": 0.903435468895079,
266
+ "eval_safety_f1": 0.9049254926572401,
267
+ "eval_samples_per_second": 840.618,
268
+ "eval_steps_per_second": 52.607,
269
+ "step": 4044
270
+ }
271
+ ],
272
+ "logging_steps": 500,
273
+ "max_steps": 10768,
274
+ "num_input_tokens_seen": 0,
275
+ "num_train_epochs": 16,
276
+ "save_steps": 500,
277
+ "stateful_callbacks": {
278
+ "EarlyStoppingCallback": {
279
+ "args": {
280
+ "early_stopping_patience": 2,
281
+ "early_stopping_threshold": 0.0
282
+ },
283
+ "attributes": {
284
+ "early_stopping_patience_counter": 1
285
+ }
286
+ },
287
+ "TrainerControl": {
288
+ "args": {
289
+ "should_epoch_stop": false,
290
+ "should_evaluate": false,
291
+ "should_log": false,
292
+ "should_save": true,
293
+ "should_training_stop": false
294
+ },
295
+ "attributes": {}
296
+ }
297
+ },
298
+ "total_flos": 3.4015358530289664e+16,
299
+ "train_batch_size": 16,
300
+ "trial_name": null,
301
+ "trial_params": null
302
+ }
checkpoint-4044/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e04a520e0b54409a3029464cde7d12079be452340929d293e17e45309f482f8
3
+ size 5368
checkpoint-4718/config.json ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "LABEL_0": 0,
66
+ "LABEL_1": 1,
67
+ "LABEL_10": 10,
68
+ "LABEL_11": 11,
69
+ "LABEL_12": 12,
70
+ "LABEL_13": 13,
71
+ "LABEL_14": 14,
72
+ "LABEL_15": 15,
73
+ "LABEL_16": 16,
74
+ "LABEL_17": 17,
75
+ "LABEL_18": 18,
76
+ "LABEL_19": 19,
77
+ "LABEL_2": 2,
78
+ "LABEL_20": 20,
79
+ "LABEL_21": 21,
80
+ "LABEL_22": 22,
81
+ "LABEL_23": 23,
82
+ "LABEL_24": 24,
83
+ "LABEL_25": 25,
84
+ "LABEL_26": 26,
85
+ "LABEL_27": 27,
86
+ "LABEL_28": 28,
87
+ "LABEL_29": 29,
88
+ "LABEL_3": 3,
89
+ "LABEL_30": 30,
90
+ "LABEL_31": 31,
91
+ "LABEL_32": 32,
92
+ "LABEL_33": 33,
93
+ "LABEL_34": 34,
94
+ "LABEL_35": 35,
95
+ "LABEL_36": 36,
96
+ "LABEL_37": 37,
97
+ "LABEL_38": 38,
98
+ "LABEL_39": 39,
99
+ "LABEL_4": 4,
100
+ "LABEL_40": 40,
101
+ "LABEL_41": 41,
102
+ "LABEL_42": 42,
103
+ "LABEL_43": 43,
104
+ "LABEL_44": 44,
105
+ "LABEL_45": 45,
106
+ "LABEL_46": 46,
107
+ "LABEL_47": 47,
108
+ "LABEL_5": 5,
109
+ "LABEL_6": 6,
110
+ "LABEL_7": 7,
111
+ "LABEL_8": 8,
112
+ "LABEL_9": 9
113
+ },
114
+ "layer_norm_eps": 1e-12,
115
+ "max_position_embeddings": 512,
116
+ "model_type": "bert",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
+ "pad_token_id": 0,
120
+ "position_embedding_type": "absolute",
121
+ "torch_dtype": "float32",
122
+ "transformers_version": "4.48.2",
123
+ "type_vocab_size": 2,
124
+ "use_cache": true,
125
+ "vocab_size": 30522
126
+ }
checkpoint-4718/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:239459ee5e26bfb60ee18a2ddd2b19f03414d3a5ab527beb9d4cd2508e4f07d2
3
+ size 438100144
checkpoint-4718/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3ae74aa8ed8d1923695fb3378148bb3e12537f41d5e7b9873d73085e55d4aff
3
+ size 876321402
checkpoint-4718/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35450a7534388067569db6fc13d4f16c9ae0806647d56ff8ce7dc917bbea9205
3
+ size 14244
checkpoint-4718/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e87bacdf67f2de74e66d886eb3658fb593c867d5deb67c6a9bb916b5d65f2116
3
+ size 1064
checkpoint-4718/trainer_state.json ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9203949718676169,
3
+ "best_model_checkpoint": "./results/checkpoint-3370",
4
+ "epoch": 7.0,
5
+ "eval_steps": 500,
6
+ "global_step": 4718,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7423904974016332,
13
+ "grad_norm": 1.0851198434829712,
14
+ "learning_rate": 2.8606983655274892e-05,
15
+ "loss": 0.448,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_administration_accuracy": 0.8700092850510678,
21
+ "eval_administration_f1": 0.830681542112301,
22
+ "eval_corruption_accuracy": 0.9301764159702879,
23
+ "eval_corruption_f1": 0.9210203978746075,
24
+ "eval_democracy_accuracy": 0.9201485608170845,
25
+ "eval_democracy_f1": 0.8972549864645475,
26
+ "eval_development_accuracy": 0.8672237697307336,
27
+ "eval_development_f1": 0.8462048329953127,
28
+ "eval_economy_accuracy": 0.9062209842154132,
29
+ "eval_economy_f1": 0.8952254345075943,
30
+ "eval_education_accuracy": 0.9467038068709378,
31
+ "eval_education_f1": 0.9344408657756663,
32
+ "eval_environment_accuracy": 0.9619312906220984,
33
+ "eval_environment_f1": 0.9564784032426319,
34
+ "eval_instability_accuracy": 0.9084493964716805,
35
+ "eval_instability_f1": 0.8872576883139862,
36
+ "eval_leadership_accuracy": 0.833983286908078,
37
+ "eval_leadership_f1": 0.8199437016930928,
38
+ "eval_loss": 0.2781185507774353,
39
+ "eval_overall_accuracy": 0.9090374497059734,
40
+ "eval_overall_f1": 0.8943279031006514,
41
+ "eval_race_accuracy": 0.9385329619312907,
42
+ "eval_race_f1": 0.9345823913548884,
43
+ "eval_religion_accuracy": 0.9303621169916435,
44
+ "eval_religion_f1": 0.9219388519537324,
45
+ "eval_runtime": 6.3896,
46
+ "eval_safety_accuracy": 0.8947075208913648,
47
+ "eval_safety_f1": 0.8869057409194557,
48
+ "eval_samples_per_second": 842.781,
49
+ "eval_steps_per_second": 52.742,
50
+ "step": 674
51
+ },
52
+ {
53
+ "epoch": 1.4840386043058649,
54
+ "grad_norm": 1.3921394348144531,
55
+ "learning_rate": 2.7213967310549777e-05,
56
+ "loss": 0.2646,
57
+ "step": 1000
58
+ },
59
+ {
60
+ "epoch": 2.0,
61
+ "eval_administration_accuracy": 0.886908077994429,
62
+ "eval_administration_f1": 0.8723732992505114,
63
+ "eval_corruption_accuracy": 0.9424326833797586,
64
+ "eval_corruption_f1": 0.9384898864061544,
65
+ "eval_democracy_accuracy": 0.933519034354689,
66
+ "eval_democracy_f1": 0.9231869597067329,
67
+ "eval_development_accuracy": 0.8739090064995357,
68
+ "eval_development_f1": 0.8668624356641937,
69
+ "eval_economy_accuracy": 0.9143918291550603,
70
+ "eval_economy_f1": 0.9111034488927022,
71
+ "eval_education_accuracy": 0.9580315691736304,
72
+ "eval_education_f1": 0.9543147194312244,
73
+ "eval_environment_accuracy": 0.9734447539461467,
74
+ "eval_environment_f1": 0.9731508098039792,
75
+ "eval_instability_accuracy": 0.9277623026926648,
76
+ "eval_instability_f1": 0.9222378035229967,
77
+ "eval_leadership_accuracy": 0.840297121634169,
78
+ "eval_leadership_f1": 0.8406461991345243,
79
+ "eval_loss": 0.23724055290222168,
80
+ "eval_overall_accuracy": 0.9205354379449089,
81
+ "eval_overall_f1": 0.9156647148374422,
82
+ "eval_race_accuracy": 0.9467038068709378,
83
+ "eval_race_f1": 0.943797657082642,
84
+ "eval_religion_accuracy": 0.9409470752089136,
85
+ "eval_religion_f1": 0.9380295054445577,
86
+ "eval_runtime": 6.3736,
87
+ "eval_safety_accuracy": 0.9080779944289693,
88
+ "eval_safety_f1": 0.9037838537090879,
89
+ "eval_samples_per_second": 844.886,
90
+ "eval_steps_per_second": 52.874,
91
+ "step": 1348
92
+ },
93
+ {
94
+ "epoch": 2.2256867112100966,
95
+ "grad_norm": 0.9178161025047302,
96
+ "learning_rate": 2.582095096582467e-05,
97
+ "loss": 0.2085,
98
+ "step": 1500
99
+ },
100
+ {
101
+ "epoch": 2.9680772086117297,
102
+ "grad_norm": 1.1747676134109497,
103
+ "learning_rate": 2.4427934621099553e-05,
104
+ "loss": 0.1696,
105
+ "step": 2000
106
+ },
107
+ {
108
+ "epoch": 3.0,
109
+ "eval_administration_accuracy": 0.8919220055710306,
110
+ "eval_administration_f1": 0.8805907698259323,
111
+ "eval_corruption_accuracy": 0.9454038997214484,
112
+ "eval_corruption_f1": 0.9422755927443824,
113
+ "eval_democracy_accuracy": 0.9333333333333333,
114
+ "eval_democracy_f1": 0.9277380787973176,
115
+ "eval_development_accuracy": 0.8846796657381616,
116
+ "eval_development_f1": 0.8767801428288444,
117
+ "eval_economy_accuracy": 0.9177344475394614,
118
+ "eval_economy_f1": 0.913190124488594,
119
+ "eval_education_accuracy": 0.959702878365831,
120
+ "eval_education_f1": 0.9592018840910198,
121
+ "eval_environment_accuracy": 0.9766016713091922,
122
+ "eval_environment_f1": 0.9753052667455842,
123
+ "eval_instability_accuracy": 0.9255338904363974,
124
+ "eval_instability_f1": 0.9219441146038561,
125
+ "eval_leadership_accuracy": 0.8503249767873723,
126
+ "eval_leadership_f1": 0.8464660955301658,
127
+ "eval_loss": 0.22912514209747314,
128
+ "eval_overall_accuracy": 0.923785205818632,
129
+ "eval_overall_f1": 0.9198927823234619,
130
+ "eval_race_accuracy": 0.9468895078922934,
131
+ "eval_race_f1": 0.9441406664359246,
132
+ "eval_religion_accuracy": 0.9418755803156917,
133
+ "eval_religion_f1": 0.9406664102976392,
134
+ "eval_runtime": 6.3983,
135
+ "eval_safety_accuracy": 0.9114206128133705,
136
+ "eval_safety_f1": 0.9104142414922826,
137
+ "eval_samples_per_second": 841.634,
138
+ "eval_steps_per_second": 52.67,
139
+ "step": 2022
140
+ },
141
+ {
142
+ "epoch": 3.7097253155159615,
143
+ "grad_norm": 1.7257879972457886,
144
+ "learning_rate": 2.3034918276374445e-05,
145
+ "loss": 0.1309,
146
+ "step": 2500
147
+ },
148
+ {
149
+ "epoch": 4.0,
150
+ "eval_administration_accuracy": 0.875766016713092,
151
+ "eval_administration_f1": 0.8761692862915822,
152
+ "eval_corruption_accuracy": 0.9465181058495822,
153
+ "eval_corruption_f1": 0.9432932911595113,
154
+ "eval_democracy_accuracy": 0.9344475394614671,
155
+ "eval_democracy_f1": 0.9289539940724922,
156
+ "eval_development_accuracy": 0.8804085422469824,
157
+ "eval_development_f1": 0.8733152085361343,
158
+ "eval_economy_accuracy": 0.9175487465181058,
159
+ "eval_economy_f1": 0.9168308054942083,
160
+ "eval_education_accuracy": 0.959702878365831,
161
+ "eval_education_f1": 0.9577025286068839,
162
+ "eval_environment_accuracy": 0.9740018570102136,
163
+ "eval_environment_f1": 0.9740155750677182,
164
+ "eval_instability_accuracy": 0.92330547818013,
165
+ "eval_instability_f1": 0.9214961270553598,
166
+ "eval_leadership_accuracy": 0.8469823584029712,
167
+ "eval_leadership_f1": 0.8454208641557382,
168
+ "eval_loss": 0.23740428686141968,
169
+ "eval_overall_accuracy": 0.9211853915196534,
170
+ "eval_overall_f1": 0.9191929747362485,
171
+ "eval_race_accuracy": 0.9452181987000928,
172
+ "eval_race_f1": 0.9441252497587079,
173
+ "eval_religion_accuracy": 0.940761374187558,
174
+ "eval_religion_f1": 0.9389083250935241,
175
+ "eval_runtime": 6.3816,
176
+ "eval_safety_accuracy": 0.9095636025998143,
177
+ "eval_safety_f1": 0.9100844415431236,
178
+ "eval_samples_per_second": 843.83,
179
+ "eval_steps_per_second": 52.808,
180
+ "step": 2696
181
+ },
182
+ {
183
+ "epoch": 4.451373422420193,
184
+ "grad_norm": 1.5328147411346436,
185
+ "learning_rate": 2.1641901931649333e-05,
186
+ "loss": 0.1085,
187
+ "step": 3000
188
+ },
189
+ {
190
+ "epoch": 5.0,
191
+ "eval_administration_accuracy": 0.8921077065923863,
192
+ "eval_administration_f1": 0.8861580465319954,
193
+ "eval_corruption_accuracy": 0.9463324048282266,
194
+ "eval_corruption_f1": 0.943458379396768,
195
+ "eval_democracy_accuracy": 0.9346332404828227,
196
+ "eval_democracy_f1": 0.9313693520601084,
197
+ "eval_development_accuracy": 0.8807799442896936,
198
+ "eval_development_f1": 0.8746742414193872,
199
+ "eval_economy_accuracy": 0.9175487465181058,
200
+ "eval_economy_f1": 0.9165794145446547,
201
+ "eval_education_accuracy": 0.9587743732590529,
202
+ "eval_education_f1": 0.9574397967431261,
203
+ "eval_environment_accuracy": 0.9736304549675023,
204
+ "eval_environment_f1": 0.9731615305259281,
205
+ "eval_instability_accuracy": 0.9257195914577531,
206
+ "eval_instability_f1": 0.9217833078621397,
207
+ "eval_leadership_accuracy": 0.8458681522748375,
208
+ "eval_leadership_f1": 0.849217017756863,
209
+ "eval_loss": 0.24142640829086304,
210
+ "eval_overall_accuracy": 0.9226245744351593,
211
+ "eval_overall_f1": 0.9203949718676169,
212
+ "eval_race_accuracy": 0.9452181987000928,
213
+ "eval_race_f1": 0.9418839421000289,
214
+ "eval_religion_accuracy": 0.9426183844011142,
215
+ "eval_religion_f1": 0.9420269885121756,
216
+ "eval_runtime": 6.3895,
217
+ "eval_safety_accuracy": 0.9082636954503249,
218
+ "eval_safety_f1": 0.9069876449582265,
219
+ "eval_samples_per_second": 842.785,
220
+ "eval_steps_per_second": 52.743,
221
+ "step": 3370
222
+ },
223
+ {
224
+ "epoch": 5.193021529324425,
225
+ "grad_norm": 1.2458630800247192,
226
+ "learning_rate": 2.024888558692422e-05,
227
+ "loss": 0.0925,
228
+ "step": 3500
229
+ },
230
+ {
231
+ "epoch": 5.935412026726058,
232
+ "grad_norm": 1.964983582496643,
233
+ "learning_rate": 1.885586924219911e-05,
234
+ "loss": 0.0759,
235
+ "step": 4000
236
+ },
237
+ {
238
+ "epoch": 6.0,
239
+ "eval_administration_accuracy": 0.8885793871866295,
240
+ "eval_administration_f1": 0.8796540295327503,
241
+ "eval_corruption_accuracy": 0.944661095636026,
242
+ "eval_corruption_f1": 0.9430977714288632,
243
+ "eval_democracy_accuracy": 0.9312906220984215,
244
+ "eval_democracy_f1": 0.9311108081348881,
245
+ "eval_development_accuracy": 0.8809656453110493,
246
+ "eval_development_f1": 0.8743050043114865,
247
+ "eval_economy_accuracy": 0.9162488393686166,
248
+ "eval_economy_f1": 0.9153006920861309,
249
+ "eval_education_accuracy": 0.9600742804085423,
250
+ "eval_education_f1": 0.9587642973646463,
251
+ "eval_environment_accuracy": 0.9756731662024141,
252
+ "eval_environment_f1": 0.9753440362005743,
253
+ "eval_instability_accuracy": 0.9205199628597958,
254
+ "eval_instability_f1": 0.9185046153727873,
255
+ "eval_leadership_accuracy": 0.8510677808727948,
256
+ "eval_leadership_f1": 0.8491756854928866,
257
+ "eval_loss": 0.25557437539100647,
258
+ "eval_overall_accuracy": 0.9218198700092851,
259
+ "eval_overall_f1": 0.9198843837682896,
260
+ "eval_race_accuracy": 0.9472609099350047,
261
+ "eval_race_f1": 0.9465463984676691,
262
+ "eval_religion_accuracy": 0.9420612813370474,
263
+ "eval_religion_f1": 0.941883774169552,
264
+ "eval_runtime": 6.406,
265
+ "eval_safety_accuracy": 0.903435468895079,
266
+ "eval_safety_f1": 0.9049254926572401,
267
+ "eval_samples_per_second": 840.618,
268
+ "eval_steps_per_second": 52.607,
269
+ "step": 4044
270
+ },
271
+ {
272
+ "epoch": 6.67706013363029,
273
+ "grad_norm": 1.1733307838439941,
274
+ "learning_rate": 1.7462852897474e-05,
275
+ "loss": 0.0618,
276
+ "step": 4500
277
+ },
278
+ {
279
+ "epoch": 7.0,
280
+ "eval_administration_accuracy": 0.8891364902506964,
281
+ "eval_administration_f1": 0.8830637895995853,
282
+ "eval_corruption_accuracy": 0.9441039925719591,
283
+ "eval_corruption_f1": 0.9419800967289127,
284
+ "eval_democracy_accuracy": 0.9318477251624884,
285
+ "eval_democracy_f1": 0.9312436052041511,
286
+ "eval_development_accuracy": 0.8748375116063138,
287
+ "eval_development_f1": 0.8709535653934616,
288
+ "eval_economy_accuracy": 0.9151346332404828,
289
+ "eval_economy_f1": 0.9143035893385101,
290
+ "eval_education_accuracy": 0.9567316620241412,
291
+ "eval_education_f1": 0.956532871707756,
292
+ "eval_environment_accuracy": 0.9745589600742804,
293
+ "eval_environment_f1": 0.9745453763641572,
294
+ "eval_instability_accuracy": 0.9197771587743733,
295
+ "eval_instability_f1": 0.9164183053432433,
296
+ "eval_leadership_accuracy": 0.8493964716805943,
297
+ "eval_leadership_f1": 0.848764162637075,
298
+ "eval_loss": 0.2655349373817444,
299
+ "eval_overall_accuracy": 0.9205663881151346,
300
+ "eval_overall_f1": 0.9191255338624242,
301
+ "eval_race_accuracy": 0.9455896007428041,
302
+ "eval_race_f1": 0.9449102601982086,
303
+ "eval_religion_accuracy": 0.9424326833797586,
304
+ "eval_religion_f1": 0.9425971967753438,
305
+ "eval_runtime": 6.378,
306
+ "eval_safety_accuracy": 0.9032497678737234,
307
+ "eval_safety_f1": 0.9041935870586874,
308
+ "eval_samples_per_second": 844.304,
309
+ "eval_steps_per_second": 52.838,
310
+ "step": 4718
311
+ }
312
+ ],
313
+ "logging_steps": 500,
314
+ "max_steps": 10768,
315
+ "num_input_tokens_seen": 0,
316
+ "num_train_epochs": 16,
317
+ "save_steps": 500,
318
+ "stateful_callbacks": {
319
+ "EarlyStoppingCallback": {
320
+ "args": {
321
+ "early_stopping_patience": 2,
322
+ "early_stopping_threshold": 0.0
323
+ },
324
+ "attributes": {
325
+ "early_stopping_patience_counter": 2
326
+ }
327
+ },
328
+ "TrainerControl": {
329
+ "args": {
330
+ "should_epoch_stop": false,
331
+ "should_evaluate": false,
332
+ "should_log": false,
333
+ "should_save": true,
334
+ "should_training_stop": true
335
+ },
336
+ "attributes": {}
337
+ }
338
+ },
339
+ "total_flos": 3.968458495200461e+16,
340
+ "train_batch_size": 16,
341
+ "trial_name": null,
342
+ "trial_params": null
343
+ }
checkpoint-4718/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e04a520e0b54409a3029464cde7d12079be452340929d293e17e45309f482f8
3
+ size 5368
checkpoint-674/config.json ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11",
25
+ "12": "LABEL_12",
26
+ "13": "LABEL_13",
27
+ "14": "LABEL_14",
28
+ "15": "LABEL_15",
29
+ "16": "LABEL_16",
30
+ "17": "LABEL_17",
31
+ "18": "LABEL_18",
32
+ "19": "LABEL_19",
33
+ "20": "LABEL_20",
34
+ "21": "LABEL_21",
35
+ "22": "LABEL_22",
36
+ "23": "LABEL_23",
37
+ "24": "LABEL_24",
38
+ "25": "LABEL_25",
39
+ "26": "LABEL_26",
40
+ "27": "LABEL_27",
41
+ "28": "LABEL_28",
42
+ "29": "LABEL_29",
43
+ "30": "LABEL_30",
44
+ "31": "LABEL_31",
45
+ "32": "LABEL_32",
46
+ "33": "LABEL_33",
47
+ "34": "LABEL_34",
48
+ "35": "LABEL_35",
49
+ "36": "LABEL_36",
50
+ "37": "LABEL_37",
51
+ "38": "LABEL_38",
52
+ "39": "LABEL_39",
53
+ "40": "LABEL_40",
54
+ "41": "LABEL_41",
55
+ "42": "LABEL_42",
56
+ "43": "LABEL_43",
57
+ "44": "LABEL_44",
58
+ "45": "LABEL_45",
59
+ "46": "LABEL_46",
60
+ "47": "LABEL_47"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "LABEL_0": 0,
66
+ "LABEL_1": 1,
67
+ "LABEL_10": 10,
68
+ "LABEL_11": 11,
69
+ "LABEL_12": 12,
70
+ "LABEL_13": 13,
71
+ "LABEL_14": 14,
72
+ "LABEL_15": 15,
73
+ "LABEL_16": 16,
74
+ "LABEL_17": 17,
75
+ "LABEL_18": 18,
76
+ "LABEL_19": 19,
77
+ "LABEL_2": 2,
78
+ "LABEL_20": 20,
79
+ "LABEL_21": 21,
80
+ "LABEL_22": 22,
81
+ "LABEL_23": 23,
82
+ "LABEL_24": 24,
83
+ "LABEL_25": 25,
84
+ "LABEL_26": 26,
85
+ "LABEL_27": 27,
86
+ "LABEL_28": 28,
87
+ "LABEL_29": 29,
88
+ "LABEL_3": 3,
89
+ "LABEL_30": 30,
90
+ "LABEL_31": 31,
91
+ "LABEL_32": 32,
92
+ "LABEL_33": 33,
93
+ "LABEL_34": 34,
94
+ "LABEL_35": 35,
95
+ "LABEL_36": 36,
96
+ "LABEL_37": 37,
97
+ "LABEL_38": 38,
98
+ "LABEL_39": 39,
99
+ "LABEL_4": 4,
100
+ "LABEL_40": 40,
101
+ "LABEL_41": 41,
102
+ "LABEL_42": 42,
103
+ "LABEL_43": 43,
104
+ "LABEL_44": 44,
105
+ "LABEL_45": 45,
106
+ "LABEL_46": 46,
107
+ "LABEL_47": 47,
108
+ "LABEL_5": 5,
109
+ "LABEL_6": 6,
110
+ "LABEL_7": 7,
111
+ "LABEL_8": 8,
112
+ "LABEL_9": 9
113
+ },
114
+ "layer_norm_eps": 1e-12,
115
+ "max_position_embeddings": 512,
116
+ "model_type": "bert",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
+ "pad_token_id": 0,
120
+ "position_embedding_type": "absolute",
121
+ "torch_dtype": "float32",
122
+ "transformers_version": "4.48.2",
123
+ "type_vocab_size": 2,
124
+ "use_cache": true,
125
+ "vocab_size": 30522
126
+ }
checkpoint-674/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3d49ce2f1a0d0092e1c0c0cd7fab055a42d4b88fc148888c40876ab95f86abd
3
+ size 438100144
checkpoint-674/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87f29edb5809f4669985e698b6315aa7d40836eb8562e5760932f7ed0d42168e
3
+ size 876321402
checkpoint-674/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d911a0bdc9ef9f737150dae5e889b348d434cf30a1a9f52362f02dc016393dd
3
+ size 14244
checkpoint-674/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1547eae16c074f20d1a13d5cb87c5bda11f93c49f4196988823c5e3b0a74c12e
3
+ size 1064
checkpoint-674/trainer_state.json ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8943279031006514,
3
+ "best_model_checkpoint": "./results/checkpoint-674",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 674,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7423904974016332,
13
+ "grad_norm": 1.0851198434829712,
14
+ "learning_rate": 2.8606983655274892e-05,
15
+ "loss": 0.448,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_administration_accuracy": 0.8700092850510678,
21
+ "eval_administration_f1": 0.830681542112301,
22
+ "eval_corruption_accuracy": 0.9301764159702879,
23
+ "eval_corruption_f1": 0.9210203978746075,
24
+ "eval_democracy_accuracy": 0.9201485608170845,
25
+ "eval_democracy_f1": 0.8972549864645475,
26
+ "eval_development_accuracy": 0.8672237697307336,
27
+ "eval_development_f1": 0.8462048329953127,
28
+ "eval_economy_accuracy": 0.9062209842154132,
29
+ "eval_economy_f1": 0.8952254345075943,
30
+ "eval_education_accuracy": 0.9467038068709378,
31
+ "eval_education_f1": 0.9344408657756663,
32
+ "eval_environment_accuracy": 0.9619312906220984,
33
+ "eval_environment_f1": 0.9564784032426319,
34
+ "eval_instability_accuracy": 0.9084493964716805,
35
+ "eval_instability_f1": 0.8872576883139862,
36
+ "eval_leadership_accuracy": 0.833983286908078,
37
+ "eval_leadership_f1": 0.8199437016930928,
38
+ "eval_loss": 0.2781185507774353,
39
+ "eval_overall_accuracy": 0.9090374497059734,
40
+ "eval_overall_f1": 0.8943279031006514,
41
+ "eval_race_accuracy": 0.9385329619312907,
42
+ "eval_race_f1": 0.9345823913548884,
43
+ "eval_religion_accuracy": 0.9303621169916435,
44
+ "eval_religion_f1": 0.9219388519537324,
45
+ "eval_runtime": 6.3896,
46
+ "eval_safety_accuracy": 0.8947075208913648,
47
+ "eval_safety_f1": 0.8869057409194557,
48
+ "eval_samples_per_second": 842.781,
49
+ "eval_steps_per_second": 52.742,
50
+ "step": 674
51
+ }
52
+ ],
53
+ "logging_steps": 500,
54
+ "max_steps": 10768,
55
+ "num_input_tokens_seen": 0,
56
+ "num_train_epochs": 16,
57
+ "save_steps": 500,
58
+ "stateful_callbacks": {
59
+ "EarlyStoppingCallback": {
60
+ "args": {
61
+ "early_stopping_patience": 2,
62
+ "early_stopping_threshold": 0.0
63
+ },
64
+ "attributes": {
65
+ "early_stopping_patience_counter": 0
66
+ }
67
+ },
68
+ "TrainerControl": {
69
+ "args": {
70
+ "should_epoch_stop": false,
71
+ "should_evaluate": false,
72
+ "should_log": false,
73
+ "should_save": true,
74
+ "should_training_stop": false
75
+ },
76
+ "attributes": {}
77
+ }
78
+ },
79
+ "total_flos": 5669226421714944.0,
80
+ "train_batch_size": 16,
81
+ "trial_name": null,
82
+ "trial_params": null
83
+ }
checkpoint-674/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e04a520e0b54409a3029464cde7d12079be452340929d293e17e45309f482f8
3
+ size 5368
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }