rizwanulrudra commited on
Commit
f4b9f5f
·
verified ·
1 Parent(s): b0a6f05

Upload folder using huggingface_hub

Browse files
checkpoint-1140/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ElectraForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "embedding_size": 768,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Personal",
13
+ "1": "Political",
14
+ "2": "Religious",
15
+ "3": "Geopolitical"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "0": "Personal",
21
+ "1": "Political",
22
+ "2": "Religious",
23
+ "3": "Geopolitical"
24
+ },
25
+ "layer_norm_eps": 1e-12,
26
+ "max_position_embeddings": 512,
27
+ "model_type": "electra",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
+ "pad_token_id": 0,
31
+ "position_embedding_type": "absolute",
32
+ "problem_type": "single_label_classification",
33
+ "summary_activation": "gelu",
34
+ "summary_last_dropout": 0.1,
35
+ "summary_type": "first",
36
+ "summary_use_proj": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.52.4",
39
+ "type_vocab_size": 2,
40
+ "use_cache": true,
41
+ "vocab_size": 32000
42
+ }
checkpoint-1140/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c9beb9d87d4f5718f68ba66439c0c038939098e05898cf03d296b35fea432ae
3
+ size 442505824
checkpoint-1140/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a4b08f8ca8f22a289ae4c8ebc2e9aff452273afe0011b30ea2d8d755cddccb6
3
+ size 885131514
checkpoint-1140/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff5b9afe2c551807c3590bab42bdaef11b8025fda7e72d48d393d1fcc4144903
3
+ size 14244
checkpoint-1140/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efe11eee4a6bb9a0bce4feef531b6f4b82c136a07c5b5c3ceaa90c5ea36b78ed
3
+ size 1064
checkpoint-1140/trainer_state.json ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1140,
3
+ "best_metric": 0.8596423207276622,
4
+ "best_model_checkpoint": "./banglabert-hate-speech/checkpoint-1140",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1140,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.08771929824561403,
14
+ "grad_norm": 2.613939046859741,
15
+ "learning_rate": 2.9484210526315792e-05,
16
+ "loss": 1.2847,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.17543859649122806,
21
+ "grad_norm": 2.230876922607422,
22
+ "learning_rate": 2.8957894736842105e-05,
23
+ "loss": 1.1609,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.2631578947368421,
28
+ "grad_norm": 10.591499328613281,
29
+ "learning_rate": 2.8431578947368422e-05,
30
+ "loss": 1.033,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 0.3508771929824561,
35
+ "grad_norm": 4.717245101928711,
36
+ "learning_rate": 2.7905263157894738e-05,
37
+ "loss": 0.9866,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 0.43859649122807015,
42
+ "grad_norm": 7.096415042877197,
43
+ "learning_rate": 2.7378947368421055e-05,
44
+ "loss": 0.9235,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 0.5263157894736842,
49
+ "grad_norm": 5.946366786956787,
50
+ "learning_rate": 2.6852631578947368e-05,
51
+ "loss": 0.7276,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 0.6140350877192983,
56
+ "grad_norm": 2.292181968688965,
57
+ "learning_rate": 2.6326315789473687e-05,
58
+ "loss": 0.6928,
59
+ "step": 350
60
+ },
61
+ {
62
+ "epoch": 0.7017543859649122,
63
+ "grad_norm": 7.117995262145996,
64
+ "learning_rate": 2.58e-05,
65
+ "loss": 0.7734,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 0.7894736842105263,
70
+ "grad_norm": 1.2756074666976929,
71
+ "learning_rate": 2.5273684210526317e-05,
72
+ "loss": 0.602,
73
+ "step": 450
74
+ },
75
+ {
76
+ "epoch": 0.8771929824561403,
77
+ "grad_norm": 2.6861534118652344,
78
+ "learning_rate": 2.4747368421052633e-05,
79
+ "loss": 0.6134,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.9649122807017544,
84
+ "grad_norm": 11.638345718383789,
85
+ "learning_rate": 2.422105263157895e-05,
86
+ "loss": 0.517,
87
+ "step": 550
88
+ },
89
+ {
90
+ "epoch": 1.0,
91
+ "eval_accuracy": 0.8491228070175438,
92
+ "eval_f1_macro": 0.8364544727451704,
93
+ "eval_f1_weighted": 0.8507649468201081,
94
+ "eval_loss": 0.501762330532074,
95
+ "eval_runtime": 9.2272,
96
+ "eval_samples_per_second": 61.774,
97
+ "eval_steps_per_second": 7.803,
98
+ "step": 570
99
+ },
100
+ {
101
+ "epoch": 1.0526315789473684,
102
+ "grad_norm": 7.222433090209961,
103
+ "learning_rate": 2.3694736842105262e-05,
104
+ "loss": 0.4792,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 1.1403508771929824,
109
+ "grad_norm": 14.4037504196167,
110
+ "learning_rate": 2.3168421052631582e-05,
111
+ "loss": 0.4212,
112
+ "step": 650
113
+ },
114
+ {
115
+ "epoch": 1.2280701754385965,
116
+ "grad_norm": 1.6871181726455688,
117
+ "learning_rate": 2.2642105263157895e-05,
118
+ "loss": 0.5112,
119
+ "step": 700
120
+ },
121
+ {
122
+ "epoch": 1.3157894736842106,
123
+ "grad_norm": 12.315402030944824,
124
+ "learning_rate": 2.211578947368421e-05,
125
+ "loss": 0.3467,
126
+ "step": 750
127
+ },
128
+ {
129
+ "epoch": 1.4035087719298245,
130
+ "grad_norm": 26.74939727783203,
131
+ "learning_rate": 2.1589473684210528e-05,
132
+ "loss": 0.5507,
133
+ "step": 800
134
+ },
135
+ {
136
+ "epoch": 1.4912280701754386,
137
+ "grad_norm": 11.844022750854492,
138
+ "learning_rate": 2.1063157894736844e-05,
139
+ "loss": 0.4319,
140
+ "step": 850
141
+ },
142
+ {
143
+ "epoch": 1.5789473684210527,
144
+ "grad_norm": 0.30326738953590393,
145
+ "learning_rate": 2.0536842105263157e-05,
146
+ "loss": 0.4583,
147
+ "step": 900
148
+ },
149
+ {
150
+ "epoch": 1.6666666666666665,
151
+ "grad_norm": 6.0051140785217285,
152
+ "learning_rate": 2.0010526315789477e-05,
153
+ "loss": 0.3868,
154
+ "step": 950
155
+ },
156
+ {
157
+ "epoch": 1.7543859649122808,
158
+ "grad_norm": 1.0601614713668823,
159
+ "learning_rate": 1.948421052631579e-05,
160
+ "loss": 0.4607,
161
+ "step": 1000
162
+ },
163
+ {
164
+ "epoch": 1.8421052631578947,
165
+ "grad_norm": 0.5424970984458923,
166
+ "learning_rate": 1.8957894736842106e-05,
167
+ "loss": 0.4468,
168
+ "step": 1050
169
+ },
170
+ {
171
+ "epoch": 1.9298245614035088,
172
+ "grad_norm": 14.800076484680176,
173
+ "learning_rate": 1.8431578947368423e-05,
174
+ "loss": 0.4958,
175
+ "step": 1100
176
+ },
177
+ {
178
+ "epoch": 2.0,
179
+ "eval_accuracy": 0.8719298245614036,
180
+ "eval_f1_macro": 0.8596423207276622,
181
+ "eval_f1_weighted": 0.8710256158201644,
182
+ "eval_loss": 0.48058807849884033,
183
+ "eval_runtime": 9.1284,
184
+ "eval_samples_per_second": 62.442,
185
+ "eval_steps_per_second": 7.887,
186
+ "step": 1140
187
+ }
188
+ ],
189
+ "logging_steps": 50,
190
+ "max_steps": 2850,
191
+ "num_input_tokens_seen": 0,
192
+ "num_train_epochs": 5,
193
+ "save_steps": 500,
194
+ "stateful_callbacks": {
195
+ "TrainerControl": {
196
+ "args": {
197
+ "should_epoch_stop": false,
198
+ "should_evaluate": false,
199
+ "should_log": false,
200
+ "should_save": true,
201
+ "should_training_stop": false
202
+ },
203
+ "attributes": {}
204
+ }
205
+ },
206
+ "total_flos": 1199281725677568.0,
207
+ "train_batch_size": 8,
208
+ "trial_name": null,
209
+ "trial_params": null
210
+ }
checkpoint-1140/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75e59a4485b50670550ee1f98c4f17155f387568fb8c6b921b878e2ea24de3f8
3
+ size 5304
checkpoint-1710/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ElectraForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "embedding_size": 768,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Personal",
13
+ "1": "Political",
14
+ "2": "Religious",
15
+ "3": "Geopolitical"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "0": "Personal",
21
+ "1": "Political",
22
+ "2": "Religious",
23
+ "3": "Geopolitical"
24
+ },
25
+ "layer_norm_eps": 1e-12,
26
+ "max_position_embeddings": 512,
27
+ "model_type": "electra",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
+ "pad_token_id": 0,
31
+ "position_embedding_type": "absolute",
32
+ "problem_type": "single_label_classification",
33
+ "summary_activation": "gelu",
34
+ "summary_last_dropout": 0.1,
35
+ "summary_type": "first",
36
+ "summary_use_proj": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.52.4",
39
+ "type_vocab_size": 2,
40
+ "use_cache": true,
41
+ "vocab_size": 32000
42
+ }
checkpoint-1710/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2913bbcde2c37bf69585c41308a91aa354cba8447ae2f82909b05eb92bb87897
3
+ size 442505824
checkpoint-1710/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4eee0cdec76ad0d1b1a3927635d35cef35151c6625da91e250c8e1c9b6e3635
3
+ size 885131514
checkpoint-1710/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ef2ed34cefa3c64aafe034252aabaf5bc4a526e5fb0d06ef6d466a1df11bb42
3
+ size 14244
checkpoint-1710/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d195afd31b7974adc795724099e3f38c5d8dcd84f04dc1ddd6cdf6e5ee28aee
3
+ size 1064
checkpoint-1710/trainer_state.json ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1710,
3
+ "best_metric": 0.876431567492838,
4
+ "best_model_checkpoint": "./banglabert-hate-speech/checkpoint-1710",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1710,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.08771929824561403,
14
+ "grad_norm": 2.613939046859741,
15
+ "learning_rate": 2.9484210526315792e-05,
16
+ "loss": 1.2847,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.17543859649122806,
21
+ "grad_norm": 2.230876922607422,
22
+ "learning_rate": 2.8957894736842105e-05,
23
+ "loss": 1.1609,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.2631578947368421,
28
+ "grad_norm": 10.591499328613281,
29
+ "learning_rate": 2.8431578947368422e-05,
30
+ "loss": 1.033,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 0.3508771929824561,
35
+ "grad_norm": 4.717245101928711,
36
+ "learning_rate": 2.7905263157894738e-05,
37
+ "loss": 0.9866,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 0.43859649122807015,
42
+ "grad_norm": 7.096415042877197,
43
+ "learning_rate": 2.7378947368421055e-05,
44
+ "loss": 0.9235,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 0.5263157894736842,
49
+ "grad_norm": 5.946366786956787,
50
+ "learning_rate": 2.6852631578947368e-05,
51
+ "loss": 0.7276,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 0.6140350877192983,
56
+ "grad_norm": 2.292181968688965,
57
+ "learning_rate": 2.6326315789473687e-05,
58
+ "loss": 0.6928,
59
+ "step": 350
60
+ },
61
+ {
62
+ "epoch": 0.7017543859649122,
63
+ "grad_norm": 7.117995262145996,
64
+ "learning_rate": 2.58e-05,
65
+ "loss": 0.7734,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 0.7894736842105263,
70
+ "grad_norm": 1.2756074666976929,
71
+ "learning_rate": 2.5273684210526317e-05,
72
+ "loss": 0.602,
73
+ "step": 450
74
+ },
75
+ {
76
+ "epoch": 0.8771929824561403,
77
+ "grad_norm": 2.6861534118652344,
78
+ "learning_rate": 2.4747368421052633e-05,
79
+ "loss": 0.6134,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.9649122807017544,
84
+ "grad_norm": 11.638345718383789,
85
+ "learning_rate": 2.422105263157895e-05,
86
+ "loss": 0.517,
87
+ "step": 550
88
+ },
89
+ {
90
+ "epoch": 1.0,
91
+ "eval_accuracy": 0.8491228070175438,
92
+ "eval_f1_macro": 0.8364544727451704,
93
+ "eval_f1_weighted": 0.8507649468201081,
94
+ "eval_loss": 0.501762330532074,
95
+ "eval_runtime": 9.2272,
96
+ "eval_samples_per_second": 61.774,
97
+ "eval_steps_per_second": 7.803,
98
+ "step": 570
99
+ },
100
+ {
101
+ "epoch": 1.0526315789473684,
102
+ "grad_norm": 7.222433090209961,
103
+ "learning_rate": 2.3694736842105262e-05,
104
+ "loss": 0.4792,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 1.1403508771929824,
109
+ "grad_norm": 14.4037504196167,
110
+ "learning_rate": 2.3168421052631582e-05,
111
+ "loss": 0.4212,
112
+ "step": 650
113
+ },
114
+ {
115
+ "epoch": 1.2280701754385965,
116
+ "grad_norm": 1.6871181726455688,
117
+ "learning_rate": 2.2642105263157895e-05,
118
+ "loss": 0.5112,
119
+ "step": 700
120
+ },
121
+ {
122
+ "epoch": 1.3157894736842106,
123
+ "grad_norm": 12.315402030944824,
124
+ "learning_rate": 2.211578947368421e-05,
125
+ "loss": 0.3467,
126
+ "step": 750
127
+ },
128
+ {
129
+ "epoch": 1.4035087719298245,
130
+ "grad_norm": 26.74939727783203,
131
+ "learning_rate": 2.1589473684210528e-05,
132
+ "loss": 0.5507,
133
+ "step": 800
134
+ },
135
+ {
136
+ "epoch": 1.4912280701754386,
137
+ "grad_norm": 11.844022750854492,
138
+ "learning_rate": 2.1063157894736844e-05,
139
+ "loss": 0.4319,
140
+ "step": 850
141
+ },
142
+ {
143
+ "epoch": 1.5789473684210527,
144
+ "grad_norm": 0.30326738953590393,
145
+ "learning_rate": 2.0536842105263157e-05,
146
+ "loss": 0.4583,
147
+ "step": 900
148
+ },
149
+ {
150
+ "epoch": 1.6666666666666665,
151
+ "grad_norm": 6.0051140785217285,
152
+ "learning_rate": 2.0010526315789477e-05,
153
+ "loss": 0.3868,
154
+ "step": 950
155
+ },
156
+ {
157
+ "epoch": 1.7543859649122808,
158
+ "grad_norm": 1.0601614713668823,
159
+ "learning_rate": 1.948421052631579e-05,
160
+ "loss": 0.4607,
161
+ "step": 1000
162
+ },
163
+ {
164
+ "epoch": 1.8421052631578947,
165
+ "grad_norm": 0.5424970984458923,
166
+ "learning_rate": 1.8957894736842106e-05,
167
+ "loss": 0.4468,
168
+ "step": 1050
169
+ },
170
+ {
171
+ "epoch": 1.9298245614035088,
172
+ "grad_norm": 14.800076484680176,
173
+ "learning_rate": 1.8431578947368423e-05,
174
+ "loss": 0.4958,
175
+ "step": 1100
176
+ },
177
+ {
178
+ "epoch": 2.0,
179
+ "eval_accuracy": 0.8719298245614036,
180
+ "eval_f1_macro": 0.8596423207276622,
181
+ "eval_f1_weighted": 0.8710256158201644,
182
+ "eval_loss": 0.48058807849884033,
183
+ "eval_runtime": 9.1284,
184
+ "eval_samples_per_second": 62.442,
185
+ "eval_steps_per_second": 7.887,
186
+ "step": 1140
187
+ },
188
+ {
189
+ "epoch": 2.017543859649123,
190
+ "grad_norm": 20.266742706298828,
191
+ "learning_rate": 1.7905263157894736e-05,
192
+ "loss": 0.3943,
193
+ "step": 1150
194
+ },
195
+ {
196
+ "epoch": 2.1052631578947367,
197
+ "grad_norm": 0.22429589927196503,
198
+ "learning_rate": 1.7378947368421052e-05,
199
+ "loss": 0.2996,
200
+ "step": 1200
201
+ },
202
+ {
203
+ "epoch": 2.192982456140351,
204
+ "grad_norm": 0.8915501236915588,
205
+ "learning_rate": 1.685263157894737e-05,
206
+ "loss": 0.3662,
207
+ "step": 1250
208
+ },
209
+ {
210
+ "epoch": 2.280701754385965,
211
+ "grad_norm": 1.173509955406189,
212
+ "learning_rate": 1.6326315789473685e-05,
213
+ "loss": 0.2995,
214
+ "step": 1300
215
+ },
216
+ {
217
+ "epoch": 2.3684210526315788,
218
+ "grad_norm": 0.16873787343502045,
219
+ "learning_rate": 1.5799999999999998e-05,
220
+ "loss": 0.3123,
221
+ "step": 1350
222
+ },
223
+ {
224
+ "epoch": 2.456140350877193,
225
+ "grad_norm": 13.355467796325684,
226
+ "learning_rate": 1.5273684210526318e-05,
227
+ "loss": 0.3127,
228
+ "step": 1400
229
+ },
230
+ {
231
+ "epoch": 2.543859649122807,
232
+ "grad_norm": 13.341830253601074,
233
+ "learning_rate": 1.4747368421052632e-05,
234
+ "loss": 0.2525,
235
+ "step": 1450
236
+ },
237
+ {
238
+ "epoch": 2.6315789473684212,
239
+ "grad_norm": 0.2885662913322449,
240
+ "learning_rate": 1.4221052631578949e-05,
241
+ "loss": 0.2998,
242
+ "step": 1500
243
+ },
244
+ {
245
+ "epoch": 2.719298245614035,
246
+ "grad_norm": 2.877472400665283,
247
+ "learning_rate": 1.3694736842105263e-05,
248
+ "loss": 0.3174,
249
+ "step": 1550
250
+ },
251
+ {
252
+ "epoch": 2.807017543859649,
253
+ "grad_norm": 3.695666790008545,
254
+ "learning_rate": 1.316842105263158e-05,
255
+ "loss": 0.341,
256
+ "step": 1600
257
+ },
258
+ {
259
+ "epoch": 2.8947368421052633,
260
+ "grad_norm": 20.929218292236328,
261
+ "learning_rate": 1.2642105263157896e-05,
262
+ "loss": 0.3093,
263
+ "step": 1650
264
+ },
265
+ {
266
+ "epoch": 2.982456140350877,
267
+ "grad_norm": 0.13824953138828278,
268
+ "learning_rate": 1.211578947368421e-05,
269
+ "loss": 0.2969,
270
+ "step": 1700
271
+ },
272
+ {
273
+ "epoch": 3.0,
274
+ "eval_accuracy": 0.8859649122807017,
275
+ "eval_f1_macro": 0.876431567492838,
276
+ "eval_f1_weighted": 0.8855810649898733,
277
+ "eval_loss": 0.5191295742988586,
278
+ "eval_runtime": 9.2195,
279
+ "eval_samples_per_second": 61.826,
280
+ "eval_steps_per_second": 7.81,
281
+ "step": 1710
282
+ }
283
+ ],
284
+ "logging_steps": 50,
285
+ "max_steps": 2850,
286
+ "num_input_tokens_seen": 0,
287
+ "num_train_epochs": 5,
288
+ "save_steps": 500,
289
+ "stateful_callbacks": {
290
+ "TrainerControl": {
291
+ "args": {
292
+ "should_epoch_stop": false,
293
+ "should_evaluate": false,
294
+ "should_log": false,
295
+ "should_save": true,
296
+ "should_training_stop": false
297
+ },
298
+ "attributes": {}
299
+ }
300
+ },
301
+ "total_flos": 1798922588516352.0,
302
+ "train_batch_size": 8,
303
+ "trial_name": null,
304
+ "trial_params": null
305
+ }
checkpoint-1710/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75e59a4485b50670550ee1f98c4f17155f387568fb8c6b921b878e2ea24de3f8
3
+ size 5304
checkpoint-2280/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ElectraForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "embedding_size": 768,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Personal",
13
+ "1": "Political",
14
+ "2": "Religious",
15
+ "3": "Geopolitical"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "0": "Personal",
21
+ "1": "Political",
22
+ "2": "Religious",
23
+ "3": "Geopolitical"
24
+ },
25
+ "layer_norm_eps": 1e-12,
26
+ "max_position_embeddings": 512,
27
+ "model_type": "electra",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
+ "pad_token_id": 0,
31
+ "position_embedding_type": "absolute",
32
+ "problem_type": "single_label_classification",
33
+ "summary_activation": "gelu",
34
+ "summary_last_dropout": 0.1,
35
+ "summary_type": "first",
36
+ "summary_use_proj": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.52.4",
39
+ "type_vocab_size": 2,
40
+ "use_cache": true,
41
+ "vocab_size": 32000
42
+ }
checkpoint-2280/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f53cf907ff8e5ecc13cb997f3873e9ff3925f31edb36ffd1fc40375e314ae61e
3
+ size 442505824
checkpoint-2280/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db2b21ecc5fa604e0ae728d50149e1841e8dad6e932b4f61664be88bbb14a789
3
+ size 885131514
checkpoint-2280/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb163261fd9ce3891ff2abe6b12d4585aab20f6b0e4e0ad8d8d83aeb48ac480b
3
+ size 14244
checkpoint-2280/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe981a7c7d2a16d26df56c8d97387cc5dad6bcddcbf70d958184abb073cbdfaf
3
+ size 1064
checkpoint-2280/trainer_state.json ADDED
@@ -0,0 +1,393 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1710,
3
+ "best_metric": 0.876431567492838,
4
+ "best_model_checkpoint": "./banglabert-hate-speech/checkpoint-1710",
5
+ "epoch": 4.0,
6
+ "eval_steps": 500,
7
+ "global_step": 2280,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.08771929824561403,
14
+ "grad_norm": 2.613939046859741,
15
+ "learning_rate": 2.9484210526315792e-05,
16
+ "loss": 1.2847,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.17543859649122806,
21
+ "grad_norm": 2.230876922607422,
22
+ "learning_rate": 2.8957894736842105e-05,
23
+ "loss": 1.1609,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.2631578947368421,
28
+ "grad_norm": 10.591499328613281,
29
+ "learning_rate": 2.8431578947368422e-05,
30
+ "loss": 1.033,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 0.3508771929824561,
35
+ "grad_norm": 4.717245101928711,
36
+ "learning_rate": 2.7905263157894738e-05,
37
+ "loss": 0.9866,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 0.43859649122807015,
42
+ "grad_norm": 7.096415042877197,
43
+ "learning_rate": 2.7378947368421055e-05,
44
+ "loss": 0.9235,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 0.5263157894736842,
49
+ "grad_norm": 5.946366786956787,
50
+ "learning_rate": 2.6852631578947368e-05,
51
+ "loss": 0.7276,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 0.6140350877192983,
56
+ "grad_norm": 2.292181968688965,
57
+ "learning_rate": 2.6326315789473687e-05,
58
+ "loss": 0.6928,
59
+ "step": 350
60
+ },
61
+ {
62
+ "epoch": 0.7017543859649122,
63
+ "grad_norm": 7.117995262145996,
64
+ "learning_rate": 2.58e-05,
65
+ "loss": 0.7734,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 0.7894736842105263,
70
+ "grad_norm": 1.2756074666976929,
71
+ "learning_rate": 2.5273684210526317e-05,
72
+ "loss": 0.602,
73
+ "step": 450
74
+ },
75
+ {
76
+ "epoch": 0.8771929824561403,
77
+ "grad_norm": 2.6861534118652344,
78
+ "learning_rate": 2.4747368421052633e-05,
79
+ "loss": 0.6134,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.9649122807017544,
84
+ "grad_norm": 11.638345718383789,
85
+ "learning_rate": 2.422105263157895e-05,
86
+ "loss": 0.517,
87
+ "step": 550
88
+ },
89
+ {
90
+ "epoch": 1.0,
91
+ "eval_accuracy": 0.8491228070175438,
92
+ "eval_f1_macro": 0.8364544727451704,
93
+ "eval_f1_weighted": 0.8507649468201081,
94
+ "eval_loss": 0.501762330532074,
95
+ "eval_runtime": 9.2272,
96
+ "eval_samples_per_second": 61.774,
97
+ "eval_steps_per_second": 7.803,
98
+ "step": 570
99
+ },
100
+ {
101
+ "epoch": 1.0526315789473684,
102
+ "grad_norm": 7.222433090209961,
103
+ "learning_rate": 2.3694736842105262e-05,
104
+ "loss": 0.4792,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 1.1403508771929824,
109
+ "grad_norm": 14.4037504196167,
110
+ "learning_rate": 2.3168421052631582e-05,
111
+ "loss": 0.4212,
112
+ "step": 650
113
+ },
114
+ {
115
+ "epoch": 1.2280701754385965,
116
+ "grad_norm": 1.6871181726455688,
117
+ "learning_rate": 2.2642105263157895e-05,
118
+ "loss": 0.5112,
119
+ "step": 700
120
+ },
121
+ {
122
+ "epoch": 1.3157894736842106,
123
+ "grad_norm": 12.315402030944824,
124
+ "learning_rate": 2.211578947368421e-05,
125
+ "loss": 0.3467,
126
+ "step": 750
127
+ },
128
+ {
129
+ "epoch": 1.4035087719298245,
130
+ "grad_norm": 26.74939727783203,
131
+ "learning_rate": 2.1589473684210528e-05,
132
+ "loss": 0.5507,
133
+ "step": 800
134
+ },
135
+ {
136
+ "epoch": 1.4912280701754386,
137
+ "grad_norm": 11.844022750854492,
138
+ "learning_rate": 2.1063157894736844e-05,
139
+ "loss": 0.4319,
140
+ "step": 850
141
+ },
142
+ {
143
+ "epoch": 1.5789473684210527,
144
+ "grad_norm": 0.30326738953590393,
145
+ "learning_rate": 2.0536842105263157e-05,
146
+ "loss": 0.4583,
147
+ "step": 900
148
+ },
149
+ {
150
+ "epoch": 1.6666666666666665,
151
+ "grad_norm": 6.0051140785217285,
152
+ "learning_rate": 2.0010526315789477e-05,
153
+ "loss": 0.3868,
154
+ "step": 950
155
+ },
156
+ {
157
+ "epoch": 1.7543859649122808,
158
+ "grad_norm": 1.0601614713668823,
159
+ "learning_rate": 1.948421052631579e-05,
160
+ "loss": 0.4607,
161
+ "step": 1000
162
+ },
163
+ {
164
+ "epoch": 1.8421052631578947,
165
+ "grad_norm": 0.5424970984458923,
166
+ "learning_rate": 1.8957894736842106e-05,
167
+ "loss": 0.4468,
168
+ "step": 1050
169
+ },
170
+ {
171
+ "epoch": 1.9298245614035088,
172
+ "grad_norm": 14.800076484680176,
173
+ "learning_rate": 1.8431578947368423e-05,
174
+ "loss": 0.4958,
175
+ "step": 1100
176
+ },
177
+ {
178
+ "epoch": 2.0,
179
+ "eval_accuracy": 0.8719298245614036,
180
+ "eval_f1_macro": 0.8596423207276622,
181
+ "eval_f1_weighted": 0.8710256158201644,
182
+ "eval_loss": 0.48058807849884033,
183
+ "eval_runtime": 9.1284,
184
+ "eval_samples_per_second": 62.442,
185
+ "eval_steps_per_second": 7.887,
186
+ "step": 1140
187
+ },
188
+ {
189
+ "epoch": 2.017543859649123,
190
+ "grad_norm": 20.266742706298828,
191
+ "learning_rate": 1.7905263157894736e-05,
192
+ "loss": 0.3943,
193
+ "step": 1150
194
+ },
195
+ {
196
+ "epoch": 2.1052631578947367,
197
+ "grad_norm": 0.22429589927196503,
198
+ "learning_rate": 1.7378947368421052e-05,
199
+ "loss": 0.2996,
200
+ "step": 1200
201
+ },
202
+ {
203
+ "epoch": 2.192982456140351,
204
+ "grad_norm": 0.8915501236915588,
205
+ "learning_rate": 1.685263157894737e-05,
206
+ "loss": 0.3662,
207
+ "step": 1250
208
+ },
209
+ {
210
+ "epoch": 2.280701754385965,
211
+ "grad_norm": 1.173509955406189,
212
+ "learning_rate": 1.6326315789473685e-05,
213
+ "loss": 0.2995,
214
+ "step": 1300
215
+ },
216
+ {
217
+ "epoch": 2.3684210526315788,
218
+ "grad_norm": 0.16873787343502045,
219
+ "learning_rate": 1.5799999999999998e-05,
220
+ "loss": 0.3123,
221
+ "step": 1350
222
+ },
223
+ {
224
+ "epoch": 2.456140350877193,
225
+ "grad_norm": 13.355467796325684,
226
+ "learning_rate": 1.5273684210526318e-05,
227
+ "loss": 0.3127,
228
+ "step": 1400
229
+ },
230
+ {
231
+ "epoch": 2.543859649122807,
232
+ "grad_norm": 13.341830253601074,
233
+ "learning_rate": 1.4747368421052632e-05,
234
+ "loss": 0.2525,
235
+ "step": 1450
236
+ },
237
+ {
238
+ "epoch": 2.6315789473684212,
239
+ "grad_norm": 0.2885662913322449,
240
+ "learning_rate": 1.4221052631578949e-05,
241
+ "loss": 0.2998,
242
+ "step": 1500
243
+ },
244
+ {
245
+ "epoch": 2.719298245614035,
246
+ "grad_norm": 2.877472400665283,
247
+ "learning_rate": 1.3694736842105263e-05,
248
+ "loss": 0.3174,
249
+ "step": 1550
250
+ },
251
+ {
252
+ "epoch": 2.807017543859649,
253
+ "grad_norm": 3.695666790008545,
254
+ "learning_rate": 1.316842105263158e-05,
255
+ "loss": 0.341,
256
+ "step": 1600
257
+ },
258
+ {
259
+ "epoch": 2.8947368421052633,
260
+ "grad_norm": 20.929218292236328,
261
+ "learning_rate": 1.2642105263157896e-05,
262
+ "loss": 0.3093,
263
+ "step": 1650
264
+ },
265
+ {
266
+ "epoch": 2.982456140350877,
267
+ "grad_norm": 0.13824953138828278,
268
+ "learning_rate": 1.211578947368421e-05,
269
+ "loss": 0.2969,
270
+ "step": 1700
271
+ },
272
+ {
273
+ "epoch": 3.0,
274
+ "eval_accuracy": 0.8859649122807017,
275
+ "eval_f1_macro": 0.876431567492838,
276
+ "eval_f1_weighted": 0.8855810649898733,
277
+ "eval_loss": 0.5191295742988586,
278
+ "eval_runtime": 9.2195,
279
+ "eval_samples_per_second": 61.826,
280
+ "eval_steps_per_second": 7.81,
281
+ "step": 1710
282
+ },
283
+ {
284
+ "epoch": 3.0701754385964914,
285
+ "grad_norm": 0.19890473783016205,
286
+ "learning_rate": 1.1589473684210527e-05,
287
+ "loss": 0.2249,
288
+ "step": 1750
289
+ },
290
+ {
291
+ "epoch": 3.1578947368421053,
292
+ "grad_norm": 0.09896814078092575,
293
+ "learning_rate": 1.1063157894736843e-05,
294
+ "loss": 0.1578,
295
+ "step": 1800
296
+ },
297
+ {
298
+ "epoch": 3.245614035087719,
299
+ "grad_norm": 0.18045368790626526,
300
+ "learning_rate": 1.0536842105263158e-05,
301
+ "loss": 0.1534,
302
+ "step": 1850
303
+ },
304
+ {
305
+ "epoch": 3.3333333333333335,
306
+ "grad_norm": 2.493330478668213,
307
+ "learning_rate": 1.0010526315789474e-05,
308
+ "loss": 0.181,
309
+ "step": 1900
310
+ },
311
+ {
312
+ "epoch": 3.4210526315789473,
313
+ "grad_norm": 0.25888559222221375,
314
+ "learning_rate": 9.484210526315791e-06,
315
+ "loss": 0.1758,
316
+ "step": 1950
317
+ },
318
+ {
319
+ "epoch": 3.5087719298245617,
320
+ "grad_norm": 59.44745635986328,
321
+ "learning_rate": 8.957894736842106e-06,
322
+ "loss": 0.2045,
323
+ "step": 2000
324
+ },
325
+ {
326
+ "epoch": 3.5964912280701755,
327
+ "grad_norm": 0.15124382078647614,
328
+ "learning_rate": 8.431578947368422e-06,
329
+ "loss": 0.3103,
330
+ "step": 2050
331
+ },
332
+ {
333
+ "epoch": 3.6842105263157894,
334
+ "grad_norm": 4.733994007110596,
335
+ "learning_rate": 7.905263157894738e-06,
336
+ "loss": 0.2384,
337
+ "step": 2100
338
+ },
339
+ {
340
+ "epoch": 3.7719298245614032,
341
+ "grad_norm": 0.46031907200813293,
342
+ "learning_rate": 7.378947368421053e-06,
343
+ "loss": 0.1849,
344
+ "step": 2150
345
+ },
346
+ {
347
+ "epoch": 3.8596491228070176,
348
+ "grad_norm": 5.210213661193848,
349
+ "learning_rate": 6.8526315789473685e-06,
350
+ "loss": 0.2268,
351
+ "step": 2200
352
+ },
353
+ {
354
+ "epoch": 3.9473684210526314,
355
+ "grad_norm": 0.19484597444534302,
356
+ "learning_rate": 6.326315789473684e-06,
357
+ "loss": 0.2313,
358
+ "step": 2250
359
+ },
360
+ {
361
+ "epoch": 4.0,
362
+ "eval_accuracy": 0.875438596491228,
363
+ "eval_f1_macro": 0.8628016579647876,
364
+ "eval_f1_weighted": 0.8751100908173923,
365
+ "eval_loss": 0.5712200999259949,
366
+ "eval_runtime": 9.187,
367
+ "eval_samples_per_second": 62.044,
368
+ "eval_steps_per_second": 7.837,
369
+ "step": 2280
370
+ }
371
+ ],
372
+ "logging_steps": 50,
373
+ "max_steps": 2850,
374
+ "num_input_tokens_seen": 0,
375
+ "num_train_epochs": 5,
376
+ "save_steps": 500,
377
+ "stateful_callbacks": {
378
+ "TrainerControl": {
379
+ "args": {
380
+ "should_epoch_stop": false,
381
+ "should_evaluate": false,
382
+ "should_log": false,
383
+ "should_save": true,
384
+ "should_training_stop": false
385
+ },
386
+ "attributes": {}
387
+ }
388
+ },
389
+ "total_flos": 2398563451355136.0,
390
+ "train_batch_size": 8,
391
+ "trial_name": null,
392
+ "trial_params": null
393
+ }
checkpoint-2280/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75e59a4485b50670550ee1f98c4f17155f387568fb8c6b921b878e2ea24de3f8
3
+ size 5304
checkpoint-2850/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ElectraForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "embedding_size": 768,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Personal",
13
+ "1": "Political",
14
+ "2": "Religious",
15
+ "3": "Geopolitical"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "0": "Personal",
21
+ "1": "Political",
22
+ "2": "Religious",
23
+ "3": "Geopolitical"
24
+ },
25
+ "layer_norm_eps": 1e-12,
26
+ "max_position_embeddings": 512,
27
+ "model_type": "electra",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
+ "pad_token_id": 0,
31
+ "position_embedding_type": "absolute",
32
+ "problem_type": "single_label_classification",
33
+ "summary_activation": "gelu",
34
+ "summary_last_dropout": 0.1,
35
+ "summary_type": "first",
36
+ "summary_use_proj": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.52.4",
39
+ "type_vocab_size": 2,
40
+ "use_cache": true,
41
+ "vocab_size": 32000
42
+ }
checkpoint-2850/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:103e82756deba030dabe6c6131e9c35ddd75617542c822bcb88d9d47ea3d692c
3
+ size 442505824
checkpoint-2850/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fba9d38022474d3dbcb755e4c864d65ebba0ad98dad2735698de8a1d53a15834
3
+ size 885131514
checkpoint-2850/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42a34d67eebeddc815e3e11e61db97d051e4d58700b1bfcc8001f211636083fb
3
+ size 14244
checkpoint-2850/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:469267c99727d972deab65a94ded1577aa272e0ff46092f59ffd1d373c2c738d
3
+ size 1064
checkpoint-2850/trainer_state.json ADDED
@@ -0,0 +1,488 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1710,
3
+ "best_metric": 0.876431567492838,
4
+ "best_model_checkpoint": "./banglabert-hate-speech/checkpoint-1710",
5
+ "epoch": 5.0,
6
+ "eval_steps": 500,
7
+ "global_step": 2850,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.08771929824561403,
14
+ "grad_norm": 2.613939046859741,
15
+ "learning_rate": 2.9484210526315792e-05,
16
+ "loss": 1.2847,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.17543859649122806,
21
+ "grad_norm": 2.230876922607422,
22
+ "learning_rate": 2.8957894736842105e-05,
23
+ "loss": 1.1609,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.2631578947368421,
28
+ "grad_norm": 10.591499328613281,
29
+ "learning_rate": 2.8431578947368422e-05,
30
+ "loss": 1.033,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 0.3508771929824561,
35
+ "grad_norm": 4.717245101928711,
36
+ "learning_rate": 2.7905263157894738e-05,
37
+ "loss": 0.9866,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 0.43859649122807015,
42
+ "grad_norm": 7.096415042877197,
43
+ "learning_rate": 2.7378947368421055e-05,
44
+ "loss": 0.9235,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 0.5263157894736842,
49
+ "grad_norm": 5.946366786956787,
50
+ "learning_rate": 2.6852631578947368e-05,
51
+ "loss": 0.7276,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 0.6140350877192983,
56
+ "grad_norm": 2.292181968688965,
57
+ "learning_rate": 2.6326315789473687e-05,
58
+ "loss": 0.6928,
59
+ "step": 350
60
+ },
61
+ {
62
+ "epoch": 0.7017543859649122,
63
+ "grad_norm": 7.117995262145996,
64
+ "learning_rate": 2.58e-05,
65
+ "loss": 0.7734,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 0.7894736842105263,
70
+ "grad_norm": 1.2756074666976929,
71
+ "learning_rate": 2.5273684210526317e-05,
72
+ "loss": 0.602,
73
+ "step": 450
74
+ },
75
+ {
76
+ "epoch": 0.8771929824561403,
77
+ "grad_norm": 2.6861534118652344,
78
+ "learning_rate": 2.4747368421052633e-05,
79
+ "loss": 0.6134,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.9649122807017544,
84
+ "grad_norm": 11.638345718383789,
85
+ "learning_rate": 2.422105263157895e-05,
86
+ "loss": 0.517,
87
+ "step": 550
88
+ },
89
+ {
90
+ "epoch": 1.0,
91
+ "eval_accuracy": 0.8491228070175438,
92
+ "eval_f1_macro": 0.8364544727451704,
93
+ "eval_f1_weighted": 0.8507649468201081,
94
+ "eval_loss": 0.501762330532074,
95
+ "eval_runtime": 9.2272,
96
+ "eval_samples_per_second": 61.774,
97
+ "eval_steps_per_second": 7.803,
98
+ "step": 570
99
+ },
100
+ {
101
+ "epoch": 1.0526315789473684,
102
+ "grad_norm": 7.222433090209961,
103
+ "learning_rate": 2.3694736842105262e-05,
104
+ "loss": 0.4792,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 1.1403508771929824,
109
+ "grad_norm": 14.4037504196167,
110
+ "learning_rate": 2.3168421052631582e-05,
111
+ "loss": 0.4212,
112
+ "step": 650
113
+ },
114
+ {
115
+ "epoch": 1.2280701754385965,
116
+ "grad_norm": 1.6871181726455688,
117
+ "learning_rate": 2.2642105263157895e-05,
118
+ "loss": 0.5112,
119
+ "step": 700
120
+ },
121
+ {
122
+ "epoch": 1.3157894736842106,
123
+ "grad_norm": 12.315402030944824,
124
+ "learning_rate": 2.211578947368421e-05,
125
+ "loss": 0.3467,
126
+ "step": 750
127
+ },
128
+ {
129
+ "epoch": 1.4035087719298245,
130
+ "grad_norm": 26.74939727783203,
131
+ "learning_rate": 2.1589473684210528e-05,
132
+ "loss": 0.5507,
133
+ "step": 800
134
+ },
135
+ {
136
+ "epoch": 1.4912280701754386,
137
+ "grad_norm": 11.844022750854492,
138
+ "learning_rate": 2.1063157894736844e-05,
139
+ "loss": 0.4319,
140
+ "step": 850
141
+ },
142
+ {
143
+ "epoch": 1.5789473684210527,
144
+ "grad_norm": 0.30326738953590393,
145
+ "learning_rate": 2.0536842105263157e-05,
146
+ "loss": 0.4583,
147
+ "step": 900
148
+ },
149
+ {
150
+ "epoch": 1.6666666666666665,
151
+ "grad_norm": 6.0051140785217285,
152
+ "learning_rate": 2.0010526315789477e-05,
153
+ "loss": 0.3868,
154
+ "step": 950
155
+ },
156
+ {
157
+ "epoch": 1.7543859649122808,
158
+ "grad_norm": 1.0601614713668823,
159
+ "learning_rate": 1.948421052631579e-05,
160
+ "loss": 0.4607,
161
+ "step": 1000
162
+ },
163
+ {
164
+ "epoch": 1.8421052631578947,
165
+ "grad_norm": 0.5424970984458923,
166
+ "learning_rate": 1.8957894736842106e-05,
167
+ "loss": 0.4468,
168
+ "step": 1050
169
+ },
170
+ {
171
+ "epoch": 1.9298245614035088,
172
+ "grad_norm": 14.800076484680176,
173
+ "learning_rate": 1.8431578947368423e-05,
174
+ "loss": 0.4958,
175
+ "step": 1100
176
+ },
177
+ {
178
+ "epoch": 2.0,
179
+ "eval_accuracy": 0.8719298245614036,
180
+ "eval_f1_macro": 0.8596423207276622,
181
+ "eval_f1_weighted": 0.8710256158201644,
182
+ "eval_loss": 0.48058807849884033,
183
+ "eval_runtime": 9.1284,
184
+ "eval_samples_per_second": 62.442,
185
+ "eval_steps_per_second": 7.887,
186
+ "step": 1140
187
+ },
188
+ {
189
+ "epoch": 2.017543859649123,
190
+ "grad_norm": 20.266742706298828,
191
+ "learning_rate": 1.7905263157894736e-05,
192
+ "loss": 0.3943,
193
+ "step": 1150
194
+ },
195
+ {
196
+ "epoch": 2.1052631578947367,
197
+ "grad_norm": 0.22429589927196503,
198
+ "learning_rate": 1.7378947368421052e-05,
199
+ "loss": 0.2996,
200
+ "step": 1200
201
+ },
202
+ {
203
+ "epoch": 2.192982456140351,
204
+ "grad_norm": 0.8915501236915588,
205
+ "learning_rate": 1.685263157894737e-05,
206
+ "loss": 0.3662,
207
+ "step": 1250
208
+ },
209
+ {
210
+ "epoch": 2.280701754385965,
211
+ "grad_norm": 1.173509955406189,
212
+ "learning_rate": 1.6326315789473685e-05,
213
+ "loss": 0.2995,
214
+ "step": 1300
215
+ },
216
+ {
217
+ "epoch": 2.3684210526315788,
218
+ "grad_norm": 0.16873787343502045,
219
+ "learning_rate": 1.5799999999999998e-05,
220
+ "loss": 0.3123,
221
+ "step": 1350
222
+ },
223
+ {
224
+ "epoch": 2.456140350877193,
225
+ "grad_norm": 13.355467796325684,
226
+ "learning_rate": 1.5273684210526318e-05,
227
+ "loss": 0.3127,
228
+ "step": 1400
229
+ },
230
+ {
231
+ "epoch": 2.543859649122807,
232
+ "grad_norm": 13.341830253601074,
233
+ "learning_rate": 1.4747368421052632e-05,
234
+ "loss": 0.2525,
235
+ "step": 1450
236
+ },
237
+ {
238
+ "epoch": 2.6315789473684212,
239
+ "grad_norm": 0.2885662913322449,
240
+ "learning_rate": 1.4221052631578949e-05,
241
+ "loss": 0.2998,
242
+ "step": 1500
243
+ },
244
+ {
245
+ "epoch": 2.719298245614035,
246
+ "grad_norm": 2.877472400665283,
247
+ "learning_rate": 1.3694736842105263e-05,
248
+ "loss": 0.3174,
249
+ "step": 1550
250
+ },
251
+ {
252
+ "epoch": 2.807017543859649,
253
+ "grad_norm": 3.695666790008545,
254
+ "learning_rate": 1.316842105263158e-05,
255
+ "loss": 0.341,
256
+ "step": 1600
257
+ },
258
+ {
259
+ "epoch": 2.8947368421052633,
260
+ "grad_norm": 20.929218292236328,
261
+ "learning_rate": 1.2642105263157896e-05,
262
+ "loss": 0.3093,
263
+ "step": 1650
264
+ },
265
+ {
266
+ "epoch": 2.982456140350877,
267
+ "grad_norm": 0.13824953138828278,
268
+ "learning_rate": 1.211578947368421e-05,
269
+ "loss": 0.2969,
270
+ "step": 1700
271
+ },
272
+ {
273
+ "epoch": 3.0,
274
+ "eval_accuracy": 0.8859649122807017,
275
+ "eval_f1_macro": 0.876431567492838,
276
+ "eval_f1_weighted": 0.8855810649898733,
277
+ "eval_loss": 0.5191295742988586,
278
+ "eval_runtime": 9.2195,
279
+ "eval_samples_per_second": 61.826,
280
+ "eval_steps_per_second": 7.81,
281
+ "step": 1710
282
+ },
283
+ {
284
+ "epoch": 3.0701754385964914,
285
+ "grad_norm": 0.19890473783016205,
286
+ "learning_rate": 1.1589473684210527e-05,
287
+ "loss": 0.2249,
288
+ "step": 1750
289
+ },
290
+ {
291
+ "epoch": 3.1578947368421053,
292
+ "grad_norm": 0.09896814078092575,
293
+ "learning_rate": 1.1063157894736843e-05,
294
+ "loss": 0.1578,
295
+ "step": 1800
296
+ },
297
+ {
298
+ "epoch": 3.245614035087719,
299
+ "grad_norm": 0.18045368790626526,
300
+ "learning_rate": 1.0536842105263158e-05,
301
+ "loss": 0.1534,
302
+ "step": 1850
303
+ },
304
+ {
305
+ "epoch": 3.3333333333333335,
306
+ "grad_norm": 2.493330478668213,
307
+ "learning_rate": 1.0010526315789474e-05,
308
+ "loss": 0.181,
309
+ "step": 1900
310
+ },
311
+ {
312
+ "epoch": 3.4210526315789473,
313
+ "grad_norm": 0.25888559222221375,
314
+ "learning_rate": 9.484210526315791e-06,
315
+ "loss": 0.1758,
316
+ "step": 1950
317
+ },
318
+ {
319
+ "epoch": 3.5087719298245617,
320
+ "grad_norm": 59.44745635986328,
321
+ "learning_rate": 8.957894736842106e-06,
322
+ "loss": 0.2045,
323
+ "step": 2000
324
+ },
325
+ {
326
+ "epoch": 3.5964912280701755,
327
+ "grad_norm": 0.15124382078647614,
328
+ "learning_rate": 8.431578947368422e-06,
329
+ "loss": 0.3103,
330
+ "step": 2050
331
+ },
332
+ {
333
+ "epoch": 3.6842105263157894,
334
+ "grad_norm": 4.733994007110596,
335
+ "learning_rate": 7.905263157894738e-06,
336
+ "loss": 0.2384,
337
+ "step": 2100
338
+ },
339
+ {
340
+ "epoch": 3.7719298245614032,
341
+ "grad_norm": 0.46031907200813293,
342
+ "learning_rate": 7.378947368421053e-06,
343
+ "loss": 0.1849,
344
+ "step": 2150
345
+ },
346
+ {
347
+ "epoch": 3.8596491228070176,
348
+ "grad_norm": 5.210213661193848,
349
+ "learning_rate": 6.8526315789473685e-06,
350
+ "loss": 0.2268,
351
+ "step": 2200
352
+ },
353
+ {
354
+ "epoch": 3.9473684210526314,
355
+ "grad_norm": 0.19484597444534302,
356
+ "learning_rate": 6.326315789473684e-06,
357
+ "loss": 0.2313,
358
+ "step": 2250
359
+ },
360
+ {
361
+ "epoch": 4.0,
362
+ "eval_accuracy": 0.875438596491228,
363
+ "eval_f1_macro": 0.8628016579647876,
364
+ "eval_f1_weighted": 0.8751100908173923,
365
+ "eval_loss": 0.5712200999259949,
366
+ "eval_runtime": 9.187,
367
+ "eval_samples_per_second": 62.044,
368
+ "eval_steps_per_second": 7.837,
369
+ "step": 2280
370
+ },
371
+ {
372
+ "epoch": 4.035087719298246,
373
+ "grad_norm": 0.6245447397232056,
374
+ "learning_rate": 5.8e-06,
375
+ "loss": 0.1366,
376
+ "step": 2300
377
+ },
378
+ {
379
+ "epoch": 4.12280701754386,
380
+ "grad_norm": 13.598281860351562,
381
+ "learning_rate": 5.273684210526316e-06,
382
+ "loss": 0.1524,
383
+ "step": 2350
384
+ },
385
+ {
386
+ "epoch": 4.2105263157894735,
387
+ "grad_norm": 0.09752348810434341,
388
+ "learning_rate": 4.747368421052631e-06,
389
+ "loss": 0.1104,
390
+ "step": 2400
391
+ },
392
+ {
393
+ "epoch": 4.298245614035087,
394
+ "grad_norm": 0.12292918562889099,
395
+ "learning_rate": 4.221052631578948e-06,
396
+ "loss": 0.1157,
397
+ "step": 2450
398
+ },
399
+ {
400
+ "epoch": 4.385964912280702,
401
+ "grad_norm": 0.06472612172365189,
402
+ "learning_rate": 3.6947368421052633e-06,
403
+ "loss": 0.1327,
404
+ "step": 2500
405
+ },
406
+ {
407
+ "epoch": 4.473684210526316,
408
+ "grad_norm": 3.8604917526245117,
409
+ "learning_rate": 3.1684210526315793e-06,
410
+ "loss": 0.1394,
411
+ "step": 2550
412
+ },
413
+ {
414
+ "epoch": 4.56140350877193,
415
+ "grad_norm": 0.390594482421875,
416
+ "learning_rate": 2.6421052631578948e-06,
417
+ "loss": 0.1442,
418
+ "step": 2600
419
+ },
420
+ {
421
+ "epoch": 4.649122807017544,
422
+ "grad_norm": 1.7911845445632935,
423
+ "learning_rate": 2.1157894736842107e-06,
424
+ "loss": 0.1759,
425
+ "step": 2650
426
+ },
427
+ {
428
+ "epoch": 4.7368421052631575,
429
+ "grad_norm": 0.17604109644889832,
430
+ "learning_rate": 1.5894736842105263e-06,
431
+ "loss": 0.1394,
432
+ "step": 2700
433
+ },
434
+ {
435
+ "epoch": 4.824561403508772,
436
+ "grad_norm": 0.08516625314950943,
437
+ "learning_rate": 1.063157894736842e-06,
438
+ "loss": 0.1408,
439
+ "step": 2750
440
+ },
441
+ {
442
+ "epoch": 4.912280701754386,
443
+ "grad_norm": 0.03146979585289955,
444
+ "learning_rate": 5.368421052631578e-07,
445
+ "loss": 0.1251,
446
+ "step": 2800
447
+ },
448
+ {
449
+ "epoch": 5.0,
450
+ "grad_norm": 3.661275863647461,
451
+ "learning_rate": 1.0526315789473684e-08,
452
+ "loss": 0.0834,
453
+ "step": 2850
454
+ },
455
+ {
456
+ "epoch": 5.0,
457
+ "eval_accuracy": 0.8859649122807017,
458
+ "eval_f1_macro": 0.8727182063209552,
459
+ "eval_f1_weighted": 0.8854333132110179,
460
+ "eval_loss": 0.5799562931060791,
461
+ "eval_runtime": 9.1822,
462
+ "eval_samples_per_second": 62.077,
463
+ "eval_steps_per_second": 7.841,
464
+ "step": 2850
465
+ }
466
+ ],
467
+ "logging_steps": 50,
468
+ "max_steps": 2850,
469
+ "num_input_tokens_seen": 0,
470
+ "num_train_epochs": 5,
471
+ "save_steps": 500,
472
+ "stateful_callbacks": {
473
+ "TrainerControl": {
474
+ "args": {
475
+ "should_epoch_stop": false,
476
+ "should_evaluate": false,
477
+ "should_log": false,
478
+ "should_save": true,
479
+ "should_training_stop": true
480
+ },
481
+ "attributes": {}
482
+ }
483
+ },
484
+ "total_flos": 2998204314193920.0,
485
+ "train_batch_size": 8,
486
+ "trial_name": null,
487
+ "trial_params": null
488
+ }
checkpoint-2850/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75e59a4485b50670550ee1f98c4f17155f387568fb8c6b921b878e2ea24de3f8
3
+ size 5304
checkpoint-570/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ElectraForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "embedding_size": 768,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Personal",
13
+ "1": "Political",
14
+ "2": "Religious",
15
+ "3": "Geopolitical"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "0": "Personal",
21
+ "1": "Political",
22
+ "2": "Religious",
23
+ "3": "Geopolitical"
24
+ },
25
+ "layer_norm_eps": 1e-12,
26
+ "max_position_embeddings": 512,
27
+ "model_type": "electra",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
+ "pad_token_id": 0,
31
+ "position_embedding_type": "absolute",
32
+ "problem_type": "single_label_classification",
33
+ "summary_activation": "gelu",
34
+ "summary_last_dropout": 0.1,
35
+ "summary_type": "first",
36
+ "summary_use_proj": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.52.4",
39
+ "type_vocab_size": 2,
40
+ "use_cache": true,
41
+ "vocab_size": 32000
42
+ }
checkpoint-570/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ac333d69f89fdc32bde19b14d4116b849c6111a5314f31613e821639d59be83
3
+ size 442505824
checkpoint-570/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:444c3b1195463a3224bb41abd49d002723211f122800308c6c074d11b2bb2f7b
3
+ size 885131514
checkpoint-570/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:497ebba5762cf7b3a63a1bb7a679a7fc15de654767c2ef5d74fe834391b49a30
3
+ size 14244
checkpoint-570/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7534b6da25b19285c5c1070dba5090606637aa8907845f7a46fec774f90bfdb
3
+ size 1064
checkpoint-570/trainer_state.json ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 570,
3
+ "best_metric": 0.8364544727451704,
4
+ "best_model_checkpoint": "./banglabert-hate-speech/checkpoint-570",
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 570,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.08771929824561403,
14
+ "grad_norm": 2.613939046859741,
15
+ "learning_rate": 2.9484210526315792e-05,
16
+ "loss": 1.2847,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.17543859649122806,
21
+ "grad_norm": 2.230876922607422,
22
+ "learning_rate": 2.8957894736842105e-05,
23
+ "loss": 1.1609,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.2631578947368421,
28
+ "grad_norm": 10.591499328613281,
29
+ "learning_rate": 2.8431578947368422e-05,
30
+ "loss": 1.033,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 0.3508771929824561,
35
+ "grad_norm": 4.717245101928711,
36
+ "learning_rate": 2.7905263157894738e-05,
37
+ "loss": 0.9866,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 0.43859649122807015,
42
+ "grad_norm": 7.096415042877197,
43
+ "learning_rate": 2.7378947368421055e-05,
44
+ "loss": 0.9235,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 0.5263157894736842,
49
+ "grad_norm": 5.946366786956787,
50
+ "learning_rate": 2.6852631578947368e-05,
51
+ "loss": 0.7276,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 0.6140350877192983,
56
+ "grad_norm": 2.292181968688965,
57
+ "learning_rate": 2.6326315789473687e-05,
58
+ "loss": 0.6928,
59
+ "step": 350
60
+ },
61
+ {
62
+ "epoch": 0.7017543859649122,
63
+ "grad_norm": 7.117995262145996,
64
+ "learning_rate": 2.58e-05,
65
+ "loss": 0.7734,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 0.7894736842105263,
70
+ "grad_norm": 1.2756074666976929,
71
+ "learning_rate": 2.5273684210526317e-05,
72
+ "loss": 0.602,
73
+ "step": 450
74
+ },
75
+ {
76
+ "epoch": 0.8771929824561403,
77
+ "grad_norm": 2.6861534118652344,
78
+ "learning_rate": 2.4747368421052633e-05,
79
+ "loss": 0.6134,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.9649122807017544,
84
+ "grad_norm": 11.638345718383789,
85
+ "learning_rate": 2.422105263157895e-05,
86
+ "loss": 0.517,
87
+ "step": 550
88
+ },
89
+ {
90
+ "epoch": 1.0,
91
+ "eval_accuracy": 0.8491228070175438,
92
+ "eval_f1_macro": 0.8364544727451704,
93
+ "eval_f1_weighted": 0.8507649468201081,
94
+ "eval_loss": 0.501762330532074,
95
+ "eval_runtime": 9.2272,
96
+ "eval_samples_per_second": 61.774,
97
+ "eval_steps_per_second": 7.803,
98
+ "step": 570
99
+ }
100
+ ],
101
+ "logging_steps": 50,
102
+ "max_steps": 2850,
103
+ "num_input_tokens_seen": 0,
104
+ "num_train_epochs": 5,
105
+ "save_steps": 500,
106
+ "stateful_callbacks": {
107
+ "TrainerControl": {
108
+ "args": {
109
+ "should_epoch_stop": false,
110
+ "should_evaluate": false,
111
+ "should_log": false,
112
+ "should_save": true,
113
+ "should_training_stop": false
114
+ },
115
+ "attributes": {}
116
+ }
117
+ },
118
+ "total_flos": 599640862838784.0,
119
+ "train_batch_size": 8,
120
+ "trial_name": null,
121
+ "trial_params": null
122
+ }
checkpoint-570/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75e59a4485b50670550ee1f98c4f17155f387568fb8c6b921b878e2ea24de3f8
3
+ size 5304