Floressek commited on
Commit
4c819e2
·
verified ·
1 Parent(s): 933e304

Upload folder using huggingface_hub

Browse files
Files changed (31) hide show
  1. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/config.json +24 -0
  2. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/model.safetensors +3 -0
  3. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/optimizer.pt +3 -0
  4. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/rng_state.pth +3 -0
  5. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/scaler.pt +3 -0
  6. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/scheduler.pt +3 -0
  7. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/special_tokens_map.json +7 -0
  8. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/tokenizer.json +0 -0
  9. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/tokenizer_config.json +56 -0
  10. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/trainer_state.json +324 -0
  11. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/training_args.bin +3 -0
  12. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/vocab.txt +0 -0
  13. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/config.json +24 -0
  14. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/model.safetensors +3 -0
  15. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/optimizer.pt +3 -0
  16. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/rng_state.pth +3 -0
  17. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/scaler.pt +3 -0
  18. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/scheduler.pt +3 -0
  19. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/special_tokens_map.json +7 -0
  20. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/tokenizer.json +0 -0
  21. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/tokenizer_config.json +56 -0
  22. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/trainer_state.json +621 -0
  23. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/training_args.bin +3 -0
  24. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/vocab.txt +0 -0
  25. Floressek/sentiment_classification_from_distillbert/config.json +24 -0
  26. Floressek/sentiment_classification_from_distillbert/model.safetensors +3 -0
  27. Floressek/sentiment_classification_from_distillbert/special_tokens_map.json +7 -0
  28. Floressek/sentiment_classification_from_distillbert/tokenizer.json +0 -0
  29. Floressek/sentiment_classification_from_distillbert/tokenizer_config.json +56 -0
  30. Floressek/sentiment_classification_from_distillbert/training_args.bin +3 -0
  31. Floressek/sentiment_classification_from_distillbert/vocab.txt +0 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "dtype": "float32",
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "transformers_version": "4.57.1",
23
+ "vocab_size": 30522
24
+ }
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48f53c9337700718dfe05b128baaf117bd0426c55a2997dcb5eba2a6ece8b78a
3
+ size 267832560
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60dd26e70b6b195af77ec24609240dc615fa06373e1041dbb7d954caac63ab9
3
+ size 535727755
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33b3fc4dc6e72c19b95ccc6bf2103e72921fd75896fc4bef7c6d974ba74630e9
3
+ size 14645
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:484e81aa45a06ff82acc987feb7e25bd65f6d761d4b270f18fa3fdb66af71f57
3
+ size 1383
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad6350ab52d6a97e61162b0ff6ee33357b33b4a128de1a32409ac383b24e0b81
3
+ size 1465
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/trainer_state.json ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 4092,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.024437927663734114,
14
+ "grad_norm": 4.433139801025391,
15
+ "learning_rate": 1.9760508308895406e-05,
16
+ "loss": 0.2466,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.04887585532746823,
21
+ "grad_norm": 2.9566471576690674,
22
+ "learning_rate": 1.9516129032258068e-05,
23
+ "loss": 0.0958,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.07331378299120235,
28
+ "grad_norm": 3.056684732437134,
29
+ "learning_rate": 1.9271749755620726e-05,
30
+ "loss": 0.0854,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.09775171065493646,
35
+ "grad_norm": 0.7034033536911011,
36
+ "learning_rate": 1.9027370478983384e-05,
37
+ "loss": 0.0854,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.12218963831867058,
42
+ "grad_norm": 2.2177963256835938,
43
+ "learning_rate": 1.8782991202346042e-05,
44
+ "loss": 0.0851,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.1466275659824047,
49
+ "grad_norm": 0.9197642803192139,
50
+ "learning_rate": 1.85386119257087e-05,
51
+ "loss": 0.0802,
52
+ "step": 600
53
+ },
54
+ {
55
+ "epoch": 0.1710654936461388,
56
+ "grad_norm": 2.132237672805786,
57
+ "learning_rate": 1.829423264907136e-05,
58
+ "loss": 0.068,
59
+ "step": 700
60
+ },
61
+ {
62
+ "epoch": 0.19550342130987292,
63
+ "grad_norm": 1.3075214624404907,
64
+ "learning_rate": 1.804985337243402e-05,
65
+ "loss": 0.0679,
66
+ "step": 800
67
+ },
68
+ {
69
+ "epoch": 0.21994134897360704,
70
+ "grad_norm": 2.7458581924438477,
71
+ "learning_rate": 1.7805474095796678e-05,
72
+ "loss": 0.0698,
73
+ "step": 900
74
+ },
75
+ {
76
+ "epoch": 0.24437927663734116,
77
+ "grad_norm": 1.7474775314331055,
78
+ "learning_rate": 1.7561094819159336e-05,
79
+ "loss": 0.0647,
80
+ "step": 1000
81
+ },
82
+ {
83
+ "epoch": 0.26881720430107525,
84
+ "grad_norm": 3.5915794372558594,
85
+ "learning_rate": 1.7316715542521995e-05,
86
+ "loss": 0.0598,
87
+ "step": 1100
88
+ },
89
+ {
90
+ "epoch": 0.2932551319648094,
91
+ "grad_norm": 0.5882957577705383,
92
+ "learning_rate": 1.7072336265884656e-05,
93
+ "loss": 0.0635,
94
+ "step": 1200
95
+ },
96
+ {
97
+ "epoch": 0.3176930596285435,
98
+ "grad_norm": 3.578653335571289,
99
+ "learning_rate": 1.6827956989247314e-05,
100
+ "loss": 0.0666,
101
+ "step": 1300
102
+ },
103
+ {
104
+ "epoch": 0.3421309872922776,
105
+ "grad_norm": 0.5485109090805054,
106
+ "learning_rate": 1.6583577712609973e-05,
107
+ "loss": 0.0583,
108
+ "step": 1400
109
+ },
110
+ {
111
+ "epoch": 0.36656891495601174,
112
+ "grad_norm": 0.6193661689758301,
113
+ "learning_rate": 1.633919843597263e-05,
114
+ "loss": 0.0637,
115
+ "step": 1500
116
+ },
117
+ {
118
+ "epoch": 0.39100684261974583,
119
+ "grad_norm": 0.11104666441679001,
120
+ "learning_rate": 1.609481915933529e-05,
121
+ "loss": 0.0505,
122
+ "step": 1600
123
+ },
124
+ {
125
+ "epoch": 0.41544477028348,
126
+ "grad_norm": 0.13834528625011444,
127
+ "learning_rate": 1.5850439882697947e-05,
128
+ "loss": 0.0535,
129
+ "step": 1700
130
+ },
131
+ {
132
+ "epoch": 0.4398826979472141,
133
+ "grad_norm": 0.19737549126148224,
134
+ "learning_rate": 1.5606060606060605e-05,
135
+ "loss": 0.0503,
136
+ "step": 1800
137
+ },
138
+ {
139
+ "epoch": 0.46432062561094817,
140
+ "grad_norm": 5.043758392333984,
141
+ "learning_rate": 1.5361681329423267e-05,
142
+ "loss": 0.0512,
143
+ "step": 1900
144
+ },
145
+ {
146
+ "epoch": 0.4887585532746823,
147
+ "grad_norm": 0.07497064024209976,
148
+ "learning_rate": 1.5117302052785925e-05,
149
+ "loss": 0.0526,
150
+ "step": 2000
151
+ },
152
+ {
153
+ "epoch": 0.5131964809384164,
154
+ "grad_norm": 1.9213156700134277,
155
+ "learning_rate": 1.4872922776148583e-05,
156
+ "loss": 0.051,
157
+ "step": 2100
158
+ },
159
+ {
160
+ "epoch": 0.5376344086021505,
161
+ "grad_norm": 1.2963168621063232,
162
+ "learning_rate": 1.4628543499511243e-05,
163
+ "loss": 0.0429,
164
+ "step": 2200
165
+ },
166
+ {
167
+ "epoch": 0.5620723362658846,
168
+ "grad_norm": 1.5551583766937256,
169
+ "learning_rate": 1.4384164222873903e-05,
170
+ "loss": 0.0602,
171
+ "step": 2300
172
+ },
173
+ {
174
+ "epoch": 0.5865102639296188,
175
+ "grad_norm": 2.956341028213501,
176
+ "learning_rate": 1.4139784946236561e-05,
177
+ "loss": 0.0564,
178
+ "step": 2400
179
+ },
180
+ {
181
+ "epoch": 0.6109481915933529,
182
+ "grad_norm": 0.2880280911922455,
183
+ "learning_rate": 1.3895405669599221e-05,
184
+ "loss": 0.0425,
185
+ "step": 2500
186
+ },
187
+ {
188
+ "epoch": 0.635386119257087,
189
+ "grad_norm": 0.6562920808792114,
190
+ "learning_rate": 1.3651026392961877e-05,
191
+ "loss": 0.0456,
192
+ "step": 2600
193
+ },
194
+ {
195
+ "epoch": 0.6598240469208211,
196
+ "grad_norm": 0.09532313793897629,
197
+ "learning_rate": 1.3406647116324536e-05,
198
+ "loss": 0.051,
199
+ "step": 2700
200
+ },
201
+ {
202
+ "epoch": 0.6842619745845552,
203
+ "grad_norm": 2.781747817993164,
204
+ "learning_rate": 1.3162267839687195e-05,
205
+ "loss": 0.0512,
206
+ "step": 2800
207
+ },
208
+ {
209
+ "epoch": 0.7086999022482894,
210
+ "grad_norm": 2.1303281784057617,
211
+ "learning_rate": 1.2917888563049854e-05,
212
+ "loss": 0.0519,
213
+ "step": 2900
214
+ },
215
+ {
216
+ "epoch": 0.7331378299120235,
217
+ "grad_norm": 0.10773918032646179,
218
+ "learning_rate": 1.2673509286412513e-05,
219
+ "loss": 0.042,
220
+ "step": 3000
221
+ },
222
+ {
223
+ "epoch": 0.7575757575757576,
224
+ "grad_norm": 0.533173143863678,
225
+ "learning_rate": 1.2429130009775172e-05,
226
+ "loss": 0.0569,
227
+ "step": 3100
228
+ },
229
+ {
230
+ "epoch": 0.7820136852394917,
231
+ "grad_norm": 2.882176637649536,
232
+ "learning_rate": 1.2184750733137831e-05,
233
+ "loss": 0.0421,
234
+ "step": 3200
235
+ },
236
+ {
237
+ "epoch": 0.8064516129032258,
238
+ "grad_norm": 5.4323530197143555,
239
+ "learning_rate": 1.194037145650049e-05,
240
+ "loss": 0.0458,
241
+ "step": 3300
242
+ },
243
+ {
244
+ "epoch": 0.83088954056696,
245
+ "grad_norm": 0.08420676738023758,
246
+ "learning_rate": 1.169599217986315e-05,
247
+ "loss": 0.0432,
248
+ "step": 3400
249
+ },
250
+ {
251
+ "epoch": 0.855327468230694,
252
+ "grad_norm": 3.6947717666625977,
253
+ "learning_rate": 1.1451612903225808e-05,
254
+ "loss": 0.0387,
255
+ "step": 3500
256
+ },
257
+ {
258
+ "epoch": 0.8797653958944281,
259
+ "grad_norm": 4.411167621612549,
260
+ "learning_rate": 1.1209677419354839e-05,
261
+ "loss": 0.0515,
262
+ "step": 3600
263
+ },
264
+ {
265
+ "epoch": 0.9042033235581622,
266
+ "grad_norm": 3.9330861568450928,
267
+ "learning_rate": 1.0965298142717497e-05,
268
+ "loss": 0.0368,
269
+ "step": 3700
270
+ },
271
+ {
272
+ "epoch": 0.9286412512218963,
273
+ "grad_norm": 0.9437419772148132,
274
+ "learning_rate": 1.0720918866080157e-05,
275
+ "loss": 0.0479,
276
+ "step": 3800
277
+ },
278
+ {
279
+ "epoch": 0.9530791788856305,
280
+ "grad_norm": 1.0870046615600586,
281
+ "learning_rate": 1.0476539589442815e-05,
282
+ "loss": 0.0426,
283
+ "step": 3900
284
+ },
285
+ {
286
+ "epoch": 0.9775171065493646,
287
+ "grad_norm": 3.1951255798339844,
288
+ "learning_rate": 1.0232160312805475e-05,
289
+ "loss": 0.0432,
290
+ "step": 4000
291
+ },
292
+ {
293
+ "epoch": 1.0,
294
+ "eval_accuracy": 0.9891518737672583,
295
+ "eval_f1": 0.9928387101834639,
296
+ "eval_loss": 0.04053397476673126,
297
+ "eval_runtime": 158.0778,
298
+ "eval_samples_per_second": 532.409,
299
+ "eval_steps_per_second": 11.096,
300
+ "step": 4092
301
+ }
302
+ ],
303
+ "logging_steps": 100,
304
+ "max_steps": 8184,
305
+ "num_input_tokens_seen": 0,
306
+ "num_train_epochs": 2,
307
+ "save_steps": 500,
308
+ "stateful_callbacks": {
309
+ "TrainerControl": {
310
+ "args": {
311
+ "should_epoch_stop": false,
312
+ "should_evaluate": false,
313
+ "should_log": false,
314
+ "should_save": true,
315
+ "should_training_stop": false
316
+ },
317
+ "attributes": {}
318
+ }
319
+ },
320
+ "total_flos": 2.6013285411072e+16,
321
+ "train_batch_size": 48,
322
+ "trial_name": null,
323
+ "trial_params": null
324
+ }
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c534883427a071f1d9f3ba8e1a112e5e016307e4c8c2c116719a636f815013c
3
+ size 5841
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "dtype": "float32",
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "transformers_version": "4.57.1",
23
+ "vocab_size": 30522
24
+ }
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ef48f63397444fab7fe77a7c3e8113768986ff1c8e5e9126a66727aa030ec64
3
+ size 267832560
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:223c1ad8c221cdb8480bd0fbf758bce16ab32234cf8f118612402b10534a701a
3
+ size 535727755
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1344599c19119661a23b30409dd058766623df54c92c90cfbddc059d4d9b8506
3
+ size 14645
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c83094e856654a01b4be2edab3b20eb796b993d8a6bc80224e3383365e815ba
3
+ size 1383
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd6f8ed4f35d1035d3220bdfe3aa23843396e8ea037faf54ec8c9dc5fe556d41
3
+ size 1465
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/trainer_state.json ADDED
@@ -0,0 +1,621 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 8184,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.024437927663734114,
14
+ "grad_norm": 4.433139801025391,
15
+ "learning_rate": 1.9760508308895406e-05,
16
+ "loss": 0.2466,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.04887585532746823,
21
+ "grad_norm": 2.9566471576690674,
22
+ "learning_rate": 1.9516129032258068e-05,
23
+ "loss": 0.0958,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.07331378299120235,
28
+ "grad_norm": 3.056684732437134,
29
+ "learning_rate": 1.9271749755620726e-05,
30
+ "loss": 0.0854,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.09775171065493646,
35
+ "grad_norm": 0.7034033536911011,
36
+ "learning_rate": 1.9027370478983384e-05,
37
+ "loss": 0.0854,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.12218963831867058,
42
+ "grad_norm": 2.2177963256835938,
43
+ "learning_rate": 1.8782991202346042e-05,
44
+ "loss": 0.0851,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.1466275659824047,
49
+ "grad_norm": 0.9197642803192139,
50
+ "learning_rate": 1.85386119257087e-05,
51
+ "loss": 0.0802,
52
+ "step": 600
53
+ },
54
+ {
55
+ "epoch": 0.1710654936461388,
56
+ "grad_norm": 2.132237672805786,
57
+ "learning_rate": 1.829423264907136e-05,
58
+ "loss": 0.068,
59
+ "step": 700
60
+ },
61
+ {
62
+ "epoch": 0.19550342130987292,
63
+ "grad_norm": 1.3075214624404907,
64
+ "learning_rate": 1.804985337243402e-05,
65
+ "loss": 0.0679,
66
+ "step": 800
67
+ },
68
+ {
69
+ "epoch": 0.21994134897360704,
70
+ "grad_norm": 2.7458581924438477,
71
+ "learning_rate": 1.7805474095796678e-05,
72
+ "loss": 0.0698,
73
+ "step": 900
74
+ },
75
+ {
76
+ "epoch": 0.24437927663734116,
77
+ "grad_norm": 1.7474775314331055,
78
+ "learning_rate": 1.7561094819159336e-05,
79
+ "loss": 0.0647,
80
+ "step": 1000
81
+ },
82
+ {
83
+ "epoch": 0.26881720430107525,
84
+ "grad_norm": 3.5915794372558594,
85
+ "learning_rate": 1.7316715542521995e-05,
86
+ "loss": 0.0598,
87
+ "step": 1100
88
+ },
89
+ {
90
+ "epoch": 0.2932551319648094,
91
+ "grad_norm": 0.5882957577705383,
92
+ "learning_rate": 1.7072336265884656e-05,
93
+ "loss": 0.0635,
94
+ "step": 1200
95
+ },
96
+ {
97
+ "epoch": 0.3176930596285435,
98
+ "grad_norm": 3.578653335571289,
99
+ "learning_rate": 1.6827956989247314e-05,
100
+ "loss": 0.0666,
101
+ "step": 1300
102
+ },
103
+ {
104
+ "epoch": 0.3421309872922776,
105
+ "grad_norm": 0.5485109090805054,
106
+ "learning_rate": 1.6583577712609973e-05,
107
+ "loss": 0.0583,
108
+ "step": 1400
109
+ },
110
+ {
111
+ "epoch": 0.36656891495601174,
112
+ "grad_norm": 0.6193661689758301,
113
+ "learning_rate": 1.633919843597263e-05,
114
+ "loss": 0.0637,
115
+ "step": 1500
116
+ },
117
+ {
118
+ "epoch": 0.39100684261974583,
119
+ "grad_norm": 0.11104666441679001,
120
+ "learning_rate": 1.609481915933529e-05,
121
+ "loss": 0.0505,
122
+ "step": 1600
123
+ },
124
+ {
125
+ "epoch": 0.41544477028348,
126
+ "grad_norm": 0.13834528625011444,
127
+ "learning_rate": 1.5850439882697947e-05,
128
+ "loss": 0.0535,
129
+ "step": 1700
130
+ },
131
+ {
132
+ "epoch": 0.4398826979472141,
133
+ "grad_norm": 0.19737549126148224,
134
+ "learning_rate": 1.5606060606060605e-05,
135
+ "loss": 0.0503,
136
+ "step": 1800
137
+ },
138
+ {
139
+ "epoch": 0.46432062561094817,
140
+ "grad_norm": 5.043758392333984,
141
+ "learning_rate": 1.5361681329423267e-05,
142
+ "loss": 0.0512,
143
+ "step": 1900
144
+ },
145
+ {
146
+ "epoch": 0.4887585532746823,
147
+ "grad_norm": 0.07497064024209976,
148
+ "learning_rate": 1.5117302052785925e-05,
149
+ "loss": 0.0526,
150
+ "step": 2000
151
+ },
152
+ {
153
+ "epoch": 0.5131964809384164,
154
+ "grad_norm": 1.9213156700134277,
155
+ "learning_rate": 1.4872922776148583e-05,
156
+ "loss": 0.051,
157
+ "step": 2100
158
+ },
159
+ {
160
+ "epoch": 0.5376344086021505,
161
+ "grad_norm": 1.2963168621063232,
162
+ "learning_rate": 1.4628543499511243e-05,
163
+ "loss": 0.0429,
164
+ "step": 2200
165
+ },
166
+ {
167
+ "epoch": 0.5620723362658846,
168
+ "grad_norm": 1.5551583766937256,
169
+ "learning_rate": 1.4384164222873903e-05,
170
+ "loss": 0.0602,
171
+ "step": 2300
172
+ },
173
+ {
174
+ "epoch": 0.5865102639296188,
175
+ "grad_norm": 2.956341028213501,
176
+ "learning_rate": 1.4139784946236561e-05,
177
+ "loss": 0.0564,
178
+ "step": 2400
179
+ },
180
+ {
181
+ "epoch": 0.6109481915933529,
182
+ "grad_norm": 0.2880280911922455,
183
+ "learning_rate": 1.3895405669599221e-05,
184
+ "loss": 0.0425,
185
+ "step": 2500
186
+ },
187
+ {
188
+ "epoch": 0.635386119257087,
189
+ "grad_norm": 0.6562920808792114,
190
+ "learning_rate": 1.3651026392961877e-05,
191
+ "loss": 0.0456,
192
+ "step": 2600
193
+ },
194
+ {
195
+ "epoch": 0.6598240469208211,
196
+ "grad_norm": 0.09532313793897629,
197
+ "learning_rate": 1.3406647116324536e-05,
198
+ "loss": 0.051,
199
+ "step": 2700
200
+ },
201
+ {
202
+ "epoch": 0.6842619745845552,
203
+ "grad_norm": 2.781747817993164,
204
+ "learning_rate": 1.3162267839687195e-05,
205
+ "loss": 0.0512,
206
+ "step": 2800
207
+ },
208
+ {
209
+ "epoch": 0.7086999022482894,
210
+ "grad_norm": 2.1303281784057617,
211
+ "learning_rate": 1.2917888563049854e-05,
212
+ "loss": 0.0519,
213
+ "step": 2900
214
+ },
215
+ {
216
+ "epoch": 0.7331378299120235,
217
+ "grad_norm": 0.10773918032646179,
218
+ "learning_rate": 1.2673509286412513e-05,
219
+ "loss": 0.042,
220
+ "step": 3000
221
+ },
222
+ {
223
+ "epoch": 0.7575757575757576,
224
+ "grad_norm": 0.533173143863678,
225
+ "learning_rate": 1.2429130009775172e-05,
226
+ "loss": 0.0569,
227
+ "step": 3100
228
+ },
229
+ {
230
+ "epoch": 0.7820136852394917,
231
+ "grad_norm": 2.882176637649536,
232
+ "learning_rate": 1.2184750733137831e-05,
233
+ "loss": 0.0421,
234
+ "step": 3200
235
+ },
236
+ {
237
+ "epoch": 0.8064516129032258,
238
+ "grad_norm": 5.4323530197143555,
239
+ "learning_rate": 1.194037145650049e-05,
240
+ "loss": 0.0458,
241
+ "step": 3300
242
+ },
243
+ {
244
+ "epoch": 0.83088954056696,
245
+ "grad_norm": 0.08420676738023758,
246
+ "learning_rate": 1.169599217986315e-05,
247
+ "loss": 0.0432,
248
+ "step": 3400
249
+ },
250
+ {
251
+ "epoch": 0.855327468230694,
252
+ "grad_norm": 3.6947717666625977,
253
+ "learning_rate": 1.1451612903225808e-05,
254
+ "loss": 0.0387,
255
+ "step": 3500
256
+ },
257
+ {
258
+ "epoch": 0.8797653958944281,
259
+ "grad_norm": 4.411167621612549,
260
+ "learning_rate": 1.1209677419354839e-05,
261
+ "loss": 0.0515,
262
+ "step": 3600
263
+ },
264
+ {
265
+ "epoch": 0.9042033235581622,
266
+ "grad_norm": 3.9330861568450928,
267
+ "learning_rate": 1.0965298142717497e-05,
268
+ "loss": 0.0368,
269
+ "step": 3700
270
+ },
271
+ {
272
+ "epoch": 0.9286412512218963,
273
+ "grad_norm": 0.9437419772148132,
274
+ "learning_rate": 1.0720918866080157e-05,
275
+ "loss": 0.0479,
276
+ "step": 3800
277
+ },
278
+ {
279
+ "epoch": 0.9530791788856305,
280
+ "grad_norm": 1.0870046615600586,
281
+ "learning_rate": 1.0476539589442815e-05,
282
+ "loss": 0.0426,
283
+ "step": 3900
284
+ },
285
+ {
286
+ "epoch": 0.9775171065493646,
287
+ "grad_norm": 3.1951255798339844,
288
+ "learning_rate": 1.0232160312805475e-05,
289
+ "loss": 0.0432,
290
+ "step": 4000
291
+ },
292
+ {
293
+ "epoch": 1.0,
294
+ "eval_accuracy": 0.9891518737672583,
295
+ "eval_f1": 0.9928387101834639,
296
+ "eval_loss": 0.04053397476673126,
297
+ "eval_runtime": 158.0778,
298
+ "eval_samples_per_second": 532.409,
299
+ "eval_steps_per_second": 11.096,
300
+ "step": 4092
301
+ },
302
+ {
303
+ "epoch": 1.0019550342130987,
304
+ "grad_norm": 0.5268033146858215,
305
+ "learning_rate": 9.987781036168133e-06,
306
+ "loss": 0.0386,
307
+ "step": 4100
308
+ },
309
+ {
310
+ "epoch": 1.0263929618768328,
311
+ "grad_norm": 0.3988477289676666,
312
+ "learning_rate": 9.743401759530793e-06,
313
+ "loss": 0.0224,
314
+ "step": 4200
315
+ },
316
+ {
317
+ "epoch": 1.050830889540567,
318
+ "grad_norm": 0.031836945563554764,
319
+ "learning_rate": 9.499022482893451e-06,
320
+ "loss": 0.0344,
321
+ "step": 4300
322
+ },
323
+ {
324
+ "epoch": 1.075268817204301,
325
+ "grad_norm": 1.4545081853866577,
326
+ "learning_rate": 9.25464320625611e-06,
327
+ "loss": 0.0285,
328
+ "step": 4400
329
+ },
330
+ {
331
+ "epoch": 1.099706744868035,
332
+ "grad_norm": 0.054912444204092026,
333
+ "learning_rate": 9.01026392961877e-06,
334
+ "loss": 0.0176,
335
+ "step": 4500
336
+ },
337
+ {
338
+ "epoch": 1.1241446725317692,
339
+ "grad_norm": 1.042144775390625,
340
+ "learning_rate": 8.765884652981427e-06,
341
+ "loss": 0.0235,
342
+ "step": 4600
343
+ },
344
+ {
345
+ "epoch": 1.1485826001955035,
346
+ "grad_norm": 0.12319644540548325,
347
+ "learning_rate": 8.521505376344087e-06,
348
+ "loss": 0.0274,
349
+ "step": 4700
350
+ },
351
+ {
352
+ "epoch": 1.1730205278592376,
353
+ "grad_norm": 0.08448143303394318,
354
+ "learning_rate": 8.277126099706745e-06,
355
+ "loss": 0.0261,
356
+ "step": 4800
357
+ },
358
+ {
359
+ "epoch": 1.1974584555229717,
360
+ "grad_norm": 0.06239945441484451,
361
+ "learning_rate": 8.032746823069404e-06,
362
+ "loss": 0.022,
363
+ "step": 4900
364
+ },
365
+ {
366
+ "epoch": 1.2218963831867058,
367
+ "grad_norm": 0.0412888340651989,
368
+ "learning_rate": 7.788367546432064e-06,
369
+ "loss": 0.0239,
370
+ "step": 5000
371
+ },
372
+ {
373
+ "epoch": 1.2463343108504399,
374
+ "grad_norm": 0.10470504313707352,
375
+ "learning_rate": 7.543988269794722e-06,
376
+ "loss": 0.0256,
377
+ "step": 5100
378
+ },
379
+ {
380
+ "epoch": 1.270772238514174,
381
+ "grad_norm": 0.3927100896835327,
382
+ "learning_rate": 7.299608993157381e-06,
383
+ "loss": 0.0299,
384
+ "step": 5200
385
+ },
386
+ {
387
+ "epoch": 1.295210166177908,
388
+ "grad_norm": 0.2635032534599304,
389
+ "learning_rate": 7.05522971652004e-06,
390
+ "loss": 0.0223,
391
+ "step": 5300
392
+ },
393
+ {
394
+ "epoch": 1.3196480938416422,
395
+ "grad_norm": 0.05280297249555588,
396
+ "learning_rate": 6.810850439882698e-06,
397
+ "loss": 0.0319,
398
+ "step": 5400
399
+ },
400
+ {
401
+ "epoch": 1.3440860215053765,
402
+ "grad_norm": 0.02359873428940773,
403
+ "learning_rate": 6.566471163245357e-06,
404
+ "loss": 0.02,
405
+ "step": 5500
406
+ },
407
+ {
408
+ "epoch": 1.3685239491691106,
409
+ "grad_norm": 0.42642369866371155,
410
+ "learning_rate": 6.324535679374389e-06,
411
+ "loss": 0.0241,
412
+ "step": 5600
413
+ },
414
+ {
415
+ "epoch": 1.3929618768328447,
416
+ "grad_norm": 0.7084789872169495,
417
+ "learning_rate": 6.080156402737049e-06,
418
+ "loss": 0.0195,
419
+ "step": 5700
420
+ },
421
+ {
422
+ "epoch": 1.4173998044965788,
423
+ "grad_norm": 0.14894872903823853,
424
+ "learning_rate": 5.835777126099708e-06,
425
+ "loss": 0.0233,
426
+ "step": 5800
427
+ },
428
+ {
429
+ "epoch": 1.4418377321603129,
430
+ "grad_norm": 6.313684463500977,
431
+ "learning_rate": 5.591397849462365e-06,
432
+ "loss": 0.0181,
433
+ "step": 5900
434
+ },
435
+ {
436
+ "epoch": 1.466275659824047,
437
+ "grad_norm": 0.9010400772094727,
438
+ "learning_rate": 5.347018572825024e-06,
439
+ "loss": 0.0243,
440
+ "step": 6000
441
+ },
442
+ {
443
+ "epoch": 1.490713587487781,
444
+ "grad_norm": 0.15028172731399536,
445
+ "learning_rate": 5.102639296187683e-06,
446
+ "loss": 0.0227,
447
+ "step": 6100
448
+ },
449
+ {
450
+ "epoch": 1.5151515151515151,
451
+ "grad_norm": 1.0572513341903687,
452
+ "learning_rate": 4.858260019550342e-06,
453
+ "loss": 0.019,
454
+ "step": 6200
455
+ },
456
+ {
457
+ "epoch": 1.5395894428152492,
458
+ "grad_norm": 0.056791041046381,
459
+ "learning_rate": 4.613880742913001e-06,
460
+ "loss": 0.0204,
461
+ "step": 6300
462
+ },
463
+ {
464
+ "epoch": 1.5640273704789833,
465
+ "grad_norm": 0.3077963590621948,
466
+ "learning_rate": 4.36950146627566e-06,
467
+ "loss": 0.0253,
468
+ "step": 6400
469
+ },
470
+ {
471
+ "epoch": 1.5884652981427174,
472
+ "grad_norm": 0.03899073228240013,
473
+ "learning_rate": 4.125122189638319e-06,
474
+ "loss": 0.0216,
475
+ "step": 6500
476
+ },
477
+ {
478
+ "epoch": 1.6129032258064515,
479
+ "grad_norm": 1.412708044052124,
480
+ "learning_rate": 3.8807429130009776e-06,
481
+ "loss": 0.0224,
482
+ "step": 6600
483
+ },
484
+ {
485
+ "epoch": 1.6373411534701856,
486
+ "grad_norm": 0.08187337219715118,
487
+ "learning_rate": 3.6363636363636366e-06,
488
+ "loss": 0.0178,
489
+ "step": 6700
490
+ },
491
+ {
492
+ "epoch": 1.6617790811339197,
493
+ "grad_norm": 0.04208606481552124,
494
+ "learning_rate": 3.391984359726295e-06,
495
+ "loss": 0.0247,
496
+ "step": 6800
497
+ },
498
+ {
499
+ "epoch": 1.6862170087976538,
500
+ "grad_norm": 0.0923227071762085,
501
+ "learning_rate": 3.147605083088954e-06,
502
+ "loss": 0.0207,
503
+ "step": 6900
504
+ },
505
+ {
506
+ "epoch": 1.710654936461388,
507
+ "grad_norm": 1.2065573930740356,
508
+ "learning_rate": 2.903225806451613e-06,
509
+ "loss": 0.0362,
510
+ "step": 7000
511
+ },
512
+ {
513
+ "epoch": 1.7350928641251222,
514
+ "grad_norm": 5.345006942749023,
515
+ "learning_rate": 2.658846529814272e-06,
516
+ "loss": 0.0274,
517
+ "step": 7100
518
+ },
519
+ {
520
+ "epoch": 1.7595307917888563,
521
+ "grad_norm": 0.1414783000946045,
522
+ "learning_rate": 2.414467253176931e-06,
523
+ "loss": 0.0204,
524
+ "step": 7200
525
+ },
526
+ {
527
+ "epoch": 1.7839687194525904,
528
+ "grad_norm": 1.9135044813156128,
529
+ "learning_rate": 2.17008797653959e-06,
530
+ "loss": 0.0279,
531
+ "step": 7300
532
+ },
533
+ {
534
+ "epoch": 1.8084066471163245,
535
+ "grad_norm": 1.5857988595962524,
536
+ "learning_rate": 1.9257086999022484e-06,
537
+ "loss": 0.0251,
538
+ "step": 7400
539
+ },
540
+ {
541
+ "epoch": 1.8328445747800588,
542
+ "grad_norm": 0.38852691650390625,
543
+ "learning_rate": 1.6813294232649072e-06,
544
+ "loss": 0.0199,
545
+ "step": 7500
546
+ },
547
+ {
548
+ "epoch": 1.857282502443793,
549
+ "grad_norm": 0.09283141791820526,
550
+ "learning_rate": 1.4369501466275662e-06,
551
+ "loss": 0.0178,
552
+ "step": 7600
553
+ },
554
+ {
555
+ "epoch": 1.881720430107527,
556
+ "grad_norm": 0.04146512970328331,
557
+ "learning_rate": 1.192570869990225e-06,
558
+ "loss": 0.017,
559
+ "step": 7700
560
+ },
561
+ {
562
+ "epoch": 1.906158357771261,
563
+ "grad_norm": 0.029652154073119164,
564
+ "learning_rate": 9.481915933528838e-07,
565
+ "loss": 0.0193,
566
+ "step": 7800
567
+ },
568
+ {
569
+ "epoch": 1.9305962854349952,
570
+ "grad_norm": 0.04845303297042847,
571
+ "learning_rate": 7.038123167155427e-07,
572
+ "loss": 0.0287,
573
+ "step": 7900
574
+ },
575
+ {
576
+ "epoch": 1.9550342130987293,
577
+ "grad_norm": 0.05037612095475197,
578
+ "learning_rate": 4.618768328445748e-07,
579
+ "loss": 0.0257,
580
+ "step": 8000
581
+ },
582
+ {
583
+ "epoch": 1.9794721407624634,
584
+ "grad_norm": 0.9069479703903198,
585
+ "learning_rate": 2.1749755620723366e-07,
586
+ "loss": 0.0299,
587
+ "step": 8100
588
+ },
589
+ {
590
+ "epoch": 2.0,
591
+ "eval_accuracy": 0.9917183527007438,
592
+ "eval_f1": 0.994545440316787,
593
+ "eval_loss": 0.03528362512588501,
594
+ "eval_runtime": 158.882,
595
+ "eval_samples_per_second": 529.714,
596
+ "eval_steps_per_second": 11.04,
597
+ "step": 8184
598
+ }
599
+ ],
600
+ "logging_steps": 100,
601
+ "max_steps": 8184,
602
+ "num_input_tokens_seen": 0,
603
+ "num_train_epochs": 2,
604
+ "save_steps": 500,
605
+ "stateful_callbacks": {
606
+ "TrainerControl": {
607
+ "args": {
608
+ "should_epoch_stop": false,
609
+ "should_evaluate": false,
610
+ "should_log": false,
611
+ "should_save": true,
612
+ "should_training_stop": true
613
+ },
614
+ "attributes": {}
615
+ }
616
+ },
617
+ "total_flos": 5.2026570822144e+16,
618
+ "train_batch_size": 48,
619
+ "trial_name": null,
620
+ "trial_params": null
621
+ }
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c534883427a071f1d9f3ba8e1a112e5e016307e4c8c2c116719a636f815013c
3
+ size 5841
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
Floressek/sentiment_classification_from_distillbert/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "dtype": "float32",
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "transformers_version": "4.57.1",
23
+ "vocab_size": 30522
24
+ }
Floressek/sentiment_classification_from_distillbert/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ef48f63397444fab7fe77a7c3e8113768986ff1c8e5e9126a66727aa030ec64
3
+ size 267832560
Floressek/sentiment_classification_from_distillbert/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
Floressek/sentiment_classification_from_distillbert/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
Floressek/sentiment_classification_from_distillbert/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
Floressek/sentiment_classification_from_distillbert/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c534883427a071f1d9f3ba8e1a112e5e016307e4c8c2c116719a636f815013c
3
+ size 5841
Floressek/sentiment_classification_from_distillbert/vocab.txt ADDED
The diff for this file is too large to render. See raw diff