Floressek commited on
Commit
b2bc225
·
verified ·
1 Parent(s): 193c905

Upload folder using huggingface_hub

Browse files
Files changed (36) hide show
  1. .gitattributes +1 -0
  2. .hfignore +13 -0
  3. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/config.json +24 -0
  4. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/model.safetensors +3 -0
  5. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/optimizer.pt +3 -0
  6. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/rng_state.pth +3 -0
  7. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/scaler.pt +3 -0
  8. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/scheduler.pt +3 -0
  9. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/special_tokens_map.json +7 -0
  10. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/tokenizer.json +0 -0
  11. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/tokenizer_config.json +56 -0
  12. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/trainer_state.json +324 -0
  13. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/training_args.bin +3 -0
  14. Floressek/sentiment_classification_from_distillbert/checkpoint-4092/vocab.txt +0 -0
  15. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/config.json +24 -0
  16. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/model.safetensors +3 -0
  17. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/optimizer.pt +3 -0
  18. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/rng_state.pth +3 -0
  19. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/scaler.pt +3 -0
  20. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/scheduler.pt +3 -0
  21. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/special_tokens_map.json +7 -0
  22. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/tokenizer.json +0 -0
  23. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/tokenizer_config.json +56 -0
  24. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/trainer_state.json +621 -0
  25. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/training_args.bin +3 -0
  26. Floressek/sentiment_classification_from_distillbert/checkpoint-8184/vocab.txt +0 -0
  27. Floressek/sentiment_classification_from_distillbert/config.json +24 -0
  28. Floressek/sentiment_classification_from_distillbert/model.safetensors +3 -0
  29. Floressek/sentiment_classification_from_distillbert/special_tokens_map.json +7 -0
  30. Floressek/sentiment_classification_from_distillbert/tokenizer.json +0 -0
  31. Floressek/sentiment_classification_from_distillbert/tokenizer_config.json +56 -0
  32. Floressek/sentiment_classification_from_distillbert/training_args.bin +3 -0
  33. Floressek/sentiment_classification_from_distillbert/vocab.txt +0 -0
  34. Szymon_Florek_Model_fine_tuning.ipynb +1145 -0
  35. data/Amazon_Unlocked_Mobile.csv +3 -0
  36. data/Amazon_Unlocked_Mobile.csv.zip +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/Amazon_Unlocked_Mobile.csv filter=lfs diff=lfs merge=lfs -text
.hfignore ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .venv/
2
+ .git/
3
+ **/__pycache__/
4
+ **/*.pyc
5
+ **/*.pyd
6
+ **/*.so
7
+ **/*.parquet
8
+ **/*.zip
9
+ **/*.whl
10
+ data/
11
+ datasets/
12
+ notebooks/
13
+ tests/
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "dtype": "float32",
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "transformers_version": "4.57.1",
23
+ "vocab_size": 30522
24
+ }
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48f53c9337700718dfe05b128baaf117bd0426c55a2997dcb5eba2a6ece8b78a
3
+ size 267832560
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60dd26e70b6b195af77ec24609240dc615fa06373e1041dbb7d954caac63ab9
3
+ size 535727755
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33b3fc4dc6e72c19b95ccc6bf2103e72921fd75896fc4bef7c6d974ba74630e9
3
+ size 14645
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:484e81aa45a06ff82acc987feb7e25bd65f6d761d4b270f18fa3fdb66af71f57
3
+ size 1383
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad6350ab52d6a97e61162b0ff6ee33357b33b4a128de1a32409ac383b24e0b81
3
+ size 1465
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/trainer_state.json ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 4092,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.024437927663734114,
14
+ "grad_norm": 4.433139801025391,
15
+ "learning_rate": 1.9760508308895406e-05,
16
+ "loss": 0.2466,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.04887585532746823,
21
+ "grad_norm": 2.9566471576690674,
22
+ "learning_rate": 1.9516129032258068e-05,
23
+ "loss": 0.0958,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.07331378299120235,
28
+ "grad_norm": 3.056684732437134,
29
+ "learning_rate": 1.9271749755620726e-05,
30
+ "loss": 0.0854,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.09775171065493646,
35
+ "grad_norm": 0.7034033536911011,
36
+ "learning_rate": 1.9027370478983384e-05,
37
+ "loss": 0.0854,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.12218963831867058,
42
+ "grad_norm": 2.2177963256835938,
43
+ "learning_rate": 1.8782991202346042e-05,
44
+ "loss": 0.0851,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.1466275659824047,
49
+ "grad_norm": 0.9197642803192139,
50
+ "learning_rate": 1.85386119257087e-05,
51
+ "loss": 0.0802,
52
+ "step": 600
53
+ },
54
+ {
55
+ "epoch": 0.1710654936461388,
56
+ "grad_norm": 2.132237672805786,
57
+ "learning_rate": 1.829423264907136e-05,
58
+ "loss": 0.068,
59
+ "step": 700
60
+ },
61
+ {
62
+ "epoch": 0.19550342130987292,
63
+ "grad_norm": 1.3075214624404907,
64
+ "learning_rate": 1.804985337243402e-05,
65
+ "loss": 0.0679,
66
+ "step": 800
67
+ },
68
+ {
69
+ "epoch": 0.21994134897360704,
70
+ "grad_norm": 2.7458581924438477,
71
+ "learning_rate": 1.7805474095796678e-05,
72
+ "loss": 0.0698,
73
+ "step": 900
74
+ },
75
+ {
76
+ "epoch": 0.24437927663734116,
77
+ "grad_norm": 1.7474775314331055,
78
+ "learning_rate": 1.7561094819159336e-05,
79
+ "loss": 0.0647,
80
+ "step": 1000
81
+ },
82
+ {
83
+ "epoch": 0.26881720430107525,
84
+ "grad_norm": 3.5915794372558594,
85
+ "learning_rate": 1.7316715542521995e-05,
86
+ "loss": 0.0598,
87
+ "step": 1100
88
+ },
89
+ {
90
+ "epoch": 0.2932551319648094,
91
+ "grad_norm": 0.5882957577705383,
92
+ "learning_rate": 1.7072336265884656e-05,
93
+ "loss": 0.0635,
94
+ "step": 1200
95
+ },
96
+ {
97
+ "epoch": 0.3176930596285435,
98
+ "grad_norm": 3.578653335571289,
99
+ "learning_rate": 1.6827956989247314e-05,
100
+ "loss": 0.0666,
101
+ "step": 1300
102
+ },
103
+ {
104
+ "epoch": 0.3421309872922776,
105
+ "grad_norm": 0.5485109090805054,
106
+ "learning_rate": 1.6583577712609973e-05,
107
+ "loss": 0.0583,
108
+ "step": 1400
109
+ },
110
+ {
111
+ "epoch": 0.36656891495601174,
112
+ "grad_norm": 0.6193661689758301,
113
+ "learning_rate": 1.633919843597263e-05,
114
+ "loss": 0.0637,
115
+ "step": 1500
116
+ },
117
+ {
118
+ "epoch": 0.39100684261974583,
119
+ "grad_norm": 0.11104666441679001,
120
+ "learning_rate": 1.609481915933529e-05,
121
+ "loss": 0.0505,
122
+ "step": 1600
123
+ },
124
+ {
125
+ "epoch": 0.41544477028348,
126
+ "grad_norm": 0.13834528625011444,
127
+ "learning_rate": 1.5850439882697947e-05,
128
+ "loss": 0.0535,
129
+ "step": 1700
130
+ },
131
+ {
132
+ "epoch": 0.4398826979472141,
133
+ "grad_norm": 0.19737549126148224,
134
+ "learning_rate": 1.5606060606060605e-05,
135
+ "loss": 0.0503,
136
+ "step": 1800
137
+ },
138
+ {
139
+ "epoch": 0.46432062561094817,
140
+ "grad_norm": 5.043758392333984,
141
+ "learning_rate": 1.5361681329423267e-05,
142
+ "loss": 0.0512,
143
+ "step": 1900
144
+ },
145
+ {
146
+ "epoch": 0.4887585532746823,
147
+ "grad_norm": 0.07497064024209976,
148
+ "learning_rate": 1.5117302052785925e-05,
149
+ "loss": 0.0526,
150
+ "step": 2000
151
+ },
152
+ {
153
+ "epoch": 0.5131964809384164,
154
+ "grad_norm": 1.9213156700134277,
155
+ "learning_rate": 1.4872922776148583e-05,
156
+ "loss": 0.051,
157
+ "step": 2100
158
+ },
159
+ {
160
+ "epoch": 0.5376344086021505,
161
+ "grad_norm": 1.2963168621063232,
162
+ "learning_rate": 1.4628543499511243e-05,
163
+ "loss": 0.0429,
164
+ "step": 2200
165
+ },
166
+ {
167
+ "epoch": 0.5620723362658846,
168
+ "grad_norm": 1.5551583766937256,
169
+ "learning_rate": 1.4384164222873903e-05,
170
+ "loss": 0.0602,
171
+ "step": 2300
172
+ },
173
+ {
174
+ "epoch": 0.5865102639296188,
175
+ "grad_norm": 2.956341028213501,
176
+ "learning_rate": 1.4139784946236561e-05,
177
+ "loss": 0.0564,
178
+ "step": 2400
179
+ },
180
+ {
181
+ "epoch": 0.6109481915933529,
182
+ "grad_norm": 0.2880280911922455,
183
+ "learning_rate": 1.3895405669599221e-05,
184
+ "loss": 0.0425,
185
+ "step": 2500
186
+ },
187
+ {
188
+ "epoch": 0.635386119257087,
189
+ "grad_norm": 0.6562920808792114,
190
+ "learning_rate": 1.3651026392961877e-05,
191
+ "loss": 0.0456,
192
+ "step": 2600
193
+ },
194
+ {
195
+ "epoch": 0.6598240469208211,
196
+ "grad_norm": 0.09532313793897629,
197
+ "learning_rate": 1.3406647116324536e-05,
198
+ "loss": 0.051,
199
+ "step": 2700
200
+ },
201
+ {
202
+ "epoch": 0.6842619745845552,
203
+ "grad_norm": 2.781747817993164,
204
+ "learning_rate": 1.3162267839687195e-05,
205
+ "loss": 0.0512,
206
+ "step": 2800
207
+ },
208
+ {
209
+ "epoch": 0.7086999022482894,
210
+ "grad_norm": 2.1303281784057617,
211
+ "learning_rate": 1.2917888563049854e-05,
212
+ "loss": 0.0519,
213
+ "step": 2900
214
+ },
215
+ {
216
+ "epoch": 0.7331378299120235,
217
+ "grad_norm": 0.10773918032646179,
218
+ "learning_rate": 1.2673509286412513e-05,
219
+ "loss": 0.042,
220
+ "step": 3000
221
+ },
222
+ {
223
+ "epoch": 0.7575757575757576,
224
+ "grad_norm": 0.533173143863678,
225
+ "learning_rate": 1.2429130009775172e-05,
226
+ "loss": 0.0569,
227
+ "step": 3100
228
+ },
229
+ {
230
+ "epoch": 0.7820136852394917,
231
+ "grad_norm": 2.882176637649536,
232
+ "learning_rate": 1.2184750733137831e-05,
233
+ "loss": 0.0421,
234
+ "step": 3200
235
+ },
236
+ {
237
+ "epoch": 0.8064516129032258,
238
+ "grad_norm": 5.4323530197143555,
239
+ "learning_rate": 1.194037145650049e-05,
240
+ "loss": 0.0458,
241
+ "step": 3300
242
+ },
243
+ {
244
+ "epoch": 0.83088954056696,
245
+ "grad_norm": 0.08420676738023758,
246
+ "learning_rate": 1.169599217986315e-05,
247
+ "loss": 0.0432,
248
+ "step": 3400
249
+ },
250
+ {
251
+ "epoch": 0.855327468230694,
252
+ "grad_norm": 3.6947717666625977,
253
+ "learning_rate": 1.1451612903225808e-05,
254
+ "loss": 0.0387,
255
+ "step": 3500
256
+ },
257
+ {
258
+ "epoch": 0.8797653958944281,
259
+ "grad_norm": 4.411167621612549,
260
+ "learning_rate": 1.1209677419354839e-05,
261
+ "loss": 0.0515,
262
+ "step": 3600
263
+ },
264
+ {
265
+ "epoch": 0.9042033235581622,
266
+ "grad_norm": 3.9330861568450928,
267
+ "learning_rate": 1.0965298142717497e-05,
268
+ "loss": 0.0368,
269
+ "step": 3700
270
+ },
271
+ {
272
+ "epoch": 0.9286412512218963,
273
+ "grad_norm": 0.9437419772148132,
274
+ "learning_rate": 1.0720918866080157e-05,
275
+ "loss": 0.0479,
276
+ "step": 3800
277
+ },
278
+ {
279
+ "epoch": 0.9530791788856305,
280
+ "grad_norm": 1.0870046615600586,
281
+ "learning_rate": 1.0476539589442815e-05,
282
+ "loss": 0.0426,
283
+ "step": 3900
284
+ },
285
+ {
286
+ "epoch": 0.9775171065493646,
287
+ "grad_norm": 3.1951255798339844,
288
+ "learning_rate": 1.0232160312805475e-05,
289
+ "loss": 0.0432,
290
+ "step": 4000
291
+ },
292
+ {
293
+ "epoch": 1.0,
294
+ "eval_accuracy": 0.9891518737672583,
295
+ "eval_f1": 0.9928387101834639,
296
+ "eval_loss": 0.04053397476673126,
297
+ "eval_runtime": 158.0778,
298
+ "eval_samples_per_second": 532.409,
299
+ "eval_steps_per_second": 11.096,
300
+ "step": 4092
301
+ }
302
+ ],
303
+ "logging_steps": 100,
304
+ "max_steps": 8184,
305
+ "num_input_tokens_seen": 0,
306
+ "num_train_epochs": 2,
307
+ "save_steps": 500,
308
+ "stateful_callbacks": {
309
+ "TrainerControl": {
310
+ "args": {
311
+ "should_epoch_stop": false,
312
+ "should_evaluate": false,
313
+ "should_log": false,
314
+ "should_save": true,
315
+ "should_training_stop": false
316
+ },
317
+ "attributes": {}
318
+ }
319
+ },
320
+ "total_flos": 2.6013285411072e+16,
321
+ "train_batch_size": 48,
322
+ "trial_name": null,
323
+ "trial_params": null
324
+ }
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c534883427a071f1d9f3ba8e1a112e5e016307e4c8c2c116719a636f815013c
3
+ size 5841
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "dtype": "float32",
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "transformers_version": "4.57.1",
23
+ "vocab_size": 30522
24
+ }
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ef48f63397444fab7fe77a7c3e8113768986ff1c8e5e9126a66727aa030ec64
3
+ size 267832560
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:223c1ad8c221cdb8480bd0fbf758bce16ab32234cf8f118612402b10534a701a
3
+ size 535727755
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1344599c19119661a23b30409dd058766623df54c92c90cfbddc059d4d9b8506
3
+ size 14645
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c83094e856654a01b4be2edab3b20eb796b993d8a6bc80224e3383365e815ba
3
+ size 1383
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd6f8ed4f35d1035d3220bdfe3aa23843396e8ea037faf54ec8c9dc5fe556d41
3
+ size 1465
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/trainer_state.json ADDED
@@ -0,0 +1,621 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 8184,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.024437927663734114,
14
+ "grad_norm": 4.433139801025391,
15
+ "learning_rate": 1.9760508308895406e-05,
16
+ "loss": 0.2466,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.04887585532746823,
21
+ "grad_norm": 2.9566471576690674,
22
+ "learning_rate": 1.9516129032258068e-05,
23
+ "loss": 0.0958,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.07331378299120235,
28
+ "grad_norm": 3.056684732437134,
29
+ "learning_rate": 1.9271749755620726e-05,
30
+ "loss": 0.0854,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.09775171065493646,
35
+ "grad_norm": 0.7034033536911011,
36
+ "learning_rate": 1.9027370478983384e-05,
37
+ "loss": 0.0854,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.12218963831867058,
42
+ "grad_norm": 2.2177963256835938,
43
+ "learning_rate": 1.8782991202346042e-05,
44
+ "loss": 0.0851,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.1466275659824047,
49
+ "grad_norm": 0.9197642803192139,
50
+ "learning_rate": 1.85386119257087e-05,
51
+ "loss": 0.0802,
52
+ "step": 600
53
+ },
54
+ {
55
+ "epoch": 0.1710654936461388,
56
+ "grad_norm": 2.132237672805786,
57
+ "learning_rate": 1.829423264907136e-05,
58
+ "loss": 0.068,
59
+ "step": 700
60
+ },
61
+ {
62
+ "epoch": 0.19550342130987292,
63
+ "grad_norm": 1.3075214624404907,
64
+ "learning_rate": 1.804985337243402e-05,
65
+ "loss": 0.0679,
66
+ "step": 800
67
+ },
68
+ {
69
+ "epoch": 0.21994134897360704,
70
+ "grad_norm": 2.7458581924438477,
71
+ "learning_rate": 1.7805474095796678e-05,
72
+ "loss": 0.0698,
73
+ "step": 900
74
+ },
75
+ {
76
+ "epoch": 0.24437927663734116,
77
+ "grad_norm": 1.7474775314331055,
78
+ "learning_rate": 1.7561094819159336e-05,
79
+ "loss": 0.0647,
80
+ "step": 1000
81
+ },
82
+ {
83
+ "epoch": 0.26881720430107525,
84
+ "grad_norm": 3.5915794372558594,
85
+ "learning_rate": 1.7316715542521995e-05,
86
+ "loss": 0.0598,
87
+ "step": 1100
88
+ },
89
+ {
90
+ "epoch": 0.2932551319648094,
91
+ "grad_norm": 0.5882957577705383,
92
+ "learning_rate": 1.7072336265884656e-05,
93
+ "loss": 0.0635,
94
+ "step": 1200
95
+ },
96
+ {
97
+ "epoch": 0.3176930596285435,
98
+ "grad_norm": 3.578653335571289,
99
+ "learning_rate": 1.6827956989247314e-05,
100
+ "loss": 0.0666,
101
+ "step": 1300
102
+ },
103
+ {
104
+ "epoch": 0.3421309872922776,
105
+ "grad_norm": 0.5485109090805054,
106
+ "learning_rate": 1.6583577712609973e-05,
107
+ "loss": 0.0583,
108
+ "step": 1400
109
+ },
110
+ {
111
+ "epoch": 0.36656891495601174,
112
+ "grad_norm": 0.6193661689758301,
113
+ "learning_rate": 1.633919843597263e-05,
114
+ "loss": 0.0637,
115
+ "step": 1500
116
+ },
117
+ {
118
+ "epoch": 0.39100684261974583,
119
+ "grad_norm": 0.11104666441679001,
120
+ "learning_rate": 1.609481915933529e-05,
121
+ "loss": 0.0505,
122
+ "step": 1600
123
+ },
124
+ {
125
+ "epoch": 0.41544477028348,
126
+ "grad_norm": 0.13834528625011444,
127
+ "learning_rate": 1.5850439882697947e-05,
128
+ "loss": 0.0535,
129
+ "step": 1700
130
+ },
131
+ {
132
+ "epoch": 0.4398826979472141,
133
+ "grad_norm": 0.19737549126148224,
134
+ "learning_rate": 1.5606060606060605e-05,
135
+ "loss": 0.0503,
136
+ "step": 1800
137
+ },
138
+ {
139
+ "epoch": 0.46432062561094817,
140
+ "grad_norm": 5.043758392333984,
141
+ "learning_rate": 1.5361681329423267e-05,
142
+ "loss": 0.0512,
143
+ "step": 1900
144
+ },
145
+ {
146
+ "epoch": 0.4887585532746823,
147
+ "grad_norm": 0.07497064024209976,
148
+ "learning_rate": 1.5117302052785925e-05,
149
+ "loss": 0.0526,
150
+ "step": 2000
151
+ },
152
+ {
153
+ "epoch": 0.5131964809384164,
154
+ "grad_norm": 1.9213156700134277,
155
+ "learning_rate": 1.4872922776148583e-05,
156
+ "loss": 0.051,
157
+ "step": 2100
158
+ },
159
+ {
160
+ "epoch": 0.5376344086021505,
161
+ "grad_norm": 1.2963168621063232,
162
+ "learning_rate": 1.4628543499511243e-05,
163
+ "loss": 0.0429,
164
+ "step": 2200
165
+ },
166
+ {
167
+ "epoch": 0.5620723362658846,
168
+ "grad_norm": 1.5551583766937256,
169
+ "learning_rate": 1.4384164222873903e-05,
170
+ "loss": 0.0602,
171
+ "step": 2300
172
+ },
173
+ {
174
+ "epoch": 0.5865102639296188,
175
+ "grad_norm": 2.956341028213501,
176
+ "learning_rate": 1.4139784946236561e-05,
177
+ "loss": 0.0564,
178
+ "step": 2400
179
+ },
180
+ {
181
+ "epoch": 0.6109481915933529,
182
+ "grad_norm": 0.2880280911922455,
183
+ "learning_rate": 1.3895405669599221e-05,
184
+ "loss": 0.0425,
185
+ "step": 2500
186
+ },
187
+ {
188
+ "epoch": 0.635386119257087,
189
+ "grad_norm": 0.6562920808792114,
190
+ "learning_rate": 1.3651026392961877e-05,
191
+ "loss": 0.0456,
192
+ "step": 2600
193
+ },
194
+ {
195
+ "epoch": 0.6598240469208211,
196
+ "grad_norm": 0.09532313793897629,
197
+ "learning_rate": 1.3406647116324536e-05,
198
+ "loss": 0.051,
199
+ "step": 2700
200
+ },
201
+ {
202
+ "epoch": 0.6842619745845552,
203
+ "grad_norm": 2.781747817993164,
204
+ "learning_rate": 1.3162267839687195e-05,
205
+ "loss": 0.0512,
206
+ "step": 2800
207
+ },
208
+ {
209
+ "epoch": 0.7086999022482894,
210
+ "grad_norm": 2.1303281784057617,
211
+ "learning_rate": 1.2917888563049854e-05,
212
+ "loss": 0.0519,
213
+ "step": 2900
214
+ },
215
+ {
216
+ "epoch": 0.7331378299120235,
217
+ "grad_norm": 0.10773918032646179,
218
+ "learning_rate": 1.2673509286412513e-05,
219
+ "loss": 0.042,
220
+ "step": 3000
221
+ },
222
+ {
223
+ "epoch": 0.7575757575757576,
224
+ "grad_norm": 0.533173143863678,
225
+ "learning_rate": 1.2429130009775172e-05,
226
+ "loss": 0.0569,
227
+ "step": 3100
228
+ },
229
+ {
230
+ "epoch": 0.7820136852394917,
231
+ "grad_norm": 2.882176637649536,
232
+ "learning_rate": 1.2184750733137831e-05,
233
+ "loss": 0.0421,
234
+ "step": 3200
235
+ },
236
+ {
237
+ "epoch": 0.8064516129032258,
238
+ "grad_norm": 5.4323530197143555,
239
+ "learning_rate": 1.194037145650049e-05,
240
+ "loss": 0.0458,
241
+ "step": 3300
242
+ },
243
+ {
244
+ "epoch": 0.83088954056696,
245
+ "grad_norm": 0.08420676738023758,
246
+ "learning_rate": 1.169599217986315e-05,
247
+ "loss": 0.0432,
248
+ "step": 3400
249
+ },
250
+ {
251
+ "epoch": 0.855327468230694,
252
+ "grad_norm": 3.6947717666625977,
253
+ "learning_rate": 1.1451612903225808e-05,
254
+ "loss": 0.0387,
255
+ "step": 3500
256
+ },
257
+ {
258
+ "epoch": 0.8797653958944281,
259
+ "grad_norm": 4.411167621612549,
260
+ "learning_rate": 1.1209677419354839e-05,
261
+ "loss": 0.0515,
262
+ "step": 3600
263
+ },
264
+ {
265
+ "epoch": 0.9042033235581622,
266
+ "grad_norm": 3.9330861568450928,
267
+ "learning_rate": 1.0965298142717497e-05,
268
+ "loss": 0.0368,
269
+ "step": 3700
270
+ },
271
+ {
272
+ "epoch": 0.9286412512218963,
273
+ "grad_norm": 0.9437419772148132,
274
+ "learning_rate": 1.0720918866080157e-05,
275
+ "loss": 0.0479,
276
+ "step": 3800
277
+ },
278
+ {
279
+ "epoch": 0.9530791788856305,
280
+ "grad_norm": 1.0870046615600586,
281
+ "learning_rate": 1.0476539589442815e-05,
282
+ "loss": 0.0426,
283
+ "step": 3900
284
+ },
285
+ {
286
+ "epoch": 0.9775171065493646,
287
+ "grad_norm": 3.1951255798339844,
288
+ "learning_rate": 1.0232160312805475e-05,
289
+ "loss": 0.0432,
290
+ "step": 4000
291
+ },
292
+ {
293
+ "epoch": 1.0,
294
+ "eval_accuracy": 0.9891518737672583,
295
+ "eval_f1": 0.9928387101834639,
296
+ "eval_loss": 0.04053397476673126,
297
+ "eval_runtime": 158.0778,
298
+ "eval_samples_per_second": 532.409,
299
+ "eval_steps_per_second": 11.096,
300
+ "step": 4092
301
+ },
302
+ {
303
+ "epoch": 1.0019550342130987,
304
+ "grad_norm": 0.5268033146858215,
305
+ "learning_rate": 9.987781036168133e-06,
306
+ "loss": 0.0386,
307
+ "step": 4100
308
+ },
309
+ {
310
+ "epoch": 1.0263929618768328,
311
+ "grad_norm": 0.3988477289676666,
312
+ "learning_rate": 9.743401759530793e-06,
313
+ "loss": 0.0224,
314
+ "step": 4200
315
+ },
316
+ {
317
+ "epoch": 1.050830889540567,
318
+ "grad_norm": 0.031836945563554764,
319
+ "learning_rate": 9.499022482893451e-06,
320
+ "loss": 0.0344,
321
+ "step": 4300
322
+ },
323
+ {
324
+ "epoch": 1.075268817204301,
325
+ "grad_norm": 1.4545081853866577,
326
+ "learning_rate": 9.25464320625611e-06,
327
+ "loss": 0.0285,
328
+ "step": 4400
329
+ },
330
+ {
331
+ "epoch": 1.099706744868035,
332
+ "grad_norm": 0.054912444204092026,
333
+ "learning_rate": 9.01026392961877e-06,
334
+ "loss": 0.0176,
335
+ "step": 4500
336
+ },
337
+ {
338
+ "epoch": 1.1241446725317692,
339
+ "grad_norm": 1.042144775390625,
340
+ "learning_rate": 8.765884652981427e-06,
341
+ "loss": 0.0235,
342
+ "step": 4600
343
+ },
344
+ {
345
+ "epoch": 1.1485826001955035,
346
+ "grad_norm": 0.12319644540548325,
347
+ "learning_rate": 8.521505376344087e-06,
348
+ "loss": 0.0274,
349
+ "step": 4700
350
+ },
351
+ {
352
+ "epoch": 1.1730205278592376,
353
+ "grad_norm": 0.08448143303394318,
354
+ "learning_rate": 8.277126099706745e-06,
355
+ "loss": 0.0261,
356
+ "step": 4800
357
+ },
358
+ {
359
+ "epoch": 1.1974584555229717,
360
+ "grad_norm": 0.06239945441484451,
361
+ "learning_rate": 8.032746823069404e-06,
362
+ "loss": 0.022,
363
+ "step": 4900
364
+ },
365
+ {
366
+ "epoch": 1.2218963831867058,
367
+ "grad_norm": 0.0412888340651989,
368
+ "learning_rate": 7.788367546432064e-06,
369
+ "loss": 0.0239,
370
+ "step": 5000
371
+ },
372
+ {
373
+ "epoch": 1.2463343108504399,
374
+ "grad_norm": 0.10470504313707352,
375
+ "learning_rate": 7.543988269794722e-06,
376
+ "loss": 0.0256,
377
+ "step": 5100
378
+ },
379
+ {
380
+ "epoch": 1.270772238514174,
381
+ "grad_norm": 0.3927100896835327,
382
+ "learning_rate": 7.299608993157381e-06,
383
+ "loss": 0.0299,
384
+ "step": 5200
385
+ },
386
+ {
387
+ "epoch": 1.295210166177908,
388
+ "grad_norm": 0.2635032534599304,
389
+ "learning_rate": 7.05522971652004e-06,
390
+ "loss": 0.0223,
391
+ "step": 5300
392
+ },
393
+ {
394
+ "epoch": 1.3196480938416422,
395
+ "grad_norm": 0.05280297249555588,
396
+ "learning_rate": 6.810850439882698e-06,
397
+ "loss": 0.0319,
398
+ "step": 5400
399
+ },
400
+ {
401
+ "epoch": 1.3440860215053765,
402
+ "grad_norm": 0.02359873428940773,
403
+ "learning_rate": 6.566471163245357e-06,
404
+ "loss": 0.02,
405
+ "step": 5500
406
+ },
407
+ {
408
+ "epoch": 1.3685239491691106,
409
+ "grad_norm": 0.42642369866371155,
410
+ "learning_rate": 6.324535679374389e-06,
411
+ "loss": 0.0241,
412
+ "step": 5600
413
+ },
414
+ {
415
+ "epoch": 1.3929618768328447,
416
+ "grad_norm": 0.7084789872169495,
417
+ "learning_rate": 6.080156402737049e-06,
418
+ "loss": 0.0195,
419
+ "step": 5700
420
+ },
421
+ {
422
+ "epoch": 1.4173998044965788,
423
+ "grad_norm": 0.14894872903823853,
424
+ "learning_rate": 5.835777126099708e-06,
425
+ "loss": 0.0233,
426
+ "step": 5800
427
+ },
428
+ {
429
+ "epoch": 1.4418377321603129,
430
+ "grad_norm": 6.313684463500977,
431
+ "learning_rate": 5.591397849462365e-06,
432
+ "loss": 0.0181,
433
+ "step": 5900
434
+ },
435
+ {
436
+ "epoch": 1.466275659824047,
437
+ "grad_norm": 0.9010400772094727,
438
+ "learning_rate": 5.347018572825024e-06,
439
+ "loss": 0.0243,
440
+ "step": 6000
441
+ },
442
+ {
443
+ "epoch": 1.490713587487781,
444
+ "grad_norm": 0.15028172731399536,
445
+ "learning_rate": 5.102639296187683e-06,
446
+ "loss": 0.0227,
447
+ "step": 6100
448
+ },
449
+ {
450
+ "epoch": 1.5151515151515151,
451
+ "grad_norm": 1.0572513341903687,
452
+ "learning_rate": 4.858260019550342e-06,
453
+ "loss": 0.019,
454
+ "step": 6200
455
+ },
456
+ {
457
+ "epoch": 1.5395894428152492,
458
+ "grad_norm": 0.056791041046381,
459
+ "learning_rate": 4.613880742913001e-06,
460
+ "loss": 0.0204,
461
+ "step": 6300
462
+ },
463
+ {
464
+ "epoch": 1.5640273704789833,
465
+ "grad_norm": 0.3077963590621948,
466
+ "learning_rate": 4.36950146627566e-06,
467
+ "loss": 0.0253,
468
+ "step": 6400
469
+ },
470
+ {
471
+ "epoch": 1.5884652981427174,
472
+ "grad_norm": 0.03899073228240013,
473
+ "learning_rate": 4.125122189638319e-06,
474
+ "loss": 0.0216,
475
+ "step": 6500
476
+ },
477
+ {
478
+ "epoch": 1.6129032258064515,
479
+ "grad_norm": 1.412708044052124,
480
+ "learning_rate": 3.8807429130009776e-06,
481
+ "loss": 0.0224,
482
+ "step": 6600
483
+ },
484
+ {
485
+ "epoch": 1.6373411534701856,
486
+ "grad_norm": 0.08187337219715118,
487
+ "learning_rate": 3.6363636363636366e-06,
488
+ "loss": 0.0178,
489
+ "step": 6700
490
+ },
491
+ {
492
+ "epoch": 1.6617790811339197,
493
+ "grad_norm": 0.04208606481552124,
494
+ "learning_rate": 3.391984359726295e-06,
495
+ "loss": 0.0247,
496
+ "step": 6800
497
+ },
498
+ {
499
+ "epoch": 1.6862170087976538,
500
+ "grad_norm": 0.0923227071762085,
501
+ "learning_rate": 3.147605083088954e-06,
502
+ "loss": 0.0207,
503
+ "step": 6900
504
+ },
505
+ {
506
+ "epoch": 1.710654936461388,
507
+ "grad_norm": 1.2065573930740356,
508
+ "learning_rate": 2.903225806451613e-06,
509
+ "loss": 0.0362,
510
+ "step": 7000
511
+ },
512
+ {
513
+ "epoch": 1.7350928641251222,
514
+ "grad_norm": 5.345006942749023,
515
+ "learning_rate": 2.658846529814272e-06,
516
+ "loss": 0.0274,
517
+ "step": 7100
518
+ },
519
+ {
520
+ "epoch": 1.7595307917888563,
521
+ "grad_norm": 0.1414783000946045,
522
+ "learning_rate": 2.414467253176931e-06,
523
+ "loss": 0.0204,
524
+ "step": 7200
525
+ },
526
+ {
527
+ "epoch": 1.7839687194525904,
528
+ "grad_norm": 1.9135044813156128,
529
+ "learning_rate": 2.17008797653959e-06,
530
+ "loss": 0.0279,
531
+ "step": 7300
532
+ },
533
+ {
534
+ "epoch": 1.8084066471163245,
535
+ "grad_norm": 1.5857988595962524,
536
+ "learning_rate": 1.9257086999022484e-06,
537
+ "loss": 0.0251,
538
+ "step": 7400
539
+ },
540
+ {
541
+ "epoch": 1.8328445747800588,
542
+ "grad_norm": 0.38852691650390625,
543
+ "learning_rate": 1.6813294232649072e-06,
544
+ "loss": 0.0199,
545
+ "step": 7500
546
+ },
547
+ {
548
+ "epoch": 1.857282502443793,
549
+ "grad_norm": 0.09283141791820526,
550
+ "learning_rate": 1.4369501466275662e-06,
551
+ "loss": 0.0178,
552
+ "step": 7600
553
+ },
554
+ {
555
+ "epoch": 1.881720430107527,
556
+ "grad_norm": 0.04146512970328331,
557
+ "learning_rate": 1.192570869990225e-06,
558
+ "loss": 0.017,
559
+ "step": 7700
560
+ },
561
+ {
562
+ "epoch": 1.906158357771261,
563
+ "grad_norm": 0.029652154073119164,
564
+ "learning_rate": 9.481915933528838e-07,
565
+ "loss": 0.0193,
566
+ "step": 7800
567
+ },
568
+ {
569
+ "epoch": 1.9305962854349952,
570
+ "grad_norm": 0.04845303297042847,
571
+ "learning_rate": 7.038123167155427e-07,
572
+ "loss": 0.0287,
573
+ "step": 7900
574
+ },
575
+ {
576
+ "epoch": 1.9550342130987293,
577
+ "grad_norm": 0.05037612095475197,
578
+ "learning_rate": 4.618768328445748e-07,
579
+ "loss": 0.0257,
580
+ "step": 8000
581
+ },
582
+ {
583
+ "epoch": 1.9794721407624634,
584
+ "grad_norm": 0.9069479703903198,
585
+ "learning_rate": 2.1749755620723366e-07,
586
+ "loss": 0.0299,
587
+ "step": 8100
588
+ },
589
+ {
590
+ "epoch": 2.0,
591
+ "eval_accuracy": 0.9917183527007438,
592
+ "eval_f1": 0.994545440316787,
593
+ "eval_loss": 0.03528362512588501,
594
+ "eval_runtime": 158.882,
595
+ "eval_samples_per_second": 529.714,
596
+ "eval_steps_per_second": 11.04,
597
+ "step": 8184
598
+ }
599
+ ],
600
+ "logging_steps": 100,
601
+ "max_steps": 8184,
602
+ "num_input_tokens_seen": 0,
603
+ "num_train_epochs": 2,
604
+ "save_steps": 500,
605
+ "stateful_callbacks": {
606
+ "TrainerControl": {
607
+ "args": {
608
+ "should_epoch_stop": false,
609
+ "should_evaluate": false,
610
+ "should_log": false,
611
+ "should_save": true,
612
+ "should_training_stop": true
613
+ },
614
+ "attributes": {}
615
+ }
616
+ },
617
+ "total_flos": 5.2026570822144e+16,
618
+ "train_batch_size": 48,
619
+ "trial_name": null,
620
+ "trial_params": null
621
+ }
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c534883427a071f1d9f3ba8e1a112e5e016307e4c8c2c116719a636f815013c
3
+ size 5841
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
Floressek/sentiment_classification_from_distillbert/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "dtype": "float32",
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "transformers_version": "4.57.1",
23
+ "vocab_size": 30522
24
+ }
Floressek/sentiment_classification_from_distillbert/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ef48f63397444fab7fe77a7c3e8113768986ff1c8e5e9126a66727aa030ec64
3
+ size 267832560
Floressek/sentiment_classification_from_distillbert/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
Floressek/sentiment_classification_from_distillbert/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
Floressek/sentiment_classification_from_distillbert/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
Floressek/sentiment_classification_from_distillbert/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c534883427a071f1d9f3ba8e1a112e5e016307e4c8c2c116719a636f815013c
3
+ size 5841
Floressek/sentiment_classification_from_distillbert/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
Szymon_Florek_Model_fine_tuning.ipynb ADDED
@@ -0,0 +1,1145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "metadata": {
5
+ "collapsed": true
6
+ },
7
+ "cell_type": "markdown",
8
+ "source": "# Fine-tuning a Pretrained Model for sentiment analysis",
9
+ "id": "d26037aadad0840b"
10
+ },
11
+ {
12
+ "metadata": {},
13
+ "cell_type": "markdown",
14
+ "source": "## Importing necessary libraries and data\n",
15
+ "id": "15d09dfe44d655d4"
16
+ },
17
+ {
18
+ "metadata": {
19
+ "ExecuteTime": {
20
+ "end_time": "2025-11-14T17:17:28.523157Z",
21
+ "start_time": "2025-11-14T17:17:23.902525Z"
22
+ }
23
+ },
24
+ "cell_type": "code",
25
+ "source": [
26
+ "!pip install datasets evaluate transformers[sentencepiece]\n",
27
+ "!pip install ipywidgets\n",
28
+ "!pip install torch\n",
29
+ "!pip install transformers[torch]\n",
30
+ "!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n",
31
+ "!pip install scikit-learn"
32
+ ],
33
+ "id": "efdad8b2b5ab54d",
34
+ "outputs": [
35
+ {
36
+ "name": "stdout",
37
+ "output_type": "stream",
38
+ "text": [
39
+ "Requirement already satisfied: datasets in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (4.4.1)\n",
40
+ "Requirement already satisfied: evaluate in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (0.4.6)\n",
41
+ "Requirement already satisfied: transformers[sentencepiece] in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (4.57.1)\n",
42
+ "Requirement already satisfied: filelock in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from datasets) (3.20.0)\n",
43
+ "Requirement already satisfied: numpy>=1.17 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from datasets) (2.3.4)\n",
44
+ "Requirement already satisfied: pyarrow>=21.0.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from datasets) (22.0.0)\n",
45
+ "Requirement already satisfied: dill<0.4.1,>=0.3.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from datasets) (0.4.0)\n",
46
+ "Requirement already satisfied: pandas in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from datasets) (2.3.3)\n",
47
+ "Requirement already satisfied: requests>=2.32.2 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from datasets) (2.32.5)\n",
48
+ "Requirement already satisfied: httpx<1.0.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from datasets) (0.28.1)\n",
49
+ "Requirement already satisfied: tqdm>=4.66.3 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from datasets) (4.67.1)\n",
50
+ "Requirement already satisfied: xxhash in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from datasets) (3.6.0)\n",
51
+ "Requirement already satisfied: multiprocess<0.70.19 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from datasets) (0.70.18)\n",
52
+ "Requirement already satisfied: fsspec<=2025.10.0,>=2023.1.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (2025.10.0)\n",
53
+ "Requirement already satisfied: huggingface-hub<2.0,>=0.25.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from datasets) (0.36.0)\n",
54
+ "Requirement already satisfied: packaging in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from datasets) (25.0)\n",
55
+ "Requirement already satisfied: pyyaml>=5.1 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from datasets) (6.0.3)\n",
56
+ "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (3.13.2)\n",
57
+ "Requirement already satisfied: anyio in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from httpx<1.0.0->datasets) (4.11.0)\n",
58
+ "Requirement already satisfied: certifi in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from httpx<1.0.0->datasets) (2025.11.12)\n",
59
+ "Requirement already satisfied: httpcore==1.* in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from httpx<1.0.0->datasets) (1.0.9)\n",
60
+ "Requirement already satisfied: idna in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from httpx<1.0.0->datasets) (3.11)\n",
61
+ "Requirement already satisfied: h11>=0.16 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from httpcore==1.*->httpx<1.0.0->datasets) (0.16.0)\n",
62
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.25.0->datasets) (4.15.0)\n",
63
+ "Requirement already satisfied: regex!=2019.12.17 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from transformers[sentencepiece]) (2025.11.3)\n",
64
+ "Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from transformers[sentencepiece]) (0.22.1)\n",
65
+ "Requirement already satisfied: safetensors>=0.4.3 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from transformers[sentencepiece]) (0.6.2)\n",
66
+ "Requirement already satisfied: sentencepiece!=0.1.92,>=0.1.91 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from transformers[sentencepiece]) (0.2.1)\n",
67
+ "Requirement already satisfied: protobuf in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from transformers[sentencepiece]) (6.33.1)\n",
68
+ "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (2.6.1)\n",
69
+ "Requirement already satisfied: aiosignal>=1.4.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (1.4.0)\n",
70
+ "Requirement already satisfied: attrs>=17.3.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (25.4.0)\n",
71
+ "Requirement already satisfied: frozenlist>=1.1.1 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (1.8.0)\n",
72
+ "Requirement already satisfied: multidict<7.0,>=4.5 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (6.7.0)\n",
73
+ "Requirement already satisfied: propcache>=0.2.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (0.4.1)\n",
74
+ "Requirement already satisfied: yarl<2.0,>=1.17.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (1.22.0)\n",
75
+ "Requirement already satisfied: charset_normalizer<4,>=2 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from requests>=2.32.2->datasets) (3.4.4)\n",
76
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from requests>=2.32.2->datasets) (2.5.0)\n",
77
+ "Requirement already satisfied: colorama in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from tqdm>=4.66.3->datasets) (0.4.6)\n",
78
+ "Requirement already satisfied: sniffio>=1.1 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from anyio->httpx<1.0.0->datasets) (1.3.1)\n",
79
+ "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from pandas->datasets) (2.9.0.post0)\n",
80
+ "Requirement already satisfied: pytz>=2020.1 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from pandas->datasets) (2025.2)\n",
81
+ "Requirement already satisfied: tzdata>=2022.7 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from pandas->datasets) (2025.2)\n",
82
+ "Requirement already satisfied: six>=1.5 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n",
83
+ "Requirement already satisfied: ipywidgets in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (8.1.8)\n",
84
+ "Requirement already satisfied: comm>=0.1.3 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from ipywidgets) (0.2.3)\n",
85
+ "Requirement already satisfied: ipython>=6.1.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from ipywidgets) (9.7.0)\n",
86
+ "Requirement already satisfied: traitlets>=4.3.1 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from ipywidgets) (5.14.3)\n",
87
+ "Requirement already satisfied: widgetsnbextension~=4.0.14 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from ipywidgets) (4.0.15)\n",
88
+ "Requirement already satisfied: jupyterlab_widgets~=3.0.15 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from ipywidgets) (3.0.16)\n",
89
+ "Requirement already satisfied: colorama>=0.4.4 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.4.6)\n",
90
+ "Requirement already satisfied: decorator>=4.3.2 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (5.2.1)\n",
91
+ "Requirement already satisfied: ipython-pygments-lexers>=1.0.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (1.1.1)\n",
92
+ "Requirement already satisfied: jedi>=0.18.1 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.19.2)\n",
93
+ "Requirement already satisfied: matplotlib-inline>=0.1.5 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.2.1)\n",
94
+ "Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (3.0.52)\n",
95
+ "Requirement already satisfied: pygments>=2.11.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (2.19.2)\n",
96
+ "Requirement already satisfied: stack_data>=0.6.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.6.3)\n",
97
+ "Requirement already satisfied: wcwidth in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets) (0.2.14)\n",
98
+ "Requirement already satisfied: parso<0.9.0,>=0.8.4 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from jedi>=0.18.1->ipython>=6.1.0->ipywidgets) (0.8.5)\n",
99
+ "Requirement already satisfied: executing>=1.2.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (2.2.1)\n",
100
+ "Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (3.0.0)\n",
101
+ "Requirement already satisfied: pure-eval in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (0.2.3)\n",
102
+ "Requirement already satisfied: torch in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (2.7.1+cu118)\n",
103
+ "Requirement already satisfied: filelock in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch) (3.20.0)\n",
104
+ "Requirement already satisfied: typing-extensions>=4.10.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch) (4.15.0)\n",
105
+ "Requirement already satisfied: sympy>=1.13.3 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch) (1.14.0)\n",
106
+ "Requirement already satisfied: networkx in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch) (3.5)\n",
107
+ "Requirement already satisfied: jinja2 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch) (3.1.6)\n",
108
+ "Requirement already satisfied: fsspec in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch) (2025.10.0)\n",
109
+ "Requirement already satisfied: setuptools in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch) (80.9.0)\n",
110
+ "Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from sympy>=1.13.3->torch) (1.3.0)\n",
111
+ "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from jinja2->torch) (3.0.3)\n",
112
+ "Requirement already satisfied: transformers[torch] in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (4.57.1)\n",
113
+ "Requirement already satisfied: filelock in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from transformers[torch]) (3.20.0)\n",
114
+ "Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from transformers[torch]) (0.36.0)\n",
115
+ "Requirement already satisfied: numpy>=1.17 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from transformers[torch]) (2.3.4)\n",
116
+ "Requirement already satisfied: packaging>=20.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from transformers[torch]) (25.0)\n",
117
+ "Requirement already satisfied: pyyaml>=5.1 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from transformers[torch]) (6.0.3)\n",
118
+ "Requirement already satisfied: regex!=2019.12.17 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from transformers[torch]) (2025.11.3)\n",
119
+ "Requirement already satisfied: requests in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from transformers[torch]) (2.32.5)\n",
120
+ "Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from transformers[torch]) (0.22.1)\n",
121
+ "Requirement already satisfied: safetensors>=0.4.3 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from transformers[torch]) (0.6.2)\n",
122
+ "Requirement already satisfied: tqdm>=4.27 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from transformers[torch]) (4.67.1)\n",
123
+ "Requirement already satisfied: torch>=2.2 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from transformers[torch]) (2.7.1+cu118)\n",
124
+ "Requirement already satisfied: accelerate>=0.26.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from transformers[torch]) (1.11.0)\n",
125
+ "Requirement already satisfied: fsspec>=2023.5.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from huggingface-hub<1.0,>=0.34.0->transformers[torch]) (2025.10.0)\n",
126
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from huggingface-hub<1.0,>=0.34.0->transformers[torch]) (4.15.0)\n",
127
+ "Requirement already satisfied: psutil in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from accelerate>=0.26.0->transformers[torch]) (7.1.3)\n",
128
+ "Requirement already satisfied: sympy>=1.13.3 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch>=2.2->transformers[torch]) (1.14.0)\n",
129
+ "Requirement already satisfied: networkx in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch>=2.2->transformers[torch]) (3.5)\n",
130
+ "Requirement already satisfied: jinja2 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch>=2.2->transformers[torch]) (3.1.6)\n",
131
+ "Requirement already satisfied: setuptools in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch>=2.2->transformers[torch]) (80.9.0)\n",
132
+ "Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from sympy>=1.13.3->torch>=2.2->transformers[torch]) (1.3.0)\n",
133
+ "Requirement already satisfied: colorama in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from tqdm>=4.27->transformers[torch]) (0.4.6)\n",
134
+ "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from jinja2->torch>=2.2->transformers[torch]) (3.0.3)\n",
135
+ "Requirement already satisfied: charset_normalizer<4,>=2 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from requests->transformers[torch]) (3.4.4)\n",
136
+ "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from requests->transformers[torch]) (3.11)\n",
137
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from requests->transformers[torch]) (2.5.0)\n",
138
+ "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from requests->transformers[torch]) (2025.11.12)\n",
139
+ "Looking in indexes: https://download.pytorch.org/whl/cu118\n",
140
+ "Requirement already satisfied: torch in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (2.7.1+cu118)\n",
141
+ "Requirement already satisfied: torchvision in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (0.22.1+cu118)\n",
142
+ "Requirement already satisfied: torchaudio in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (2.7.1+cu118)\n",
143
+ "Requirement already satisfied: filelock in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch) (3.20.0)\n",
144
+ "Requirement already satisfied: typing-extensions>=4.10.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch) (4.15.0)\n",
145
+ "Requirement already satisfied: sympy>=1.13.3 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch) (1.14.0)\n",
146
+ "Requirement already satisfied: networkx in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch) (3.5)\n",
147
+ "Requirement already satisfied: jinja2 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch) (3.1.6)\n",
148
+ "Requirement already satisfied: fsspec in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch) (2025.10.0)\n",
149
+ "Requirement already satisfied: setuptools in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torch) (80.9.0)\n",
150
+ "Requirement already satisfied: numpy in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torchvision) (2.3.4)\n",
151
+ "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from torchvision) (11.3.0)\n",
152
+ "Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from sympy>=1.13.3->torch) (1.3.0)\n",
153
+ "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from jinja2->torch) (3.0.3)\n",
154
+ "Requirement already satisfied: scikit-learn in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (1.7.2)\n",
155
+ "Requirement already satisfied: numpy>=1.22.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from scikit-learn) (2.3.4)\n",
156
+ "Requirement already satisfied: scipy>=1.8.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from scikit-learn) (1.16.3)\n",
157
+ "Requirement already satisfied: joblib>=1.2.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from scikit-learn) (1.5.2)\n",
158
+ "Requirement already satisfied: threadpoolctl>=3.1.0 in c:\\users\\szyme\\pycharmprojects\\languageprocessinglab\\.venv\\lib\\site-packages (from scikit-learn) (3.6.0)\n"
159
+ ]
160
+ }
161
+ ],
162
+ "execution_count": 4
163
+ },
164
+ {
165
+ "metadata": {
166
+ "ExecuteTime": {
167
+ "end_time": "2025-11-14T18:39:53.784036Z",
168
+ "start_time": "2025-11-14T18:39:53.693711Z"
169
+ }
170
+ },
171
+ "cell_type": "code",
172
+ "source": [
173
+ "!nvidia-smi\n",
174
+ "import torch\n",
175
+ "\n",
176
+ "print(f\"CUDA available: {torch.cuda.is_available()}\")\n",
177
+ "if torch.cuda.is_available():\n",
178
+ " print(f\"Device: {torch.cuda.get_device_name(0)}\")\n",
179
+ "else:\n",
180
+ " print(\"WARNING: Training on CPU will be very slow!\")"
181
+ ],
182
+ "id": "f6d0e86c409e9a9d",
183
+ "outputs": [
184
+ {
185
+ "name": "stdout",
186
+ "output_type": "stream",
187
+ "text": [
188
+ "Fri Nov 14 19:39:53 2025 \n",
189
+ "+-----------------------------------------------------------------------------------------+\n",
190
+ "| NVIDIA-SMI 581.42 Driver Version: 581.42 CUDA Version: 13.0 |\n",
191
+ "+-----------------------------------------+------------------------+----------------------+\n",
192
+ "| GPU Name Driver-Model | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
193
+ "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
194
+ "| | | MIG M. |\n",
195
+ "|=========================================+========================+======================|\n",
196
+ "| 0 NVIDIA GeForce RTX 4070 WDDM | 00000000:01:00.0 On | N/A |\n",
197
+ "| 0% 43C P5 25W / 215W | 4740MiB / 12282MiB | 21% Default |\n",
198
+ "| | | N/A |\n",
199
+ "+-----------------------------------------+------------------------+----------------------+\n",
200
+ "\n",
201
+ "+-----------------------------------------------------------------------------------------+\n",
202
+ "| Processes: |\n",
203
+ "| GPU GI CI PID Type Process name GPU Memory |\n",
204
+ "| ID ID Usage |\n",
205
+ "|=========================================================================================|\n",
206
+ "| 0 N/A N/A 1416 C+G C:\\Windows\\System32\\dwm.exe N/A |\n",
207
+ "| 0 N/A N/A 2332 C+G ....0.3595.53\\msedgewebview2.exe N/A |\n",
208
+ "| 0 N/A N/A 2736 C+G ...xyewy\\ShellExperienceHost.exe N/A |\n",
209
+ "| 0 N/A N/A 5108 C+G ...5n1h2txyewy\\TextInputHost.exe N/A |\n",
210
+ "| 0 N/A N/A 7252 C+G ...8bbwe\\PhoneExperienceHost.exe N/A |\n",
211
+ "| 0 N/A N/A 11540 C+G ...4__8wekyb3d8bbwe\\ms-teams.exe N/A |\n",
212
+ "| 0 N/A N/A 12376 C+G ...l\\Programs\\Opera GX\\opera.exe N/A |\n",
213
+ "| 0 N/A N/A 12668 C+G ...GABYTE\\Control Center\\GCC.exe N/A |\n",
214
+ "| 0 N/A N/A 14740 C+G ...lus\\logioptionsplus_agent.exe N/A |\n",
215
+ "| 0 N/A N/A 14780 C+G ...D\\CNext\\CNext\\AMDRSSrcExt.exe N/A |\n",
216
+ "| 0 N/A N/A 15060 C+G C:\\Windows\\explorer.exe N/A |\n",
217
+ "| 0 N/A N/A 15140 C+G ...indows\\System32\\ShellHost.exe N/A |\n",
218
+ "| 0 N/A N/A 16828 C+G ..._cw5n1h2txyewy\\SearchHost.exe N/A |\n",
219
+ "| 0 N/A N/A 16836 C+G ...y\\StartMenuExperienceHost.exe N/A |\n",
220
+ "| 0 N/A N/A 19100 C+G ....0.3595.53\\msedgewebview2.exe N/A |\n",
221
+ "| 0 N/A N/A 19252 C+G ...64__zpdnekdrzrea0\\Spotify.exe N/A |\n",
222
+ "| 0 N/A N/A 20520 C+G ...Next\\CNext\\RadeonSoftware.exe N/A |\n",
223
+ "| 0 N/A N/A 20576 C+G ...roadcast\\NVIDIA Broadcast.exe N/A |\n",
224
+ "| 0 N/A N/A 21188 C+G ...cord\\app-1.0.9214\\Discord.exe N/A |\n",
225
+ "| 0 N/A N/A 22276 C+G ...lpaper_engine\\wallpaper64.exe N/A |\n",
226
+ "| 0 N/A N/A 23288 C+G ...l\\Programs\\Opera GX\\opera.exe N/A |\n",
227
+ "| 0 N/A N/A 23576 C+G ...__8yrtsj140pw4g\\app\\Slack.exe N/A |\n",
228
+ "| 0 N/A N/A 23656 C+G ...zcv7bpp5a\\Raycast\\Raycast.exe N/A |\n",
229
+ "| 0 N/A N/A 24616 C+G ...__8yrtsj140pw4g\\app\\Slack.exe N/A |\n",
230
+ "| 0 N/A N/A 24772 C+G ....0.3595.53\\msedgewebview2.exe N/A |\n",
231
+ "| 0 N/A N/A 28520 C+G ...4__8wekyb3d8bbwe\\ms-teams.exe N/A |\n",
232
+ "| 0 N/A N/A 29960 C+G ....0.3595.53\\msedgewebview2.exe N/A |\n",
233
+ "| 0 N/A N/A 31076 C+G ...0_x64__8wekyb3d8bbwe\\Todo.exe N/A |\n",
234
+ "| 0 N/A N/A 31724 C ...gLab\\.venv\\Scripts\\python.exe N/A |\n",
235
+ "| 0 N/A N/A 33752 C+G ...harm 2\\jbr\\bin\\cef_server.exe N/A |\n",
236
+ "| 0 N/A N/A 35756 C+G ...Claude\\app-1.0.332\\claude.exe N/A |\n",
237
+ "| 0 N/A N/A 36000 C+G ...em32\\ApplicationFrameHost.exe N/A |\n",
238
+ "| 0 N/A N/A 36392 C+G C:\\Windows\\System32\\Taskmgr.exe N/A |\n",
239
+ "+-----------------------------------------------------------------------------------------+\n",
240
+ "CUDA available: True\n",
241
+ "Device: NVIDIA GeForce RTX 4070\n"
242
+ ]
243
+ }
244
+ ],
245
+ "execution_count": 28
246
+ },
247
+ {
248
+ "metadata": {
249
+ "ExecuteTime": {
250
+ "end_time": "2025-11-14T20:21:55.196741Z",
251
+ "start_time": "2025-11-14T20:21:52.456375Z"
252
+ }
253
+ },
254
+ "cell_type": "code",
255
+ "source": [
256
+ "from pathlib import Path\n",
257
+ "from urllib.request import urlretrieve\n",
258
+ "import zipfile\n",
259
+ "import pandas as pd\n",
260
+ "\n",
261
+ "base_dir = Path().resolve()\n",
262
+ "\n",
263
+ "amazon_mobile_reviews_url = \"https://eduds.blob.core.windows.net/nlp/Amazon_Unlocked_Mobile.csv.zip\"\n",
264
+ "filename = \"data/Amazon_Unlocked_Mobile.csv.zip\"\n",
265
+ "data_dir = base_dir / \"data\"\n",
266
+ "\n",
267
+ "zip_path = data_dir / \"Amazon_Unlocked_Mobile.csv.zip\"\n",
268
+ "csv_path = data_dir / \"Amazon_Unlocked_Mobile.csv\"\n",
269
+ "\n",
270
+ "urlretrieve(amazon_mobile_reviews_url, filename)\n",
271
+ "\n",
272
+ "with zipfile.ZipFile(zip_path) as zfile:\n",
273
+ " zfile.extractall(data_dir)\n",
274
+ "\n",
275
+ "df = pd.read_csv(csv_path)"
276
+ ],
277
+ "id": "af23e8e3d03e3bfb",
278
+ "outputs": [],
279
+ "execution_count": 63
280
+ },
281
+ {
282
+ "metadata": {
283
+ "ExecuteTime": {
284
+ "end_time": "2025-11-14T20:21:56.503693Z",
285
+ "start_time": "2025-11-14T20:21:56.440111Z"
286
+ }
287
+ },
288
+ "cell_type": "code",
289
+ "source": [
290
+ "df.info()\n",
291
+ "df.describe()\n",
292
+ "df.head()"
293
+ ],
294
+ "id": "27b0bab46de0d18d",
295
+ "outputs": [
296
+ {
297
+ "name": "stdout",
298
+ "output_type": "stream",
299
+ "text": [
300
+ "<class 'pandas.core.frame.DataFrame'>\n",
301
+ "RangeIndex: 413840 entries, 0 to 413839\n",
302
+ "Data columns (total 6 columns):\n",
303
+ " # Column Non-Null Count Dtype \n",
304
+ "--- ------ -------------- ----- \n",
305
+ " 0 Product Name 413840 non-null object \n",
306
+ " 1 Brand Name 348669 non-null object \n",
307
+ " 2 Price 407907 non-null float64\n",
308
+ " 3 Rating 413840 non-null int64 \n",
309
+ " 4 Reviews 413770 non-null object \n",
310
+ " 5 Review Votes 401544 non-null float64\n",
311
+ "dtypes: float64(2), int64(1), object(3)\n",
312
+ "memory usage: 18.9+ MB\n"
313
+ ]
314
+ },
315
+ {
316
+ "data": {
317
+ "text/plain": [
318
+ " Product Name Brand Name Price \\\n",
319
+ "0 \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7... Samsung 199.99 \n",
320
+ "1 \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7... Samsung 199.99 \n",
321
+ "2 \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7... Samsung 199.99 \n",
322
+ "3 \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7... Samsung 199.99 \n",
323
+ "4 \"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7... Samsung 199.99 \n",
324
+ "\n",
325
+ " Rating Reviews Review Votes \n",
326
+ "0 5 I feel so LUCKY to have found this used (phone... 1.0 \n",
327
+ "1 4 nice phone, nice up grade from my pantach revu... 0.0 \n",
328
+ "2 5 Very pleased 0.0 \n",
329
+ "3 4 It works good but it goes slow sometimes but i... 0.0 \n",
330
+ "4 4 Great phone to replace my lost phone. The only... 0.0 "
331
+ ],
332
+ "text/html": [
333
+ "<div>\n",
334
+ "<style scoped>\n",
335
+ " .dataframe tbody tr th:only-of-type {\n",
336
+ " vertical-align: middle;\n",
337
+ " }\n",
338
+ "\n",
339
+ " .dataframe tbody tr th {\n",
340
+ " vertical-align: top;\n",
341
+ " }\n",
342
+ "\n",
343
+ " .dataframe thead th {\n",
344
+ " text-align: right;\n",
345
+ " }\n",
346
+ "</style>\n",
347
+ "<table border=\"1\" class=\"dataframe\">\n",
348
+ " <thead>\n",
349
+ " <tr style=\"text-align: right;\">\n",
350
+ " <th></th>\n",
351
+ " <th>Product Name</th>\n",
352
+ " <th>Brand Name</th>\n",
353
+ " <th>Price</th>\n",
354
+ " <th>Rating</th>\n",
355
+ " <th>Reviews</th>\n",
356
+ " <th>Review Votes</th>\n",
357
+ " </tr>\n",
358
+ " </thead>\n",
359
+ " <tbody>\n",
360
+ " <tr>\n",
361
+ " <th>0</th>\n",
362
+ " <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
363
+ " <td>Samsung</td>\n",
364
+ " <td>199.99</td>\n",
365
+ " <td>5</td>\n",
366
+ " <td>I feel so LUCKY to have found this used (phone...</td>\n",
367
+ " <td>1.0</td>\n",
368
+ " </tr>\n",
369
+ " <tr>\n",
370
+ " <th>1</th>\n",
371
+ " <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
372
+ " <td>Samsung</td>\n",
373
+ " <td>199.99</td>\n",
374
+ " <td>4</td>\n",
375
+ " <td>nice phone, nice up grade from my pantach revu...</td>\n",
376
+ " <td>0.0</td>\n",
377
+ " </tr>\n",
378
+ " <tr>\n",
379
+ " <th>2</th>\n",
380
+ " <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
381
+ " <td>Samsung</td>\n",
382
+ " <td>199.99</td>\n",
383
+ " <td>5</td>\n",
384
+ " <td>Very pleased</td>\n",
385
+ " <td>0.0</td>\n",
386
+ " </tr>\n",
387
+ " <tr>\n",
388
+ " <th>3</th>\n",
389
+ " <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
390
+ " <td>Samsung</td>\n",
391
+ " <td>199.99</td>\n",
392
+ " <td>4</td>\n",
393
+ " <td>It works good but it goes slow sometimes but i...</td>\n",
394
+ " <td>0.0</td>\n",
395
+ " </tr>\n",
396
+ " <tr>\n",
397
+ " <th>4</th>\n",
398
+ " <td>\"CLEAR CLEAN ESN\" Sprint EPIC 4G Galaxy SPH-D7...</td>\n",
399
+ " <td>Samsung</td>\n",
400
+ " <td>199.99</td>\n",
401
+ " <td>4</td>\n",
402
+ " <td>Great phone to replace my lost phone. The only...</td>\n",
403
+ " <td>0.0</td>\n",
404
+ " </tr>\n",
405
+ " </tbody>\n",
406
+ "</table>\n",
407
+ "</div>"
408
+ ]
409
+ },
410
+ "execution_count": 64,
411
+ "metadata": {},
412
+ "output_type": "execute_result"
413
+ }
414
+ ],
415
+ "execution_count": 64
416
+ },
417
+ {
418
+ "metadata": {},
419
+ "cell_type": "markdown",
420
+ "source": "## CONFIGURATION\n",
421
+ "id": "a44e589066e2e256"
422
+ },
423
+ {
424
+ "metadata": {
425
+ "ExecuteTime": {
426
+ "end_time": "2025-11-14T18:46:29.881682Z",
427
+ "start_time": "2025-11-14T18:46:29.879491Z"
428
+ }
429
+ },
430
+ "cell_type": "code",
431
+ "source": [
432
+ "DATA_URL = \"https://eduds.blob.core.windows.net/nlp/Amazon_Unlocked_Mobile.csv.zip\"\n",
433
+ "DATA_FILE = \"data/Amazon_Unlocked_Mobile.csv\"\n",
434
+ "MODEL_CHECKPOINT = \"distilbert-base-uncased\"\n",
435
+ "REPO_NAME = \"Floressek/sentiment_classification_from_distillbert\"\n",
436
+ "HUGGING_FACE_TOKEN = \"here input your token\"\n",
437
+ "\n",
438
+ "MAX_REVIEW_LENGTH = 128\n",
439
+ "TEST_SIZE = 0.3\n",
440
+ "BATCH_SIZE = 48\n",
441
+ "BATCH_SIZE_TOKEN = 1000\n",
442
+ "LEARNING_RATE = 2e-5\n",
443
+ "NUM_EPOCHS = 2\n",
444
+ "WEIGHT_DECAY = 0.01\n",
445
+ "SEED = 100"
446
+ ],
447
+ "id": "2cf90b8ba07d0a63",
448
+ "outputs": [],
449
+ "execution_count": 45
450
+ },
451
+ {
452
+ "metadata": {},
453
+ "cell_type": "markdown",
454
+ "source": "## Data cleaning\n",
455
+ "id": "fbce9d6f95c91c27"
456
+ },
457
+ {
458
+ "metadata": {
459
+ "ExecuteTime": {
460
+ "end_time": "2025-11-14T18:46:31.582547Z",
461
+ "start_time": "2025-11-14T18:46:31.574356Z"
462
+ }
463
+ },
464
+ "cell_type": "code",
465
+ "source": [
466
+ "df = df.drop(columns=[\"Brand Name\", \"Price\", \"Review Votes\", \"Product Name\"])\n",
467
+ "df.head()"
468
+ ],
469
+ "id": "d6daa3969babb120",
470
+ "outputs": [
471
+ {
472
+ "data": {
473
+ "text/plain": [
474
+ " Rating Reviews\n",
475
+ "0 5 I feel so LUCKY to have found this used (phone...\n",
476
+ "1 4 nice phone, nice up grade from my pantach revu...\n",
477
+ "2 5 Very pleased\n",
478
+ "3 4 It works good but it goes slow sometimes but i...\n",
479
+ "4 4 Great phone to replace my lost phone. The only..."
480
+ ],
481
+ "text/html": [
482
+ "<div>\n",
483
+ "<style scoped>\n",
484
+ " .dataframe tbody tr th:only-of-type {\n",
485
+ " vertical-align: middle;\n",
486
+ " }\n",
487
+ "\n",
488
+ " .dataframe tbody tr th {\n",
489
+ " vertical-align: top;\n",
490
+ " }\n",
491
+ "\n",
492
+ " .dataframe thead th {\n",
493
+ " text-align: right;\n",
494
+ " }\n",
495
+ "</style>\n",
496
+ "<table border=\"1\" class=\"dataframe\">\n",
497
+ " <thead>\n",
498
+ " <tr style=\"text-align: right;\">\n",
499
+ " <th></th>\n",
500
+ " <th>Rating</th>\n",
501
+ " <th>Reviews</th>\n",
502
+ " </tr>\n",
503
+ " </thead>\n",
504
+ " <tbody>\n",
505
+ " <tr>\n",
506
+ " <th>0</th>\n",
507
+ " <td>5</td>\n",
508
+ " <td>I feel so LUCKY to have found this used (phone...</td>\n",
509
+ " </tr>\n",
510
+ " <tr>\n",
511
+ " <th>1</th>\n",
512
+ " <td>4</td>\n",
513
+ " <td>nice phone, nice up grade from my pantach revu...</td>\n",
514
+ " </tr>\n",
515
+ " <tr>\n",
516
+ " <th>2</th>\n",
517
+ " <td>5</td>\n",
518
+ " <td>Very pleased</td>\n",
519
+ " </tr>\n",
520
+ " <tr>\n",
521
+ " <th>3</th>\n",
522
+ " <td>4</td>\n",
523
+ " <td>It works good but it goes slow sometimes but i...</td>\n",
524
+ " </tr>\n",
525
+ " <tr>\n",
526
+ " <th>4</th>\n",
527
+ " <td>4</td>\n",
528
+ " <td>Great phone to replace my lost phone. The only...</td>\n",
529
+ " </tr>\n",
530
+ " </tbody>\n",
531
+ "</table>\n",
532
+ "</div>"
533
+ ]
534
+ },
535
+ "execution_count": 46,
536
+ "metadata": {},
537
+ "output_type": "execute_result"
538
+ }
539
+ ],
540
+ "execution_count": 46
541
+ },
542
+ {
543
+ "metadata": {
544
+ "ExecuteTime": {
545
+ "end_time": "2025-11-14T18:46:34.926211Z",
546
+ "start_time": "2025-11-14T18:46:33.614096Z"
547
+ }
548
+ },
549
+ "cell_type": "code",
550
+ "source": [
551
+ "from datasets import Dataset\n",
552
+ "\n",
553
+ "dataset = Dataset.from_pandas(df)\n",
554
+ "\n",
555
+ "dataset = dataset.filter(\n",
556
+ " lambda x: (\n",
557
+ " x[\"Reviews\"] is not None\n",
558
+ " and len(x[\"Reviews\"].split()) < MAX_REVIEW_LENGTH\n",
559
+ " and x[\"Rating\"] in [1, 5] # skrajnosci do binary classification\n",
560
+ " )\n",
561
+ ")\n",
562
+ "\n",
563
+ "dataset_split = dataset.train_test_split(test_size=TEST_SIZE, seed=SEED)"
564
+ ],
565
+ "id": "40a44a79b98aa996",
566
+ "outputs": [
567
+ {
568
+ "data": {
569
+ "text/plain": [
570
+ "Filter: 0%| | 0/413840 [00:00<?, ? examples/s]"
571
+ ],
572
+ "application/vnd.jupyter.widget-view+json": {
573
+ "version_major": 2,
574
+ "version_minor": 0,
575
+ "model_id": "5023c01162324b84afb19ec45a970c56"
576
+ }
577
+ },
578
+ "metadata": {},
579
+ "output_type": "display_data",
580
+ "jetTransient": {
581
+ "display_id": null
582
+ }
583
+ }
584
+ ],
585
+ "execution_count": 47
586
+ },
587
+ {
588
+ "metadata": {},
589
+ "cell_type": "markdown",
590
+ "source": "### Tokenization",
591
+ "id": "f1cb6bd3ccecc50d"
592
+ },
593
+ {
594
+ "metadata": {
595
+ "ExecuteTime": {
596
+ "end_time": "2025-11-14T18:46:37.851802Z",
597
+ "start_time": "2025-11-14T18:46:37.100084Z"
598
+ }
599
+ },
600
+ "cell_type": "code",
601
+ "source": [
602
+ "from datasets import DatasetDict\n",
603
+ "from typing import Any\n",
604
+ "from transformers import AutoTokenizer\n",
605
+ "\n",
606
+ "tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT)\n",
607
+ "\n",
608
+ "\n",
609
+ "# zostawiamy inputs id z tokenizacji i attention mask dla padding/tresc oraz labels\n",
610
+ "def tokenize_and_label(dataset: DatasetDict, tokenizer: Any):\n",
611
+ " def tokenize_function(example):\n",
612
+ " return tokenizer(example[\"Reviews\"], padding=\"max_length\", truncation=True)\n",
613
+ "\n",
614
+ " def convert_to_binary_label(example):\n",
615
+ " return {'label': [0 if r == 1 else 1 for r in example['Rating']]}\n",
616
+ "\n",
617
+ " tokenized = dataset.map(tokenize_function, batched=True, batch_size=BATCH_SIZE_TOKEN)\n",
618
+ " tokenized = tokenized.map(convert_to_binary_label, batched=True, batch_size=BATCH_SIZE_TOKEN)\n",
619
+ " tokenized = tokenized.remove_columns([\"Reviews\", \"Rating\"])\n",
620
+ "\n",
621
+ " return tokenized"
622
+ ],
623
+ "id": "18c877bae841c93e",
624
+ "outputs": [],
625
+ "execution_count": 48
626
+ },
627
+ {
628
+ "metadata": {
629
+ "ExecuteTime": {
630
+ "end_time": "2025-11-14T18:46:41.756880Z",
631
+ "start_time": "2025-11-14T18:46:41.754675Z"
632
+ }
633
+ },
634
+ "cell_type": "code",
635
+ "source": [
636
+ "import numpy as np\n",
637
+ "from evaluate import load\n",
638
+ "\n",
639
+ "\n",
640
+ "def compute_metrics(eval_pred) -> dict:\n",
641
+ " accuracy_metric = load(\"accuracy\")\n",
642
+ " f1_metric = load(\"f1\")\n",
643
+ "\n",
644
+ " logits, labels = eval_pred\n",
645
+ " predictions = np.argmax(logits, axis=-1)\n",
646
+ "\n",
647
+ " accuracy = accuracy_metric.compute(predictions=predictions, references=labels)[\"accuracy\"]\n",
648
+ " f1 = f1_metric.compute(predictions=predictions, references=labels)[\"f1\"]\n",
649
+ "\n",
650
+ " return {\"accuracy\": accuracy, \"f1\": f1}"
651
+ ],
652
+ "id": "763efe8d9ac72643",
653
+ "outputs": [],
654
+ "execution_count": 50
655
+ },
656
+ {
657
+ "metadata": {},
658
+ "cell_type": "markdown",
659
+ "source": "## Huggingface - logging and model loading\n",
660
+ "id": "b73061c3bcedc4a2"
661
+ },
662
+ {
663
+ "metadata": {
664
+ "ExecuteTime": {
665
+ "end_time": "2025-11-14T18:46:44.096470Z",
666
+ "start_time": "2025-11-14T18:46:43.924416Z"
667
+ }
668
+ },
669
+ "cell_type": "code",
670
+ "source": [
671
+ "from huggingface_hub import login\n",
672
+ "\n",
673
+ "token = HUGGING_FACE_TOKEN\n",
674
+ "\n",
675
+ "if token:\n",
676
+ " login(token=HUGGING_FACE_TOKEN)\n",
677
+ "else:\n",
678
+ " login()"
679
+ ],
680
+ "id": "b439612b50fed578",
681
+ "outputs": [],
682
+ "execution_count": 51
683
+ },
684
+ {
685
+ "metadata": {},
686
+ "cell_type": "markdown",
687
+ "source": "## Train and fine-tuning the model\n",
688
+ "id": "9560b042d0525b99"
689
+ },
690
+ {
691
+ "metadata": {
692
+ "ExecuteTime": {
693
+ "end_time": "2025-11-14T19:29:10.301060Z",
694
+ "start_time": "2025-11-14T18:46:45.568724Z"
695
+ }
696
+ },
697
+ "cell_type": "code",
698
+ "source": [
699
+ "from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer\n",
700
+ "\n",
701
+ "\n",
702
+ "def train_model(tokenized_datasets: DatasetDict, tokenizer) -> Trainer:\n",
703
+ " model = AutoModelForSequenceClassification.from_pretrained(\n",
704
+ " MODEL_CHECKPOINT,\n",
705
+ " num_labels=2,\n",
706
+ " )\n",
707
+ "\n",
708
+ " training_args = TrainingArguments(\n",
709
+ " output_dir=REPO_NAME,\n",
710
+ " learning_rate=LEARNING_RATE,\n",
711
+ " per_device_train_batch_size=BATCH_SIZE,\n",
712
+ " per_device_eval_batch_size=BATCH_SIZE,\n",
713
+ " num_train_epochs=NUM_EPOCHS,\n",
714
+ " weight_decay=WEIGHT_DECAY,\n",
715
+ " save_strategy=\"epoch\",\n",
716
+ " eval_strategy=\"epoch\",\n",
717
+ " push_to_hub=True,\n",
718
+ " remove_unused_columns=False,\n",
719
+ " logging_steps=100,\n",
720
+ " fp16=True,\n",
721
+ " )\n",
722
+ "\n",
723
+ " trainer = Trainer(\n",
724
+ " model=model,\n",
725
+ " args=training_args,\n",
726
+ " train_dataset=tokenized_datasets['train'],\n",
727
+ " eval_dataset=tokenized_datasets['test'],\n",
728
+ " processing_class=tokenizer,\n",
729
+ " compute_metrics=compute_metrics\n",
730
+ " )\n",
731
+ "\n",
732
+ " trainer.train()\n",
733
+ "\n",
734
+ " return trainer\n",
735
+ "\n",
736
+ "\n",
737
+ "print(f\"Train size: {len(dataset_split['train'])}\")\n",
738
+ "print(f\"Test size: {len(dataset_split['test'])}\")\n",
739
+ "\n",
740
+ "tokenized_datasets = tokenize_and_label(dataset_split, tokenizer)\n",
741
+ "\n",
742
+ "trainer = train_model(tokenized_datasets, tokenizer)"
743
+ ],
744
+ "id": "f2baddc39582ea90",
745
+ "outputs": [
746
+ {
747
+ "name": "stdout",
748
+ "output_type": "stream",
749
+ "text": [
750
+ "Train size: 196375\n",
751
+ "Test size: 84162\n"
752
+ ]
753
+ },
754
+ {
755
+ "data": {
756
+ "text/plain": [
757
+ "Map: 0%| | 0/196375 [00:00<?, ? examples/s]"
758
+ ],
759
+ "application/vnd.jupyter.widget-view+json": {
760
+ "version_major": 2,
761
+ "version_minor": 0,
762
+ "model_id": "9eca0885f7e941e9ba0225ecab5226b5"
763
+ }
764
+ },
765
+ "metadata": {},
766
+ "output_type": "display_data",
767
+ "jetTransient": {
768
+ "display_id": null
769
+ }
770
+ },
771
+ {
772
+ "data": {
773
+ "text/plain": [
774
+ "Map: 0%| | 0/84162 [00:00<?, ? examples/s]"
775
+ ],
776
+ "application/vnd.jupyter.widget-view+json": {
777
+ "version_major": 2,
778
+ "version_minor": 0,
779
+ "model_id": "46e17d5252ff4edba4613665ce78610f"
780
+ }
781
+ },
782
+ "metadata": {},
783
+ "output_type": "display_data",
784
+ "jetTransient": {
785
+ "display_id": null
786
+ }
787
+ },
788
+ {
789
+ "data": {
790
+ "text/plain": [
791
+ "Map: 0%| | 0/196375 [00:00<?, ? examples/s]"
792
+ ],
793
+ "application/vnd.jupyter.widget-view+json": {
794
+ "version_major": 2,
795
+ "version_minor": 0,
796
+ "model_id": "b0899afc630842dd9c6de08b7d186a3b"
797
+ }
798
+ },
799
+ "metadata": {},
800
+ "output_type": "display_data",
801
+ "jetTransient": {
802
+ "display_id": null
803
+ }
804
+ },
805
+ {
806
+ "data": {
807
+ "text/plain": [
808
+ "Map: 0%| | 0/84162 [00:00<?, ? examples/s]"
809
+ ],
810
+ "application/vnd.jupyter.widget-view+json": {
811
+ "version_major": 2,
812
+ "version_minor": 0,
813
+ "model_id": "669f11f90f5d4ac19919c0ae3772bf7b"
814
+ }
815
+ },
816
+ "metadata": {},
817
+ "output_type": "display_data",
818
+ "jetTransient": {
819
+ "display_id": null
820
+ }
821
+ },
822
+ {
823
+ "name": "stderr",
824
+ "output_type": "stream",
825
+ "text": [
826
+ "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n",
827
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
828
+ ]
829
+ },
830
+ {
831
+ "data": {
832
+ "text/plain": [
833
+ "<IPython.core.display.HTML object>"
834
+ ],
835
+ "text/html": [
836
+ "\n",
837
+ " <div>\n",
838
+ " \n",
839
+ " <progress value='8184' max='8184' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
840
+ " [8184/8184 41:33, Epoch 2/2]\n",
841
+ " </div>\n",
842
+ " <table border=\"1\" class=\"dataframe\">\n",
843
+ " <thead>\n",
844
+ " <tr style=\"text-align: left;\">\n",
845
+ " <th>Epoch</th>\n",
846
+ " <th>Training Loss</th>\n",
847
+ " <th>Validation Loss</th>\n",
848
+ " <th>Accuracy</th>\n",
849
+ " <th>F1</th>\n",
850
+ " </tr>\n",
851
+ " </thead>\n",
852
+ " <tbody>\n",
853
+ " <tr>\n",
854
+ " <td>1</td>\n",
855
+ " <td>0.043200</td>\n",
856
+ " <td>0.040534</td>\n",
857
+ " <td>0.989152</td>\n",
858
+ " <td>0.992839</td>\n",
859
+ " </tr>\n",
860
+ " <tr>\n",
861
+ " <td>2</td>\n",
862
+ " <td>0.029900</td>\n",
863
+ " <td>0.035284</td>\n",
864
+ " <td>0.991718</td>\n",
865
+ " <td>0.994545</td>\n",
866
+ " </tr>\n",
867
+ " </tbody>\n",
868
+ "</table><p>"
869
+ ]
870
+ },
871
+ "metadata": {},
872
+ "output_type": "display_data",
873
+ "jetTransient": {
874
+ "display_id": "5232456d49be4c01fb108dc77e31f262"
875
+ }
876
+ },
877
+ {
878
+ "data": {
879
+ "text/plain": [
880
+ "Downloading builder script: 0.00B [00:00, ?B/s]"
881
+ ],
882
+ "application/vnd.jupyter.widget-view+json": {
883
+ "version_major": 2,
884
+ "version_minor": 0,
885
+ "model_id": "6ea920f26713434d9390bbe9cfa3a896"
886
+ }
887
+ },
888
+ "metadata": {},
889
+ "output_type": "display_data",
890
+ "jetTransient": {
891
+ "display_id": null
892
+ }
893
+ },
894
+ {
895
+ "data": {
896
+ "text/plain": [
897
+ "Downloading builder script: 0.00B [00:00, ?B/s]"
898
+ ],
899
+ "application/vnd.jupyter.widget-view+json": {
900
+ "version_major": 2,
901
+ "version_minor": 0,
902
+ "model_id": "b661d468c9394cf7a2f3e161a223adf1"
903
+ }
904
+ },
905
+ "metadata": {},
906
+ "output_type": "display_data",
907
+ "jetTransient": {
908
+ "display_id": null
909
+ }
910
+ },
911
+ {
912
+ "data": {
913
+ "text/plain": [
914
+ "Downloading builder script: 0.00B [00:00, ?B/s]"
915
+ ],
916
+ "application/vnd.jupyter.widget-view+json": {
917
+ "version_major": 2,
918
+ "version_minor": 0,
919
+ "model_id": "e755ba88b1e4457b8bab9944d4b1412f"
920
+ }
921
+ },
922
+ "metadata": {},
923
+ "output_type": "display_data",
924
+ "jetTransient": {
925
+ "display_id": null
926
+ }
927
+ }
928
+ ],
929
+ "execution_count": 52
930
+ },
931
+ {
932
+ "metadata": {},
933
+ "cell_type": "markdown",
934
+ "source": "## Testing the newly fine-tuned model\n",
935
+ "id": "195c0e81bf298c1c"
936
+ },
937
+ {
938
+ "metadata": {
939
+ "ExecuteTime": {
940
+ "end_time": "2025-11-14T19:56:16.772843Z",
941
+ "start_time": "2025-11-14T19:56:16.769490Z"
942
+ }
943
+ },
944
+ "cell_type": "code",
945
+ "source": [
946
+ "from transformers import pipeline\n",
947
+ "\n",
948
+ "\n",
949
+ "def create_classifier(model_path=f\"./{REPO_NAME}\"):\n",
950
+ " tokenizer = AutoTokenizer.from_pretrained(model_path)\n",
951
+ " model = AutoModelForSequenceClassification.from_pretrained(model_path)\n",
952
+ "\n",
953
+ " return pipeline(\"text-classification\", model=model, tokenizer=tokenizer)\n",
954
+ "\n",
955
+ "\n",
956
+ "def test_classifier(classifier) -> None:\n",
957
+ " examples = [\n",
958
+ " \"Shame. I wish I hadn't buy it.\",\n",
959
+ " \"Great handset!\",\n",
960
+ " \"Terrible product, waste of money\",\n",
961
+ " \"Best phone ever, highly recommend!\"\n",
962
+ " ]\n",
963
+ "\n",
964
+ " for text in examples:\n",
965
+ " result = classifier(text)\n",
966
+ " print(f\"Text: {text}\")\n",
967
+ " print(f\"Result: {result}\\n\")\n"
968
+ ],
969
+ "id": "c1ce352b5ebe328c",
970
+ "outputs": [],
971
+ "execution_count": 53
972
+ },
973
+ {
974
+ "metadata": {},
975
+ "cell_type": "markdown",
976
+ "source": "### Eval metrics",
977
+ "id": "d4c5787afb9e6ae7"
978
+ },
979
+ {
980
+ "metadata": {
981
+ "ExecuteTime": {
982
+ "end_time": "2025-11-14T20:06:29.136185Z",
983
+ "start_time": "2025-11-14T20:03:59.964777Z"
984
+ }
985
+ },
986
+ "cell_type": "code",
987
+ "source": [
988
+ "from transformers import AutoModelForSequenceClassification\n",
989
+ "from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, precision_recall_curve, auc\n",
990
+ "import numpy as np\n",
991
+ "import torch\n",
992
+ "\n",
993
+ "model_path = f\"./{REPO_NAME}\"\n",
994
+ "model = AutoModelForSequenceClassification.from_pretrained(model_path)\n",
995
+ "print(model)\n",
996
+ "\n",
997
+ "preds = trainer.predict(tokenized_datasets[\"test\"])\n",
998
+ "logits = preds.predictions\n",
999
+ "y_true = preds.label_ids\n",
1000
+ "y_pred = np.argmax(logits, axis=-1)\n",
1001
+ "probs = torch.softmax(torch.from_numpy(logits), dim=-1).numpy()[:, 1]\n",
1002
+ "\n",
1003
+ "cm = confusion_matrix(y_true, y_pred)\n",
1004
+ "tn, fp, fn, tp = cm.ravel()\n",
1005
+ "roc_auc = roc_auc_score(y_true, probs)\n",
1006
+ "prec_curve, rec_curve, _ = precision_recall_curve(y_true, probs)\n",
1007
+ "pr_auc = auc(rec_curve, prec_curve)\n",
1008
+ "report = classification_report(y_true, y_pred, digits=4)\n",
1009
+ "\n",
1010
+ "print(\"Confusion matrix:\")\n",
1011
+ "print(cm)\n",
1012
+ "print(f\"TP={tp} FP={fp} TN={tn} FN={fn}\")\n",
1013
+ "print(f\"ROC-AUC={roc_auc:.4f} PR-AUC={pr_auc:.4f}\")\n",
1014
+ "print(report)\n"
1015
+ ],
1016
+ "id": "e3308ae685c70928",
1017
+ "outputs": [
1018
+ {
1019
+ "name": "stdout",
1020
+ "output_type": "stream",
1021
+ "text": [
1022
+ "Architecture: DistilBertForSequenceClassification\n",
1023
+ "DistilBertConfig {\n",
1024
+ " \"activation\": \"gelu\",\n",
1025
+ " \"architectures\": [\n",
1026
+ " \"DistilBertForSequenceClassification\"\n",
1027
+ " ],\n",
1028
+ " \"attention_dropout\": 0.1,\n",
1029
+ " \"dim\": 768,\n",
1030
+ " \"dropout\": 0.1,\n",
1031
+ " \"dtype\": \"float32\",\n",
1032
+ " \"hidden_dim\": 3072,\n",
1033
+ " \"initializer_range\": 0.02,\n",
1034
+ " \"max_position_embeddings\": 512,\n",
1035
+ " \"model_type\": \"distilbert\",\n",
1036
+ " \"n_heads\": 12,\n",
1037
+ " \"n_layers\": 6,\n",
1038
+ " \"pad_token_id\": 0,\n",
1039
+ " \"problem_type\": \"single_label_classification\",\n",
1040
+ " \"qa_dropout\": 0.1,\n",
1041
+ " \"seq_classif_dropout\": 0.2,\n",
1042
+ " \"sinusoidal_pos_embds\": false,\n",
1043
+ " \"tie_weights_\": true,\n",
1044
+ " \"transformers_version\": \"4.57.1\",\n",
1045
+ " \"vocab_size\": 30522\n",
1046
+ "}\n",
1047
+ "\n",
1048
+ "Total params: 66,955,010\n",
1049
+ "Trainable params: 66,955,010\n",
1050
+ "Confusion matrix:\n",
1051
+ "[[19922 352]\n",
1052
+ " [ 345 63543]]\n",
1053
+ "TP=63543 FP=352 TN=19922 FN=345\n",
1054
+ "ROC-AUC=0.9983 PR-AUC=0.9994\n",
1055
+ " precision recall f1-score support\n",
1056
+ "\n",
1057
+ " 0 0.9830 0.9826 0.9828 20274\n",
1058
+ " 1 0.9945 0.9946 0.9945 63888\n",
1059
+ "\n",
1060
+ " accuracy 0.9917 84162\n",
1061
+ " macro avg 0.9887 0.9886 0.9887 84162\n",
1062
+ "weighted avg 0.9917 0.9917 0.9917 84162\n",
1063
+ "\n"
1064
+ ]
1065
+ }
1066
+ ],
1067
+ "execution_count": 62
1068
+ },
1069
+ {
1070
+ "metadata": {},
1071
+ "cell_type": "markdown",
1072
+ "source": "### Testing the model with text samples",
1073
+ "id": "6fcdad5dc49bc568"
1074
+ },
1075
+ {
1076
+ "metadata": {
1077
+ "ExecuteTime": {
1078
+ "end_time": "2025-11-14T19:59:26.564660Z",
1079
+ "start_time": "2025-11-14T19:59:26.258924Z"
1080
+ }
1081
+ },
1082
+ "cell_type": "code",
1083
+ "source": [
1084
+ "# Pomijam bo mialem w trakcie uczenia eval wlaczony\n",
1085
+ "# eval_results = trainer.evaluate()\n",
1086
+ "# print(f\"Evaluation results: {eval_results}\")\n",
1087
+ "\n",
1088
+ "print(\"Testing the fine-tuned model:\")\n",
1089
+ "classifier = create_classifier()\n",
1090
+ "test_classifier(classifier)"
1091
+ ],
1092
+ "id": "993ea8f31d059b8d",
1093
+ "outputs": [
1094
+ {
1095
+ "name": "stderr",
1096
+ "output_type": "stream",
1097
+ "text": [
1098
+ "Device set to use cuda:0\n"
1099
+ ]
1100
+ },
1101
+ {
1102
+ "name": "stdout",
1103
+ "output_type": "stream",
1104
+ "text": [
1105
+ "Testing the fine-tuned model:\n",
1106
+ "Text: Shame. I wish I hadn't buy it.\n",
1107
+ "Result: [{'label': 'LABEL_0', 'score': 0.9975292086601257}]\n",
1108
+ "\n",
1109
+ "Text: Great handset!\n",
1110
+ "Result: [{'label': 'LABEL_1', 'score': 0.9996094107627869}]\n",
1111
+ "\n",
1112
+ "Text: Terrible product, waste of money\n",
1113
+ "Result: [{'label': 'LABEL_0', 'score': 0.998723566532135}]\n",
1114
+ "\n",
1115
+ "Text: Best phone ever, highly recommend!\n",
1116
+ "Result: [{'label': 'LABEL_1', 'score': 0.9996873140335083}]\n",
1117
+ "\n"
1118
+ ]
1119
+ }
1120
+ ],
1121
+ "execution_count": 58
1122
+ }
1123
+ ],
1124
+ "metadata": {
1125
+ "kernelspec": {
1126
+ "display_name": "Python 3",
1127
+ "language": "python",
1128
+ "name": "python3"
1129
+ },
1130
+ "language_info": {
1131
+ "codemirror_mode": {
1132
+ "name": "ipython",
1133
+ "version": 2
1134
+ },
1135
+ "file_extension": ".py",
1136
+ "mimetype": "text/x-python",
1137
+ "name": "python",
1138
+ "nbconvert_exporter": "python",
1139
+ "pygments_lexer": "ipython2",
1140
+ "version": "2.7.6"
1141
+ }
1142
+ },
1143
+ "nbformat": 4,
1144
+ "nbformat_minor": 5
1145
+ }
data/Amazon_Unlocked_Mobile.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:097abeefe303816e0f5e9c9ff380adb25910cf46307d2c104911eae8d0304e76
3
+ size 131879567
data/Amazon_Unlocked_Mobile.csv.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:801af84059ea0c660aa082b0f07363fcefe224827ba7231248ecc69d95a739f3
3
+ size 34182461