robo-noct commited on
Commit
aa31af5
·
verified ·
1 Parent(s): e56d0dd

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ library_name: transformers
4
+ tags:
5
+ - autotrain
6
+ - text-classification
7
+ base_model: google/mobilebert-uncased
8
+ widget:
9
+ - text: "I love AutoTrain"
10
+ ---
11
+
12
+ # Model Trained Using AutoTrain
13
+
14
+ - Problem type: Text Classification
15
+
16
+ ## Validation Metrics
17
+ loss: 1.7558108568191528
18
+
19
+ f1_macro: 0.3432320638995863
20
+
21
+ f1_micro: 0.35714285714285715
22
+
23
+ f1_weighted: 0.3432320638995863
24
+
25
+ precision_macro: 0.6085343228200372
26
+
27
+ precision_micro: 0.35714285714285715
28
+
29
+ precision_weighted: 0.6085343228200372
30
+
31
+ recall_macro: 0.35714285714285715
32
+
33
+ recall_micro: 0.35714285714285715
34
+
35
+ recall_weighted: 0.35714285714285715
36
+
37
+ accuracy: 0.35714285714285715
checkpoint-84/config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mobilebert-uncased",
3
+ "_num_labels": 7,
4
+ "architectures": [
5
+ "MobileBertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "classifier_activation": false,
9
+ "classifier_dropout": null,
10
+ "embedding_size": 128,
11
+ "hidden_act": "relu",
12
+ "hidden_dropout_prob": 0.0,
13
+ "hidden_size": 512,
14
+ "id2label": {
15
+ "0": "anniversary",
16
+ "1": "baby",
17
+ "2": "birthday",
18
+ "3": "get_well",
19
+ "4": "holiday",
20
+ "5": "promotion",
21
+ "6": "wedding"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 512,
25
+ "intra_bottleneck_size": 128,
26
+ "key_query_shared_bottleneck": true,
27
+ "label2id": {
28
+ "anniversary": 0,
29
+ "baby": 1,
30
+ "birthday": 2,
31
+ "get_well": 3,
32
+ "holiday": 4,
33
+ "promotion": 5,
34
+ "wedding": 6
35
+ },
36
+ "layer_norm_eps": 1e-12,
37
+ "max_position_embeddings": 512,
38
+ "model_type": "mobilebert",
39
+ "normalization_type": "no_norm",
40
+ "num_attention_heads": 4,
41
+ "num_feedforward_networks": 4,
42
+ "num_hidden_layers": 24,
43
+ "pad_token_id": 0,
44
+ "problem_type": "single_label_classification",
45
+ "torch_dtype": "float32",
46
+ "transformers_version": "4.48.0",
47
+ "trigram_input": true,
48
+ "true_hidden_size": 128,
49
+ "type_vocab_size": 2,
50
+ "use_bottleneck": true,
51
+ "use_bottleneck_attention": false,
52
+ "vocab_size": 30522
53
+ }
checkpoint-84/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f3afca87ac473bfc430ab58b3fe58e985d3ac71f86c6748fd1e7e3432e743ae
3
+ size 98480380
checkpoint-84/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e71e0bba8d7dc74a70ef2f96b0223c0e214fc6d5766120c470c3e8c6d881ac4
3
+ size 197583069
checkpoint-84/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34f589ceff3622f09550f9b30eae034559a07be8bcd0c079a574e80fdf0f2936
3
+ size 13990
checkpoint-84/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b176eedd19a752a5759156c376fae5bff5a174b7b100c0698e65f5e76ac4547e
3
+ size 1064
checkpoint-84/trainer_state.json ADDED
@@ -0,0 +1,684 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.7558108568191528,
3
+ "best_model_checkpoint": "greetings/checkpoint-84",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 84,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03571428571428571,
13
+ "grad_norm": 363697984.0,
14
+ "learning_rate": 5.555555555555556e-06,
15
+ "loss": 2329191.0,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.07142857142857142,
20
+ "grad_norm": 332609952.0,
21
+ "learning_rate": 1.1111111111111112e-05,
22
+ "loss": 4540264.0,
23
+ "step": 2
24
+ },
25
+ {
26
+ "epoch": 0.10714285714285714,
27
+ "grad_norm": 366500000.0,
28
+ "learning_rate": 1.6666666666666667e-05,
29
+ "loss": 1966578.75,
30
+ "step": 3
31
+ },
32
+ {
33
+ "epoch": 0.14285714285714285,
34
+ "grad_norm": 367947328.0,
35
+ "learning_rate": 2.2222222222222223e-05,
36
+ "loss": 4571710.5,
37
+ "step": 4
38
+ },
39
+ {
40
+ "epoch": 0.17857142857142858,
41
+ "grad_norm": 174013440.0,
42
+ "learning_rate": 2.777777777777778e-05,
43
+ "loss": 2946220.5,
44
+ "step": 5
45
+ },
46
+ {
47
+ "epoch": 0.21428571428571427,
48
+ "grad_norm": 310462848.0,
49
+ "learning_rate": 3.3333333333333335e-05,
50
+ "loss": 4198167.5,
51
+ "step": 6
52
+ },
53
+ {
54
+ "epoch": 0.25,
55
+ "grad_norm": 295043232.0,
56
+ "learning_rate": 3.888888888888889e-05,
57
+ "loss": 2041447.375,
58
+ "step": 7
59
+ },
60
+ {
61
+ "epoch": 0.2857142857142857,
62
+ "grad_norm": 275529440.0,
63
+ "learning_rate": 4.4444444444444447e-05,
64
+ "loss": 3907286.5,
65
+ "step": 8
66
+ },
67
+ {
68
+ "epoch": 0.32142857142857145,
69
+ "grad_norm": 261926944.0,
70
+ "learning_rate": 5e-05,
71
+ "loss": 2561375.0,
72
+ "step": 9
73
+ },
74
+ {
75
+ "epoch": 0.35714285714285715,
76
+ "grad_norm": 198724384.0,
77
+ "learning_rate": 4.933333333333334e-05,
78
+ "loss": 684095.8125,
79
+ "step": 10
80
+ },
81
+ {
82
+ "epoch": 0.39285714285714285,
83
+ "grad_norm": 74599368.0,
84
+ "learning_rate": 4.866666666666667e-05,
85
+ "loss": 730185.5625,
86
+ "step": 11
87
+ },
88
+ {
89
+ "epoch": 0.42857142857142855,
90
+ "grad_norm": 141572384.0,
91
+ "learning_rate": 4.8e-05,
92
+ "loss": 1238386.5,
93
+ "step": 12
94
+ },
95
+ {
96
+ "epoch": 0.4642857142857143,
97
+ "grad_norm": 105069400.0,
98
+ "learning_rate": 4.7333333333333336e-05,
99
+ "loss": 329932.7188,
100
+ "step": 13
101
+ },
102
+ {
103
+ "epoch": 0.5,
104
+ "grad_norm": 78809056.0,
105
+ "learning_rate": 4.666666666666667e-05,
106
+ "loss": 502494.0312,
107
+ "step": 14
108
+ },
109
+ {
110
+ "epoch": 0.5357142857142857,
111
+ "grad_norm": 45120072.0,
112
+ "learning_rate": 4.600000000000001e-05,
113
+ "loss": 132410.2188,
114
+ "step": 15
115
+ },
116
+ {
117
+ "epoch": 0.5714285714285714,
118
+ "grad_norm": 14674872.0,
119
+ "learning_rate": 4.5333333333333335e-05,
120
+ "loss": 117778.0703,
121
+ "step": 16
122
+ },
123
+ {
124
+ "epoch": 0.6071428571428571,
125
+ "grad_norm": 12449395.0,
126
+ "learning_rate": 4.466666666666667e-05,
127
+ "loss": 36973.5859,
128
+ "step": 17
129
+ },
130
+ {
131
+ "epoch": 0.6428571428571429,
132
+ "grad_norm": 7055619.0,
133
+ "learning_rate": 4.4000000000000006e-05,
134
+ "loss": 17534.8203,
135
+ "step": 18
136
+ },
137
+ {
138
+ "epoch": 0.6785714285714286,
139
+ "grad_norm": 2371520.25,
140
+ "learning_rate": 4.3333333333333334e-05,
141
+ "loss": 8574.9141,
142
+ "step": 19
143
+ },
144
+ {
145
+ "epoch": 0.7142857142857143,
146
+ "grad_norm": 689186.875,
147
+ "learning_rate": 4.266666666666667e-05,
148
+ "loss": 960.9683,
149
+ "step": 20
150
+ },
151
+ {
152
+ "epoch": 0.75,
153
+ "grad_norm": 5261831.0,
154
+ "learning_rate": 4.2e-05,
155
+ "loss": 3616.8489,
156
+ "step": 21
157
+ },
158
+ {
159
+ "epoch": 0.7857142857142857,
160
+ "grad_norm": 391023.5625,
161
+ "learning_rate": 4.133333333333333e-05,
162
+ "loss": 1925.2124,
163
+ "step": 22
164
+ },
165
+ {
166
+ "epoch": 0.8214285714285714,
167
+ "grad_norm": 348153.03125,
168
+ "learning_rate": 4.066666666666667e-05,
169
+ "loss": 846.3534,
170
+ "step": 23
171
+ },
172
+ {
173
+ "epoch": 0.8571428571428571,
174
+ "grad_norm": 195729.609375,
175
+ "learning_rate": 4e-05,
176
+ "loss": 1115.4226,
177
+ "step": 24
178
+ },
179
+ {
180
+ "epoch": 0.8928571428571429,
181
+ "grad_norm": 213168.09375,
182
+ "learning_rate": 3.933333333333333e-05,
183
+ "loss": 549.3003,
184
+ "step": 25
185
+ },
186
+ {
187
+ "epoch": 0.9285714285714286,
188
+ "grad_norm": 174626.953125,
189
+ "learning_rate": 3.866666666666667e-05,
190
+ "loss": 1134.6808,
191
+ "step": 26
192
+ },
193
+ {
194
+ "epoch": 0.9642857142857143,
195
+ "grad_norm": 140094.015625,
196
+ "learning_rate": 3.8e-05,
197
+ "loss": 317.9678,
198
+ "step": 27
199
+ },
200
+ {
201
+ "epoch": 1.0,
202
+ "grad_norm": 18230.22265625,
203
+ "learning_rate": 3.733333333333334e-05,
204
+ "loss": 26.3715,
205
+ "step": 28
206
+ },
207
+ {
208
+ "epoch": 1.0,
209
+ "eval_accuracy": 0.14285714285714285,
210
+ "eval_f1_macro": 0.036281179138321996,
211
+ "eval_f1_micro": 0.14285714285714285,
212
+ "eval_f1_weighted": 0.036281179138321996,
213
+ "eval_loss": 3.2450358867645264,
214
+ "eval_precision_macro": 0.02077922077922078,
215
+ "eval_precision_micro": 0.14285714285714285,
216
+ "eval_precision_weighted": 0.02077922077922078,
217
+ "eval_recall_macro": 0.14285714285714285,
218
+ "eval_recall_micro": 0.14285714285714285,
219
+ "eval_recall_weighted": 0.14285714285714285,
220
+ "eval_runtime": 4.5357,
221
+ "eval_samples_per_second": 12.346,
222
+ "eval_steps_per_second": 0.882,
223
+ "step": 28
224
+ },
225
+ {
226
+ "epoch": 1.0357142857142858,
227
+ "grad_norm": 44302.54296875,
228
+ "learning_rate": 3.6666666666666666e-05,
229
+ "loss": 57.2804,
230
+ "step": 29
231
+ },
232
+ {
233
+ "epoch": 1.0714285714285714,
234
+ "grad_norm": 59.95901870727539,
235
+ "learning_rate": 3.6e-05,
236
+ "loss": 3.1393,
237
+ "step": 30
238
+ },
239
+ {
240
+ "epoch": 1.1071428571428572,
241
+ "grad_norm": 16230.6044921875,
242
+ "learning_rate": 3.5333333333333336e-05,
243
+ "loss": 18.976,
244
+ "step": 31
245
+ },
246
+ {
247
+ "epoch": 1.1428571428571428,
248
+ "grad_norm": 10362.8798828125,
249
+ "learning_rate": 3.466666666666667e-05,
250
+ "loss": 5.5453,
251
+ "step": 32
252
+ },
253
+ {
254
+ "epoch": 1.1785714285714286,
255
+ "grad_norm": 55.348052978515625,
256
+ "learning_rate": 3.4000000000000007e-05,
257
+ "loss": 3.1038,
258
+ "step": 33
259
+ },
260
+ {
261
+ "epoch": 1.2142857142857142,
262
+ "grad_norm": 36.999446868896484,
263
+ "learning_rate": 3.3333333333333335e-05,
264
+ "loss": 2.6861,
265
+ "step": 34
266
+ },
267
+ {
268
+ "epoch": 1.25,
269
+ "grad_norm": 33.05454635620117,
270
+ "learning_rate": 3.266666666666667e-05,
271
+ "loss": 1.6557,
272
+ "step": 35
273
+ },
274
+ {
275
+ "epoch": 1.2857142857142856,
276
+ "grad_norm": 60.194175720214844,
277
+ "learning_rate": 3.2000000000000005e-05,
278
+ "loss": 2.6653,
279
+ "step": 36
280
+ },
281
+ {
282
+ "epoch": 1.3214285714285714,
283
+ "grad_norm": 24.14499282836914,
284
+ "learning_rate": 3.1333333333333334e-05,
285
+ "loss": 2.1868,
286
+ "step": 37
287
+ },
288
+ {
289
+ "epoch": 1.3571428571428572,
290
+ "grad_norm": 42.75698471069336,
291
+ "learning_rate": 3.066666666666667e-05,
292
+ "loss": 2.4374,
293
+ "step": 38
294
+ },
295
+ {
296
+ "epoch": 1.3928571428571428,
297
+ "grad_norm": 34.075408935546875,
298
+ "learning_rate": 3e-05,
299
+ "loss": 1.9931,
300
+ "step": 39
301
+ },
302
+ {
303
+ "epoch": 1.4285714285714286,
304
+ "grad_norm": 53.1582145690918,
305
+ "learning_rate": 2.9333333333333336e-05,
306
+ "loss": 2.0789,
307
+ "step": 40
308
+ },
309
+ {
310
+ "epoch": 1.4642857142857144,
311
+ "grad_norm": 35.603214263916016,
312
+ "learning_rate": 2.8666666666666668e-05,
313
+ "loss": 2.1585,
314
+ "step": 41
315
+ },
316
+ {
317
+ "epoch": 1.5,
318
+ "grad_norm": 25.19101333618164,
319
+ "learning_rate": 2.8000000000000003e-05,
320
+ "loss": 2.1399,
321
+ "step": 42
322
+ },
323
+ {
324
+ "epoch": 1.5357142857142856,
325
+ "grad_norm": 28.07346534729004,
326
+ "learning_rate": 2.733333333333333e-05,
327
+ "loss": 2.0334,
328
+ "step": 43
329
+ },
330
+ {
331
+ "epoch": 1.5714285714285714,
332
+ "grad_norm": 28.849031448364258,
333
+ "learning_rate": 2.6666666666666667e-05,
334
+ "loss": 2.0505,
335
+ "step": 44
336
+ },
337
+ {
338
+ "epoch": 1.6071428571428572,
339
+ "grad_norm": 27.37757682800293,
340
+ "learning_rate": 2.6000000000000002e-05,
341
+ "loss": 1.9464,
342
+ "step": 45
343
+ },
344
+ {
345
+ "epoch": 1.6428571428571428,
346
+ "grad_norm": 33.769561767578125,
347
+ "learning_rate": 2.5333333333333337e-05,
348
+ "loss": 1.9979,
349
+ "step": 46
350
+ },
351
+ {
352
+ "epoch": 1.6785714285714286,
353
+ "grad_norm": 28.812028884887695,
354
+ "learning_rate": 2.466666666666667e-05,
355
+ "loss": 2.0561,
356
+ "step": 47
357
+ },
358
+ {
359
+ "epoch": 1.7142857142857144,
360
+ "grad_norm": 30.845348358154297,
361
+ "learning_rate": 2.4e-05,
362
+ "loss": 2.1788,
363
+ "step": 48
364
+ },
365
+ {
366
+ "epoch": 1.75,
367
+ "grad_norm": 20.779951095581055,
368
+ "learning_rate": 2.3333333333333336e-05,
369
+ "loss": 2.0341,
370
+ "step": 49
371
+ },
372
+ {
373
+ "epoch": 1.7857142857142856,
374
+ "grad_norm": 33.18374252319336,
375
+ "learning_rate": 2.2666666666666668e-05,
376
+ "loss": 1.9194,
377
+ "step": 50
378
+ },
379
+ {
380
+ "epoch": 1.8214285714285714,
381
+ "grad_norm": 28.13947105407715,
382
+ "learning_rate": 2.2000000000000003e-05,
383
+ "loss": 2.0167,
384
+ "step": 51
385
+ },
386
+ {
387
+ "epoch": 1.8571428571428572,
388
+ "grad_norm": 18.270055770874023,
389
+ "learning_rate": 2.1333333333333335e-05,
390
+ "loss": 2.0225,
391
+ "step": 52
392
+ },
393
+ {
394
+ "epoch": 1.8928571428571428,
395
+ "grad_norm": 30.988462448120117,
396
+ "learning_rate": 2.0666666666666666e-05,
397
+ "loss": 1.9634,
398
+ "step": 53
399
+ },
400
+ {
401
+ "epoch": 1.9285714285714286,
402
+ "grad_norm": 30.876007080078125,
403
+ "learning_rate": 2e-05,
404
+ "loss": 2.1378,
405
+ "step": 54
406
+ },
407
+ {
408
+ "epoch": 1.9642857142857144,
409
+ "grad_norm": 19.88353729248047,
410
+ "learning_rate": 1.9333333333333333e-05,
411
+ "loss": 1.971,
412
+ "step": 55
413
+ },
414
+ {
415
+ "epoch": 2.0,
416
+ "grad_norm": 28.21643829345703,
417
+ "learning_rate": 1.866666666666667e-05,
418
+ "loss": 1.8975,
419
+ "step": 56
420
+ },
421
+ {
422
+ "epoch": 2.0,
423
+ "eval_accuracy": 0.19642857142857142,
424
+ "eval_f1_macro": 0.12244897959183673,
425
+ "eval_f1_micro": 0.19642857142857142,
426
+ "eval_f1_weighted": 0.12244897959183673,
427
+ "eval_loss": 1.9160802364349365,
428
+ "eval_precision_macro": 0.20477664750110572,
429
+ "eval_precision_micro": 0.19642857142857142,
430
+ "eval_precision_weighted": 0.20477664750110572,
431
+ "eval_recall_macro": 0.19642857142857142,
432
+ "eval_recall_micro": 0.19642857142857142,
433
+ "eval_recall_weighted": 0.19642857142857142,
434
+ "eval_runtime": 4.7496,
435
+ "eval_samples_per_second": 11.79,
436
+ "eval_steps_per_second": 0.842,
437
+ "step": 56
438
+ },
439
+ {
440
+ "epoch": 2.0357142857142856,
441
+ "grad_norm": 45.93918228149414,
442
+ "learning_rate": 1.8e-05,
443
+ "loss": 1.637,
444
+ "step": 57
445
+ },
446
+ {
447
+ "epoch": 2.0714285714285716,
448
+ "grad_norm": 29.07505226135254,
449
+ "learning_rate": 1.7333333333333336e-05,
450
+ "loss": 2.0992,
451
+ "step": 58
452
+ },
453
+ {
454
+ "epoch": 2.107142857142857,
455
+ "grad_norm": 19.287498474121094,
456
+ "learning_rate": 1.6666666666666667e-05,
457
+ "loss": 1.7994,
458
+ "step": 59
459
+ },
460
+ {
461
+ "epoch": 2.142857142857143,
462
+ "grad_norm": 25.761016845703125,
463
+ "learning_rate": 1.6000000000000003e-05,
464
+ "loss": 1.8948,
465
+ "step": 60
466
+ },
467
+ {
468
+ "epoch": 2.1785714285714284,
469
+ "grad_norm": 30.0518798828125,
470
+ "learning_rate": 1.5333333333333334e-05,
471
+ "loss": 1.9301,
472
+ "step": 61
473
+ },
474
+ {
475
+ "epoch": 2.2142857142857144,
476
+ "grad_norm": 27.495458602905273,
477
+ "learning_rate": 1.4666666666666668e-05,
478
+ "loss": 1.9712,
479
+ "step": 62
480
+ },
481
+ {
482
+ "epoch": 2.25,
483
+ "grad_norm": 27.65778350830078,
484
+ "learning_rate": 1.4000000000000001e-05,
485
+ "loss": 1.9837,
486
+ "step": 63
487
+ },
488
+ {
489
+ "epoch": 2.2857142857142856,
490
+ "grad_norm": 26.54891014099121,
491
+ "learning_rate": 1.3333333333333333e-05,
492
+ "loss": 1.7548,
493
+ "step": 64
494
+ },
495
+ {
496
+ "epoch": 2.3214285714285716,
497
+ "grad_norm": 31.930673599243164,
498
+ "learning_rate": 1.2666666666666668e-05,
499
+ "loss": 1.9568,
500
+ "step": 65
501
+ },
502
+ {
503
+ "epoch": 2.357142857142857,
504
+ "grad_norm": 27.43727684020996,
505
+ "learning_rate": 1.2e-05,
506
+ "loss": 1.9759,
507
+ "step": 66
508
+ },
509
+ {
510
+ "epoch": 2.392857142857143,
511
+ "grad_norm": 21.30677032470703,
512
+ "learning_rate": 1.1333333333333334e-05,
513
+ "loss": 2.0283,
514
+ "step": 67
515
+ },
516
+ {
517
+ "epoch": 2.4285714285714284,
518
+ "grad_norm": 28.749021530151367,
519
+ "learning_rate": 1.0666666666666667e-05,
520
+ "loss": 1.8936,
521
+ "step": 68
522
+ },
523
+ {
524
+ "epoch": 2.4642857142857144,
525
+ "grad_norm": 26.132905960083008,
526
+ "learning_rate": 1e-05,
527
+ "loss": 1.8316,
528
+ "step": 69
529
+ },
530
+ {
531
+ "epoch": 2.5,
532
+ "grad_norm": 17.647850036621094,
533
+ "learning_rate": 9.333333333333334e-06,
534
+ "loss": 1.6821,
535
+ "step": 70
536
+ },
537
+ {
538
+ "epoch": 2.5357142857142856,
539
+ "grad_norm": 19.563146591186523,
540
+ "learning_rate": 8.666666666666668e-06,
541
+ "loss": 1.7006,
542
+ "step": 71
543
+ },
544
+ {
545
+ "epoch": 2.571428571428571,
546
+ "grad_norm": 25.576669692993164,
547
+ "learning_rate": 8.000000000000001e-06,
548
+ "loss": 1.9317,
549
+ "step": 72
550
+ },
551
+ {
552
+ "epoch": 2.607142857142857,
553
+ "grad_norm": 21.140615463256836,
554
+ "learning_rate": 7.333333333333334e-06,
555
+ "loss": 1.7071,
556
+ "step": 73
557
+ },
558
+ {
559
+ "epoch": 2.642857142857143,
560
+ "grad_norm": 18.916963577270508,
561
+ "learning_rate": 6.666666666666667e-06,
562
+ "loss": 1.6799,
563
+ "step": 74
564
+ },
565
+ {
566
+ "epoch": 2.678571428571429,
567
+ "grad_norm": 21.43787956237793,
568
+ "learning_rate": 6e-06,
569
+ "loss": 1.7079,
570
+ "step": 75
571
+ },
572
+ {
573
+ "epoch": 2.7142857142857144,
574
+ "grad_norm": 32.862083435058594,
575
+ "learning_rate": 5.333333333333334e-06,
576
+ "loss": 1.7776,
577
+ "step": 76
578
+ },
579
+ {
580
+ "epoch": 2.75,
581
+ "grad_norm": 19.999584197998047,
582
+ "learning_rate": 4.666666666666667e-06,
583
+ "loss": 1.422,
584
+ "step": 77
585
+ },
586
+ {
587
+ "epoch": 2.7857142857142856,
588
+ "grad_norm": 31.127351760864258,
589
+ "learning_rate": 4.000000000000001e-06,
590
+ "loss": 2.0594,
591
+ "step": 78
592
+ },
593
+ {
594
+ "epoch": 2.821428571428571,
595
+ "grad_norm": 29.890186309814453,
596
+ "learning_rate": 3.3333333333333333e-06,
597
+ "loss": 1.5418,
598
+ "step": 79
599
+ },
600
+ {
601
+ "epoch": 2.857142857142857,
602
+ "grad_norm": 26.948698043823242,
603
+ "learning_rate": 2.666666666666667e-06,
604
+ "loss": 1.8609,
605
+ "step": 80
606
+ },
607
+ {
608
+ "epoch": 2.892857142857143,
609
+ "grad_norm": 29.302663803100586,
610
+ "learning_rate": 2.0000000000000003e-06,
611
+ "loss": 1.8429,
612
+ "step": 81
613
+ },
614
+ {
615
+ "epoch": 2.928571428571429,
616
+ "grad_norm": 28.169702529907227,
617
+ "learning_rate": 1.3333333333333334e-06,
618
+ "loss": 1.7572,
619
+ "step": 82
620
+ },
621
+ {
622
+ "epoch": 2.9642857142857144,
623
+ "grad_norm": 32.628578186035156,
624
+ "learning_rate": 6.666666666666667e-07,
625
+ "loss": 1.693,
626
+ "step": 83
627
+ },
628
+ {
629
+ "epoch": 3.0,
630
+ "grad_norm": 25.8957462310791,
631
+ "learning_rate": 0.0,
632
+ "loss": 1.422,
633
+ "step": 84
634
+ },
635
+ {
636
+ "epoch": 3.0,
637
+ "eval_accuracy": 0.35714285714285715,
638
+ "eval_f1_macro": 0.3432320638995863,
639
+ "eval_f1_micro": 0.35714285714285715,
640
+ "eval_f1_weighted": 0.3432320638995863,
641
+ "eval_loss": 1.7558108568191528,
642
+ "eval_precision_macro": 0.6085343228200372,
643
+ "eval_precision_micro": 0.35714285714285715,
644
+ "eval_precision_weighted": 0.6085343228200372,
645
+ "eval_recall_macro": 0.35714285714285715,
646
+ "eval_recall_micro": 0.35714285714285715,
647
+ "eval_recall_weighted": 0.35714285714285715,
648
+ "eval_runtime": 4.7032,
649
+ "eval_samples_per_second": 11.907,
650
+ "eval_steps_per_second": 0.85,
651
+ "step": 84
652
+ }
653
+ ],
654
+ "logging_steps": 1,
655
+ "max_steps": 84,
656
+ "num_input_tokens_seen": 0,
657
+ "num_train_epochs": 3,
658
+ "save_steps": 500,
659
+ "stateful_callbacks": {
660
+ "EarlyStoppingCallback": {
661
+ "args": {
662
+ "early_stopping_patience": 5,
663
+ "early_stopping_threshold": 0.01
664
+ },
665
+ "attributes": {
666
+ "early_stopping_patience_counter": 0
667
+ }
668
+ },
669
+ "TrainerControl": {
670
+ "args": {
671
+ "should_epoch_stop": false,
672
+ "should_evaluate": false,
673
+ "should_log": false,
674
+ "should_save": true,
675
+ "should_training_stop": true
676
+ },
677
+ "attributes": {}
678
+ }
679
+ },
680
+ "total_flos": 10536355307520.0,
681
+ "train_batch_size": 8,
682
+ "trial_name": null,
683
+ "trial_params": null
684
+ }
checkpoint-84/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91985e71ce5cb37df961d9103170c821b998b7345c2051c8e5b5548a218e1985
3
+ size 5368
config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mobilebert-uncased",
3
+ "_num_labels": 7,
4
+ "architectures": [
5
+ "MobileBertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "classifier_activation": false,
9
+ "classifier_dropout": null,
10
+ "embedding_size": 128,
11
+ "hidden_act": "relu",
12
+ "hidden_dropout_prob": 0.0,
13
+ "hidden_size": 512,
14
+ "id2label": {
15
+ "0": "anniversary",
16
+ "1": "baby",
17
+ "2": "birthday",
18
+ "3": "get_well",
19
+ "4": "holiday",
20
+ "5": "promotion",
21
+ "6": "wedding"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 512,
25
+ "intra_bottleneck_size": 128,
26
+ "key_query_shared_bottleneck": true,
27
+ "label2id": {
28
+ "anniversary": 0,
29
+ "baby": 1,
30
+ "birthday": 2,
31
+ "get_well": 3,
32
+ "holiday": 4,
33
+ "promotion": 5,
34
+ "wedding": 6
35
+ },
36
+ "layer_norm_eps": 1e-12,
37
+ "max_position_embeddings": 512,
38
+ "model_type": "mobilebert",
39
+ "normalization_type": "no_norm",
40
+ "num_attention_heads": 4,
41
+ "num_feedforward_networks": 4,
42
+ "num_hidden_layers": 24,
43
+ "pad_token_id": 0,
44
+ "problem_type": "single_label_classification",
45
+ "torch_dtype": "float32",
46
+ "transformers_version": "4.48.0",
47
+ "trigram_input": true,
48
+ "true_hidden_size": 128,
49
+ "type_vocab_size": 2,
50
+ "use_bottleneck": true,
51
+ "use_bottleneck_attention": false,
52
+ "vocab_size": 30522
53
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f3afca87ac473bfc430ab58b3fe58e985d3ac71f86c6748fd1e7e3432e743ae
3
+ size 98480380
runs/Jun30_23-40-29_r-robo-noct-autotrain-advanced-kegy3hzm-59701-y966k/events.out.tfevents.1751326832.r-robo-noct-autotrain-advanced-kegy3hzm-59701-y966k.114.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4be9a5d099a3aa2d226fdf5428f8a3382440693ec98836cc61b1a71ebda352d3
3
- size 5620
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:502aff7fdf42b72e4b74549c20e58c35d46838e38c75b550afc2c14f14ce8710
3
+ size 25810
runs/Jun30_23-40-29_r-robo-noct-autotrain-advanced-kegy3hzm-59701-y966k/events.out.tfevents.1751327028.r-robo-noct-autotrain-advanced-kegy3hzm-59701-y966k.114.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1bceee9820d7a77eaafb097e463b608cabc2ec5e1a03f32d7e19a25256c4d81
3
+ size 906
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 1000000000000000019884624838656,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "MobileBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91985e71ce5cb37df961d9103170c821b998b7345c2051c8e5b5548a218e1985
3
+ size 5368
training_params.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "greetings/autotrain-data",
3
+ "model": "google/mobilebert-uncased",
4
+ "lr": 5e-05,
5
+ "epochs": 3,
6
+ "max_seq_length": 128,
7
+ "batch_size": 8,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "text_column": "autotrain_text",
18
+ "target_column": "autotrain_label",
19
+ "logging_steps": -1,
20
+ "project_name": "greetings",
21
+ "auto_find_batch_size": false,
22
+ "mixed_precision": "fp16",
23
+ "save_total_limit": 1,
24
+ "push_to_hub": true,
25
+ "eval_strategy": "epoch",
26
+ "username": "robo-noct",
27
+ "log": "tensorboard",
28
+ "early_stopping_patience": 5,
29
+ "early_stopping_threshold": 0.01
30
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff