Akhil-9640 commited on
Commit
c91e60d
·
verified ·
1 Parent(s): d2a0aba

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - text-classification
6
+ widget:
7
+ - text: "I love AutoTrain"
8
+ datasets:
9
+ - ACTSA-AI4Bharath/autotrain-data
10
+ ---
11
+
12
+ # Model Trained Using AutoTrain
13
+
14
+ - Problem type: Text Classification
15
+
16
+ ## Validation Metrics
17
+ loss: 1.0651801824569702
18
+
19
+ f1_macro: 0.2095479509928179
20
+
21
+ f1_micro: 0.4584103512014787
22
+
23
+ f1_weighted: 0.2881768494245037
24
+
25
+ precision_macro: 0.1528034504004929
26
+
27
+ precision_micro: 0.4584103512014787
28
+
29
+ precision_weighted: 0.21014005008866307
30
+
31
+ recall_macro: 0.3333333333333333
32
+
33
+ recall_micro: 0.4584103512014787
34
+
35
+ recall_weighted: 0.4584103512014787
36
+
37
+ accuracy: 0.4584103512014787
checkpoint-408/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ai4bharat/IndicBERTv2-MLM-only",
3
+ "_num_labels": 3,
4
+ "architectures": [
5
+ "BertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "classifier_dropout": null,
9
+ "embedding_size": 768,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "-1",
15
+ "1": "0",
16
+ "2": "1"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "label2id": {
21
+ "-1": 0,
22
+ "0": 1,
23
+ "1": 2
24
+ },
25
+ "layer_norm_eps": 1e-12,
26
+ "max_position_embeddings": 512,
27
+ "model_type": "bert",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
+ "pad_token_id": 0,
31
+ "position_embedding_type": "absolute",
32
+ "problem_type": "single_label_classification",
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.40.1",
35
+ "type_vocab_size": 2,
36
+ "use_cache": true,
37
+ "vocab_size": 250000
38
+ }
checkpoint-408/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af417d52f19f9696a532da06413d3b212cc75fe7d1f9c5075a7b9aa4bd935e0c
3
+ size 1112198276
checkpoint-408/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:129b5764104bfabca371bcebf02d1f1b41bfbf90db57f8c149064edad332f24d
3
+ size 2224511738
checkpoint-408/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1849766005ba4492afa1b6132578d519bb994cc308b4a2167e0b19bb1d85ebc2
3
+ size 13990
checkpoint-408/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8b7aa265af4b760edf137189c9df8fd167f96e5a05b37a842c26901e3a1c6f9
3
+ size 1064
checkpoint-408/trainer_state.json ADDED
@@ -0,0 +1,551 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0651801824569702,
3
+ "best_model_checkpoint": "ACTSA-AI4Bharath/checkpoint-408",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 408,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04411764705882353,
13
+ "grad_norm": 13.643086433410645,
14
+ "learning_rate": 0.0007317073170731707,
15
+ "loss": 1.0853,
16
+ "step": 6
17
+ },
18
+ {
19
+ "epoch": 0.08823529411764706,
20
+ "grad_norm": 4.308501720428467,
21
+ "learning_rate": 0.0014634146341463415,
22
+ "loss": 1.137,
23
+ "step": 12
24
+ },
25
+ {
26
+ "epoch": 0.1323529411764706,
27
+ "grad_norm": 3.478633165359497,
28
+ "learning_rate": 0.0021951219512195124,
29
+ "loss": 1.2175,
30
+ "step": 18
31
+ },
32
+ {
33
+ "epoch": 0.17647058823529413,
34
+ "grad_norm": 0.7425355911254883,
35
+ "learning_rate": 0.002926829268292683,
36
+ "loss": 1.1266,
37
+ "step": 24
38
+ },
39
+ {
40
+ "epoch": 0.22058823529411764,
41
+ "grad_norm": 1.5016902685165405,
42
+ "learning_rate": 0.0036585365853658534,
43
+ "loss": 1.2296,
44
+ "step": 30
45
+ },
46
+ {
47
+ "epoch": 0.2647058823529412,
48
+ "grad_norm": 3.432800054550171,
49
+ "learning_rate": 0.004390243902439025,
50
+ "loss": 1.0316,
51
+ "step": 36
52
+ },
53
+ {
54
+ "epoch": 0.3088235294117647,
55
+ "grad_norm": 1.3766824007034302,
56
+ "learning_rate": 0.004986376021798365,
57
+ "loss": 1.1216,
58
+ "step": 42
59
+ },
60
+ {
61
+ "epoch": 0.35294117647058826,
62
+ "grad_norm": 1.1002386808395386,
63
+ "learning_rate": 0.004904632152588556,
64
+ "loss": 1.1628,
65
+ "step": 48
66
+ },
67
+ {
68
+ "epoch": 0.39705882352941174,
69
+ "grad_norm": 0.42980217933654785,
70
+ "learning_rate": 0.004822888283378747,
71
+ "loss": 1.1251,
72
+ "step": 54
73
+ },
74
+ {
75
+ "epoch": 0.4411764705882353,
76
+ "grad_norm": 2.333111524581909,
77
+ "learning_rate": 0.004741144414168937,
78
+ "loss": 1.1751,
79
+ "step": 60
80
+ },
81
+ {
82
+ "epoch": 0.4852941176470588,
83
+ "grad_norm": 1.2478774785995483,
84
+ "learning_rate": 0.004659400544959128,
85
+ "loss": 1.1556,
86
+ "step": 66
87
+ },
88
+ {
89
+ "epoch": 0.5294117647058824,
90
+ "grad_norm": 2.177210807800293,
91
+ "learning_rate": 0.004577656675749319,
92
+ "loss": 1.1362,
93
+ "step": 72
94
+ },
95
+ {
96
+ "epoch": 0.5735294117647058,
97
+ "grad_norm": 1.873475432395935,
98
+ "learning_rate": 0.004495912806539509,
99
+ "loss": 1.1633,
100
+ "step": 78
101
+ },
102
+ {
103
+ "epoch": 0.6176470588235294,
104
+ "grad_norm": 2.743321180343628,
105
+ "learning_rate": 0.0044141689373297,
106
+ "loss": 1.1512,
107
+ "step": 84
108
+ },
109
+ {
110
+ "epoch": 0.6617647058823529,
111
+ "grad_norm": 2.1436805725097656,
112
+ "learning_rate": 0.004332425068119892,
113
+ "loss": 1.252,
114
+ "step": 90
115
+ },
116
+ {
117
+ "epoch": 0.7058823529411765,
118
+ "grad_norm": 1.0206413269042969,
119
+ "learning_rate": 0.004250681198910082,
120
+ "loss": 1.0283,
121
+ "step": 96
122
+ },
123
+ {
124
+ "epoch": 0.75,
125
+ "grad_norm": 2.015864610671997,
126
+ "learning_rate": 0.0041689373297002725,
127
+ "loss": 1.1657,
128
+ "step": 102
129
+ },
130
+ {
131
+ "epoch": 0.7941176470588235,
132
+ "grad_norm": 1.419548749923706,
133
+ "learning_rate": 0.004087193460490463,
134
+ "loss": 1.1078,
135
+ "step": 108
136
+ },
137
+ {
138
+ "epoch": 0.8382352941176471,
139
+ "grad_norm": 3.879511594772339,
140
+ "learning_rate": 0.004005449591280654,
141
+ "loss": 1.1994,
142
+ "step": 114
143
+ },
144
+ {
145
+ "epoch": 0.8823529411764706,
146
+ "grad_norm": 2.264059543609619,
147
+ "learning_rate": 0.003923705722070845,
148
+ "loss": 1.2139,
149
+ "step": 120
150
+ },
151
+ {
152
+ "epoch": 0.9264705882352942,
153
+ "grad_norm": 1.9268299341201782,
154
+ "learning_rate": 0.0038419618528610358,
155
+ "loss": 1.1377,
156
+ "step": 126
157
+ },
158
+ {
159
+ "epoch": 0.9705882352941176,
160
+ "grad_norm": 2.074373722076416,
161
+ "learning_rate": 0.003760217983651226,
162
+ "loss": 1.1601,
163
+ "step": 132
164
+ },
165
+ {
166
+ "epoch": 1.0,
167
+ "eval_accuracy": 0.4584103512014787,
168
+ "eval_f1_macro": 0.2095479509928179,
169
+ "eval_f1_micro": 0.4584103512014787,
170
+ "eval_f1_weighted": 0.2881768494245037,
171
+ "eval_loss": 1.0745776891708374,
172
+ "eval_precision_macro": 0.1528034504004929,
173
+ "eval_precision_micro": 0.4584103512014787,
174
+ "eval_precision_weighted": 0.21014005008866307,
175
+ "eval_recall_macro": 0.3333333333333333,
176
+ "eval_recall_micro": 0.4584103512014787,
177
+ "eval_recall_weighted": 0.4584103512014787,
178
+ "eval_runtime": 180.1257,
179
+ "eval_samples_per_second": 6.007,
180
+ "eval_steps_per_second": 0.094,
181
+ "step": 136
182
+ },
183
+ {
184
+ "epoch": 1.0147058823529411,
185
+ "grad_norm": 0.47819703817367554,
186
+ "learning_rate": 0.003678474114441417,
187
+ "loss": 1.131,
188
+ "step": 138
189
+ },
190
+ {
191
+ "epoch": 1.0588235294117647,
192
+ "grad_norm": 1.602620244026184,
193
+ "learning_rate": 0.0035967302452316074,
194
+ "loss": 1.1478,
195
+ "step": 144
196
+ },
197
+ {
198
+ "epoch": 1.1029411764705883,
199
+ "grad_norm": 1.7910455465316772,
200
+ "learning_rate": 0.0035149863760217987,
201
+ "loss": 1.0949,
202
+ "step": 150
203
+ },
204
+ {
205
+ "epoch": 1.1470588235294117,
206
+ "grad_norm": 1.08793044090271,
207
+ "learning_rate": 0.0034332425068119895,
208
+ "loss": 1.1943,
209
+ "step": 156
210
+ },
211
+ {
212
+ "epoch": 1.1911764705882353,
213
+ "grad_norm": 2.3373301029205322,
214
+ "learning_rate": 0.00335149863760218,
215
+ "loss": 1.0772,
216
+ "step": 162
217
+ },
218
+ {
219
+ "epoch": 1.2352941176470589,
220
+ "grad_norm": 1.8598955869674683,
221
+ "learning_rate": 0.0032697547683923707,
222
+ "loss": 1.1313,
223
+ "step": 168
224
+ },
225
+ {
226
+ "epoch": 1.2794117647058822,
227
+ "grad_norm": 2.8973326683044434,
228
+ "learning_rate": 0.003188010899182561,
229
+ "loss": 1.1225,
230
+ "step": 174
231
+ },
232
+ {
233
+ "epoch": 1.3235294117647058,
234
+ "grad_norm": 1.990200400352478,
235
+ "learning_rate": 0.003106267029972752,
236
+ "loss": 1.188,
237
+ "step": 180
238
+ },
239
+ {
240
+ "epoch": 1.3676470588235294,
241
+ "grad_norm": 1.9855934381484985,
242
+ "learning_rate": 0.003024523160762943,
243
+ "loss": 1.1344,
244
+ "step": 186
245
+ },
246
+ {
247
+ "epoch": 1.4117647058823528,
248
+ "grad_norm": 0.6666595339775085,
249
+ "learning_rate": 0.0029427792915531336,
250
+ "loss": 1.0998,
251
+ "step": 192
252
+ },
253
+ {
254
+ "epoch": 1.4558823529411764,
255
+ "grad_norm": 0.45097193121910095,
256
+ "learning_rate": 0.0028610354223433244,
257
+ "loss": 1.1805,
258
+ "step": 198
259
+ },
260
+ {
261
+ "epoch": 1.5,
262
+ "grad_norm": 0.4356406629085541,
263
+ "learning_rate": 0.002779291553133515,
264
+ "loss": 1.0879,
265
+ "step": 204
266
+ },
267
+ {
268
+ "epoch": 1.5441176470588234,
269
+ "grad_norm": 2.1680757999420166,
270
+ "learning_rate": 0.0026975476839237057,
271
+ "loss": 1.0708,
272
+ "step": 210
273
+ },
274
+ {
275
+ "epoch": 1.5882352941176472,
276
+ "grad_norm": 1.1253345012664795,
277
+ "learning_rate": 0.0026158038147138965,
278
+ "loss": 1.147,
279
+ "step": 216
280
+ },
281
+ {
282
+ "epoch": 1.6323529411764706,
283
+ "grad_norm": 1.1781907081604004,
284
+ "learning_rate": 0.0025340599455040873,
285
+ "loss": 1.1487,
286
+ "step": 222
287
+ },
288
+ {
289
+ "epoch": 1.6764705882352942,
290
+ "grad_norm": 1.1827212572097778,
291
+ "learning_rate": 0.002452316076294278,
292
+ "loss": 1.1075,
293
+ "step": 228
294
+ },
295
+ {
296
+ "epoch": 1.7205882352941178,
297
+ "grad_norm": 0.4935368299484253,
298
+ "learning_rate": 0.0023705722070844685,
299
+ "loss": 1.1091,
300
+ "step": 234
301
+ },
302
+ {
303
+ "epoch": 1.7647058823529411,
304
+ "grad_norm": 1.7564665079116821,
305
+ "learning_rate": 0.0022888283378746594,
306
+ "loss": 1.0616,
307
+ "step": 240
308
+ },
309
+ {
310
+ "epoch": 1.8088235294117647,
311
+ "grad_norm": 0.3409131169319153,
312
+ "learning_rate": 0.00220708446866485,
313
+ "loss": 1.0253,
314
+ "step": 246
315
+ },
316
+ {
317
+ "epoch": 1.8529411764705883,
318
+ "grad_norm": 1.9489390850067139,
319
+ "learning_rate": 0.002125340599455041,
320
+ "loss": 1.1115,
321
+ "step": 252
322
+ },
323
+ {
324
+ "epoch": 1.8970588235294117,
325
+ "grad_norm": 1.4360109567642212,
326
+ "learning_rate": 0.0020435967302452314,
327
+ "loss": 1.0629,
328
+ "step": 258
329
+ },
330
+ {
331
+ "epoch": 1.9411764705882353,
332
+ "grad_norm": 0.4626617729663849,
333
+ "learning_rate": 0.0019618528610354227,
334
+ "loss": 1.0992,
335
+ "step": 264
336
+ },
337
+ {
338
+ "epoch": 1.9852941176470589,
339
+ "grad_norm": 0.7563297152519226,
340
+ "learning_rate": 0.001880108991825613,
341
+ "loss": 1.0473,
342
+ "step": 270
343
+ },
344
+ {
345
+ "epoch": 2.0,
346
+ "eval_accuracy": 0.4584103512014787,
347
+ "eval_f1_macro": 0.2095479509928179,
348
+ "eval_f1_micro": 0.4584103512014787,
349
+ "eval_f1_weighted": 0.2881768494245037,
350
+ "eval_loss": 1.088401198387146,
351
+ "eval_precision_macro": 0.1528034504004929,
352
+ "eval_precision_micro": 0.4584103512014787,
353
+ "eval_precision_weighted": 0.21014005008866307,
354
+ "eval_recall_macro": 0.3333333333333333,
355
+ "eval_recall_micro": 0.4584103512014787,
356
+ "eval_recall_weighted": 0.4584103512014787,
357
+ "eval_runtime": 181.0686,
358
+ "eval_samples_per_second": 5.976,
359
+ "eval_steps_per_second": 0.094,
360
+ "step": 272
361
+ },
362
+ {
363
+ "epoch": 2.0294117647058822,
364
+ "grad_norm": 1.0731829404830933,
365
+ "learning_rate": 0.0017983651226158037,
366
+ "loss": 1.1064,
367
+ "step": 276
368
+ },
369
+ {
370
+ "epoch": 2.073529411764706,
371
+ "grad_norm": 0.7920964360237122,
372
+ "learning_rate": 0.0017166212534059947,
373
+ "loss": 1.0805,
374
+ "step": 282
375
+ },
376
+ {
377
+ "epoch": 2.1176470588235294,
378
+ "grad_norm": 1.288224697113037,
379
+ "learning_rate": 0.0016348773841961854,
380
+ "loss": 1.0924,
381
+ "step": 288
382
+ },
383
+ {
384
+ "epoch": 2.161764705882353,
385
+ "grad_norm": 0.7358605265617371,
386
+ "learning_rate": 0.001553133514986376,
387
+ "loss": 1.0822,
388
+ "step": 294
389
+ },
390
+ {
391
+ "epoch": 2.2058823529411766,
392
+ "grad_norm": 1.8211092948913574,
393
+ "learning_rate": 0.0014713896457765668,
394
+ "loss": 1.0734,
395
+ "step": 300
396
+ },
397
+ {
398
+ "epoch": 2.25,
399
+ "grad_norm": 0.9776346683502197,
400
+ "learning_rate": 0.0013896457765667574,
401
+ "loss": 1.0823,
402
+ "step": 306
403
+ },
404
+ {
405
+ "epoch": 2.2941176470588234,
406
+ "grad_norm": 0.7615415453910828,
407
+ "learning_rate": 0.0013079019073569482,
408
+ "loss": 1.0671,
409
+ "step": 312
410
+ },
411
+ {
412
+ "epoch": 2.338235294117647,
413
+ "grad_norm": 0.6685084104537964,
414
+ "learning_rate": 0.001226158038147139,
415
+ "loss": 1.0458,
416
+ "step": 318
417
+ },
418
+ {
419
+ "epoch": 2.3823529411764706,
420
+ "grad_norm": 0.5736514329910278,
421
+ "learning_rate": 0.0011444141689373297,
422
+ "loss": 1.1256,
423
+ "step": 324
424
+ },
425
+ {
426
+ "epoch": 2.426470588235294,
427
+ "grad_norm": 0.8168231248855591,
428
+ "learning_rate": 0.0010626702997275205,
429
+ "loss": 1.087,
430
+ "step": 330
431
+ },
432
+ {
433
+ "epoch": 2.4705882352941178,
434
+ "grad_norm": 1.3093127012252808,
435
+ "learning_rate": 0.0009809264305177113,
436
+ "loss": 1.0606,
437
+ "step": 336
438
+ },
439
+ {
440
+ "epoch": 2.514705882352941,
441
+ "grad_norm": 0.9428394436836243,
442
+ "learning_rate": 0.0008991825613079018,
443
+ "loss": 1.0716,
444
+ "step": 342
445
+ },
446
+ {
447
+ "epoch": 2.5588235294117645,
448
+ "grad_norm": 2.0453405380249023,
449
+ "learning_rate": 0.0008174386920980927,
450
+ "loss": 1.0795,
451
+ "step": 348
452
+ },
453
+ {
454
+ "epoch": 2.6029411764705883,
455
+ "grad_norm": 1.005281925201416,
456
+ "learning_rate": 0.0007356948228882834,
457
+ "loss": 1.0466,
458
+ "step": 354
459
+ },
460
+ {
461
+ "epoch": 2.6470588235294117,
462
+ "grad_norm": 0.947652280330658,
463
+ "learning_rate": 0.0006539509536784741,
464
+ "loss": 1.1121,
465
+ "step": 360
466
+ },
467
+ {
468
+ "epoch": 2.6911764705882355,
469
+ "grad_norm": 0.6671378016471863,
470
+ "learning_rate": 0.0005722070844686648,
471
+ "loss": 1.054,
472
+ "step": 366
473
+ },
474
+ {
475
+ "epoch": 2.735294117647059,
476
+ "grad_norm": 0.47493502497673035,
477
+ "learning_rate": 0.0004904632152588557,
478
+ "loss": 1.0529,
479
+ "step": 372
480
+ },
481
+ {
482
+ "epoch": 2.7794117647058822,
483
+ "grad_norm": 1.0291929244995117,
484
+ "learning_rate": 0.00040871934604904634,
485
+ "loss": 1.0508,
486
+ "step": 378
487
+ },
488
+ {
489
+ "epoch": 2.8235294117647056,
490
+ "grad_norm": 0.7595148682594299,
491
+ "learning_rate": 0.00032697547683923706,
492
+ "loss": 1.0667,
493
+ "step": 384
494
+ },
495
+ {
496
+ "epoch": 2.8676470588235294,
497
+ "grad_norm": 1.3260746002197266,
498
+ "learning_rate": 0.00024523160762942784,
499
+ "loss": 1.1066,
500
+ "step": 390
501
+ },
502
+ {
503
+ "epoch": 2.911764705882353,
504
+ "grad_norm": 0.4144936203956604,
505
+ "learning_rate": 0.00016348773841961853,
506
+ "loss": 1.1075,
507
+ "step": 396
508
+ },
509
+ {
510
+ "epoch": 2.9558823529411766,
511
+ "grad_norm": 1.2374686002731323,
512
+ "learning_rate": 8.174386920980927e-05,
513
+ "loss": 1.0618,
514
+ "step": 402
515
+ },
516
+ {
517
+ "epoch": 3.0,
518
+ "grad_norm": 2.612288236618042,
519
+ "learning_rate": 0.0,
520
+ "loss": 1.0772,
521
+ "step": 408
522
+ },
523
+ {
524
+ "epoch": 3.0,
525
+ "eval_accuracy": 0.4584103512014787,
526
+ "eval_f1_macro": 0.2095479509928179,
527
+ "eval_f1_micro": 0.4584103512014787,
528
+ "eval_f1_weighted": 0.2881768494245037,
529
+ "eval_loss": 1.0651801824569702,
530
+ "eval_precision_macro": 0.1528034504004929,
531
+ "eval_precision_micro": 0.4584103512014787,
532
+ "eval_precision_weighted": 0.21014005008866307,
533
+ "eval_recall_macro": 0.3333333333333333,
534
+ "eval_recall_micro": 0.4584103512014787,
535
+ "eval_recall_weighted": 0.4584103512014787,
536
+ "eval_runtime": 200.3097,
537
+ "eval_samples_per_second": 5.402,
538
+ "eval_steps_per_second": 0.085,
539
+ "step": 408
540
+ }
541
+ ],
542
+ "logging_steps": 6,
543
+ "max_steps": 408,
544
+ "num_input_tokens_seen": 0,
545
+ "num_train_epochs": 3,
546
+ "save_steps": 500,
547
+ "total_flos": 854066153945088.0,
548
+ "train_batch_size": 32,
549
+ "trial_name": null,
550
+ "trial_params": null
551
+ }
checkpoint-408/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5535ad3d727506b5f567fcbbd90842af5b01602d837611fffb384fb6dcd53359
3
+ size 5048
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ai4bharat/IndicBERTv2-MLM-only",
3
+ "_num_labels": 3,
4
+ "architectures": [
5
+ "BertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "classifier_dropout": null,
9
+ "embedding_size": 768,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "-1",
15
+ "1": "0",
16
+ "2": "1"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "label2id": {
21
+ "-1": 0,
22
+ "0": 1,
23
+ "1": 2
24
+ },
25
+ "layer_norm_eps": 1e-12,
26
+ "max_position_embeddings": 512,
27
+ "model_type": "bert",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
+ "pad_token_id": 0,
31
+ "position_embedding_type": "absolute",
32
+ "problem_type": "single_label_classification",
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.40.1",
35
+ "type_vocab_size": 2,
36
+ "use_cache": true,
37
+ "vocab_size": 250000
38
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af417d52f19f9696a532da06413d3b212cc75fe7d1f9c5075a7b9aa4bd935e0c
3
+ size 1112198276
runs/Apr29_17-28-14_r-akhil-9640-actsa-train-lgmwpzpt-451cc-8quec/events.out.tfevents.1714411694.r-akhil-9640-actsa-train-lgmwpzpt-451cc-8quec.86.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c15185e83c947fb81ce1a45cc461a41078f7dba95cba136a62ea4e3d8560e010
3
- size 19741
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41a95d7173f43eca65d1f10573b97c75df2eb2cc6c9851e3d7cc817560767b4d
3
+ size 21983
runs/Apr29_17-28-14_r-akhil-9640-actsa-train-lgmwpzpt-451cc-8quec/events.out.tfevents.1714421090.r-akhil-9640-actsa-train-lgmwpzpt-451cc-8quec.86.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:810555a654415e0be3e77386af1cd1cc39313f1c93244319f1674886dae61396
3
+ size 921
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[UNK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[PAD]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "<as>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "6": {
52
+ "content": "<bd>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "7": {
60
+ "content": "<bn>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "8": {
68
+ "content": "<dg>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "9": {
76
+ "content": "<en>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "10": {
84
+ "content": "<gom>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "11": {
92
+ "content": "<gu>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "12": {
100
+ "content": "<hi>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "13": {
108
+ "content": "<kha>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "14": {
116
+ "content": "<kn>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "15": {
124
+ "content": "<ks>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "16": {
132
+ "content": "<mai>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "17": {
140
+ "content": "<ml>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "18": {
148
+ "content": "<mni>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "19": {
156
+ "content": "<mr>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "20": {
164
+ "content": "<ne>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "21": {
172
+ "content": "<or>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "22": {
180
+ "content": "<pa>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "23": {
188
+ "content": "<sa>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "24": {
196
+ "content": "<sd>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "25": {
204
+ "content": "<sat>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "26": {
212
+ "content": "<ta>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "27": {
220
+ "content": "<te>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "28": {
228
+ "content": "<ur>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ }
235
+ },
236
+ "clean_up_tokenization_spaces": true,
237
+ "cls_token": "[CLS]",
238
+ "mask_token": "[MASK]",
239
+ "model_max_length": 1000000000000000019884624838656,
240
+ "pad_token": "[PAD]",
241
+ "sep_token": "[SEP]",
242
+ "tokenizer_class": "PreTrainedTokenizerFast",
243
+ "unk_token": "[UNK]"
244
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5535ad3d727506b5f567fcbbd90842af5b01602d837611fffb384fb6dcd53359
3
+ size 5048
training_params.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "ACTSA-AI4Bharath/autotrain-data",
3
+ "model": "ai4bharat/IndicBERTv2-MLM-only",
4
+ "lr": 0.005,
5
+ "epochs": 3,
6
+ "max_seq_length": 128,
7
+ "batch_size": 32,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "text_column": "autotrain_text",
18
+ "target_column": "autotrain_label",
19
+ "logging_steps": -1,
20
+ "project_name": "ACTSA-AI4Bharath",
21
+ "auto_find_batch_size": false,
22
+ "mixed_precision": null,
23
+ "save_total_limit": 1,
24
+ "push_to_hub": true,
25
+ "evaluation_strategy": "epoch",
26
+ "username": "Akhil-9640",
27
+ "log": "tensorboard"
28
+ }