sankalps commited on
Commit
6330029
·
verified ·
1 Parent(s): 83c6d3f

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - text-classification
6
+ widget:
7
+ - text: "I love AutoTrain"
8
+ datasets:
9
+ - NonCompete-Test/autotrain-data
10
+ ---
11
+
12
+ # Model Trained Using AutoTrain
13
+
14
+ - Problem type: Text Classification
15
+
16
+ ## Validation Metrics
17
+ loss: 0.20267261564731598
18
+
19
+ f1: 0.9090909090909091
20
+
21
+ precision: 0.8333333333333334
22
+
23
+ recall: 1.0
24
+
25
+ auc: 0.9666666666666667
26
+
27
+ accuracy: 0.9411764705882353
checkpoint-52/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "nlpaueb/legal-bert-base-uncased",
3
+ "_num_labels": 2,
4
+ "architectures": [
5
+ "BertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "bos_token_id": 0,
9
+ "classifier_dropout": null,
10
+ "eos_token_ids": 0,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "contractclause",
16
+ "1": "notcontract"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "label2id": {
21
+ "contractclause": 0,
22
+ "notcontract": 1
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "bert",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "output_past": true,
30
+ "pad_token_id": 0,
31
+ "position_embedding_type": "absolute",
32
+ "problem_type": "single_label_classification",
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.40.1",
35
+ "type_vocab_size": 2,
36
+ "use_cache": true,
37
+ "vocab_size": 30522
38
+ }
checkpoint-52/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bf51fbeba99482bc01e07d455f1517f2956fe1a007ee4ad19a1cafe2cd21343
3
+ size 437958648
checkpoint-52/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b6e69f4ab193c5d2bbf9509595aa36b77d8f0f33fcdf60f6ce40bfa030814e1
3
+ size 876038394
checkpoint-52/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3fa917606e159b71131abbdce6ca6180a61b7b8575143394fcc4a6a41810fab
3
+ size 14244
checkpoint-52/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f32d58479625700bd99c87449ebf87c7b8dd999298eb328b2faf136d2cc159d3
3
+ size 1064
checkpoint-52/trainer_state.json ADDED
@@ -0,0 +1,554 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.20267261564731598,
3
+ "best_model_checkpoint": "NonCompete-Test/checkpoint-52",
4
+ "epoch": 13.0,
5
+ "eval_steps": 500,
6
+ "global_step": 52,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.25,
13
+ "grad_norm": Infinity,
14
+ "learning_rate": 0.0,
15
+ "loss": 0.5877,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.5,
20
+ "grad_norm": 6.38834810256958,
21
+ "learning_rate": 1.25e-06,
22
+ "loss": 0.7,
23
+ "step": 2
24
+ },
25
+ {
26
+ "epoch": 0.75,
27
+ "grad_norm": 5.209357738494873,
28
+ "learning_rate": 2.5e-06,
29
+ "loss": 0.5835,
30
+ "step": 3
31
+ },
32
+ {
33
+ "epoch": 1.0,
34
+ "grad_norm": 11.068732261657715,
35
+ "learning_rate": 3.75e-06,
36
+ "loss": 0.65,
37
+ "step": 4
38
+ },
39
+ {
40
+ "epoch": 1.0,
41
+ "eval_accuracy": 0.7058823529411765,
42
+ "eval_auc": 0.7000000000000001,
43
+ "eval_f1": 0.0,
44
+ "eval_loss": 0.6488395929336548,
45
+ "eval_precision": 0.0,
46
+ "eval_recall": 0.0,
47
+ "eval_runtime": 0.06,
48
+ "eval_samples_per_second": 283.11,
49
+ "eval_steps_per_second": 16.654,
50
+ "step": 4
51
+ },
52
+ {
53
+ "epoch": 1.25,
54
+ "grad_norm": 4.674075603485107,
55
+ "learning_rate": 5e-06,
56
+ "loss": 0.5979,
57
+ "step": 5
58
+ },
59
+ {
60
+ "epoch": 1.5,
61
+ "grad_norm": 3.6536738872528076,
62
+ "learning_rate": 6.25e-06,
63
+ "loss": 0.5355,
64
+ "step": 6
65
+ },
66
+ {
67
+ "epoch": 1.75,
68
+ "grad_norm": 5.9179840087890625,
69
+ "learning_rate": 7.5e-06,
70
+ "loss": 0.6727,
71
+ "step": 7
72
+ },
73
+ {
74
+ "epoch": 2.0,
75
+ "grad_norm": 8.177053451538086,
76
+ "learning_rate": 8.75e-06,
77
+ "loss": 0.6564,
78
+ "step": 8
79
+ },
80
+ {
81
+ "epoch": 2.0,
82
+ "eval_accuracy": 0.7058823529411765,
83
+ "eval_auc": 0.7000000000000001,
84
+ "eval_f1": 0.0,
85
+ "eval_loss": 0.6269674897193909,
86
+ "eval_precision": 0.0,
87
+ "eval_recall": 0.0,
88
+ "eval_runtime": 0.0578,
89
+ "eval_samples_per_second": 294.137,
90
+ "eval_steps_per_second": 17.302,
91
+ "step": 8
92
+ },
93
+ {
94
+ "epoch": 2.25,
95
+ "grad_norm": Infinity,
96
+ "learning_rate": 8.75e-06,
97
+ "loss": 0.5595,
98
+ "step": 9
99
+ },
100
+ {
101
+ "epoch": 2.5,
102
+ "grad_norm": 4.788901329040527,
103
+ "learning_rate": 1e-05,
104
+ "loss": 0.6515,
105
+ "step": 10
106
+ },
107
+ {
108
+ "epoch": 2.75,
109
+ "grad_norm": 4.311627388000488,
110
+ "learning_rate": 1.125e-05,
111
+ "loss": 0.5628,
112
+ "step": 11
113
+ },
114
+ {
115
+ "epoch": 3.0,
116
+ "grad_norm": 6.801633358001709,
117
+ "learning_rate": 1.25e-05,
118
+ "loss": 0.3583,
119
+ "step": 12
120
+ },
121
+ {
122
+ "epoch": 3.0,
123
+ "eval_accuracy": 0.7058823529411765,
124
+ "eval_auc": 0.7666666666666667,
125
+ "eval_f1": 0.0,
126
+ "eval_loss": 0.5773638486862183,
127
+ "eval_precision": 0.0,
128
+ "eval_recall": 0.0,
129
+ "eval_runtime": 0.0574,
130
+ "eval_samples_per_second": 296.101,
131
+ "eval_steps_per_second": 17.418,
132
+ "step": 12
133
+ },
134
+ {
135
+ "epoch": 3.25,
136
+ "grad_norm": 3.5935935974121094,
137
+ "learning_rate": 1.3750000000000002e-05,
138
+ "loss": 0.5944,
139
+ "step": 13
140
+ },
141
+ {
142
+ "epoch": 3.5,
143
+ "grad_norm": Infinity,
144
+ "learning_rate": 1.3750000000000002e-05,
145
+ "loss": 0.5499,
146
+ "step": 14
147
+ },
148
+ {
149
+ "epoch": 3.75,
150
+ "grad_norm": 5.153175354003906,
151
+ "learning_rate": 1.5e-05,
152
+ "loss": 0.5126,
153
+ "step": 15
154
+ },
155
+ {
156
+ "epoch": 4.0,
157
+ "grad_norm": 8.957610130310059,
158
+ "learning_rate": 1.6250000000000002e-05,
159
+ "loss": 0.508,
160
+ "step": 16
161
+ },
162
+ {
163
+ "epoch": 4.0,
164
+ "eval_accuracy": 0.7058823529411765,
165
+ "eval_auc": 0.7166666666666667,
166
+ "eval_f1": 0.0,
167
+ "eval_loss": 0.5451947450637817,
168
+ "eval_precision": 0.0,
169
+ "eval_recall": 0.0,
170
+ "eval_runtime": 0.0667,
171
+ "eval_samples_per_second": 254.97,
172
+ "eval_steps_per_second": 14.998,
173
+ "step": 16
174
+ },
175
+ {
176
+ "epoch": 4.25,
177
+ "grad_norm": 5.213046073913574,
178
+ "learning_rate": 1.75e-05,
179
+ "loss": 0.6059,
180
+ "step": 17
181
+ },
182
+ {
183
+ "epoch": 4.5,
184
+ "grad_norm": Infinity,
185
+ "learning_rate": 1.75e-05,
186
+ "loss": 0.596,
187
+ "step": 18
188
+ },
189
+ {
190
+ "epoch": 4.75,
191
+ "grad_norm": 5.471120834350586,
192
+ "learning_rate": 1.8750000000000002e-05,
193
+ "loss": 0.4574,
194
+ "step": 19
195
+ },
196
+ {
197
+ "epoch": 5.0,
198
+ "grad_norm": 6.0362372398376465,
199
+ "learning_rate": 2e-05,
200
+ "loss": 0.3978,
201
+ "step": 20
202
+ },
203
+ {
204
+ "epoch": 5.0,
205
+ "eval_accuracy": 0.7058823529411765,
206
+ "eval_auc": 0.65,
207
+ "eval_f1": 0.0,
208
+ "eval_loss": 0.606438159942627,
209
+ "eval_precision": 0.0,
210
+ "eval_recall": 0.0,
211
+ "eval_runtime": 0.0567,
212
+ "eval_samples_per_second": 300.085,
213
+ "eval_steps_per_second": 17.652,
214
+ "step": 20
215
+ },
216
+ {
217
+ "epoch": 5.25,
218
+ "grad_norm": 15.422409057617188,
219
+ "learning_rate": 2.125e-05,
220
+ "loss": 0.5901,
221
+ "step": 21
222
+ },
223
+ {
224
+ "epoch": 5.5,
225
+ "grad_norm": 3.408106565475464,
226
+ "learning_rate": 2.25e-05,
227
+ "loss": 0.4629,
228
+ "step": 22
229
+ },
230
+ {
231
+ "epoch": 5.75,
232
+ "grad_norm": 4.22088098526001,
233
+ "learning_rate": 2.375e-05,
234
+ "loss": 0.5596,
235
+ "step": 23
236
+ },
237
+ {
238
+ "epoch": 6.0,
239
+ "grad_norm": Infinity,
240
+ "learning_rate": 2.375e-05,
241
+ "loss": 0.4767,
242
+ "step": 24
243
+ },
244
+ {
245
+ "epoch": 6.0,
246
+ "eval_accuracy": 0.7058823529411765,
247
+ "eval_auc": 0.6,
248
+ "eval_f1": 0.0,
249
+ "eval_loss": 0.5987117886543274,
250
+ "eval_precision": 0.0,
251
+ "eval_recall": 0.0,
252
+ "eval_runtime": 0.0568,
253
+ "eval_samples_per_second": 299.558,
254
+ "eval_steps_per_second": 17.621,
255
+ "step": 24
256
+ },
257
+ {
258
+ "epoch": 6.25,
259
+ "grad_norm": 2.9963974952697754,
260
+ "learning_rate": 2.5e-05,
261
+ "loss": 0.4606,
262
+ "step": 25
263
+ },
264
+ {
265
+ "epoch": 6.5,
266
+ "grad_norm": 4.663226127624512,
267
+ "learning_rate": 2.625e-05,
268
+ "loss": 0.4368,
269
+ "step": 26
270
+ },
271
+ {
272
+ "epoch": 6.75,
273
+ "grad_norm": 3.2067272663116455,
274
+ "learning_rate": 2.7500000000000004e-05,
275
+ "loss": 0.4495,
276
+ "step": 27
277
+ },
278
+ {
279
+ "epoch": 7.0,
280
+ "grad_norm": 7.059210300445557,
281
+ "learning_rate": 2.8749999999999997e-05,
282
+ "loss": 0.5914,
283
+ "step": 28
284
+ },
285
+ {
286
+ "epoch": 7.0,
287
+ "eval_accuracy": 0.7647058823529411,
288
+ "eval_auc": 0.75,
289
+ "eval_f1": 0.3333333333333333,
290
+ "eval_loss": 0.5011345148086548,
291
+ "eval_precision": 1.0,
292
+ "eval_recall": 0.2,
293
+ "eval_runtime": 0.057,
294
+ "eval_samples_per_second": 298.185,
295
+ "eval_steps_per_second": 17.54,
296
+ "step": 28
297
+ },
298
+ {
299
+ "epoch": 7.25,
300
+ "grad_norm": 3.552440643310547,
301
+ "learning_rate": 3e-05,
302
+ "loss": 0.2947,
303
+ "step": 29
304
+ },
305
+ {
306
+ "epoch": 7.5,
307
+ "grad_norm": 5.064967155456543,
308
+ "learning_rate": 3.125e-05,
309
+ "loss": 0.4558,
310
+ "step": 30
311
+ },
312
+ {
313
+ "epoch": 7.75,
314
+ "grad_norm": 6.062498569488525,
315
+ "learning_rate": 3.2500000000000004e-05,
316
+ "loss": 0.4291,
317
+ "step": 31
318
+ },
319
+ {
320
+ "epoch": 8.0,
321
+ "grad_norm": 12.07767105102539,
322
+ "learning_rate": 3.375000000000001e-05,
323
+ "loss": 0.4277,
324
+ "step": 32
325
+ },
326
+ {
327
+ "epoch": 8.0,
328
+ "eval_accuracy": 0.7647058823529411,
329
+ "eval_auc": 0.9166666666666666,
330
+ "eval_f1": 0.5,
331
+ "eval_loss": 0.40540269017219543,
332
+ "eval_precision": 0.6666666666666666,
333
+ "eval_recall": 0.4,
334
+ "eval_runtime": 0.0578,
335
+ "eval_samples_per_second": 293.961,
336
+ "eval_steps_per_second": 17.292,
337
+ "step": 32
338
+ },
339
+ {
340
+ "epoch": 8.25,
341
+ "grad_norm": 4.816434860229492,
342
+ "learning_rate": 3.5e-05,
343
+ "loss": 0.2649,
344
+ "step": 33
345
+ },
346
+ {
347
+ "epoch": 8.5,
348
+ "grad_norm": 5.842213153839111,
349
+ "learning_rate": 3.625e-05,
350
+ "loss": 0.2945,
351
+ "step": 34
352
+ },
353
+ {
354
+ "epoch": 8.75,
355
+ "grad_norm": 4.9558024406433105,
356
+ "learning_rate": 3.7500000000000003e-05,
357
+ "loss": 0.2484,
358
+ "step": 35
359
+ },
360
+ {
361
+ "epoch": 9.0,
362
+ "grad_norm": 5.846200466156006,
363
+ "learning_rate": 3.875e-05,
364
+ "loss": 0.2677,
365
+ "step": 36
366
+ },
367
+ {
368
+ "epoch": 9.0,
369
+ "eval_accuracy": 0.7647058823529411,
370
+ "eval_auc": 0.6833333333333333,
371
+ "eval_f1": 0.3333333333333333,
372
+ "eval_loss": 0.4802425503730774,
373
+ "eval_precision": 1.0,
374
+ "eval_recall": 0.2,
375
+ "eval_runtime": 0.0572,
376
+ "eval_samples_per_second": 297.166,
377
+ "eval_steps_per_second": 17.48,
378
+ "step": 36
379
+ },
380
+ {
381
+ "epoch": 9.25,
382
+ "grad_norm": 3.983147621154785,
383
+ "learning_rate": 4e-05,
384
+ "loss": 0.1888,
385
+ "step": 37
386
+ },
387
+ {
388
+ "epoch": 9.5,
389
+ "grad_norm": 3.358937978744507,
390
+ "learning_rate": 4.125e-05,
391
+ "loss": 0.2366,
392
+ "step": 38
393
+ },
394
+ {
395
+ "epoch": 9.75,
396
+ "grad_norm": 3.220496654510498,
397
+ "learning_rate": 4.25e-05,
398
+ "loss": 0.1501,
399
+ "step": 39
400
+ },
401
+ {
402
+ "epoch": 10.0,
403
+ "grad_norm": 6.872381210327148,
404
+ "learning_rate": 4.375e-05,
405
+ "loss": 0.2008,
406
+ "step": 40
407
+ },
408
+ {
409
+ "epoch": 10.0,
410
+ "eval_accuracy": 0.8823529411764706,
411
+ "eval_auc": 0.9666666666666667,
412
+ "eval_f1": 0.8,
413
+ "eval_loss": 0.23314711451530457,
414
+ "eval_precision": 0.8,
415
+ "eval_recall": 0.8,
416
+ "eval_runtime": 0.057,
417
+ "eval_samples_per_second": 298.263,
418
+ "eval_steps_per_second": 17.545,
419
+ "step": 40
420
+ },
421
+ {
422
+ "epoch": 10.25,
423
+ "grad_norm": 3.6026062965393066,
424
+ "learning_rate": 4.5e-05,
425
+ "loss": 0.1339,
426
+ "step": 41
427
+ },
428
+ {
429
+ "epoch": 10.5,
430
+ "grad_norm": 4.244753360748291,
431
+ "learning_rate": 4.6250000000000006e-05,
432
+ "loss": 0.1435,
433
+ "step": 42
434
+ },
435
+ {
436
+ "epoch": 10.75,
437
+ "grad_norm": 2.4807896614074707,
438
+ "learning_rate": 4.75e-05,
439
+ "loss": 0.0886,
440
+ "step": 43
441
+ },
442
+ {
443
+ "epoch": 11.0,
444
+ "grad_norm": 1.8359788656234741,
445
+ "learning_rate": 4.875e-05,
446
+ "loss": 0.063,
447
+ "step": 44
448
+ },
449
+ {
450
+ "epoch": 11.0,
451
+ "eval_accuracy": 0.8823529411764706,
452
+ "eval_auc": 0.9833333333333333,
453
+ "eval_f1": 0.8,
454
+ "eval_loss": 0.20963242650032043,
455
+ "eval_precision": 0.8,
456
+ "eval_recall": 0.8,
457
+ "eval_runtime": 0.057,
458
+ "eval_samples_per_second": 298.411,
459
+ "eval_steps_per_second": 17.554,
460
+ "step": 44
461
+ },
462
+ {
463
+ "epoch": 11.25,
464
+ "grad_norm": 1.7810684442520142,
465
+ "learning_rate": 5e-05,
466
+ "loss": 0.0659,
467
+ "step": 45
468
+ },
469
+ {
470
+ "epoch": 11.5,
471
+ "grad_norm": 1.6344645023345947,
472
+ "learning_rate": 4.986111111111111e-05,
473
+ "loss": 0.05,
474
+ "step": 46
475
+ },
476
+ {
477
+ "epoch": 11.75,
478
+ "grad_norm": 1.3671772480010986,
479
+ "learning_rate": 4.972222222222223e-05,
480
+ "loss": 0.0491,
481
+ "step": 47
482
+ },
483
+ {
484
+ "epoch": 12.0,
485
+ "grad_norm": 0.8882291316986084,
486
+ "learning_rate": 4.958333333333334e-05,
487
+ "loss": 0.0283,
488
+ "step": 48
489
+ },
490
+ {
491
+ "epoch": 12.0,
492
+ "eval_accuracy": 0.8823529411764706,
493
+ "eval_auc": 0.9666666666666667,
494
+ "eval_f1": 0.8,
495
+ "eval_loss": 0.26048457622528076,
496
+ "eval_precision": 0.8,
497
+ "eval_recall": 0.8,
498
+ "eval_runtime": 0.057,
499
+ "eval_samples_per_second": 298.469,
500
+ "eval_steps_per_second": 17.557,
501
+ "step": 48
502
+ },
503
+ {
504
+ "epoch": 12.25,
505
+ "grad_norm": 1.625268816947937,
506
+ "learning_rate": 4.9444444444444446e-05,
507
+ "loss": 0.0379,
508
+ "step": 49
509
+ },
510
+ {
511
+ "epoch": 12.5,
512
+ "grad_norm": 0.7491352558135986,
513
+ "learning_rate": 4.930555555555556e-05,
514
+ "loss": 0.0253,
515
+ "step": 50
516
+ },
517
+ {
518
+ "epoch": 12.75,
519
+ "grad_norm": 0.6248525977134705,
520
+ "learning_rate": 4.9166666666666665e-05,
521
+ "loss": 0.0211,
522
+ "step": 51
523
+ },
524
+ {
525
+ "epoch": 13.0,
526
+ "grad_norm": 0.5918123722076416,
527
+ "learning_rate": 4.902777777777778e-05,
528
+ "loss": 0.0206,
529
+ "step": 52
530
+ },
531
+ {
532
+ "epoch": 13.0,
533
+ "eval_accuracy": 0.9411764705882353,
534
+ "eval_auc": 0.9666666666666667,
535
+ "eval_f1": 0.9090909090909091,
536
+ "eval_loss": 0.20267261564731598,
537
+ "eval_precision": 0.8333333333333334,
538
+ "eval_recall": 1.0,
539
+ "eval_runtime": 0.0573,
540
+ "eval_samples_per_second": 296.92,
541
+ "eval_steps_per_second": 17.466,
542
+ "step": 52
543
+ }
544
+ ],
545
+ "logging_steps": 1,
546
+ "max_steps": 400,
547
+ "num_input_tokens_seen": 0,
548
+ "num_train_epochs": 100,
549
+ "save_steps": 500,
550
+ "total_flos": 57292432304640.0,
551
+ "train_batch_size": 20,
552
+ "trial_name": null,
553
+ "trial_params": null
554
+ }
checkpoint-52/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f75b4e2f937fb466fc92311c76d79dcb3e2eec0d5b5bff27967f956d12a647a
3
+ size 5048
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "nlpaueb/legal-bert-base-uncased",
3
+ "_num_labels": 2,
4
+ "architectures": [
5
+ "BertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "bos_token_id": 0,
9
+ "classifier_dropout": null,
10
+ "eos_token_ids": 0,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "contractclause",
16
+ "1": "notcontract"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "label2id": {
21
+ "contractclause": 0,
22
+ "notcontract": 1
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "bert",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "output_past": true,
30
+ "pad_token_id": 0,
31
+ "position_embedding_type": "absolute",
32
+ "problem_type": "single_label_classification",
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.40.1",
35
+ "type_vocab_size": 2,
36
+ "use_cache": true,
37
+ "vocab_size": 30522
38
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bf51fbeba99482bc01e07d455f1517f2956fe1a007ee4ad19a1cafe2cd21343
3
+ size 437958648
runs/May09_00-36-25_r-sankalps-autotrain-noncompete-secondattempt-23ul0t7-1a597-3qz/events.out.tfevents.1715214985.r-sankalps-autotrain-noncompete-secondattempt-23ul0t7-1a597-3qz.57.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:205b054b9bd97ef50ec27bbe75145bbe73e59f37de53565cc02ff644564ef197
3
- size 4916
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b8f326b39585a4828f47b12dc58c71841d0d98474de6768c211e48a9f08010e
3
+ size 23982
runs/May09_00-36-25_r-sankalps-autotrain-noncompete-secondattempt-23ul0t7-1a597-3qz/events.out.tfevents.1715215047.r-sankalps-autotrain-noncompete-secondattempt-23ul0t7-1a597-3qz.57.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65a6255a90fd4b2bd65f2c8e9157654e73557d791b0261e73c714e941e3323f9
3
+ size 597
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f75b4e2f937fb466fc92311c76d79dcb3e2eec0d5b5bff27967f956d12a647a
3
+ size 5048
training_params.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "NonCompete-Test/autotrain-data",
3
+ "model": "nlpaueb/legal-bert-base-uncased",
4
+ "lr": 5e-05,
5
+ "epochs": 100,
6
+ "max_seq_length": 128,
7
+ "batch_size": 20,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "text_column": "autotrain_text",
18
+ "target_column": "autotrain_label",
19
+ "logging_steps": -1,
20
+ "project_name": "NonCompete-Test",
21
+ "auto_find_batch_size": false,
22
+ "mixed_precision": "fp16",
23
+ "save_total_limit": 1,
24
+ "push_to_hub": true,
25
+ "evaluation_strategy": "epoch",
26
+ "username": "sankalps",
27
+ "log": "tensorboard"
28
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff