mattlev commited on
Commit
8fa7893
·
verified ·
1 Parent(s): 83043cd

Upload folder using huggingface_hub

Browse files
Files changed (8) hide show
  1. config.json +1 -1
  2. model.safetensors +1 -1
  3. optimizer.pt +2 -2
  4. results.txt +6 -0
  5. rng_state.pth +2 -2
  6. scheduler.pt +2 -2
  7. trainer_state.json +60 -165
  8. training_args.bin +2 -2
config.json CHANGED
@@ -34,7 +34,7 @@
34
  "pooler_type": "first_token_transform",
35
  "position_embedding_type": "absolute",
36
  "torch_dtype": "float32",
37
- "transformers_version": "4.41.1",
38
  "type_vocab_size": 2,
39
  "use_cache": true,
40
  "vocab_size": 119547
 
34
  "pooler_type": "first_token_transform",
35
  "position_embedding_type": "absolute",
36
  "torch_dtype": "float32",
37
+ "transformers_version": "4.48.0",
38
  "type_vocab_size": 2,
39
  "use_cache": true,
40
  "vocab_size": 119547
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fba7caab1fc24d54e4945648fb0607ca10634c0084ba0080accc1619b6d58055
3
  size 709083980
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55bbf3ceae134f1cc2ad0c57efca348da2823746a6d5b6e97e596193c04d363e
3
  size 709083980
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64fa743c60be9c4c25d0d06018797708fa5c5d8c13c40aa1803ee095622030cd
3
- size 1418287674
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b836f30893427e221487e575727921a7478c6f139bdfdec378b1cd1a539df28
3
+ size 1418288139
results.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Best model on test data
2
+ | | Segment Content | Segment Boundary |
3
+ |-----------+-------------------+--------------------|
4
+ | Precision | 0.99073 | 0.884803 |
5
+ | Recall | 0.986411 | 0.91858 |
6
+ | F1 | 0.988566 | 0.901375 |
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d92f8588ca4576196e1ff5806fd549535f1e969c714eae025e4c9f25d11bd5bb
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:102015cca1b18727a0a043ded5c1363cac2464e4ec9c72e2502f8fdf86cfa502
3
+ size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:005e6bda6cd88475018063d38f5944852738698cb0b5363e8ec1c9bf609c6c68
3
- size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e17d618a317c719cd27895bd4f2783df2b891d58b45dc85487fa1b973ffebbf7
3
+ size 1465
trainer_state.json CHANGED
@@ -1,214 +1,109 @@
1
  {
2
- "best_metric": 0.009304543025791645,
3
- "best_model_checkpoint": "results_multilingual_model/epoch50_bs100/checkpoint-240",
4
- "epoch": 6.0,
5
  "eval_steps": 500,
6
- "global_step": 360,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 0.0904545858502388,
14
  "learning_rate": 4.9e-05,
15
- "loss": 0.0833,
16
- "step": 60
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accurracy": {
21
- "accuracy": 0.9942178762266288
22
  },
23
  "eval_f1": [
24
- 0.9811026821999458,
25
- 0.812751677852349,
26
- 1.0
27
  ],
28
- "eval_loss": 0.015753956511616707,
29
  "eval_precision": [
30
- 0.9769120093520974,
31
- 0.8488317757009346,
32
- 1.0
33
  ],
34
  "eval_recall": [
35
- 0.9853294635163938,
36
- 0.7796137339055794,
37
- 1.0
38
  ],
39
- "eval_runtime": 9.1266,
40
- "eval_samples_per_second": 76.809,
41
- "eval_steps_per_second": 0.877,
42
- "step": 60
43
  },
44
  {
45
  "epoch": 2.0,
46
- "grad_norm": 0.056562066078186035,
47
  "learning_rate": 4.8e-05,
48
- "loss": 0.0113,
49
- "step": 120
50
  },
51
  {
52
  "epoch": 2.0,
53
- "eval_accurracy": {
54
- "accuracy": 0.9961314345124399
55
  },
56
  "eval_f1": [
57
- 0.9873171173619604,
58
- 0.8784722222222222,
59
- 1.0
60
  ],
61
- "eval_loss": 0.010700768791139126,
62
  "eval_precision": [
63
- 0.9861557254671312,
64
- 0.8884986830553117,
65
- 1.0
66
  ],
67
  "eval_recall": [
68
- 0.988481248015963,
69
- 0.8686695278969957,
70
- 1.0
71
  ],
72
- "eval_runtime": 9.2093,
73
- "eval_samples_per_second": 76.118,
74
- "eval_steps_per_second": 0.869,
75
- "step": 120
76
  },
77
  {
78
  "epoch": 3.0,
79
- "grad_norm": 0.046516694128513336,
80
  "learning_rate": 4.7e-05,
81
- "loss": 0.0082,
82
- "step": 180
83
  },
84
  {
85
  "epoch": 3.0,
86
- "eval_accurracy": {
87
- "accuracy": 0.9964975666032267
88
  },
89
  "eval_f1": [
90
- 0.9885187617473222,
91
- 0.8898544427547251,
92
- 1.0
93
  ],
94
- "eval_loss": 0.009478472173213959,
95
  "eval_precision": [
96
- 0.987244436403112,
97
- 0.9010118785745711,
98
- 1.0
99
  ],
100
  "eval_recall": [
101
- 0.9897963811165027,
102
- 0.878969957081545,
103
- 1.0
104
- ],
105
- "eval_runtime": 9.3442,
106
- "eval_samples_per_second": 75.02,
107
- "eval_steps_per_second": 0.856,
108
- "step": 180
109
- },
110
- {
111
- "epoch": 4.0,
112
- "grad_norm": 0.06247434392571449,
113
- "learning_rate": 4.600000000000001e-05,
114
- "loss": 0.0066,
115
- "step": 240
116
- },
117
- {
118
- "epoch": 4.0,
119
- "eval_accurracy": {
120
- "accuracy": 0.9965321073665085
121
- },
122
- "eval_f1": [
123
- 0.9886149728981924,
124
- 0.8924823302634397,
125
- 1.0
126
- ],
127
- "eval_loss": 0.009304543025791645,
128
- "eval_precision": [
129
- 0.988816804282733,
130
- 0.890765284309534,
131
- 1.0
132
- ],
133
- "eval_recall": [
134
- 0.988413223890073,
135
- 0.894206008583691,
136
- 1.0
137
- ],
138
- "eval_runtime": 9.051,
139
- "eval_samples_per_second": 77.45,
140
- "eval_steps_per_second": 0.884,
141
- "step": 240
142
- },
143
- {
144
- "epoch": 5.0,
145
- "grad_norm": 0.06775504350662231,
146
- "learning_rate": 4.5e-05,
147
- "loss": 0.0055,
148
- "step": 300
149
- },
150
- {
151
- "epoch": 5.0,
152
- "eval_accurracy": {
153
- "accuracy": 0.9966460918853385
154
- },
155
- "eval_f1": [
156
- 0.9890169552873577,
157
- 0.8934722984092156,
158
- 1.0
159
- ],
160
- "eval_loss": 0.010034682229161263,
161
- "eval_precision": [
162
- 0.9867289593066558,
163
- 0.9140291806958474,
164
- 1.0
165
- ],
166
- "eval_recall": [
167
- 0.9913155865947123,
168
- 0.8738197424892704,
169
- 1.0
170
- ],
171
- "eval_runtime": 9.0462,
172
- "eval_samples_per_second": 77.491,
173
- "eval_steps_per_second": 0.884,
174
- "step": 300
175
- },
176
- {
177
- "epoch": 6.0,
178
- "grad_norm": 0.07699564099311829,
179
- "learning_rate": 4.4000000000000006e-05,
180
- "loss": 0.0046,
181
- "step": 360
182
- },
183
- {
184
- "epoch": 6.0,
185
- "eval_accurracy": {
186
- "accuracy": 0.9964112146950224
187
- },
188
- "eval_f1": [
189
- 0.9881879469310263,
190
- 0.8913520861654293,
191
- 1.0
192
- ],
193
- "eval_loss": 0.010619055479764938,
194
- "eval_precision": [
195
- 0.9909254656968923,
196
- 0.8692637160921884,
197
- 1.0
198
- ],
199
- "eval_recall": [
200
- 0.9854655117681738,
201
- 0.9145922746781115,
202
- 1.0
203
- ],
204
- "eval_runtime": 9.0321,
205
- "eval_samples_per_second": 77.612,
206
- "eval_steps_per_second": 0.886,
207
- "step": 360
208
  }
209
  ],
210
  "logging_steps": 500,
211
- "max_steps": 3000,
212
  "num_input_tokens_seen": 0,
213
  "num_train_epochs": 50,
214
  "save_steps": 500,
@@ -219,7 +114,7 @@
219
  "early_stopping_threshold": 0.0
220
  },
221
  "attributes": {
222
- "early_stopping_patience_counter": 0
223
  }
224
  },
225
  "TrainerControl": {
@@ -233,8 +128,8 @@
233
  "attributes": {}
234
  }
235
  },
236
- "total_flos": 8431673465625120.0,
237
- "train_batch_size": 100,
238
  "trial_name": null,
239
  "trial_params": null
240
  }
 
1
  {
2
+ "best_metric": 0.012019365094602108,
3
+ "best_model_checkpoint": "/projects/users/mgillele/trash/test_segmenter/2025-10-13T18:02:46.689601_BERT_model_paper/models/checkpoint-498",
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 1494,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.08617644011974335,
14
  "learning_rate": 4.9e-05,
15
+ "loss": 0.0243,
16
+ "step": 498
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": {
21
+ "accuracy": 0.9797698824124416
22
  },
23
  "eval_f1": [
24
+ 0.9887650969010392,
25
+ 0.8985200845665962
 
26
  ],
27
+ "eval_loss": 0.012019365094602108,
28
  "eval_precision": [
29
+ 0.9864561927890902,
30
+ 0.91792656587473
 
31
  ],
32
  "eval_recall": [
33
+ 0.9910848348348348,
34
+ 0.8799171842650103
 
35
  ],
36
+ "eval_runtime": 4.235,
37
+ "eval_samples_per_second": 234.946,
38
+ "eval_steps_per_second": 7.556,
39
+ "step": 498
40
  },
41
  {
42
  "epoch": 2.0,
43
+ "grad_norm": 0.09689918160438538,
44
  "learning_rate": 4.8e-05,
45
+ "loss": 0.0094,
46
+ "step": 996
47
  },
48
  {
49
  "epoch": 2.0,
50
+ "eval_accuracy": {
51
+ "accuracy": 0.9800368075750552
52
  },
53
  "eval_f1": [
54
+ 0.9889099607439146,
55
+ 0.9001335301145548
 
56
  ],
57
+ "eval_loss": 0.012484115548431873,
58
  "eval_precision": [
59
+ 0.9868996993628986,
60
+ 0.9169530355097365
 
61
  ],
62
  "eval_recall": [
63
+ 0.9909284284284284,
64
+ 0.88391994478951
 
65
  ],
66
+ "eval_runtime": 4.179,
67
+ "eval_samples_per_second": 238.098,
68
+ "eval_steps_per_second": 7.657,
69
+ "step": 996
70
  },
71
  {
72
  "epoch": 3.0,
73
+ "grad_norm": 0.2906811535358429,
74
  "learning_rate": 4.7e-05,
75
+ "loss": 0.0064,
76
+ "step": 1494
77
  },
78
  {
79
  "epoch": 3.0,
80
+ "eval_accuracy": {
81
+ "accuracy": 0.979011253002908
82
  },
83
  "eval_f1": [
84
+ 0.9882948384468332,
85
+ 0.8986078098471987,
86
+ 0.0
87
  ],
88
+ "eval_loss": 0.014125695452094078,
89
  "eval_precision": [
90
+ 0.9901255886970173,
91
+ 0.8844919786096257,
92
+ 0.0
93
  ],
94
  "eval_recall": [
95
+ 0.9864708458458459,
96
+ 0.9131815044858523,
97
+ 0.0
98
+ ],
99
+ "eval_runtime": 4.1316,
100
+ "eval_samples_per_second": 240.828,
101
+ "eval_steps_per_second": 7.745,
102
+ "step": 1494
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  }
104
  ],
105
  "logging_steps": 500,
106
+ "max_steps": 24900,
107
  "num_input_tokens_seen": 0,
108
  "num_train_epochs": 50,
109
  "save_steps": 500,
 
114
  "early_stopping_threshold": 0.0
115
  },
116
  "attributes": {
117
+ "early_stopping_patience_counter": 2
118
  }
119
  },
120
  "TrainerControl": {
 
128
  "attributes": {}
129
  }
130
  },
131
+ "total_flos": 8436689183872296.0,
132
+ "train_batch_size": 32,
133
  "trial_name": null,
134
  "trial_params": null
135
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56bcb5fa40f9f9fdb2343dd4f41005e8be349f6799361b66ccb28613cc60f2d4
3
- size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b3153fba4ac08e8d3b7829313fe860f51174713dfad9dcb0d9c2eab5c19d7f8
3
+ size 5841