ciebson commited on
Commit
9e9cff4
·
verified ·
1 Parent(s): ce6a01a

End of training

Browse files
README.md CHANGED
@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
15
  It achieves the following results on the evaluation set:
16
- - Loss: 0.0943
17
 
18
  ## Model description
19
 
@@ -44,46 +44,46 @@ The following hyperparameters were used during training:
44
 
45
  | Training Loss | Epoch | Step | Validation Loss |
46
  |:-------------:|:-----:|:----:|:---------------:|
47
- | 3.0793 | 1.0 | 12 | 2.2430 |
48
- | 1.9376 | 2.0 | 24 | 1.5349 |
49
- | 1.3941 | 3.0 | 36 | 1.2003 |
50
- | 1.1127 | 4.0 | 48 | 1.0107 |
51
- | 0.9531 | 5.0 | 60 | 0.8533 |
52
- | 0.84 | 6.0 | 72 | 0.7840 |
53
- | 0.7753 | 7.0 | 84 | 0.7185 |
54
- | 0.7678 | 8.0 | 96 | 0.7171 |
55
- | 0.724 | 9.0 | 108 | 0.6569 |
56
- | 0.6601 | 10.0 | 120 | 0.6202 |
57
- | 0.6235 | 11.0 | 132 | 0.5901 |
58
- | 0.5875 | 12.0 | 144 | 0.5537 |
59
- | 0.5707 | 13.0 | 156 | 0.5714 |
60
- | 0.5528 | 14.0 | 168 | 0.5239 |
61
- | 0.5327 | 15.0 | 180 | 0.5050 |
62
- | 0.5042 | 16.0 | 192 | 0.4515 |
63
- | 0.4587 | 17.0 | 204 | 0.4170 |
64
- | 0.4317 | 18.0 | 216 | 0.3874 |
65
- | 0.4012 | 19.0 | 228 | 0.3570 |
66
- | 0.3729 | 20.0 | 240 | 0.3287 |
67
- | 0.3486 | 21.0 | 252 | 0.3186 |
68
- | 0.3391 | 22.0 | 264 | 0.2798 |
69
- | 0.2975 | 23.0 | 276 | 0.2586 |
70
- | 0.2832 | 24.0 | 288 | 0.2426 |
71
- | 0.2641 | 25.0 | 300 | 0.2202 |
72
- | 0.2427 | 26.0 | 312 | 0.2055 |
73
- | 0.2315 | 27.0 | 324 | 0.1902 |
74
- | 0.2149 | 28.0 | 336 | 0.1758 |
75
- | 0.1983 | 29.0 | 348 | 0.1705 |
76
- | 0.1865 | 30.0 | 360 | 0.1524 |
77
- | 0.1732 | 31.0 | 372 | 0.1406 |
78
- | 0.1611 | 32.0 | 384 | 0.1336 |
79
- | 0.1514 | 33.0 | 396 | 0.1309 |
80
- | 0.1443 | 34.0 | 408 | 0.1202 |
81
- | 0.1374 | 35.0 | 420 | 0.1127 |
82
- | 0.1302 | 36.0 | 432 | 0.1050 |
83
- | 0.1247 | 37.0 | 444 | 0.1050 |
84
- | 0.12 | 38.0 | 456 | 0.0985 |
85
- | 0.1159 | 39.0 | 468 | 0.0960 |
86
- | 0.1144 | 40.0 | 480 | 0.0943 |
87
 
88
 
89
  ### Framework versions
 
13
 
14
  This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
15
  It achieves the following results on the evaluation set:
16
+ - Loss: 0.0581
17
 
18
  ## Model description
19
 
 
44
 
45
  | Training Loss | Epoch | Step | Validation Loss |
46
  |:-------------:|:-----:|:----:|:---------------:|
47
+ | 3.0573 | 1.0 | 12 | 2.2239 |
48
+ | 1.8987 | 2.0 | 24 | 1.4952 |
49
+ | 1.3343 | 3.0 | 36 | 1.1356 |
50
+ | 1.0684 | 4.0 | 48 | 0.9575 |
51
+ | 0.9186 | 5.0 | 60 | 0.8412 |
52
+ | 0.8762 | 6.0 | 72 | 0.7883 |
53
+ | 0.7913 | 7.0 | 84 | 0.7373 |
54
+ | 0.7432 | 8.0 | 96 | 0.6780 |
55
+ | 0.6807 | 9.0 | 108 | 0.6366 |
56
+ | 0.628 | 10.0 | 120 | 0.5657 |
57
+ | 0.5816 | 11.0 | 132 | 0.5325 |
58
+ | 0.5332 | 12.0 | 144 | 0.4900 |
59
+ | 0.5041 | 13.0 | 156 | 0.4710 |
60
+ | 0.444 | 14.0 | 168 | 0.3915 |
61
+ | 0.3973 | 15.0 | 180 | 0.3357 |
62
+ | 0.3477 | 16.0 | 192 | 0.2913 |
63
+ | 0.3143 | 17.0 | 204 | 0.2802 |
64
+ | 0.3018 | 18.0 | 216 | 0.2547 |
65
+ | 0.2715 | 19.0 | 228 | 0.2355 |
66
+ | 0.2514 | 20.0 | 240 | 0.2183 |
67
+ | 0.2396 | 21.0 | 252 | 0.2038 |
68
+ | 0.2225 | 22.0 | 264 | 0.1802 |
69
+ | 0.2133 | 23.0 | 276 | 0.1770 |
70
+ | 0.1939 | 24.0 | 288 | 0.1547 |
71
+ | 0.1782 | 25.0 | 300 | 0.1297 |
72
+ | 0.1669 | 26.0 | 312 | 0.1413 |
73
+ | 0.1613 | 27.0 | 324 | 0.1153 |
74
+ | 0.153 | 28.0 | 336 | 0.1109 |
75
+ | 0.1422 | 29.0 | 348 | 0.1019 |
76
+ | 0.1302 | 30.0 | 360 | 0.0928 |
77
+ | 0.1213 | 31.0 | 372 | 0.0837 |
78
+ | 0.1159 | 32.0 | 384 | 0.0807 |
79
+ | 0.1116 | 33.0 | 396 | 0.0757 |
80
+ | 0.1064 | 34.0 | 408 | 0.0722 |
81
+ | 0.1023 | 35.0 | 420 | 0.0667 |
82
+ | 0.0982 | 36.0 | 432 | 0.0656 |
83
+ | 0.0942 | 37.0 | 444 | 0.0631 |
84
+ | 0.0912 | 38.0 | 456 | 0.0600 |
85
+ | 0.0893 | 39.0 | 468 | 0.0593 |
86
+ | 0.0884 | 40.0 | 480 | 0.0581 |
87
 
88
 
89
  ### Framework versions
config.json CHANGED
@@ -78,7 +78,7 @@
78
  "typical_p": 1.0,
79
  "use_bfloat16": false,
80
  "use_cache": true,
81
- "vocab_size": 99
82
  },
83
  "decoder_start_token_id": 2,
84
  "encoder": {
@@ -157,7 +157,7 @@
157
  "typical_p": 1.0,
158
  "use_bfloat16": false,
159
  "use_cache": true,
160
- "vocab_size": 99
161
  },
162
  "eos_token_id": 0,
163
  "is_encoder_decoder": true,
 
78
  "typical_p": 1.0,
79
  "use_bfloat16": false,
80
  "use_cache": true,
81
+ "vocab_size": 100
82
  },
83
  "decoder_start_token_id": 2,
84
  "encoder": {
 
157
  "typical_p": 1.0,
158
  "use_bfloat16": false,
159
  "use_cache": true,
160
+ "vocab_size": 100
161
  },
162
  "eos_token_id": 0,
163
  "is_encoder_decoder": true,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef070266d38568680a80d34762c0f65f401fce8618045f620bd8527c7fe04619
3
- size 31301996
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9f26789173ceaca3ccf6b1d58248b4890416476ef12e7057c07234a721ac85f
3
+ size 31304048
runs/Mar04_14-04-35_d8c8da50ec4e/events.out.tfevents.1709561076.d8c8da50ec4e.409.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:421d31caa8dc4d22fab6be6aebd73196c1ab9567c3a8bc2efa9f22ae6d9de603
3
+ size 12467
runs/Mar04_14-05-00_d8c8da50ec4e/events.out.tfevents.1709561101.d8c8da50ec4e.409.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f55d8ac27b3733233bf8746763055a5e6bfaef0d7fbcc545b4027c44ca955ce
3
+ size 24019
runs/Mar04_14-06-10_d8c8da50ec4e/events.out.tfevents.1709561170.d8c8da50ec4e.409.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad592f955d342d0043ca9e4fab9b9904ce8115adf6f7c340597c49901910bc00
3
+ size 18234
runs/Mar04_14-07-18_d8c8da50ec4e/events.out.tfevents.1709561238.d8c8da50ec4e.409.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d0283f681788c07e6c865ce8a07a7e1225aa577cd8ed625c2e1ca58346c561d
3
+ size 13018
runs/Mar04_14-07-41_d8c8da50ec4e/events.out.tfevents.1709561261.d8c8da50ec4e.409.7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:644baaba442e6b92e85de2c5a3519497cdde5e4e386dd3628b03e09712efcd97
3
+ size 10920
runs/Mar04_14-07-59_d8c8da50ec4e/events.out.tfevents.1709561279.d8c8da50ec4e.409.8 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e8caaa85dfaff064563c720e5013fb89a2a765574649f04657aba9064525a2a
3
+ size 28099
runs/Mar04_14-09-07_d8c8da50ec4e/events.out.tfevents.1709561348.d8c8da50ec4e.409.9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db66a615b419ebc29ad2e145eedfe214b63c6dd10eb4557c028c939fe68f4336
3
+ size 28098
tokenizer.json CHANGED
@@ -113,172 +113,174 @@
113
  "8": 14,
114
  "9": 15,
115
  "=": 16,
116
- "10": 17,
117
- "11": 18,
118
- "13": 19,
119
- "19": 20,
120
- "12": 21,
121
- "15": 22,
122
- "14": 23,
123
- "16": 24,
124
- "17": 25,
125
  "18": 26,
126
  "20": 27,
127
- "97": 28,
128
- "99": 29,
129
- "98": 30,
130
- "21": 31,
131
- "24": 32,
132
- "22": 33,
133
- "23": 34,
134
- "95": 35,
135
- "96": 36,
136
- "25": 37,
137
- "26": 38,
138
- "94": 39,
139
- "93": 40,
140
- "27": 41,
141
- "92": 42,
142
- "91": 43,
143
- "28": 44,
144
- "90": 45,
145
  "29": 46,
146
- "89": 47,
147
- "88": 48,
148
- "32": 49,
149
  "31": 50,
150
- "30": 51,
151
- "87": 52,
152
- "33": 53,
153
- "34": 54,
154
- "86": 55,
155
  "35": 56,
156
- "85": 57,
157
- "36": 58,
158
  "83": 59,
159
- "84": 60,
160
  "37": 61,
161
  "82": 62,
162
- "81": 63,
163
- "38": 64,
164
- "39": 65,
165
- "40": 66,
166
- "80": 67,
167
- "41": 68,
168
  "79": 69,
169
- "78": 70,
170
- "43": 71,
171
- "45": 72,
172
- "76": 73,
173
- "42": 74,
174
- "77": 75,
175
  "44": 76,
176
- "75": 77,
177
- "46": 78,
178
  "47": 79,
179
  "73": 80,
180
- "71": 81,
181
- "72": 82,
182
- "74": 83,
183
- "50": 84,
184
- "49": 85,
185
- "70": 86,
186
- "48": 87,
187
  "68": 88,
188
- "51": 89,
189
- "52": 90,
190
- "69": 91,
191
- "67": 92,
192
- "65": 93,
193
- "53": 94,
194
- "54": 95,
195
  "55": 96,
196
- "66": 97,
197
- "56": 98
 
198
  },
199
  "merges": [
200
- "1 0",
201
- "1 1",
202
- "1 3",
203
- "1 9",
204
  "1 2",
205
  "1 5",
206
- "1 4",
207
- "1 6",
208
  "1 7",
 
 
 
 
 
209
  "1 8",
210
  "2 0",
211
- "9 7",
212
- "9 9",
213
- "9 8",
214
  "2 1",
215
- "2 4",
216
  "2 2",
217
  "2 3",
218
- "9 5",
 
219
  "9 6",
 
 
 
 
220
  "2 5",
221
  "2 6",
222
- "9 4",
223
- "9 3",
224
- "2 7",
225
  "9 2",
226
- "9 1",
227
  "2 8",
 
228
  "9 0",
 
229
  "2 9",
 
230
  "8 9",
231
  "8 8",
232
- "3 2",
233
  "3 1",
234
- "3 0",
235
  "8 7",
236
  "3 3",
237
- "3 4",
238
  "8 6",
239
- "3 5",
240
  "8 5",
241
- "3 6",
242
- "8 3",
243
  "8 4",
 
 
244
  "3 7",
245
  "8 2",
246
- "8 1",
247
  "3 8",
248
- "3 9",
249
- "4 0",
250
  "8 0",
251
  "4 1",
 
 
252
  "7 9",
 
 
253
  "7 8",
 
254
  "4 3",
255
- "4 5",
256
  "7 6",
257
- "4 2",
258
- "7 7",
259
  "4 4",
260
- "7 5",
261
- "4 6",
262
  "4 7",
263
  "7 3",
264
- "7 1",
265
- "7 2",
266
- "7 4",
267
- "5 0",
268
  "4 9",
269
  "7 0",
 
270
  "4 8",
 
271
  "6 8",
 
272
  "5 1",
273
  "5 2",
274
- "6 9",
275
- "6 7",
276
- "6 5",
277
  "5 3",
278
  "5 4",
279
- "5 5",
280
  "6 6",
281
- "5 6"
 
 
 
282
  ]
283
  }
284
  }
 
113
  "8": 14,
114
  "9": 15,
115
  "=": 16,
116
+ "14": 17,
117
+ "12": 18,
118
+ "15": 19,
119
+ "17": 20,
120
+ "16": 21,
121
+ "19": 22,
122
+ "11": 23,
123
+ "10": 24,
124
+ "13": 25,
125
  "18": 26,
126
  "20": 27,
127
+ "21": 28,
128
+ "98": 29,
129
+ "22": 30,
130
+ "23": 31,
131
+ "99": 32,
132
+ "97": 33,
133
+ "96": 34,
134
+ "24": 35,
135
+ "95": 36,
136
+ "93": 37,
137
+ "94": 38,
138
+ "25": 39,
139
+ "26": 40,
140
+ "92": 41,
141
+ "28": 42,
142
+ "27": 43,
143
+ "90": 44,
144
+ "30": 45,
145
  "29": 46,
146
+ "91": 47,
147
+ "89": 48,
148
+ "88": 49,
149
  "31": 50,
150
+ "87": 51,
151
+ "33": 52,
152
+ "32": 53,
153
+ "86": 54,
154
+ "85": 55,
155
  "35": 56,
156
+ "34": 57,
157
+ "84": 58,
158
  "83": 59,
159
+ "36": 60,
160
  "37": 61,
161
  "82": 62,
162
+ "38": 63,
163
+ "81": 64,
164
+ "80": 65,
165
+ "41": 66,
166
+ "40": 67,
167
+ "39": 68,
168
  "79": 69,
169
+ "77": 70,
170
+ "42": 71,
171
+ "78": 72,
172
+ "75": 73,
173
+ "43": 74,
174
+ "76": 75,
175
  "44": 76,
176
+ "74": 77,
177
+ "72": 78,
178
  "47": 79,
179
  "73": 80,
180
+ "46": 81,
181
+ "45": 82,
182
+ "49": 83,
183
+ "70": 84,
184
+ "71": 85,
185
+ "48": 86,
186
+ "69": 87,
187
  "68": 88,
188
+ "50": 89,
189
+ "51": 90,
190
+ "52": 91,
191
+ "53": 92,
192
+ "54": 93,
193
+ "67": 94,
194
+ "66": 95,
195
  "55": 96,
196
+ "64": 97,
197
+ "65": 98,
198
+ "63": 99
199
  },
200
  "merges": [
201
+ "1 4",
 
 
 
202
  "1 2",
203
  "1 5",
 
 
204
  "1 7",
205
+ "1 6",
206
+ "1 9",
207
+ "1 1",
208
+ "1 0",
209
+ "1 3",
210
  "1 8",
211
  "2 0",
 
 
 
212
  "2 1",
213
+ "9 8",
214
  "2 2",
215
  "2 3",
216
+ "9 9",
217
+ "9 7",
218
  "9 6",
219
+ "2 4",
220
+ "9 5",
221
+ "9 3",
222
+ "9 4",
223
  "2 5",
224
  "2 6",
 
 
 
225
  "9 2",
 
226
  "2 8",
227
+ "2 7",
228
  "9 0",
229
+ "3 0",
230
  "2 9",
231
+ "9 1",
232
  "8 9",
233
  "8 8",
 
234
  "3 1",
 
235
  "8 7",
236
  "3 3",
237
+ "3 2",
238
  "8 6",
 
239
  "8 5",
240
+ "3 5",
241
+ "3 4",
242
  "8 4",
243
+ "8 3",
244
+ "3 6",
245
  "3 7",
246
  "8 2",
 
247
  "3 8",
248
+ "8 1",
 
249
  "8 0",
250
  "4 1",
251
+ "4 0",
252
+ "3 9",
253
  "7 9",
254
+ "7 7",
255
+ "4 2",
256
  "7 8",
257
+ "7 5",
258
  "4 3",
 
259
  "7 6",
 
 
260
  "4 4",
261
+ "7 4",
262
+ "7 2",
263
  "4 7",
264
  "7 3",
265
+ "4 6",
266
+ "4 5",
 
 
267
  "4 9",
268
  "7 0",
269
+ "7 1",
270
  "4 8",
271
+ "6 9",
272
  "6 8",
273
+ "5 0",
274
  "5 1",
275
  "5 2",
 
 
 
276
  "5 3",
277
  "5 4",
278
+ "6 7",
279
  "6 6",
280
+ "5 5",
281
+ "6 4",
282
+ "6 5",
283
+ "6 3"
284
  ]
285
  }
286
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f60299a6ccf7de1e094eb15269b535f3bb4b8176d7eb4c126608ea557871b5d3
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4791ceb8a8ace3a61b3381b0bdbcce405ba9afaa35906974a6b6830820af5acf
3
  size 5112