quantumLeopard commited on
Commit
29e06fe
·
verified ·
1 Parent(s): 426c0e0

Training in progress, step 500

Browse files
Files changed (3) hide show
  1. config.json +2 -2
  2. model.safetensors +2 -2
  3. tokenizer.json +86 -285
config.json CHANGED
@@ -40,7 +40,7 @@
40
  "tie_word_embeddings": true,
41
  "type_vocab_size": 2,
42
  "use_cache": true,
43
- "vocab_size": 104
44
  },
45
  "decoder_start_token_id": 2,
46
  "dtype": "float32",
@@ -82,7 +82,7 @@
82
  "tie_word_embeddings": true,
83
  "type_vocab_size": 2,
84
  "use_cache": true,
85
- "vocab_size": 104
86
  },
87
  "eos_token_id": 0,
88
  "is_encoder_decoder": true,
 
40
  "tie_word_embeddings": true,
41
  "type_vocab_size": 2,
42
  "use_cache": true,
43
+ "vocab_size": 65
44
  },
45
  "decoder_start_token_id": 2,
46
  "dtype": "float32",
 
82
  "tie_word_embeddings": true,
83
  "type_vocab_size": 2,
84
  "use_cache": true,
85
+ "vocab_size": 65
86
  },
87
  "eos_token_id": 0,
88
  "is_encoder_decoder": true,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e7604f258153687cf85d37d3914c01916ca0c6b2b9c223217d06bf586ab170b
3
- size 31312256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdb5cc70c17b1c596bb74b939824f8c798ff711a5140eb8514f89da13e2926c4
3
+ size 31232228
tokenizer.json CHANGED
@@ -113,143 +113,100 @@
113
  "7": 13,
114
  "8": 14,
115
  "9": 15,
116
- "98": 16,
117
- "10": 17,
118
- "11": 18,
119
- "99": 19,
120
- "13": 20,
121
- "97": 21,
122
- "15": 22,
123
- "12": 23,
124
- "95": 24,
125
- "93": 25,
126
- "96": 26,
127
- "92": 27,
128
  "94": 28,
129
- "14": 29,
130
- "16": 30,
131
- "19": 31,
132
- "91": 32,
133
- "18": 33,
134
- "90": 34,
135
- "89": 35,
136
- "21": 36,
137
- "20": 37,
138
- "88": 38,
139
- "17": 39,
140
- "23": 40,
141
- "87": 41,
142
- "25": 42,
143
  "86": 43,
144
- "84": 44,
145
  "85": 45,
146
  "24": 46,
147
- "83": 47,
148
- "22": 48,
149
  "26": 49,
150
- "82": 50,
151
- "80": 51,
152
- "28": 52,
153
- "81": 53,
154
- "27": 54,
155
- "31": 55,
156
  "30": 56,
157
- "78": 57,
158
- "29": 58,
159
  "79": 59,
160
- "36": 60,
161
- "75": 61,
162
- "74": 62,
163
- "32": 63,
164
- "77": 64,
165
- "33": 65,
166
- "76": 66,
167
- "34": 67,
168
- "38": 68,
169
- "37": 69,
170
- "73": 70,
171
- "72": 71,
172
- "39": 72,
173
- "35": 73,
174
- "70": 74,
175
- "40": 75,
176
- "69": 76,
177
- "71": 77,
178
- "67": 78,
179
- "42": 79,
180
- "68": 80,
181
- "41": 81,
182
- "43": 82,
183
- "66": 83,
184
- "44": 84,
185
- "65": 85,
186
- "45": 86,
187
- "61": 87,
188
- "64": 88,
189
- "46": 89,
190
- "57": 90,
191
- "47": 91,
192
- "54": 92,
193
- "59": 93,
194
- "48": 94,
195
- "49": 95,
196
- "50": 96,
197
- "62": 97,
198
- "55": 98,
199
- "58": 99,
200
- "60": 100,
201
- "63": 101,
202
- "53": 102,
203
- "51": 103
204
  },
205
  "merges": [
206
  [
207
  "9",
208
- "8"
209
  ],
210
  [
211
  "1",
212
  "0"
213
  ],
214
  [
215
- "1",
216
- "1"
217
  ],
218
  [
219
- "9",
220
- "9"
221
  ],
222
  [
223
  "1",
224
- "3"
225
  ],
226
  [
227
  "9",
228
  "7"
229
  ],
230
  [
231
- "1",
232
- "5"
233
  ],
234
  [
235
  "1",
236
- "2"
237
  ],
238
  [
239
  "9",
240
  "5"
241
  ],
242
  [
243
- "9",
244
- "3"
245
- ],
246
- [
247
- "9",
248
- "6"
249
  ],
250
  [
251
- "9",
252
- "2"
253
  ],
254
  [
255
  "9",
@@ -257,15 +214,19 @@
257
  ],
258
  [
259
  "1",
260
- "4"
261
  ],
262
  [
263
- "1",
264
- "6"
265
  ],
266
  [
267
  "1",
268
- "9"
 
 
 
 
269
  ],
270
  [
271
  "9",
@@ -280,12 +241,12 @@
280
  "0"
281
  ],
282
  [
283
- "8",
284
  "9"
285
  ],
286
  [
287
- "2",
288
- "1"
289
  ],
290
  [
291
  "2",
@@ -296,28 +257,24 @@
296
  "8"
297
  ],
298
  [
299
- "1",
300
- "7"
301
  ],
302
  [
303
  "2",
304
- "3"
305
  ],
306
  [
307
  "8",
308
  "7"
309
  ],
310
- [
311
- "2",
312
- "5"
313
- ],
314
  [
315
  "8",
316
  "6"
317
  ],
318
  [
319
- "8",
320
- "4"
321
  ],
322
  [
323
  "8",
@@ -329,11 +286,11 @@
329
  ],
330
  [
331
  "8",
332
- "3"
333
  ],
334
  [
335
  "2",
336
- "2"
337
  ],
338
  [
339
  "2",
@@ -341,67 +298,47 @@
341
  ],
342
  [
343
  "8",
344
- "2"
345
- ],
346
- [
347
- "8",
348
- "0"
349
  ],
350
  [
351
  "2",
352
- "8"
353
  ],
354
  [
355
  "8",
356
  "1"
357
  ],
358
  [
359
- "2",
360
- "7"
361
  ],
362
  [
363
- "3",
364
- "1"
365
  ],
366
  [
367
- "3",
368
  "0"
369
  ],
370
  [
371
- "7",
372
- "8"
373
  ],
374
  [
375
  "2",
376
  "9"
377
  ],
378
- [
379
- "7",
380
- "9"
381
- ],
382
  [
383
  "3",
384
- "6"
385
- ],
386
- [
387
- "7",
388
- "5"
389
  ],
390
  [
391
  "7",
392
- "4"
393
- ],
394
- [
395
- "3",
396
- "2"
397
  ],
398
  [
399
  "7",
400
- "7"
401
- ],
402
- [
403
- "3",
404
- "3"
405
  ],
406
  [
407
  "7",
@@ -411,149 +348,13 @@
411
  "3",
412
  "4"
413
  ],
414
- [
415
- "3",
416
- "8"
417
- ],
418
- [
419
- "3",
420
- "7"
421
- ],
422
- [
423
- "7",
424
- "3"
425
- ],
426
- [
427
- "7",
428
- "2"
429
- ],
430
- [
431
- "3",
432
- "9"
433
- ],
434
- [
435
- "3",
436
- "5"
437
- ],
438
  [
439
  "7",
440
- "0"
441
- ],
442
- [
443
- "4",
444
- "0"
445
- ],
446
- [
447
- "6",
448
- "9"
449
- ],
450
- [
451
- "7",
452
- "1"
453
- ],
454
- [
455
- "6",
456
- "7"
457
- ],
458
- [
459
- "4",
460
- "2"
461
- ],
462
- [
463
- "6",
464
- "8"
465
- ],
466
- [
467
- "4",
468
- "1"
469
- ],
470
- [
471
- "4",
472
- "3"
473
- ],
474
- [
475
- "6",
476
- "6"
477
- ],
478
- [
479
- "4",
480
- "4"
481
- ],
482
- [
483
- "6",
484
- "5"
485
- ],
486
- [
487
- "4",
488
- "5"
489
- ],
490
- [
491
- "6",
492
- "1"
493
- ],
494
- [
495
- "6",
496
- "4"
497
- ],
498
- [
499
- "4",
500
- "6"
501
- ],
502
- [
503
- "5",
504
- "7"
505
- ],
506
- [
507
- "4",
508
  "7"
509
  ],
510
  [
511
- "5",
512
- "4"
513
- ],
514
- [
515
- "5",
516
- "9"
517
- ],
518
- [
519
- "4",
520
- "8"
521
- ],
522
- [
523
- "4",
524
- "9"
525
- ],
526
- [
527
- "5",
528
- "0"
529
- ],
530
- [
531
- "6",
532
- "2"
533
- ],
534
- [
535
- "5",
536
- "5"
537
- ],
538
- [
539
- "5",
540
- "8"
541
- ],
542
- [
543
- "6",
544
- "0"
545
- ],
546
- [
547
- "6",
548
- "3"
549
- ],
550
- [
551
- "5",
552
  "3"
553
- ],
554
- [
555
- "5",
556
- "1"
557
  ]
558
  ]
559
  }
 
113
  "7": 13,
114
  "8": 14,
115
  "9": 15,
116
+ "=": 16,
117
+ "99": 17,
118
+ "10": 18,
119
+ "98": 19,
120
+ "11": 20,
121
+ "12": 21,
122
+ "97": 22,
123
+ "96": 23,
124
+ "13": 24,
125
+ "95": 25,
126
+ "14": 26,
127
+ "15": 27,
128
  "94": 28,
129
+ "16": 29,
130
+ "93": 30,
131
+ "17": 31,
132
+ "92": 32,
133
+ "91": 33,
134
+ "18": 34,
135
+ "90": 35,
136
+ "19": 36,
137
+ "89": 37,
138
+ "20": 38,
139
+ "88": 39,
140
+ "21": 40,
141
+ "22": 41,
142
+ "87": 42,
143
  "86": 43,
144
+ "23": 44,
145
  "85": 45,
146
  "24": 46,
147
+ "84": 47,
148
+ "25": 48,
149
  "26": 49,
150
+ "83": 50,
151
+ "27": 51,
152
+ "81": 52,
153
+ "82": 53,
154
+ "28": 54,
155
+ "80": 55,
156
  "30": 56,
157
+ "29": 57,
158
+ "31": 58,
159
  "79": 59,
160
+ "78": 60,
161
+ "76": 61,
162
+ "34": 62,
163
+ "77": 63,
164
+ "33": 64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  },
166
  "merges": [
167
  [
168
  "9",
169
+ "9"
170
  ],
171
  [
172
  "1",
173
  "0"
174
  ],
175
  [
176
+ "9",
177
+ "8"
178
  ],
179
  [
180
+ "1",
181
+ "1"
182
  ],
183
  [
184
  "1",
185
+ "2"
186
  ],
187
  [
188
  "9",
189
  "7"
190
  ],
191
  [
192
+ "9",
193
+ "6"
194
  ],
195
  [
196
  "1",
197
+ "3"
198
  ],
199
  [
200
  "9",
201
  "5"
202
  ],
203
  [
204
+ "1",
205
+ "4"
 
 
 
 
206
  ],
207
  [
208
+ "1",
209
+ "5"
210
  ],
211
  [
212
  "9",
 
214
  ],
215
  [
216
  "1",
217
+ "6"
218
  ],
219
  [
220
+ "9",
221
+ "3"
222
  ],
223
  [
224
  "1",
225
+ "7"
226
+ ],
227
+ [
228
+ "9",
229
+ "2"
230
  ],
231
  [
232
  "9",
 
241
  "0"
242
  ],
243
  [
244
+ "1",
245
  "9"
246
  ],
247
  [
248
+ "8",
249
+ "9"
250
  ],
251
  [
252
  "2",
 
257
  "8"
258
  ],
259
  [
260
+ "2",
261
+ "1"
262
  ],
263
  [
264
  "2",
265
+ "2"
266
  ],
267
  [
268
  "8",
269
  "7"
270
  ],
 
 
 
 
271
  [
272
  "8",
273
  "6"
274
  ],
275
  [
276
+ "2",
277
+ "3"
278
  ],
279
  [
280
  "8",
 
286
  ],
287
  [
288
  "8",
289
+ "4"
290
  ],
291
  [
292
  "2",
293
+ "5"
294
  ],
295
  [
296
  "2",
 
298
  ],
299
  [
300
  "8",
301
+ "3"
 
 
 
 
302
  ],
303
  [
304
  "2",
305
+ "7"
306
  ],
307
  [
308
  "8",
309
  "1"
310
  ],
311
  [
312
+ "8",
313
+ "2"
314
  ],
315
  [
316
+ "2",
317
+ "8"
318
  ],
319
  [
320
+ "8",
321
  "0"
322
  ],
323
  [
324
+ "3",
325
+ "0"
326
  ],
327
  [
328
  "2",
329
  "9"
330
  ],
 
 
 
 
331
  [
332
  "3",
333
+ "1"
 
 
 
 
334
  ],
335
  [
336
  "7",
337
+ "9"
 
 
 
 
338
  ],
339
  [
340
  "7",
341
+ "8"
 
 
 
 
342
  ],
343
  [
344
  "7",
 
348
  "3",
349
  "4"
350
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  [
352
  "7",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
  "7"
354
  ],
355
  [
356
+ "3",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  "3"
 
 
 
 
358
  ]
359
  ]
360
  }