rxmha125 commited on
Commit
5d1d1d8
·
verified ·
1 Parent(s): bf5df70

Initial commit of Rx Codex v1 (approx 25M params with small vocab) from scratch - Phase 1 Complete

Browse files
Files changed (4) hide show
  1. config.json +9 -0
  2. optimizer.pt +3 -0
  3. pytorch_model.bin +3 -0
  4. rx_codex_v1_tokenizer.json +740 -0
config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vocab_size": 163,
3
+ "d_model": 512,
4
+ "n_layer": 8,
5
+ "n_head": 8,
6
+ "d_ff": 2048,
7
+ "max_seq_len": 512,
8
+ "_class_name": "RxCodexV1"
9
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c35fbae15f1922665a1dd80ee2ce90d1b6f973f0d63975929354d893254d68d
3
+ size 1384
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a76a186284371ccbe7b49c742a5cccebdd7092ac485bacb75a0d5fb341411674
3
+ size 104412282
rx_codex_v1_tokenizer.json ADDED
@@ -0,0 +1,740 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "[UNK]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "[PAD]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "[BOS]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "[EOS]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ }
42
+ ],
43
+ "normalizer": {
44
+ "type": "Sequence",
45
+ "normalizers": [
46
+ {
47
+ "type": "NFC"
48
+ },
49
+ {
50
+ "type": "Lowercase"
51
+ }
52
+ ]
53
+ },
54
+ "pre_tokenizer": {
55
+ "type": "ByteLevel",
56
+ "add_prefix_space": true,
57
+ "trim_offsets": true,
58
+ "use_regex": true
59
+ },
60
+ "post_processor": null,
61
+ "decoder": {
62
+ "type": "ByteLevel",
63
+ "add_prefix_space": true,
64
+ "trim_offsets": true,
65
+ "use_regex": true
66
+ },
67
+ "model": {
68
+ "type": "BPE",
69
+ "dropout": null,
70
+ "unk_token": "[UNK]",
71
+ "continuing_subword_prefix": null,
72
+ "end_of_word_suffix": null,
73
+ "fuse_unk": false,
74
+ "byte_fallback": false,
75
+ "ignore_merges": false,
76
+ "vocab": {
77
+ "[UNK]": 0,
78
+ "[PAD]": 1,
79
+ "[BOS]": 2,
80
+ "[EOS]": 3,
81
+ ".": 4,
82
+ "0": 5,
83
+ "1": 6,
84
+ "2": 7,
85
+ "3": 8,
86
+ "4": 9,
87
+ "5": 10,
88
+ "6": 11,
89
+ "7": 12,
90
+ "8": 13,
91
+ "9": 14,
92
+ "a": 15,
93
+ "c": 16,
94
+ "d": 17,
95
+ "e": 18,
96
+ "f": 19,
97
+ "g": 20,
98
+ "h": 21,
99
+ "i": 22,
100
+ "j": 23,
101
+ "k": 24,
102
+ "l": 25,
103
+ "m": 26,
104
+ "n": 27,
105
+ "o": 28,
106
+ "p": 29,
107
+ "r": 30,
108
+ "s": 31,
109
+ "t": 32,
110
+ "u": 33,
111
+ "v": 34,
112
+ "x": 35,
113
+ "y": 36,
114
+ "z": 37,
115
+ "Ġ": 38,
116
+ "Ġt": 39,
117
+ "Ġf": 40,
118
+ "in": 41,
119
+ "en": 42,
120
+ "is": 43,
121
+ "er": 44,
122
+ "Ġto": 45,
123
+ "Ġa": 46,
124
+ "ain": 47,
125
+ "iz": 48,
126
+ "ken": 49,
127
+ "le": 50,
128
+ "rain": 51,
129
+ "Ġth": 52,
130
+ "Ġtrain": 53,
131
+ "Ġtoken": 54,
132
+ "izer": 55,
133
+ "Ġtokenizer": 56,
134
+ "or": 57,
135
+ "du": 58,
136
+ "ile": 59,
137
+ "mm": 60,
138
+ "om": 61,
139
+ "rom": 62,
140
+ "Ġis": 63,
141
+ "Ġdu": 64,
142
+ "Ġfor": 65,
143
+ "Ġfile": 66,
144
+ "Ġfrom": 67,
145
+ "ing": 68,
146
+ "Ġthis": 69,
147
+ "Ġtraining": 70,
148
+ "mmy": 71,
149
+ "Ġdummy": 72,
150
+ "on": 73,
151
+ "Ġs": 74,
152
+ "ce": 75,
153
+ "ten": 76,
154
+ "enten": 77,
155
+ "Ġsenten": 78,
156
+ "Ġsentence": 79,
157
+ "cs": 80,
158
+ "js": 81,
159
+ "lin": 82,
160
+ "Ġcs": 83,
161
+ "Ġjs": 84,
162
+ "Ġlin": 85,
163
+ "onl": 86,
164
+ "Ġcsv": 87,
165
+ "Ġjsonl": 88,
166
+ "Ġline": 89,
167
+ "Ġ1": 90,
168
+ "Ġ2": 91,
169
+ "Ġ3": 92,
170
+ "Ġ4": 93,
171
+ "her": 94,
172
+ "no": 95,
173
+ "ther": 96,
174
+ "Ġon": 97,
175
+ "nother": 98,
176
+ "Ġone": 99,
177
+ "another": 100,
178
+ "ex": 101,
179
+ "Ġanother": 102,
180
+ "am": 103,
181
+ "et": 104,
182
+ "ple": 105,
183
+ "yet": 106,
184
+ "Ġex": 107,
185
+ "Ġthe": 108,
186
+ "ample": 109,
187
+ "Ġexample": 110,
188
+ "Ġ0": 111,
189
+ "Ġ5": 112,
190
+ "Ġ6": 113,
191
+ "Ġ7": 114,
192
+ "Ġ8": 115,
193
+ "Ġ9": 116,
194
+ "Ġ10": 117,
195
+ "Ġ11": 118,
196
+ "Ġ12": 119,
197
+ "Ġ13": 120,
198
+ "Ġ14": 121,
199
+ "Ġ15": 122,
200
+ "Ġ16": 123,
201
+ "Ġ17": 124,
202
+ "Ġ18": 125,
203
+ "Ġ19": 126,
204
+ "Ġ20": 127,
205
+ "Ġ21": 128,
206
+ "Ġ22": 129,
207
+ "Ġ23": 130,
208
+ "Ġ24": 131,
209
+ "Ġ25": 132,
210
+ "Ġ26": 133,
211
+ "Ġ27": 134,
212
+ "Ġ28": 135,
213
+ "Ġ29": 136,
214
+ "Ġ30": 137,
215
+ "Ġ31": 138,
216
+ "Ġ32": 139,
217
+ "Ġ33": 140,
218
+ "Ġ34": 141,
219
+ "Ġ35": 142,
220
+ "Ġ36": 143,
221
+ "Ġ37": 144,
222
+ "Ġ38": 145,
223
+ "Ġ39": 146,
224
+ "Ġ40": 147,
225
+ "Ġ41": 148,
226
+ "Ġ42": 149,
227
+ "Ġ43": 150,
228
+ "Ġ44": 151,
229
+ "Ġ45": 152,
230
+ "Ġ46": 153,
231
+ "Ġ47": 154,
232
+ "Ġ48": 155,
233
+ "Ġ49": 156,
234
+ "hor": 157,
235
+ "Ġyet": 158,
236
+ "Ġtex": 159,
237
+ "Ġshor": 160,
238
+ "Ġtext": 161,
239
+ "Ġshort": 162
240
+ },
241
+ "merges": [
242
+ [
243
+ "Ġ",
244
+ "t"
245
+ ],
246
+ [
247
+ "Ġ",
248
+ "f"
249
+ ],
250
+ [
251
+ "i",
252
+ "n"
253
+ ],
254
+ [
255
+ "e",
256
+ "n"
257
+ ],
258
+ [
259
+ "i",
260
+ "s"
261
+ ],
262
+ [
263
+ "e",
264
+ "r"
265
+ ],
266
+ [
267
+ "Ġt",
268
+ "o"
269
+ ],
270
+ [
271
+ "Ġ",
272
+ "a"
273
+ ],
274
+ [
275
+ "a",
276
+ "in"
277
+ ],
278
+ [
279
+ "i",
280
+ "z"
281
+ ],
282
+ [
283
+ "k",
284
+ "en"
285
+ ],
286
+ [
287
+ "l",
288
+ "e"
289
+ ],
290
+ [
291
+ "r",
292
+ "ain"
293
+ ],
294
+ [
295
+ "Ġt",
296
+ "h"
297
+ ],
298
+ [
299
+ "Ġt",
300
+ "rain"
301
+ ],
302
+ [
303
+ "Ġto",
304
+ "ken"
305
+ ],
306
+ [
307
+ "iz",
308
+ "er"
309
+ ],
310
+ [
311
+ "Ġtoken",
312
+ "izer"
313
+ ],
314
+ [
315
+ "o",
316
+ "r"
317
+ ],
318
+ [
319
+ "d",
320
+ "u"
321
+ ],
322
+ [
323
+ "i",
324
+ "le"
325
+ ],
326
+ [
327
+ "m",
328
+ "m"
329
+ ],
330
+ [
331
+ "o",
332
+ "m"
333
+ ],
334
+ [
335
+ "r",
336
+ "om"
337
+ ],
338
+ [
339
+ "Ġ",
340
+ "is"
341
+ ],
342
+ [
343
+ "Ġ",
344
+ "du"
345
+ ],
346
+ [
347
+ "Ġf",
348
+ "or"
349
+ ],
350
+ [
351
+ "Ġf",
352
+ "ile"
353
+ ],
354
+ [
355
+ "Ġf",
356
+ "rom"
357
+ ],
358
+ [
359
+ "in",
360
+ "g"
361
+ ],
362
+ [
363
+ "Ġth",
364
+ "is"
365
+ ],
366
+ [
367
+ "Ġtrain",
368
+ "ing"
369
+ ],
370
+ [
371
+ "mm",
372
+ "y"
373
+ ],
374
+ [
375
+ "Ġdu",
376
+ "mmy"
377
+ ],
378
+ [
379
+ "o",
380
+ "n"
381
+ ],
382
+ [
383
+ "Ġ",
384
+ "s"
385
+ ],
386
+ [
387
+ "c",
388
+ "e"
389
+ ],
390
+ [
391
+ "t",
392
+ "en"
393
+ ],
394
+ [
395
+ "en",
396
+ "ten"
397
+ ],
398
+ [
399
+ "Ġs",
400
+ "enten"
401
+ ],
402
+ [
403
+ "Ġsenten",
404
+ "ce"
405
+ ],
406
+ [
407
+ "c",
408
+ "s"
409
+ ],
410
+ [
411
+ "j",
412
+ "s"
413
+ ],
414
+ [
415
+ "l",
416
+ "in"
417
+ ],
418
+ [
419
+ "Ġ",
420
+ "cs"
421
+ ],
422
+ [
423
+ "Ġ",
424
+ "js"
425
+ ],
426
+ [
427
+ "Ġ",
428
+ "lin"
429
+ ],
430
+ [
431
+ "on",
432
+ "l"
433
+ ],
434
+ [
435
+ "Ġcs",
436
+ "v"
437
+ ],
438
+ [
439
+ "Ġjs",
440
+ "onl"
441
+ ],
442
+ [
443
+ "Ġlin",
444
+ "e"
445
+ ],
446
+ [
447
+ "Ġ",
448
+ "1"
449
+ ],
450
+ [
451
+ "Ġ",
452
+ "2"
453
+ ],
454
+ [
455
+ "Ġ",
456
+ "3"
457
+ ],
458
+ [
459
+ "Ġ",
460
+ "4"
461
+ ],
462
+ [
463
+ "h",
464
+ "er"
465
+ ],
466
+ [
467
+ "n",
468
+ "o"
469
+ ],
470
+ [
471
+ "t",
472
+ "her"
473
+ ],
474
+ [
475
+ "Ġ",
476
+ "on"
477
+ ],
478
+ [
479
+ "no",
480
+ "ther"
481
+ ],
482
+ [
483
+ "Ġon",
484
+ "e"
485
+ ],
486
+ [
487
+ "a",
488
+ "nother"
489
+ ],
490
+ [
491
+ "e",
492
+ "x"
493
+ ],
494
+ [
495
+ "Ġa",
496
+ "nother"
497
+ ],
498
+ [
499
+ "a",
500
+ "m"
501
+ ],
502
+ [
503
+ "e",
504
+ "t"
505
+ ],
506
+ [
507
+ "p",
508
+ "le"
509
+ ],
510
+ [
511
+ "y",
512
+ "et"
513
+ ],
514
+ [
515
+ "Ġ",
516
+ "ex"
517
+ ],
518
+ [
519
+ "Ġth",
520
+ "e"
521
+ ],
522
+ [
523
+ "am",
524
+ "ple"
525
+ ],
526
+ [
527
+ "Ġex",
528
+ "ample"
529
+ ],
530
+ [
531
+ "Ġ",
532
+ "0"
533
+ ],
534
+ [
535
+ "Ġ",
536
+ "5"
537
+ ],
538
+ [
539
+ "Ġ",
540
+ "6"
541
+ ],
542
+ [
543
+ "Ġ",
544
+ "7"
545
+ ],
546
+ [
547
+ "Ġ",
548
+ "8"
549
+ ],
550
+ [
551
+ "Ġ",
552
+ "9"
553
+ ],
554
+ [
555
+ "Ġ1",
556
+ "0"
557
+ ],
558
+ [
559
+ "Ġ1",
560
+ "1"
561
+ ],
562
+ [
563
+ "Ġ1",
564
+ "2"
565
+ ],
566
+ [
567
+ "Ġ1",
568
+ "3"
569
+ ],
570
+ [
571
+ "Ġ1",
572
+ "4"
573
+ ],
574
+ [
575
+ "Ġ1",
576
+ "5"
577
+ ],
578
+ [
579
+ "Ġ1",
580
+ "6"
581
+ ],
582
+ [
583
+ "Ġ1",
584
+ "7"
585
+ ],
586
+ [
587
+ "Ġ1",
588
+ "8"
589
+ ],
590
+ [
591
+ "Ġ1",
592
+ "9"
593
+ ],
594
+ [
595
+ "Ġ2",
596
+ "0"
597
+ ],
598
+ [
599
+ "Ġ2",
600
+ "1"
601
+ ],
602
+ [
603
+ "Ġ2",
604
+ "2"
605
+ ],
606
+ [
607
+ "Ġ2",
608
+ "3"
609
+ ],
610
+ [
611
+ "Ġ2",
612
+ "4"
613
+ ],
614
+ [
615
+ "Ġ2",
616
+ "5"
617
+ ],
618
+ [
619
+ "Ġ2",
620
+ "6"
621
+ ],
622
+ [
623
+ "Ġ2",
624
+ "7"
625
+ ],
626
+ [
627
+ "Ġ2",
628
+ "8"
629
+ ],
630
+ [
631
+ "Ġ2",
632
+ "9"
633
+ ],
634
+ [
635
+ "Ġ3",
636
+ "0"
637
+ ],
638
+ [
639
+ "Ġ3",
640
+ "1"
641
+ ],
642
+ [
643
+ "Ġ3",
644
+ "2"
645
+ ],
646
+ [
647
+ "Ġ3",
648
+ "3"
649
+ ],
650
+ [
651
+ "Ġ3",
652
+ "4"
653
+ ],
654
+ [
655
+ "Ġ3",
656
+ "5"
657
+ ],
658
+ [
659
+ "Ġ3",
660
+ "6"
661
+ ],
662
+ [
663
+ "Ġ3",
664
+ "7"
665
+ ],
666
+ [
667
+ "Ġ3",
668
+ "8"
669
+ ],
670
+ [
671
+ "Ġ3",
672
+ "9"
673
+ ],
674
+ [
675
+ "Ġ4",
676
+ "0"
677
+ ],
678
+ [
679
+ "Ġ4",
680
+ "1"
681
+ ],
682
+ [
683
+ "Ġ4",
684
+ "2"
685
+ ],
686
+ [
687
+ "Ġ4",
688
+ "3"
689
+ ],
690
+ [
691
+ "Ġ4",
692
+ "4"
693
+ ],
694
+ [
695
+ "Ġ4",
696
+ "5"
697
+ ],
698
+ [
699
+ "Ġ4",
700
+ "6"
701
+ ],
702
+ [
703
+ "Ġ4",
704
+ "7"
705
+ ],
706
+ [
707
+ "Ġ4",
708
+ "8"
709
+ ],
710
+ [
711
+ "Ġ4",
712
+ "9"
713
+ ],
714
+ [
715
+ "h",
716
+ "or"
717
+ ],
718
+ [
719
+ "Ġ",
720
+ "yet"
721
+ ],
722
+ [
723
+ "Ġt",
724
+ "ex"
725
+ ],
726
+ [
727
+ "Ġs",
728
+ "hor"
729
+ ],
730
+ [
731
+ "Ġtex",
732
+ "t"
733
+ ],
734
+ [
735
+ "Ġshor",
736
+ "t"
737
+ ]
738
+ ]
739
+ }
740
+ }