zluvolyote commited on
Commit
892de74
·
1 Parent(s): d775864

add tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +13 -10
  2. vocab.txt +6 -3
tokenizer.json CHANGED
@@ -84,10 +84,10 @@
84
  "C": 6,
85
  "G": 7,
86
  "T": 8,
87
- "##A": 9,
88
- "##C": 10,
89
  "##T": 11,
90
- "##G": 12,
91
  "AA": 13,
92
  "GA": 14,
93
  "TT": 15,
@@ -118,12 +118,12 @@
118
  "TG": 40,
119
  "GTT": 41,
120
  "AGA": 42,
121
- "GCT": 43,
122
- "GAC": 44,
123
- "ACT": 45,
124
- "GAG": 46,
125
- "TCA": 47,
126
- "ATG": 48,
127
  "TAT": 49,
128
  "TTC": 50,
129
  "ATA": 51,
@@ -164,7 +164,10 @@
164
  "TGC": 86,
165
  "CGA": 87,
166
  "CGC": 88,
167
- "CGG": 89
 
 
 
168
  }
169
  }
170
  }
 
84
  "C": 6,
85
  "G": 7,
86
  "T": 8,
87
+ "##C": 9,
88
+ "##G": 10,
89
  "##T": 11,
90
+ "##A": 12,
91
  "AA": 13,
92
  "GA": 14,
93
  "TT": 15,
 
118
  "TG": 40,
119
  "GTT": 41,
120
  "AGA": 42,
121
+ "ATG": 43,
122
+ "GCT": 44,
123
+ "GAC": 45,
124
+ "ACT": 46,
125
+ "GAG": 47,
126
+ "TCA": 48,
127
  "TAT": 49,
128
  "TTC": 50,
129
  "ATA": 51,
 
164
  "TGC": 86,
165
  "CGA": 87,
166
  "CGC": 88,
167
+ "CGG": 89,
168
+ "TAA": 90,
169
+ "TGA": 91,
170
+ "TAG": 92
171
  }
172
  }
173
  }
vocab.txt CHANGED
@@ -7,10 +7,10 @@ A
7
  C
8
  G
9
  T
10
- ##A
11
  ##C
12
- ##T
13
  ##G
 
 
14
  AA
15
  GA
16
  TT
@@ -41,12 +41,12 @@ GGT
41
  TG
42
  GTT
43
  AGA
 
44
  GCT
45
  GAC
46
  ACT
47
  GAG
48
  TCA
49
- ATG
50
  TAT
51
  TTC
52
  ATA
@@ -88,3 +88,6 @@ TGC
88
  CGA
89
  CGC
90
  CGG
 
 
 
 
7
  C
8
  G
9
  T
 
10
  ##C
 
11
  ##G
12
+ ##T
13
+ ##A
14
  AA
15
  GA
16
  TT
 
41
  TG
42
  GTT
43
  AGA
44
+ ATG
45
  GCT
46
  GAC
47
  ACT
48
  GAG
49
  TCA
 
50
  TAT
51
  TTC
52
  ATA
 
88
  CGA
89
  CGC
90
  CGG
91
+ TAA
92
+ TGA
93
+ TAG