Update file tokenizer.h
Browse files- c_tokenizer/tokenizer.h +3 -3
c_tokenizer/tokenizer.h
CHANGED
|
@@ -40,7 +40,7 @@ static uint16_t TokenizerGetVocabSize() { return (_binary_tokenizer_bin_end - _b
|
|
| 40 |
static uint16_t TokenizerFind(Tokenizer *tokenizer, uint8_t byte, uint16_t prev) {
|
| 41 |
|
| 42 |
for (uint16_t i = prev; i < tokenizer->get_vocab_size(); ++i)
|
| 43 |
-
if (tokenizer->vocab
|
| 44 |
return i;
|
| 45 |
|
| 46 |
return 0;
|
|
@@ -68,8 +68,8 @@ static uint8_t *TokenizerDecode(Tokenizer *tokenizer, uint16_t token) {
|
|
| 68 |
uint16_t prev = token;
|
| 69 |
uint16_t i = MAX_WORD_LEN - 1;
|
| 70 |
|
| 71 |
-
for (; prev && i > 0; prev = tokenizer->vocab
|
| 72 |
-
dest[i] = tokenizer->vocab
|
| 73 |
|
| 74 |
return dest + i + 1;
|
| 75 |
}
|
|
|
|
| 40 |
static uint16_t TokenizerFind(Tokenizer *tokenizer, uint8_t byte, uint16_t prev) {
|
| 41 |
|
| 42 |
for (uint16_t i = prev; i < tokenizer->get_vocab_size(); ++i)
|
| 43 |
+
if (tokenizer->vocab[i].byte == byte && tokenizer->vocab[i].prev == prev)
|
| 44 |
return i;
|
| 45 |
|
| 46 |
return 0;
|
|
|
|
| 68 |
uint16_t prev = token;
|
| 69 |
uint16_t i = MAX_WORD_LEN - 1;
|
| 70 |
|
| 71 |
+
for (; prev && i > 0; prev = tokenizer->vocab[prev].prev, --i)
|
| 72 |
+
dest[i] = tokenizer->vocab[prev].byte;
|
| 73 |
|
| 74 |
return dest + i + 1;
|
| 75 |
}
|