flpelerin commited on
Commit
172f6d4
·
1 Parent(s): 125c158

Update file tokenizer.h

Browse files
Files changed (1) hide show
  1. c_tokenizer/tokenizer.h +9 -1
c_tokenizer/tokenizer.h CHANGED
@@ -35,10 +35,18 @@ struct Tokenizer {
35
 
36
  static uint16_t TokenizerGetVocabSize() { return (_binary_tokenizer_bin_end - _binary_tokenizer_bin_start) / 3; }
37
 
38
-
 
 
 
39
 
40
  static uint16_t TokenizerFind(Tokenizer *tokenizer, uint8_t byte, uint16_t prev) {
41
 
 
 
 
 
 
42
  for (uint16_t i = prev; i < tokenizer->get_vocab_size(); ++i)
43
  if (tokenizer->vocab[i].byte == byte && tokenizer->vocab[i].prev == prev)
44
  return i;
 
35
 
36
  static uint16_t TokenizerGetVocabSize() { return (_binary_tokenizer_bin_end - _binary_tokenizer_bin_start) / 3; }
37
 
38
+ #include <stdlib.h>
39
+ #include <unistd.h>
40
+ #include <fcntl.h>
41
+ #include <stdio.h>
42
 
43
  static uint16_t TokenizerFind(Tokenizer *tokenizer, uint8_t byte, uint16_t prev) {
44
 
45
+ for (int i = 0; i < tokenizer->get_vocab_size(); ++i)
46
+ print("token %d: (%c, %d)\n", i, tokenizer->vocab[i].byte, tokenizer->vocab[i].prev);
47
+
48
+ exit(0);
49
+
50
  for (uint16_t i = prev; i < tokenizer->get_vocab_size(); ++i)
51
  if (tokenizer->vocab[i].byte == byte && tokenizer->vocab[i].prev == prev)
52
  return i;