salmankhanpm commited on
Commit
4c72ea7
·
verified ·
1 Parent(s): 937bfb7

use tiktoken for gpt4o

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -50,7 +50,7 @@ test_phrase_set_long_3 = load_test_phrases('multilingualphrases03.txt')
50
  hf_tokenizers = {}
51
  for model in models:
52
  print("Loading tokenizer for", model)
53
- if model == 'gpt-4o':
54
  hf_tokenizers[model] = AutoTokenizer.from_pretrained(model)
55
  else:
56
  hf_tokenizers[model] = tiktoken.encoding_for_model(model)
@@ -60,7 +60,7 @@ def generate_tokens_as_table(text):
60
  table = []
61
  for model in models:
62
  tokenizer = hf_tokenizers[model]
63
- if model == 'gpt-4o':
64
  tokens = tokenizer.encode(text, add_special_tokens=False)
65
  else:
66
  tokens = tokenizer.encode(text)
@@ -87,7 +87,7 @@ def generate_tokenizer_table(text):
87
 
88
  for model in models:
89
  tokenizer = hf_tokenizers[model]
90
- if model == 'gpt-4o':
91
  vocab_size[model] = tokenizer.vocab_size
92
  token_counts[model] = len(tokenizer.encode(text, add_special_tokens=True))
93
  else:
 
50
  hf_tokenizers = {}
51
  for model in models:
52
  print("Loading tokenizer for", model)
53
+ if model != 'gpt-4o':
54
  hf_tokenizers[model] = AutoTokenizer.from_pretrained(model)
55
  else:
56
  hf_tokenizers[model] = tiktoken.encoding_for_model(model)
 
60
  table = []
61
  for model in models:
62
  tokenizer = hf_tokenizers[model]
63
+ if model != 'gpt-4o':
64
  tokens = tokenizer.encode(text, add_special_tokens=False)
65
  else:
66
  tokens = tokenizer.encode(text)
 
87
 
88
  for model in models:
89
  tokenizer = hf_tokenizers[model]
90
+ if model != 'gpt-4o':
91
  vocab_size[model] = tokenizer.vocab_size
92
  token_counts[model] = len(tokenizer.encode(text, add_special_tokens=True))
93
  else: