Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from tokenizers import Tokenizer | |
| llama3_tokenizer = Tokenizer.from_file("tokenizer-llama3.json") | |
| deepseek_coder_tokenizer = Tokenizer.from_file("tokenizer-deepseek-coder.json") | |
| def get_tokenizer(model): | |
| tokenizer_mapping = { | |
| "meta-llama/Meta-Llama-3-8B-Instruct": llama3_tokenizer, | |
| "deepseek-ai/deepseek-coder-7b-instruct-v1.5": deepseek_coder_tokenizer | |
| } | |
| if model not in tokenizer_mapping: | |
| raise Exception(f"Model {model} not supported.") | |
| return tokenizer_mapping[model] | |
| def count_tokens( | |
| model, | |
| target_text, | |
| ): | |
| tokenizer = get_tokenizer(model) | |
| toks = tokenizer.encode(target_text) | |
| yield f"Token count: {len(toks.ids)}" | |
| demo = gr.Interface( | |
| fn=count_tokens, | |
| inputs=[ | |
| gr.Dropdown( | |
| [ | |
| "meta-llama/Meta-Llama-3-8B-Instruct", | |
| "deepseek-ai/deepseek-coder-7b-instruct-v1.5", | |
| ], | |
| value="meta-llama/Meta-Llama-3-8B-Instruct", | |
| label="Model" | |
| ), | |
| gr.Textbox( | |
| label="Input", | |
| info="Text to count tokens for", | |
| lines=10, | |
| ), | |
| ], | |
| outputs=["text"], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |