levanell commited on
Commit
d6ea53f
·
verified ·
1 Parent(s): d664fb0

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +58 -0
  2. requirements +4 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer
3
+
4
+ def inspect_tokenizer(hf_token, model_name):
5
+ try:
6
+ tok = AutoTokenizer.from_pretrained(
7
+ model_name,
8
+ token=hf_token if hf_token else None
9
+ )
10
+
11
+ info = []
12
+ info.append(f"pad: {repr(tok.pad_token)} {tok.pad_token_id}")
13
+ info.append(f"eos: {repr(tok.eos_token)} {tok.eos_token_id}")
14
+ info.append(f"bos: {repr(tok.bos_token)} {tok.bos_token_id}")
15
+
16
+ test = [
17
+ {"role": "user", "content": "hello"},
18
+ {"role": "assistant", "content": "hi there"}
19
+ ]
20
+
21
+ template = tok.apply_chat_template(
22
+ test,
23
+ tokenize=False,
24
+ add_generation_prompt=False
25
+ )
26
+
27
+ return "\n".join(info) + "\n\nChat template:\n" + repr(template)
28
+
29
+ except Exception as e:
30
+ return f"Error: {str(e)}"
31
+
32
+
33
+ with gr.Blocks() as demo:
34
+ gr.Markdown("## Tokenizer Inspector")
35
+
36
+ hf_token = gr.Textbox(
37
+ label="HF Token (optional)",
38
+ placeholder="Enter your Hugging Face token if needed",
39
+ type="password"
40
+ )
41
+
42
+ model_name = gr.Textbox(
43
+ label="Model Name",
44
+ value="Qwen/Qwen3-1.7B",
45
+ placeholder="e.g. Qwen/Qwen3-1.7B"
46
+ )
47
+
48
+ run_btn = gr.Button("Inspect")
49
+
50
+ output = gr.Textbox(label="Output", lines=15)
51
+
52
+ run_btn.click(
53
+ fn=inspect_tokenizer,
54
+ inputs=[hf_token, model_name],
55
+ outputs=output
56
+ )
57
+
58
+ demo.launch()
requirements ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ huggingface_hub
3
+ sentencepiece
4
+ gradio