Spaces:
Running on L4
Running on L4
Separate GPT-2 tokenizer for demo; add Qwen2.5-7B fp16
Browse files- Tokenizer tab now uses GPT-2's tokenizer (more interesting subword splits)
- Generation still uses admin-selected model (Qwen default)
- Added Qwen2.5-7B fp16 option for L4's 24GB VRAM
- Removed fictional GPT-OSS-20B models
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
app.py
CHANGED
|
@@ -14,7 +14,7 @@ from datetime import datetime, timezone, timedelta
|
|
| 14 |
|
| 15 |
import gradio as gr
|
| 16 |
|
| 17 |
-
from models import AVAILABLE_MODELS, manager
|
| 18 |
|
| 19 |
# ---------------------------------------------------------------------------
|
| 20 |
# Admin password -- set via env var on HF Spaces, or fall back to default
|
|
@@ -498,11 +498,12 @@ def on_show_steps_change(show_steps):
|
|
| 498 |
# ---------------------------------------------------------------------------
|
| 499 |
|
| 500 |
def tokenize_text(text):
|
| 501 |
-
"""Tokenize input and return formatted HTML.
|
| 502 |
-
if not manager.is_ready():
|
| 503 |
-
return f"<p style='color:red;'>{manager.status_message()}</p>"
|
| 504 |
|
| 505 |
-
|
|
|
|
|
|
|
|
|
|
| 506 |
return _render_tokens_html(tokens)
|
| 507 |
|
| 508 |
|
|
@@ -680,13 +681,14 @@ def create_app():
|
|
| 680 |
with gr.Tab("Tokenizer"):
|
| 681 |
gr.Markdown("### Token Visualization")
|
| 682 |
gr.Markdown(
|
| 683 |
-
"
|
| 684 |
-
"Hover over each token to see its numeric ID."
|
|
|
|
| 685 |
)
|
| 686 |
|
| 687 |
t3_input = gr.Textbox(
|
| 688 |
label="Text",
|
| 689 |
-
value="Huston-Tillotson University is an HBCU in Austin, Texas",
|
| 690 |
lines=3,
|
| 691 |
)
|
| 692 |
t3_btn = gr.Button("Tokenize", variant="primary")
|
|
|
|
| 14 |
|
| 15 |
import gradio as gr
|
| 16 |
|
| 17 |
+
from models import AVAILABLE_MODELS, manager, demo_tokenizer
|
| 18 |
|
| 19 |
# ---------------------------------------------------------------------------
|
| 20 |
# Admin password -- set via env var on HF Spaces, or fall back to default
|
|
|
|
| 498 |
# ---------------------------------------------------------------------------
|
| 499 |
|
| 500 |
def tokenize_text(text):
|
| 501 |
+
"""Tokenize input and return formatted HTML.
|
|
|
|
|
|
|
| 502 |
|
| 503 |
+
Uses GPT-2's tokenizer (not the generation model's tokenizer) because
|
| 504 |
+
GPT-2's smaller vocabulary produces more interesting subword splits.
|
| 505 |
+
"""
|
| 506 |
+
tokens = demo_tokenizer.tokenize(text)
|
| 507 |
return _render_tokens_html(tokens)
|
| 508 |
|
| 509 |
|
|
|
|
| 681 |
with gr.Tab("Tokenizer"):
|
| 682 |
gr.Markdown("### Token Visualization")
|
| 683 |
gr.Markdown(
|
| 684 |
+
"See how text is split into tokens before the model processes it. "
|
| 685 |
+
"Hover over each token to see its numeric ID. "
|
| 686 |
+
"Uses GPT-2's tokenizer, which splits words into interesting subword pieces."
|
| 687 |
)
|
| 688 |
|
| 689 |
t3_input = gr.Textbox(
|
| 690 |
label="Text",
|
| 691 |
+
value="Huston-Tillotson University is an HBCU in Austin, Texas.",
|
| 692 |
lines=3,
|
| 693 |
)
|
| 694 |
t3_btn = gr.Button("Tokenize", variant="primary")
|
models.py
CHANGED
|
@@ -23,6 +23,11 @@ AVAILABLE_MODELS = {
|
|
| 23 |
"dtype": "float16",
|
| 24 |
"description": "Fast, good quality (default)",
|
| 25 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
"Qwen2.5-7B (4-bit)": {
|
| 27 |
"id": "Qwen/Qwen2.5-7B",
|
| 28 |
"quantize": "4bit",
|
|
@@ -38,16 +43,6 @@ AVAILABLE_MODELS = {
|
|
| 38 |
"quantize": "4bit",
|
| 39 |
"description": "Best quality, quantized",
|
| 40 |
},
|
| 41 |
-
"GPT-OSS-20B": {
|
| 42 |
-
"id": "openai/gpt-oss-20b",
|
| 43 |
-
"dtype": "auto",
|
| 44 |
-
"description": "OpenAI 20B, full precision (local/large GPU only)",
|
| 45 |
-
},
|
| 46 |
-
"GPT-OSS-20B (4-bit)": {
|
| 47 |
-
"id": "openai/gpt-oss-20b",
|
| 48 |
-
"quantize": "4bit",
|
| 49 |
-
"description": "OpenAI 20B, quantized to fit T4 (~10-12GB)",
|
| 50 |
-
},
|
| 51 |
}
|
| 52 |
|
| 53 |
DEFAULT_MODEL = "Qwen2.5-3B"
|
|
@@ -318,6 +313,38 @@ class ModelManager:
|
|
| 318 |
ids = self.tokenizer.encode(text)
|
| 319 |
return [(self.tokenizer.decode([tid]), tid) for tid in ids]
|
| 320 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
# ------------------------------------------------------------------
|
| 322 |
# Config helpers
|
| 323 |
# ------------------------------------------------------------------
|
|
|
|
| 23 |
"dtype": "float16",
|
| 24 |
"description": "Fast, good quality (default)",
|
| 25 |
},
|
| 26 |
+
"Qwen2.5-7B": {
|
| 27 |
+
"id": "Qwen/Qwen2.5-7B",
|
| 28 |
+
"dtype": "float16",
|
| 29 |
+
"description": "Higher quality, needs 24GB+ VRAM (L4/A10)",
|
| 30 |
+
},
|
| 31 |
"Qwen2.5-7B (4-bit)": {
|
| 32 |
"id": "Qwen/Qwen2.5-7B",
|
| 33 |
"quantize": "4bit",
|
|
|
|
| 43 |
"quantize": "4bit",
|
| 44 |
"description": "Best quality, quantized",
|
| 45 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
}
|
| 47 |
|
| 48 |
DEFAULT_MODEL = "Qwen2.5-3B"
|
|
|
|
| 313 |
ids = self.tokenizer.encode(text)
|
| 314 |
return [(self.tokenizer.decode([tid]), tid) for tid in ids]
|
| 315 |
|
| 316 |
+
|
| 317 |
+
# ---------------------------------------------------------------------------
|
| 318 |
+
# Separate tokenizer for demo purposes (GPT-2 shows more interesting splits)
|
| 319 |
+
# ---------------------------------------------------------------------------
|
| 320 |
+
|
| 321 |
+
class DemoTokenizer:
|
| 322 |
+
"""Lightweight tokenizer for the Tokenizer tab.
|
| 323 |
+
|
| 324 |
+
Uses GPT-2's BPE tokenizer which has a smaller vocabulary and produces
|
| 325 |
+
more interesting subword splits than modern tokenizers like Qwen's.
|
| 326 |
+
"""
|
| 327 |
+
|
| 328 |
+
def __init__(self):
|
| 329 |
+
self.tokenizer = None
|
| 330 |
+
self._loaded = False
|
| 331 |
+
|
| 332 |
+
def ensure_loaded(self):
|
| 333 |
+
"""Load tokenizer on first use (lazy loading)."""
|
| 334 |
+
if not self._loaded:
|
| 335 |
+
self.tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
|
| 336 |
+
self._loaded = True
|
| 337 |
+
|
| 338 |
+
def tokenize(self, text: str) -> list[tuple[str, int]]:
|
| 339 |
+
"""Tokenize text and return list of (token_str, token_id)."""
|
| 340 |
+
self.ensure_loaded()
|
| 341 |
+
ids = self.tokenizer.encode(text)
|
| 342 |
+
return [(self.tokenizer.decode([tid]), tid) for tid in ids]
|
| 343 |
+
|
| 344 |
+
|
| 345 |
+
# Module-level singleton for demo tokenizer
|
| 346 |
+
demo_tokenizer = DemoTokenizer()
|
| 347 |
+
|
| 348 |
# ------------------------------------------------------------------
|
| 349 |
# Config helpers
|
| 350 |
# ------------------------------------------------------------------
|