Spaces:

chyams
/

llm-explorer

Running on L4

chyams Claude Opus 4.5 commited on Feb 7

Commit

01852d9

1 Parent(s): 522ab9a

Separate GPT-2 tokenizer for demo; add Qwen2.5-7B fp16

- Tokenizer tab now uses GPT-2's tokenizer (more interesting subword splits)
- Generation still uses admin-selected model (Qwen default)
- Added Qwen2.5-7B fp16 option for L4's 24GB VRAM
- Removed fictional GPT-OSS-20B models

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (2) hide show

app.py +10 -8
models.py +37 -10

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ from datetime import datetime, timezone, timedelta
 import gradio as gr
-from models import AVAILABLE_MODELS, manager
 # ---------------------------------------------------------------------------
 # Admin password -- set via env var on HF Spaces, or fall back to default
@@ -498,11 +498,12 @@ def on_show_steps_change(show_steps):
 # ---------------------------------------------------------------------------
 def tokenize_text(text):
-    """Tokenize input and return formatted HTML."""
-    if not manager.is_ready():
-        return f"<p style='color:red;'>{manager.status_message()}</p>"
-    tokens = manager.tokenize(text)
     return _render_tokens_html(tokens)
@@ -680,13 +681,14 @@ def create_app():
         with gr.Tab("Tokenizer"):
             gr.Markdown("### Token Visualization")
             gr.Markdown(
-                "Enter any text to see how the model's tokenizer splits it into tokens. "
-                "Hover over each token to see its numeric ID."
             )
             t3_input = gr.Textbox(
                 label="Text",
-                value="Huston-Tillotson University is an HBCU in Austin, Texas",
                 lines=3,
             )
             t3_btn = gr.Button("Tokenize", variant="primary")

 import gradio as gr
+from models import AVAILABLE_MODELS, manager, demo_tokenizer
 # ---------------------------------------------------------------------------
 # Admin password -- set via env var on HF Spaces, or fall back to default
 # ---------------------------------------------------------------------------
 def tokenize_text(text):
+    """Tokenize input and return formatted HTML.
+    Uses GPT-2's tokenizer (not the generation model's tokenizer) because
+    GPT-2's smaller vocabulary produces more interesting subword splits.
+    """
+    tokens = demo_tokenizer.tokenize(text)
     return _render_tokens_html(tokens)
         with gr.Tab("Tokenizer"):
             gr.Markdown("### Token Visualization")
             gr.Markdown(
+                "See how text is split into tokens before the model processes it. "
+                "Hover over each token to see its numeric ID. "
+                "Uses GPT-2's tokenizer, which splits words into interesting subword pieces."
             )
             t3_input = gr.Textbox(
                 label="Text",
+                value="Huston-Tillotson University is an HBCU in Austin, Texas.",
                 lines=3,
             )
             t3_btn = gr.Button("Tokenize", variant="primary")

models.py CHANGED Viewed

@@ -23,6 +23,11 @@ AVAILABLE_MODELS = {
         "dtype": "float16",
         "description": "Fast, good quality (default)",
     },
     "Qwen2.5-7B (4-bit)": {
         "id": "Qwen/Qwen2.5-7B",
         "quantize": "4bit",
@@ -38,16 +43,6 @@ AVAILABLE_MODELS = {
         "quantize": "4bit",
         "description": "Best quality, quantized",
     },
-    "GPT-OSS-20B": {
-        "id": "openai/gpt-oss-20b",
-        "dtype": "auto",
-        "description": "OpenAI 20B, full precision (local/large GPU only)",
-    },
-    "GPT-OSS-20B (4-bit)": {
-        "id": "openai/gpt-oss-20b",
-        "quantize": "4bit",
-        "description": "OpenAI 20B, quantized to fit T4 (~10-12GB)",
-    },
 }
 DEFAULT_MODEL = "Qwen2.5-3B"
@@ -318,6 +313,38 @@ class ModelManager:
         ids = self.tokenizer.encode(text)
         return [(self.tokenizer.decode([tid]), tid) for tid in ids]
     # ------------------------------------------------------------------
     # Config helpers
     # ------------------------------------------------------------------

         "dtype": "float16",
         "description": "Fast, good quality (default)",
     },
+    "Qwen2.5-7B": {
+        "id": "Qwen/Qwen2.5-7B",
+        "dtype": "float16",
+        "description": "Higher quality, needs 24GB+ VRAM (L4/A10)",
+    },
     "Qwen2.5-7B (4-bit)": {
         "id": "Qwen/Qwen2.5-7B",
         "quantize": "4bit",
         "quantize": "4bit",
         "description": "Best quality, quantized",
     },
 }
 DEFAULT_MODEL = "Qwen2.5-3B"
         ids = self.tokenizer.encode(text)
         return [(self.tokenizer.decode([tid]), tid) for tid in ids]
+# ---------------------------------------------------------------------------
+# Separate tokenizer for demo purposes (GPT-2 shows more interesting splits)
+# ---------------------------------------------------------------------------
+class DemoTokenizer:
+    """Lightweight tokenizer for the Tokenizer tab.
+    Uses GPT-2's BPE tokenizer which has a smaller vocabulary and produces
+    more interesting subword splits than modern tokenizers like Qwen's.
+    """
+    def __init__(self):
+        self.tokenizer = None
+        self._loaded = False
+    def ensure_loaded(self):
+        """Load tokenizer on first use (lazy loading)."""
+        if not self._loaded:
+            self.tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
+            self._loaded = True
+    def tokenize(self, text: str) -> list[tuple[str, int]]:
+        """Tokenize text and return list of (token_str, token_id)."""
+        self.ensure_loaded()
+        ids = self.tokenizer.encode(text)
+        return [(self.tokenizer.decode([tid]), tid) for tid in ids]
+# Module-level singleton for demo tokenizer
+demo_tokenizer = DemoTokenizer()
     # ------------------------------------------------------------------
     # Config helpers
     # ------------------------------------------------------------------