Spaces:

trl-lib
/

chat-template-inspector

Sleeping

Quentin Gallouédec Claude Opus 4.6 (1M context) commited on Apr 15

Commit

cb002d0

1 Parent(s): 8c6a29f

Add "Sign in with Hugging Face" OAuth so visitors access their own gated repos

- README frontmatter enables hf_oauth with read-repos scope.
- Add gr.LoginButton() in the top-right of the page.
- Thread gr.OAuthToken through load_from_hub / run_inspection /
render_playground; pass the visitor's token into from_pretrained.
- Key the tokenizer cache by (model_id, token) so users with different
Hub access don't share cached objects.

Now a signed-in visitor can inspect gated templates
(meta-llama/Llama-3.1-8B-Instruct, deepseek-ai/DeepSeek-R1, …)
transparently using their own permissions.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (2) hide show

README.md +3 -0
app.py +29 -19

README.md CHANGED Viewed

@@ -7,6 +7,9 @@ sdk: gradio
 sdk_version: 6.12.0
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 sdk_version: 6.12.0
 app_file: app.py
 pinned: false
+hf_oauth: true
+hf_oauth_scopes:
+ - read-repos
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -15,35 +15,38 @@ from inspector.format import format_template
 # ─── Tokenizer cache ───────────────────────────────────────────────────────────
 #
-# `from_pretrained` is slow (download + parse). We load each model once and
-# reuse the in-memory object on subsequent calls. We also stash the originals
-# of the four special tokens so an empty override field truly resets them.
-_CACHE: dict[str, tuple[Any, bool, dict]] = {}
-def _load(model_id: str) -> tuple[Any, bool, dict]:
     model_id = model_id.strip()
     if not model_id:
         raise gr.Error("Please enter a model id.")
-    if model_id in _CACHE:
-        return _CACHE[model_id]
     try:
-        proc = AutoProcessor.from_pretrained(model_id, trust_remote_code=False)
         is_processor = hasattr(proc, "image_processor") or hasattr(proc, "feature_extractor")
-        obj = proc if is_processor else AutoTokenizer.from_pretrained(model_id, trust_remote_code=False)
     except Exception:
-        obj = AutoTokenizer.from_pretrained(model_id, trust_remote_code=False)
         is_processor = False
     tok = obj.tokenizer if is_processor else obj
     originals = {attr: getattr(tok, attr, None) for attr in ("bos_token", "eos_token", "pad_token", "unk_token")}
-    _CACHE[model_id] = (obj, is_processor, originals)
-    return _CACHE[model_id]
-def load_from_hub(model_id: str):
     try:
-        obj, is_processor, originals = _load(model_id)
     except Exception as e:
         raise gr.Error(f"Failed to load `{model_id}`: {type(e).__name__}: {e}")
     template = (
@@ -66,9 +69,10 @@ def load_from_hub(model_id: str):
 # ─── Checks runner ─────────────────────────────────────────────────────────────
 def _build_context(
-    model_id: str, template_source: str, bos: str, eos: str, pad: str, unk: str
 ) -> Context:
-    obj, is_processor, originals = _load(model_id)
     tok = obj.tokenizer if is_processor else obj
     # Reset to original tokens, then apply overrides where the user provided one.
     for attr, original in originals.items():
@@ -126,6 +130,7 @@ _PLAYGROUND_DEFAULT = '''\
 def render_playground(
     model_id: str, template_source: str, bos: str, eos: str, pad: str, unk: str,
     messages_src: str, add_gen_prompt: bool,
 ) -> str:
     """Parse the user's `messages` literal and render it with apply_chat_template.
@@ -140,8 +145,9 @@ def render_playground(
         return f"# parse error: {type(e).__name__}: {e}"
     if not isinstance(messages, list):
         return "# expected a list of messages at the top level"
     try:
-        ctx = _build_context(model_id, template_source, bos, eos, pad, unk)
         return ctx.obj.apply_chat_template(
             messages, add_generation_prompt=add_gen_prompt, tokenize=False,
         )
@@ -151,11 +157,13 @@ def render_playground(
 def run_inspection(
     model_id: str, template_source: str, bos: str, eos: str, pad: str, unk: str,
 ):
     if not model_id.strip() or not template_source.strip():
         return "_Enter a model id and a template to inspect._"
     try:
-        ctx = _build_context(model_id, template_source, bos, eos, pad, unk)
     except Exception as e:
         return f"**Failed to build context:** `{type(e).__name__}: {e}`\n\n```\n{traceback.format_exc()}\n```"
@@ -182,7 +190,9 @@ except Exception:
 with gr.Blocks(title="Chat Template Inspector", fill_width=True) as demo:
-    gr.Markdown("## Chat Template Inspector")
     with gr.Row():
         with gr.Column():
             with gr.Row():

 # ─── Tokenizer cache ───────────────────────────────────────────────────────────
 #
+# `from_pretrained` is slow (download + parse). We load each model once per
+# (model_id, token) pair so visitors with different Hub access don't share
+# cached objects. We also stash the originals of the four special tokens so an
+# empty override field truly resets them.
+_CACHE: dict[tuple[str, str], tuple[Any, bool, dict]] = {}
+def _load(model_id: str, token: str | None = None) -> tuple[Any, bool, dict]:
     model_id = model_id.strip()
     if not model_id:
         raise gr.Error("Please enter a model id.")
+    key = (model_id, token or "")
+    if key in _CACHE:
+        return _CACHE[key]
     try:
+        proc = AutoProcessor.from_pretrained(model_id, trust_remote_code=False, token=token)
         is_processor = hasattr(proc, "image_processor") or hasattr(proc, "feature_extractor")
+        obj = proc if is_processor else AutoTokenizer.from_pretrained(model_id, trust_remote_code=False, token=token)
     except Exception:
+        obj = AutoTokenizer.from_pretrained(model_id, trust_remote_code=False, token=token)
         is_processor = False
     tok = obj.tokenizer if is_processor else obj
     originals = {attr: getattr(tok, attr, None) for attr in ("bos_token", "eos_token", "pad_token", "unk_token")}
+    _CACHE[key] = (obj, is_processor, originals)
+    return _CACHE[key]
+def load_from_hub(model_id: str, oauth_token: gr.OAuthToken | None = None):
+    token = oauth_token.token if oauth_token is not None else None
     try:
+        obj, is_processor, originals = _load(model_id, token)
     except Exception as e:
         raise gr.Error(f"Failed to load `{model_id}`: {type(e).__name__}: {e}")
     template = (
 # ─── Checks runner ─────────────────────────────────────────────────────────────
 def _build_context(
+    model_id: str, template_source: str, bos: str, eos: str, pad: str, unk: str,
+    token: str | None = None,
 ) -> Context:
+    obj, is_processor, originals = _load(model_id, token)
     tok = obj.tokenizer if is_processor else obj
     # Reset to original tokens, then apply overrides where the user provided one.
     for attr, original in originals.items():
 def render_playground(
     model_id: str, template_source: str, bos: str, eos: str, pad: str, unk: str,
     messages_src: str, add_gen_prompt: bool,
+    oauth_token: gr.OAuthToken | None = None,
 ) -> str:
     """Parse the user's `messages` literal and render it with apply_chat_template.
         return f"# parse error: {type(e).__name__}: {e}"
     if not isinstance(messages, list):
         return "# expected a list of messages at the top level"
+    token = oauth_token.token if oauth_token is not None else None
     try:
+        ctx = _build_context(model_id, template_source, bos, eos, pad, unk, token=token)
         return ctx.obj.apply_chat_template(
             messages, add_generation_prompt=add_gen_prompt, tokenize=False,
         )
 def run_inspection(
     model_id: str, template_source: str, bos: str, eos: str, pad: str, unk: str,
+    oauth_token: gr.OAuthToken | None = None,
 ):
     if not model_id.strip() or not template_source.strip():
         return "_Enter a model id and a template to inspect._"
+    token = oauth_token.token if oauth_token is not None else None
     try:
+        ctx = _build_context(model_id, template_source, bos, eos, pad, unk, token=token)
     except Exception as e:
         return f"**Failed to build context:** `{type(e).__name__}: {e}`\n\n```\n{traceback.format_exc()}\n```"
 with gr.Blocks(title="Chat Template Inspector", fill_width=True) as demo:
+    with gr.Row():
+        gr.Markdown("## Chat Template Inspector", elem_id="title")
+        login_btn = gr.LoginButton(size="sm")
     with gr.Row():
         with gr.Column():
             with gr.Row():