Quentin GallouΓ©dec Claude Opus 4.6 (1M context) commited on
Commit
cb002d0
Β·
1 Parent(s): 8c6a29f

Add "Sign in with Hugging Face" OAuth so visitors access their own gated repos

Browse files

- README frontmatter enables hf_oauth with read-repos scope.
- Add gr.LoginButton() in the top-right of the page.
- Thread gr.OAuthToken through load_from_hub / run_inspection /
render_playground; pass the visitor's token into from_pretrained.
- Key the tokenizer cache by (model_id, token) so users with different
Hub access don't share cached objects.

Now a signed-in visitor can inspect gated templates
(meta-llama/Llama-3.1-8B-Instruct, deepseek-ai/DeepSeek-R1, …)
transparently using their own permissions.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (2) hide show
  1. README.md +3 -0
  2. app.py +29 -19
README.md CHANGED
@@ -7,6 +7,9 @@ sdk: gradio
7
  sdk_version: 6.12.0
8
  app_file: app.py
9
  pinned: false
 
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
7
  sdk_version: 6.12.0
8
  app_file: app.py
9
  pinned: false
10
+ hf_oauth: true
11
+ hf_oauth_scopes:
12
+ - read-repos
13
  ---
14
 
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -15,35 +15,38 @@ from inspector.format import format_template
15
 
16
  # ─── Tokenizer cache ───────────────────────────────────────────────────────────
17
  #
18
- # `from_pretrained` is slow (download + parse). We load each model once and
19
- # reuse the in-memory object on subsequent calls. We also stash the originals
20
- # of the four special tokens so an empty override field truly resets them.
 
21
 
22
- _CACHE: dict[str, tuple[Any, bool, dict]] = {}
23
 
24
 
25
- def _load(model_id: str) -> tuple[Any, bool, dict]:
26
  model_id = model_id.strip()
27
  if not model_id:
28
  raise gr.Error("Please enter a model id.")
29
- if model_id in _CACHE:
30
- return _CACHE[model_id]
 
31
  try:
32
- proc = AutoProcessor.from_pretrained(model_id, trust_remote_code=False)
33
  is_processor = hasattr(proc, "image_processor") or hasattr(proc, "feature_extractor")
34
- obj = proc if is_processor else AutoTokenizer.from_pretrained(model_id, trust_remote_code=False)
35
  except Exception:
36
- obj = AutoTokenizer.from_pretrained(model_id, trust_remote_code=False)
37
  is_processor = False
38
  tok = obj.tokenizer if is_processor else obj
39
  originals = {attr: getattr(tok, attr, None) for attr in ("bos_token", "eos_token", "pad_token", "unk_token")}
40
- _CACHE[model_id] = (obj, is_processor, originals)
41
- return _CACHE[model_id]
42
 
43
 
44
- def load_from_hub(model_id: str):
 
45
  try:
46
- obj, is_processor, originals = _load(model_id)
47
  except Exception as e:
48
  raise gr.Error(f"Failed to load `{model_id}`: {type(e).__name__}: {e}")
49
  template = (
@@ -66,9 +69,10 @@ def load_from_hub(model_id: str):
66
  # ─── Checks runner ─────────────────────────────────────────────────────────────
67
 
68
  def _build_context(
69
- model_id: str, template_source: str, bos: str, eos: str, pad: str, unk: str
 
70
  ) -> Context:
71
- obj, is_processor, originals = _load(model_id)
72
  tok = obj.tokenizer if is_processor else obj
73
  # Reset to original tokens, then apply overrides where the user provided one.
74
  for attr, original in originals.items():
@@ -126,6 +130,7 @@ _PLAYGROUND_DEFAULT = '''\
126
  def render_playground(
127
  model_id: str, template_source: str, bos: str, eos: str, pad: str, unk: str,
128
  messages_src: str, add_gen_prompt: bool,
 
129
  ) -> str:
130
  """Parse the user's `messages` literal and render it with apply_chat_template.
131
 
@@ -140,8 +145,9 @@ def render_playground(
140
  return f"# parse error: {type(e).__name__}: {e}"
141
  if not isinstance(messages, list):
142
  return "# expected a list of messages at the top level"
 
143
  try:
144
- ctx = _build_context(model_id, template_source, bos, eos, pad, unk)
145
  return ctx.obj.apply_chat_template(
146
  messages, add_generation_prompt=add_gen_prompt, tokenize=False,
147
  )
@@ -151,11 +157,13 @@ def render_playground(
151
 
152
  def run_inspection(
153
  model_id: str, template_source: str, bos: str, eos: str, pad: str, unk: str,
 
154
  ):
155
  if not model_id.strip() or not template_source.strip():
156
  return "_Enter a model id and a template to inspect._"
 
157
  try:
158
- ctx = _build_context(model_id, template_source, bos, eos, pad, unk)
159
  except Exception as e:
160
  return f"**Failed to build context:** `{type(e).__name__}: {e}`\n\n```\n{traceback.format_exc()}\n```"
161
 
@@ -182,7 +190,9 @@ except Exception:
182
 
183
 
184
  with gr.Blocks(title="Chat Template Inspector", fill_width=True) as demo:
185
- gr.Markdown("## Chat Template Inspector")
 
 
186
  with gr.Row():
187
  with gr.Column():
188
  with gr.Row():
 
15
 
16
  # ─── Tokenizer cache ───────────────────────────────────────────────────────────
17
  #
18
+ # `from_pretrained` is slow (download + parse). We load each model once per
19
+ # (model_id, token) pair so visitors with different Hub access don't share
20
+ # cached objects. We also stash the originals of the four special tokens so an
21
+ # empty override field truly resets them.
22
 
23
+ _CACHE: dict[tuple[str, str], tuple[Any, bool, dict]] = {}
24
 
25
 
26
+ def _load(model_id: str, token: str | None = None) -> tuple[Any, bool, dict]:
27
  model_id = model_id.strip()
28
  if not model_id:
29
  raise gr.Error("Please enter a model id.")
30
+ key = (model_id, token or "")
31
+ if key in _CACHE:
32
+ return _CACHE[key]
33
  try:
34
+ proc = AutoProcessor.from_pretrained(model_id, trust_remote_code=False, token=token)
35
  is_processor = hasattr(proc, "image_processor") or hasattr(proc, "feature_extractor")
36
+ obj = proc if is_processor else AutoTokenizer.from_pretrained(model_id, trust_remote_code=False, token=token)
37
  except Exception:
38
+ obj = AutoTokenizer.from_pretrained(model_id, trust_remote_code=False, token=token)
39
  is_processor = False
40
  tok = obj.tokenizer if is_processor else obj
41
  originals = {attr: getattr(tok, attr, None) for attr in ("bos_token", "eos_token", "pad_token", "unk_token")}
42
+ _CACHE[key] = (obj, is_processor, originals)
43
+ return _CACHE[key]
44
 
45
 
46
+ def load_from_hub(model_id: str, oauth_token: gr.OAuthToken | None = None):
47
+ token = oauth_token.token if oauth_token is not None else None
48
  try:
49
+ obj, is_processor, originals = _load(model_id, token)
50
  except Exception as e:
51
  raise gr.Error(f"Failed to load `{model_id}`: {type(e).__name__}: {e}")
52
  template = (
 
69
  # ─── Checks runner ─────────────────────────────────────────────────────────────
70
 
71
  def _build_context(
72
+ model_id: str, template_source: str, bos: str, eos: str, pad: str, unk: str,
73
+ token: str | None = None,
74
  ) -> Context:
75
+ obj, is_processor, originals = _load(model_id, token)
76
  tok = obj.tokenizer if is_processor else obj
77
  # Reset to original tokens, then apply overrides where the user provided one.
78
  for attr, original in originals.items():
 
130
  def render_playground(
131
  model_id: str, template_source: str, bos: str, eos: str, pad: str, unk: str,
132
  messages_src: str, add_gen_prompt: bool,
133
+ oauth_token: gr.OAuthToken | None = None,
134
  ) -> str:
135
  """Parse the user's `messages` literal and render it with apply_chat_template.
136
 
 
145
  return f"# parse error: {type(e).__name__}: {e}"
146
  if not isinstance(messages, list):
147
  return "# expected a list of messages at the top level"
148
+ token = oauth_token.token if oauth_token is not None else None
149
  try:
150
+ ctx = _build_context(model_id, template_source, bos, eos, pad, unk, token=token)
151
  return ctx.obj.apply_chat_template(
152
  messages, add_generation_prompt=add_gen_prompt, tokenize=False,
153
  )
 
157
 
158
  def run_inspection(
159
  model_id: str, template_source: str, bos: str, eos: str, pad: str, unk: str,
160
+ oauth_token: gr.OAuthToken | None = None,
161
  ):
162
  if not model_id.strip() or not template_source.strip():
163
  return "_Enter a model id and a template to inspect._"
164
+ token = oauth_token.token if oauth_token is not None else None
165
  try:
166
+ ctx = _build_context(model_id, template_source, bos, eos, pad, unk, token=token)
167
  except Exception as e:
168
  return f"**Failed to build context:** `{type(e).__name__}: {e}`\n\n```\n{traceback.format_exc()}\n```"
169
 
 
190
 
191
 
192
  with gr.Blocks(title="Chat Template Inspector", fill_width=True) as demo:
193
+ with gr.Row():
194
+ gr.Markdown("## Chat Template Inspector", elem_id="title")
195
+ login_btn = gr.LoginButton(size="sm")
196
  with gr.Row():
197
  with gr.Column():
198
  with gr.Row():