PioTio commited on
Commit
8876dbe
·
verified ·
1 Parent(s): d5edac7

Set DEFAULT_MODEL -> nanbeige-4.1-aiman-merged (by user token)

Browse files
Files changed (1) hide show
  1. app.py +211 -20
app.py CHANGED
@@ -27,9 +27,9 @@ except Exception:
27
  # ---------------------------------------------------------------------------
28
  # Config / defaults
29
  # ---------------------------------------------------------------------------
30
- DEFAULT_MODEL = "PioTio/nanbeige2.5-nsfw-merged"
31
  CPU_DEMO_MODEL = "distilgpt2" # fast, small CPU-friendly fallback for demos
32
- DEFAULT_SYSTEM_PROMPT = "You are a helpful, honest inteligent AI chatbot assistant. Answer succinctly unless asked otherwise."
33
 
34
  # globals populated by load_model()
35
  MODEL = None
@@ -95,11 +95,154 @@ def _diagnose_and_fix_tokenizer_model(tok: AutoTokenizer, mdl: AutoModelForCausa
95
  pass
96
 
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  # ----------------------------- Model loading -------------------------------
99
 
100
  def load_model(repo_id: str = DEFAULT_MODEL, force_reload: bool = False) -> str:
101
- """Load model + tokenizer from the Hub. Graceful fallbacks for CPU/GPU/4-bit.
102
- Returns a short status string for the UI.
 
 
 
 
 
103
  """
104
  global MODEL, TOKENIZER, MODEL_NAME, DEVICE
105
 
@@ -112,14 +255,46 @@ def load_model(repo_id: str = DEFAULT_MODEL, force_reload: bool = False) -> str:
112
  MODEL_NAME = repo_id
113
 
114
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
115
 
 
116
  try:
117
- # prefer safe (non-fast) tokenizer to avoid SentencePiece piece-id bugs
118
- TOKENIZER = AutoTokenizer.from_pretrained(repo_id, use_fast=False, trust_remote_code=True)
119
- except Exception:
120
- TOKENIZER = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True)
121
-
122
- # attempt bnb 4-bit if GPU available and BitsAndBytes present
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  if DEVICE == "cuda" and HAS_BNB:
124
  try:
125
  bnb_config = BitsAndBytesConfig(load_in_4bit=True)
@@ -128,31 +303,42 @@ def load_model(repo_id: str = DEFAULT_MODEL, force_reload: bool = False) -> str:
128
  device_map="auto",
129
  quantization_config=bnb_config,
130
  trust_remote_code=True,
 
131
  )
132
  MODEL.eval()
133
  _diagnose_and_fix_tokenizer_model(TOKENIZER, MODEL)
134
  return f"Loaded {repo_id} (4-bit, device_map=auto)"
135
  except Exception as e:
136
- # fall through to fp16 load
137
  print("bnb/4bit load failed - falling back:", e)
138
 
139
- # try fp16 auto device_map if GPU present
140
  try:
141
  if DEVICE == "cuda":
142
- MODEL = AutoModelForCausalLM.from_pretrained(repo_id, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True)
 
 
 
 
 
 
143
  else:
144
- MODEL = AutoModelForCausalLM.from_pretrained(repo_id, low_cpu_mem_usage=True, torch_dtype=torch.float32, trust_remote_code=True)
 
 
 
 
 
 
145
  MODEL.to("cpu")
146
- MODEL.eval()
147
 
148
- # run the tokenizer/model alignment fix (important for Nanbeige family)
149
  _diagnose_and_fix_tokenizer_model(TOKENIZER, MODEL)
150
-
151
  return f"Loaded {repo_id} (@{DEVICE})"
152
  except Exception as e:
153
  MODEL = None
154
  TOKENIZER = None
155
- return f"Model load failed: {e}"
 
156
 
157
 
158
  # ----------------------------- Prompt building -----------------------------
@@ -361,11 +547,14 @@ def apply_lora_adapter(adapter_repo: str):
361
  global MODEL
362
  if MODEL is None:
363
  return "Load base model first."
 
 
364
  try:
365
- MODEL = PeftModel.from_pretrained(MODEL, adapter_repo)
 
366
  return f"Applied LoRA adapter from {adapter_repo}"
367
  except Exception as e:
368
- return f"Failed to apply adapter: {e}"
369
 
370
 
371
  # ----------------------------- Build UI -----------------------------------
@@ -376,6 +565,7 @@ with gr.Blocks(title="Nanbeige2.5 — Chat UI") as demo:
376
  with gr.Row():
377
  model_input = gr.Textbox(value=DEFAULT_MODEL, label="Model repo (HF)", interactive=True)
378
  load_btn = gr.Button("Load model")
 
379
  model_demo_btn = gr.Button(f"Load fast CPU demo ({CPU_DEMO_MODEL})")
380
  model_status = gr.Textbox(value="Model not loaded", label="Status", interactive=False)
381
 
@@ -420,6 +610,7 @@ with gr.Blocks(title="Nanbeige2.5 — Chat UI") as demo:
420
 
421
  # Events
422
  load_btn.click(fn=lambda repo: load_model_ui(repo), inputs=model_input, outputs=model_status)
 
423
 
424
  send.click(
425
  fn=submit_message,
 
27
  # ---------------------------------------------------------------------------
28
  # Config / defaults
29
  # ---------------------------------------------------------------------------
30
+ DEFAULT_MODEL = "PioTio/nanbeige-4.1-aiman-merged"
31
  CPU_DEMO_MODEL = "distilgpt2" # fast, small CPU-friendly fallback for demos
32
+ DEFAULT_SYSTEM_PROMPT = "You are a helpful, honest assistant. Answer succinctly unless asked otherwise."
33
 
34
  # globals populated by load_model()
35
  MODEL = None
 
95
  pass
96
 
97
 
98
+ # Helper: detect Git-LFS pointer files and fetch real tokenizer.model from the Hub
99
+ def _is_lfs_pointer_file(path: str) -> bool:
100
+ try:
101
+ with open(path, "rb") as f:
102
+ start = f.read(128)
103
+ return b"git-lfs.github.com/spec/v1" in start
104
+ except Exception:
105
+ return False
106
+
107
+
108
+ def _download_tokenizer_model_from_hub(hf_repo: str, dest_path: str, hf_token: Optional[str] = None) -> bool:
109
+ """Download tokenizer.model from HF Hub into dest_path. Returns True on success."""
110
+ try:
111
+ import urllib.request
112
+
113
+ url = f"https://huggingface.co/{hf_repo}/resolve/main/tokenizer.model"
114
+ req = urllib.request.Request(url, headers={"User-Agent": "spaces-nanbeige-chat/1.0"})
115
+ if hf_token:
116
+ req.add_header("Authorization", f"Bearer {hf_token}")
117
+ with urllib.request.urlopen(req, timeout=30) as r, open(dest_path + ".tmp", "wb") as out:
118
+ out.write(r.read())
119
+ os.replace(dest_path + ".tmp", dest_path)
120
+ return True
121
+ except Exception as e:
122
+ print("_download_tokenizer_model_from_hub failed:", e)
123
+ try:
124
+ if os.path.exists(dest_path + ".tmp"):
125
+ os.remove(dest_path + ".tmp")
126
+ except Exception:
127
+ pass
128
+ return False
129
+
130
+
131
+ def _ensure_local_tokenizer_model(repo_path: str, hf_token: Optional[str] = None) -> bool:
132
+ """If tokenizer.model in repo_path is a Git-LFS pointer, try to download the real file from the Hub.
133
+ Tries to infer a Hub repo id from the local git remote; falls back to `PioTio/<dirname>` for Nanbeige folders.
134
+ """
135
+ tm = os.path.join(repo_path, "tokenizer.model")
136
+ if not os.path.exists(tm):
137
+ return False
138
+ if not _is_lfs_pointer_file(tm):
139
+ return True
140
+
141
+ # try to get repo id from git remote origin
142
+ repo_id = None
143
+ try:
144
+ import subprocess
145
+
146
+ out = subprocess.check_output(["git", "-C", repo_path, "config", "--get", "remote.origin.url"], text=True).strip()
147
+ if out and "huggingface.co" in out:
148
+ # parse https://huggingface.co/owner/repo(.git)
149
+ parts = out.rstrip(".git").split("/")
150
+ repo_id = f"{parts[-2]}/{parts[-1]}"
151
+ except Exception:
152
+ repo_id = None
153
+
154
+ # fallback: guess owner for common Nanbeige folder names
155
+ if repo_id is None:
156
+ guessed = os.path.basename(repo_path)
157
+ if guessed.lower().startswith("nanbeige") or "nanbeige" in guessed.lower():
158
+ repo_id = f"PioTio/{guessed}"
159
+
160
+ if repo_id:
161
+ return _download_tokenizer_model_from_hub(repo_id, tm, hf_token=hf_token)
162
+ return False
163
+
164
+
165
+ # Helper: upload tokenizer files (from a local tokenizer dir) back to a Hub repo
166
+ def _upload_tokenizer_files_to_hub(repo_id: str, local_tokenizer_dir: str, hf_token: Optional[str] = None) -> bool:
167
+ """Upload tokenizer files (tokenizer.model, tokenizer_config.json, tokenizer.json, special_tokens_map.json)
168
+ Returns True if at least one file was uploaded successfully.
169
+ """
170
+ try:
171
+ from huggingface_hub import HfApi
172
+ api = HfApi()
173
+ candidates = [
174
+ "tokenizer.model",
175
+ "tokenizer_config.json",
176
+ "tokenizer.json",
177
+ "special_tokens_map.json",
178
+ "chat_template.jinja",
179
+ ]
180
+ uploaded = 0
181
+ for fn in candidates:
182
+ p = os.path.join(local_tokenizer_dir, fn)
183
+ if not os.path.exists(p):
184
+ continue
185
+ try:
186
+ api.upload_file(
187
+ path_or_fileobj=p,
188
+ path_in_repo=fn,
189
+ repo_id=repo_id,
190
+ token=hf_token,
191
+ commit_message=f"Auto-fix tokenizer: {fn}",
192
+ )
193
+ print(f"_upload_tokenizer_files_to_hub: uploaded {fn} to {repo_id}")
194
+ uploaded += 1
195
+ except Exception as e:
196
+ print(f"_upload_tokenizer_files_to_hub: failed to upload {fn}: {e}")
197
+ return uploaded > 0
198
+ except Exception as e:
199
+ print("_upload_tokenizer_files_to_hub failed:", e)
200
+ return False
201
+
202
+
203
+ def _repair_and_upload_tokenizer(repo_id: str, hf_token: Optional[str] = None) -> bool:
204
+ """Fetch the correct base tokenizer (Nanbeige4.1 if detected, otherwise DEFAULT_MODEL),
205
+ then upload tokenizer files to the target repo. Returns True on success.
206
+ """
207
+ try:
208
+ base = "Nanbeige/Nanbeige4.1-3B" if "4.1" in repo_id.lower() else DEFAULT_MODEL
209
+ from transformers import AutoTokenizer
210
+ import tempfile, shutil
211
+ tmp = tempfile.mkdtemp(prefix="tokenizer_fix_")
212
+ tok = AutoTokenizer.from_pretrained(base, use_fast=False, trust_remote_code=True)
213
+ tok.save_pretrained(tmp)
214
+ ok = _upload_tokenizer_files_to_hub(repo_id, tmp, hf_token=hf_token)
215
+ shutil.rmtree(tmp)
216
+ return ok
217
+ except Exception as e:
218
+ print("_repair_and_upload_tokenizer failed:", e)
219
+ return False
220
+
221
+
222
+ def repair_tokenizer_on_hub(repo_id: str) -> str:
223
+ """Public helper callable from the UI: attempts to upload a working base tokenizer to `repo_id`.
224
+ Requires HF_TOKEN in the environment with write access to the target repo.
225
+ """
226
+ hf_token = os.environ.get("HF_TOKEN")
227
+ if not hf_token:
228
+ return "HF_TOKEN not set — cannot upload tokenizer to Hub. Add HF_TOKEN and retry."
229
+ try:
230
+ ok = _repair_and_upload_tokenizer(repo_id, hf_token=hf_token)
231
+ return "Uploaded tokenizer files to repo" if ok else "Repair attempt failed (see logs)"
232
+ except Exception as e:
233
+ return f"Repair failed: {e}"
234
+
235
+
236
  # ----------------------------- Model loading -------------------------------
237
 
238
  def load_model(repo_id: str = DEFAULT_MODEL, force_reload: bool = False) -> str:
239
+ """Load model + tokenizer from the Hub. Graceful fallbacks and HF-token support.
240
+
241
+ Changes made:
242
+ - prefer slow tokenizer (use_fast=False)
243
+ - accept HF token via env HF_TOKEN for private repos / higher rate limits
244
+ - fallback to base tokenizer (`PioTio/Nanbeige2.5`) when tokenizer files are missing
245
+ - pass auth token into from_pretrained calls where supported
246
  """
247
  global MODEL, TOKENIZER, MODEL_NAME, DEVICE
248
 
 
255
  MODEL_NAME = repo_id
256
 
257
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
258
+ hf_token = os.environ.get("HF_TOKEN")
259
 
260
+ # 1) Try to load tokenizer (slow tokenizer is required for Nanbeige family)
261
  try:
262
+ TOKENIZER = AutoTokenizer.from_pretrained(
263
+ repo_id,
264
+ use_fast=False,
265
+ trust_remote_code=True,
266
+ use_auth_token=hf_token,
267
+ )
268
+ except Exception as e_tok:
269
+ # If a local repo was cloned without git-lfs, tokenizer.model may be a pointer file — try auto-fetch
270
+ try:
271
+ if os.path.isdir(repo_id) and _ensure_local_tokenizer_model(repo_id, hf_token=hf_token):
272
+ print(f"Found LFS pointer at {repo_id}/tokenizer.model — fetched real tokenizer.model; retrying tokenizer load...")
273
+ TOKENIZER = AutoTokenizer.from_pretrained(
274
+ repo_id,
275
+ use_fast=False,
276
+ trust_remote_code=True,
277
+ use_auth_token=hf_token,
278
+ )
279
+ # success — continue to model load
280
+ else:
281
+ # fallback: try base model tokenizer (common fix when adapter upload missed tokenizer.model)
282
+ print(f"Tokenizer load from {repo_id} failed: {e_tok}. Falling back to base tokenizer PioTio/Nanbeige2.5...")
283
+ TOKENIZER = AutoTokenizer.from_pretrained(
284
+ DEFAULT_MODEL,
285
+ use_fast=False,
286
+ trust_remote_code=True,
287
+ use_auth_token=hf_token,
288
+ )
289
+ except Exception as e_base:
290
+ # last-resort: try fast tokenizer (may still fail or produce garbled output)
291
+ try:
292
+ print(f"Base tokenizer fallback failed: {e_base}. Trying generic AutoTokenizer...")
293
+ TOKENIZER = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True, use_auth_token=hf_token)
294
+ except Exception as e_final:
295
+ return f"Tokenizer load failed: {e_final}"
296
+
297
+ # 2) Load model (prefer 4-bit on GPU if available)
298
  if DEVICE == "cuda" and HAS_BNB:
299
  try:
300
  bnb_config = BitsAndBytesConfig(load_in_4bit=True)
 
303
  device_map="auto",
304
  quantization_config=bnb_config,
305
  trust_remote_code=True,
306
+ use_auth_token=hf_token,
307
  )
308
  MODEL.eval()
309
  _diagnose_and_fix_tokenizer_model(TOKENIZER, MODEL)
310
  return f"Loaded {repo_id} (4-bit, device_map=auto)"
311
  except Exception as e:
 
312
  print("bnb/4bit load failed - falling back:", e)
313
 
314
+ # 3) FP16 / CPU fallback
315
  try:
316
  if DEVICE == "cuda":
317
+ MODEL = AutoModelForCausalLM.from_pretrained(
318
+ repo_id,
319
+ device_map="auto",
320
+ torch_dtype=torch.float16,
321
+ trust_remote_code=True,
322
+ use_auth_token=hf_token,
323
+ )
324
  else:
325
+ MODEL = AutoModelForCausalLM.from_pretrained(
326
+ repo_id,
327
+ low_cpu_mem_usage=True,
328
+ torch_dtype=torch.float32,
329
+ trust_remote_code=True,
330
+ use_auth_token=hf_token,
331
+ )
332
  MODEL.to("cpu")
 
333
 
334
+ MODEL.eval()
335
  _diagnose_and_fix_tokenizer_model(TOKENIZER, MODEL)
 
336
  return f"Loaded {repo_id} (@{DEVICE})"
337
  except Exception as e:
338
  MODEL = None
339
  TOKENIZER = None
340
+ # provide a helpful diagnostic message
341
+ return f"Model load failed: {e} (hint: check HF_TOKEN, repo contents and ensure tokenizer.model is present)"
342
 
343
 
344
  # ----------------------------- Prompt building -----------------------------
 
547
  global MODEL
548
  if MODEL is None:
549
  return "Load base model first."
550
+
551
+ hf_token = os.environ.get("HF_TOKEN")
552
  try:
553
+ # allow huggingface auth token for private adapters
554
+ MODEL = PeftModel.from_pretrained(MODEL, adapter_repo, use_auth_token=hf_token)
555
  return f"Applied LoRA adapter from {adapter_repo}"
556
  except Exception as e:
557
+ return f"Failed to apply adapter: {e} (hint: check adapter name and HF_TOKEN)"
558
 
559
 
560
  # ----------------------------- Build UI -----------------------------------
 
565
  with gr.Row():
566
  model_input = gr.Textbox(value=DEFAULT_MODEL, label="Model repo (HF)", interactive=True)
567
  load_btn = gr.Button("Load model")
568
+ repair_btn = gr.Button("Repair tokenizer on Hub")
569
  model_demo_btn = gr.Button(f"Load fast CPU demo ({CPU_DEMO_MODEL})")
570
  model_status = gr.Textbox(value="Model not loaded", label="Status", interactive=False)
571
 
 
610
 
611
  # Events
612
  load_btn.click(fn=lambda repo: load_model_ui(repo), inputs=model_input, outputs=model_status)
613
+ repair_btn.click(fn=repair_tokenizer_on_hub, inputs=model_input, outputs=model_status)
614
 
615
  send.click(
616
  fn=submit_message,