NS-Y commited on
Commit
74419dd
·
verified ·
1 Parent(s): 1a26838

Upload 3 files

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +2 -4
README.md CHANGED
@@ -22,6 +22,6 @@ A Gradio Space that applies the Appendix-style prompt: the model must prioritize
22
  - `HF_TOKEN` — required if the model is gated.
23
 
24
  **Files**
25
- - `app.py` — Gradio app (cache disabled to avoid DynamicCache issues)
26
  - `requirements.txt` — dependencies (pins transformers 4.43.3, accelerate 0.32.1)
27
  - `examples/` — (optional) assets/presets
 
22
  - `HF_TOKEN` — required if the model is gated.
23
 
24
  **Files**
25
+ - `app.py` — Gradio app (slow tokenizer forced to avoid tokenizer.json schema mismatches)
26
  - `requirements.txt` — dependencies (pins transformers 4.43.3, accelerate 0.32.1)
27
  - `examples/` — (optional) assets/presets
app.py CHANGED
@@ -60,7 +60,7 @@ def load_model(model_id: str = DEFAULT_MODEL):
60
  return _tokenizer, _model
61
 
62
  auth = USE_AUTH_TOKEN if (USE_AUTH_TOKEN and len(USE_AUTH_TOKEN.strip()) > 0) else None
63
- _tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=auth, trust_remote_code=TRUST_REMOTE_CODE)
64
  _model = AutoModelForCausalLM.from_pretrained(
65
  model_id,
66
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
@@ -69,11 +69,9 @@ def load_model(model_id: str = DEFAULT_MODEL):
69
  trust_remote_code=TRUST_REMOTE_CODE,
70
  )
71
 
72
- # Safety: ensure pad_token_id is set
73
  if _tokenizer.pad_token_id is None and _tokenizer.eos_token_id is not None:
74
  _tokenizer.pad_token_id = _tokenizer.eos_token_id
75
 
76
- # Prefer static cache if available to avoid DynamicCache issues in some remote code
77
  try:
78
  _model.generation_config.cache_implementation = "static"
79
  except Exception:
@@ -93,7 +91,7 @@ def generate_text(question: str, context: str, temperature: float, top_p: float,
93
  top_p=top_p,
94
  max_new_tokens=max_new_tokens,
95
  pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
96
- use_cache=False, # <-- avoid DynamicCache path in custom modeling code
97
  )
98
  text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
99
 
 
60
  return _tokenizer, _model
61
 
62
  auth = USE_AUTH_TOKEN if (USE_AUTH_TOKEN and len(USE_AUTH_TOKEN.strip()) > 0) else None
63
+ _tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=auth, trust_remote_code=TRUST_REMOTE_CODE, use_fast=False)
64
  _model = AutoModelForCausalLM.from_pretrained(
65
  model_id,
66
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
 
69
  trust_remote_code=TRUST_REMOTE_CODE,
70
  )
71
 
 
72
  if _tokenizer.pad_token_id is None and _tokenizer.eos_token_id is not None:
73
  _tokenizer.pad_token_id = _tokenizer.eos_token_id
74
 
 
75
  try:
76
  _model.generation_config.cache_implementation = "static"
77
  except Exception:
 
91
  top_p=top_p,
92
  max_new_tokens=max_new_tokens,
93
  pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
94
+ use_cache=False,
95
  )
96
  text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
97