Sixparticle commited on
Commit
a983386
·
1 Parent(s): c1d9331

Fix tokenizer init fallback for HF Spaces startup

Browse files
Files changed (1) hide show
  1. app.py +5 -1
app.py CHANGED
@@ -4,7 +4,11 @@ import torch
4
 
5
  # 加载 CodeT5+ 模型
6
  model_name = "Salesforce/codet5p-220m"
7
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=True)
 
 
 
 
8
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True)
9
 
10
  def generate_code(prompt: str, max_length: int = 128) -> str:
 
4
 
5
  # 加载 CodeT5+ 模型
6
  model_name = "Salesforce/codet5p-220m"
7
+ try:
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=True)
9
+ except TypeError:
10
+ # Some tokenizer repos expose added_tokens metadata that breaks fast tokenizer init.
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, trust_remote_code=True)
12
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True)
13
 
14
  def generate_code(prompt: str, max_length: int = 128) -> str: