bdstar commited on
Commit
e28dfb5
·
verified ·
1 Parent(s): c812b33

update app by using token

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -15,8 +15,10 @@ if not USE_REMOTE_OLLAMA:
15
  # Transformers fallback for Spaces (CPU-friendly small instruct model)
16
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
17
  HF_CHAT_MODEL = os.getenv("HF_CHAT_MODEL", "google/gemma-2-2b-it") # small instruct model that runs on CPU
18
- _tok = AutoTokenizer.from_pretrained(HF_CHAT_MODEL)
19
- _mdl = AutoModelForCausalLM.from_pretrained(HF_CHAT_MODEL, torch_dtype="auto", device_map="auto")
 
 
20
  gen = pipeline("text-generation", model=_mdl, tokenizer=_tok, max_new_tokens=256)
21
 
22
 
 
15
  # Transformers fallback for Spaces (CPU-friendly small instruct model)
16
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
17
  HF_CHAT_MODEL = os.getenv("HF_CHAT_MODEL", "google/gemma-2-2b-it") # small instruct model that runs on CPU
18
+ HF_TOKEN = os.getenv("HF_TOKEN")
19
+
20
+ _tok = AutoTokenizer.from_pretrained(HF_CHAT_MODEL, token=HF_TOKEN)
21
+ _mdl = AutoModelForCausalLM.from_pretrained(HF_CHAT_MODEL, token=HF_TOKEN, torch_dtype="auto", device_map="auto")
22
  gen = pipeline("text-generation", model=_mdl, tokenizer=_tok, max_new_tokens=256)
23
 
24