Spaces:
Sleeping
Sleeping
update app by using token
Browse files
app.py
CHANGED
|
@@ -15,8 +15,10 @@ if not USE_REMOTE_OLLAMA:
|
|
| 15 |
# Transformers fallback for Spaces (CPU-friendly small instruct model)
|
| 16 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 17 |
HF_CHAT_MODEL = os.getenv("HF_CHAT_MODEL", "google/gemma-2-2b-it") # small instruct model that runs on CPU
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
| 20 |
gen = pipeline("text-generation", model=_mdl, tokenizer=_tok, max_new_tokens=256)
|
| 21 |
|
| 22 |
|
|
|
|
| 15 |
# Transformers fallback for Spaces (CPU-friendly small instruct model)
|
| 16 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 17 |
HF_CHAT_MODEL = os.getenv("HF_CHAT_MODEL", "google/gemma-2-2b-it") # small instruct model that runs on CPU
|
| 18 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 19 |
+
|
| 20 |
+
_tok = AutoTokenizer.from_pretrained(HF_CHAT_MODEL, token=HF_TOKEN)
|
| 21 |
+
_mdl = AutoModelForCausalLM.from_pretrained(HF_CHAT_MODEL, token=HF_TOKEN, torch_dtype="auto", device_map="auto")
|
| 22 |
gen = pipeline("text-generation", model=_mdl, tokenizer=_tok, max_new_tokens=256)
|
| 23 |
|
| 24 |
|