Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,14 @@ import torch
|
|
| 6 |
from accelerate import Accelerator
|
| 7 |
import accelerate
|
| 8 |
import time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
model = None
|
| 11 |
tokenizer = None
|
|
@@ -23,7 +31,7 @@ def load_model(model_name, eight_bit=0, device_map="auto"):
|
|
| 23 |
gpu_count = torch.cuda.device_count()
|
| 24 |
print('gpu_count', gpu_count)
|
| 25 |
|
| 26 |
-
tokenizer = transformers.LlamaTokenizer.from_pretrained(model_name)
|
| 27 |
model = transformers.LlamaForCausalLM.from_pretrained(
|
| 28 |
model_name,
|
| 29 |
#device_map=device_map,
|
|
@@ -33,7 +41,8 @@ def load_model(model_name, eight_bit=0, device_map="auto"):
|
|
| 33 |
#load_in_8bit=eight_bit,
|
| 34 |
low_cpu_mem_usage=True,
|
| 35 |
load_in_8bit=False,
|
| 36 |
-
cache_dir="cache"
|
|
|
|
| 37 |
).cuda()
|
| 38 |
|
| 39 |
generator = model.generate
|
|
|
|
| 6 |
from accelerate import Accelerator
|
| 7 |
import accelerate
|
| 8 |
import time
|
| 9 |
+
from huggingface_hub import login
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# 🟢 (Option 2 - recommended for Render/Colab)
|
| 14 |
+
# Save your token as an environment variable called HUGGINGFACE_HUB_TOKEN
|
| 15 |
+
# Then this will automatically pick it up:
|
| 16 |
+
hf_token = os.getenv("HUGGINGFACE_HUB_TOKEN")
|
| 17 |
|
| 18 |
model = None
|
| 19 |
tokenizer = None
|
|
|
|
| 31 |
gpu_count = torch.cuda.device_count()
|
| 32 |
print('gpu_count', gpu_count)
|
| 33 |
|
| 34 |
+
tokenizer = transformers.LlamaTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
|
| 35 |
model = transformers.LlamaForCausalLM.from_pretrained(
|
| 36 |
model_name,
|
| 37 |
#device_map=device_map,
|
|
|
|
| 41 |
#load_in_8bit=eight_bit,
|
| 42 |
low_cpu_mem_usage=True,
|
| 43 |
load_in_8bit=False,
|
| 44 |
+
cache_dir="cache",
|
| 45 |
+
use_auth_token=hf_token
|
| 46 |
).cuda()
|
| 47 |
|
| 48 |
generator = model.generate
|