Update app.py
Browse files
app.py
CHANGED
|
@@ -31,7 +31,7 @@ def load_model(model_name, eight_bit=0, device_map="auto"):
|
|
| 31 |
gpu_count = torch.cuda.device_count()
|
| 32 |
print('gpu_count', gpu_count)
|
| 33 |
|
| 34 |
-
tokenizer = transformers.LlamaTokenizer.from_pretrained(model_name,
|
| 35 |
model = transformers.LlamaForCausalLM.from_pretrained(
|
| 36 |
model_name,
|
| 37 |
#device_map=device_map,
|
|
@@ -42,7 +42,7 @@ def load_model(model_name, eight_bit=0, device_map="auto"):
|
|
| 42 |
low_cpu_mem_usage=True,
|
| 43 |
load_in_8bit=False,
|
| 44 |
cache_dir="cache",
|
| 45 |
-
|
| 46 |
).cuda()
|
| 47 |
|
| 48 |
generator = model.generate
|
|
|
|
| 31 |
gpu_count = torch.cuda.device_count()
|
| 32 |
print('gpu_count', gpu_count)
|
| 33 |
|
| 34 |
+
tokenizer = transformers.LlamaTokenizer.from_pretrained(model_name, token=hf_token)
|
| 35 |
model = transformers.LlamaForCausalLM.from_pretrained(
|
| 36 |
model_name,
|
| 37 |
#device_map=device_map,
|
|
|
|
| 42 |
low_cpu_mem_usage=True,
|
| 43 |
load_in_8bit=False,
|
| 44 |
cache_dir="cache",
|
| 45 |
+
token=hf_token
|
| 46 |
).cuda()
|
| 47 |
|
| 48 |
generator = model.generate
|