Muhammadidrees commited on
Commit
99c1f78
·
verified ·
1 Parent(s): a1ad673

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -2
app.py CHANGED
@@ -6,6 +6,14 @@ import torch
6
  from accelerate import Accelerator
7
  import accelerate
8
  import time
 
 
 
 
 
 
 
 
9
 
10
  model = None
11
  tokenizer = None
@@ -23,7 +31,7 @@ def load_model(model_name, eight_bit=0, device_map="auto"):
23
  gpu_count = torch.cuda.device_count()
24
  print('gpu_count', gpu_count)
25
 
26
- tokenizer = transformers.LlamaTokenizer.from_pretrained(model_name)
27
  model = transformers.LlamaForCausalLM.from_pretrained(
28
  model_name,
29
  #device_map=device_map,
@@ -33,7 +41,8 @@ def load_model(model_name, eight_bit=0, device_map="auto"):
33
  #load_in_8bit=eight_bit,
34
  low_cpu_mem_usage=True,
35
  load_in_8bit=False,
36
- cache_dir="cache"
 
37
  ).cuda()
38
 
39
  generator = model.generate
 
6
  from accelerate import Accelerator
7
  import accelerate
8
  import time
9
+ from huggingface_hub import login
10
+
11
+
12
+
13
+ # 🟢 (Option 2 - recommended for Render/Colab)
14
+ # Save your token as an environment variable called HUGGINGFACE_HUB_TOKEN
15
+ # Then this will automatically pick it up:
16
+ hf_token = os.getenv("HUGGINGFACE_HUB_TOKEN")
17
 
18
  model = None
19
  tokenizer = None
 
31
  gpu_count = torch.cuda.device_count()
32
  print('gpu_count', gpu_count)
33
 
34
+ tokenizer = transformers.LlamaTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
35
  model = transformers.LlamaForCausalLM.from_pretrained(
36
  model_name,
37
  #device_map=device_map,
 
41
  #load_in_8bit=eight_bit,
42
  low_cpu_mem_usage=True,
43
  load_in_8bit=False,
44
+ cache_dir="cache",
45
+ use_auth_token=hf_token
46
  ).cuda()
47
 
48
  generator = model.generate