jiminaa commited on
Commit
e22b2cf
·
1 Parent(s): 327245d
Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +5 -1
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
app.py CHANGED
@@ -8,6 +8,9 @@ from fastapi.responses import StreamingResponse
8
  from pydantic import BaseModel
9
  import json
10
  from typing import List, Literal
 
 
 
11
 
12
  MODEL = "meta-llama/Llama-3.2-1B-Instruct"
13
 
@@ -16,6 +19,7 @@ app = FastAPI()
16
  # base model and tokenizer
17
  base_model = AutoModelForCausalLM.from_pretrained(
18
  MODEL,
 
19
  torch_dtype=torch.float32, #huggingface free tier only has cpu
20
  device_map="cpu",
21
  low_cpu_mem_usage=True
@@ -23,7 +27,7 @@ base_model = AutoModelForCausalLM.from_pretrained(
23
 
24
  base_model.config.use_cache = True
25
 
26
- tokenizer = AutoTokenizer.from_pretrained(MODEL)
27
 
28
  if tokenizer.pad_token is None:
29
  tokenizer.pad_token = tokenizer.eos_token
 
8
  from pydantic import BaseModel
9
  import json
10
  from typing import List, Literal
11
+ import os
12
+
13
+ HF_TOKEN = os.getenv("HF_TOKEN")
14
 
15
  MODEL = "meta-llama/Llama-3.2-1B-Instruct"
16
 
 
19
  # base model and tokenizer
20
  base_model = AutoModelForCausalLM.from_pretrained(
21
  MODEL,
22
+ token=HF_TOKEN,
23
  torch_dtype=torch.float32, #huggingface free tier only has cpu
24
  device_map="cpu",
25
  low_cpu_mem_usage=True
 
27
 
28
  base_model.config.use_cache = True
29
 
30
+ tokenizer = AutoTokenizer.from_pretrained(MODEL, token=HF)
31
 
32
  if tokenizer.pad_token is None:
33
  tokenizer.pad_token = tokenizer.eos_token