AndaiMD commited on
Commit
57d206e
·
1 Parent(s): c7256c1
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. app/model_loader.py +45 -44
Dockerfile CHANGED
@@ -1,4 +1,4 @@
1
- FROM python:3.9
2
 
3
  WORKDIR /code
4
 
 
1
+ FROM python:3.10
2
 
3
  WORKDIR /code
4
 
app/model_loader.py CHANGED
@@ -1,43 +1,13 @@
1
 
2
- # import os
3
- # from transformers import AutoModelForCausalLM, AutoTokenizer
4
- # from peft import PeftModel
5
-
6
- # # Define a safe cache location
7
- # HF_CACHE = "/tmp/huggingface_cache"
8
- # os.environ["TRANSFORMERS_CACHE"] = HF_CACHE
9
- # os.environ["HF_HOME"] = HF_CACHE
10
- # os.makedirs(HF_CACHE, exist_ok=True)
11
-
12
- # def load_model():
13
- # hf_token = os.getenv("HF_TOKEN")
14
- # if not hf_token:
15
- # raise RuntimeError("HF_TOKEN not set.")
16
-
17
- # base_model = AutoModelForCausalLM.from_pretrained(
18
- # "meta-llama/Llama-2-7b-chat-hf",
19
- # use_auth_token=hf_token,
20
- # cache_dir="/tmp/hf_cache",
21
- # torch_dtype="auto",
22
- # device_map="auto"
23
- # )
24
- # model = PeftModel.from_pretrained(
25
- # base_model,
26
- # "BrainGPT/BrainGPT-7B-v0.1",
27
- # use_auth_token=hf_token,
28
- # cache_dir="/tmp/hf_cache"
29
- # )
30
- # tokenizer = AutoTokenizer.from_pretrained(
31
- # "meta-llama/Llama-2-7b-chat-hf",
32
- # use_auth_token=hf_token,
33
- # cache_dir="/tmp/hf_cache"
34
- # )
35
- # return model, tokenizer
36
-
37
  import os
38
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
39
 
40
  def load_model():
 
 
 
 
41
  # Use a user-writable cache directory (important for Docker non-root)
42
  HF_CACHE = os.path.expanduser("~/.cache/huggingface")
43
  os.makedirs(HF_CACHE, exist_ok=True)
@@ -45,16 +15,47 @@ def load_model():
45
  os.environ["TRANSFORMERS_CACHE"] = HF_CACHE
46
  os.environ["HF_HOME"] = HF_CACHE
47
 
48
- model_name = "gpt2"
49
-
 
 
 
 
 
 
 
 
 
 
 
50
  tokenizer = AutoTokenizer.from_pretrained(
51
- model_name,
52
- cache_dir=HF_CACHE
 
53
  )
 
54
 
55
- model = AutoModelForCausalLM.from_pretrained(
56
- model_name,
57
- cache_dir=HF_CACHE
58
- )
59
 
60
- return model, tokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import os
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ from peft import PeftModel
5
 
6
  def load_model():
7
+ hf_token = os.getenv("HF_TOKEN")
8
+ if not hf_token:
9
+ raise RuntimeError("HF_TOKEN not set.")
10
+
11
  # Use a user-writable cache directory (important for Docker non-root)
12
  HF_CACHE = os.path.expanduser("~/.cache/huggingface")
13
  os.makedirs(HF_CACHE, exist_ok=True)
 
15
  os.environ["TRANSFORMERS_CACHE"] = HF_CACHE
16
  os.environ["HF_HOME"] = HF_CACHE
17
 
18
+ base_model = AutoModelForCausalLM.from_pretrained(
19
+ "meta-llama/Llama-2-7b-chat-hf",
20
+ use_auth_token=hf_token,
21
+ cache_dir="/tmp/hf_cache",
22
+ torch_dtype="auto",
23
+ device_map="auto"
24
+ )
25
+ model = PeftModel.from_pretrained(
26
+ base_model,
27
+ "BrainGPT/BrainGPT-7B-v0.1",
28
+ use_auth_token=hf_token,
29
+ cache_dir="/tmp/hf_cache"
30
+ )
31
  tokenizer = AutoTokenizer.from_pretrained(
32
+ "meta-llama/Llama-2-7b-chat-hf",
33
+ use_auth_token=hf_token,
34
+ cache_dir="/tmp/hf_cache"
35
  )
36
+ return model, tokenizer
37
 
38
+ # import os
39
+ # from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
40
 
41
+ # def load_model():
42
+ # # Use a user-writable cache directory (important for Docker non-root)
43
+ # HF_CACHE = os.path.expanduser("~/.cache/huggingface")
44
+ # os.makedirs(HF_CACHE, exist_ok=True)
45
+
46
+ # os.environ["TRANSFORMERS_CACHE"] = HF_CACHE
47
+ # os.environ["HF_HOME"] = HF_CACHE
48
+
49
+ # model_name = "gpt2"
50
+
51
+ # tokenizer = AutoTokenizer.from_pretrained(
52
+ # model_name,
53
+ # cache_dir=HF_CACHE
54
+ # )
55
+
56
+ # model = AutoModelForCausalLM.from_pretrained(
57
+ # model_name,
58
+ # cache_dir=HF_CACHE
59
+ # )
60
+
61
+ # return model, tokenizer