AndaiMD commited on
Commit
5afad7e
·
1 Parent(s): 7b23d6d
Files changed (2) hide show
  1. Dockerfile +1 -2
  2. app/model_loader.py +47 -47
Dockerfile CHANGED
@@ -5,8 +5,7 @@ WORKDIR /code
5
  COPY ./requirements.txt /code/requirements.txt
6
 
7
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
- RUN apt-get update && apt-get install -y libbitsandbytes-dev
9
-
10
 
11
  RUN useradd -m -u 1000 user
12
 
 
5
  COPY ./requirements.txt /code/requirements.txt
6
 
7
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+ # RUN apt-get update && apt-get install -y libbitsandbytes-dev
 
9
 
10
  RUN useradd -m -u 1000 user
11
 
app/model_loader.py CHANGED
@@ -1,46 +1,13 @@
1
 
2
- import os
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
- from peft import PeftModel
5
-
6
- def load_model():
7
- hf_token = os.getenv("HF_TOKEN")
8
- if not hf_token:
9
- raise RuntimeError("HF_TOKEN not set.")
10
-
11
- # Use a user-writable cache directory (important for Docker non-root)
12
- HF_CACHE = os.path.expanduser("~/.cache/huggingface")
13
- os.makedirs(HF_CACHE, exist_ok=True)
14
-
15
- os.environ["TRANSFORMERS_CACHE"] = HF_CACHE
16
- os.environ["HF_HOME"] = HF_CACHE
17
-
18
- base_model = AutoModelForCausalLM.from_pretrained(
19
- "meta-llama/Llama-2-7b-chat-hf",
20
- use_auth_token=hf_token,
21
- cache_dir="/tmp/hf_cache",
22
- torch_dtype="auto",
23
- device_map="auto",
24
- load_in_8bit=True # <-- Try enabling 8-bit
25
- )
26
- model = PeftModel.from_pretrained(
27
- base_model,
28
- "BrainGPT/BrainGPT-7B-v0.1",
29
- use_auth_token=hf_token,
30
- cache_dir="/tmp/hf_cache"
31
- )
32
- tokenizer = AutoTokenizer.from_pretrained(
33
- "meta-llama/Llama-2-7b-chat-hf",
34
- use_auth_token=hf_token,
35
- cache_dir="/tmp/hf_cache"
36
- )
37
- return model, tokenizer
38
-
39
- ## GPT 2 Model
40
  # import os
41
  # from transformers import AutoModelForCausalLM, AutoTokenizer
 
42
 
43
  # def load_model():
 
 
 
 
44
  # # Use a user-writable cache directory (important for Docker non-root)
45
  # HF_CACHE = os.path.expanduser("~/.cache/huggingface")
46
  # os.makedirs(HF_CACHE, exist_ok=True)
@@ -48,16 +15,49 @@ def load_model():
48
  # os.environ["TRANSFORMERS_CACHE"] = HF_CACHE
49
  # os.environ["HF_HOME"] = HF_CACHE
50
 
51
- # model_name = "gpt2"
52
-
 
 
 
 
 
 
 
 
 
 
 
 
53
  # tokenizer = AutoTokenizer.from_pretrained(
54
- # model_name,
55
- # cache_dir=HF_CACHE
 
56
  # )
 
57
 
58
- # model = AutoModelForCausalLM.from_pretrained(
59
- # model_name,
60
- # cache_dir=HF_CACHE
61
- # )
62
 
63
- # return model, tokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  # import os
3
  # from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ # from peft import PeftModel
5
 
6
  # def load_model():
7
+ # hf_token = os.getenv("HF_TOKEN")
8
+ # if not hf_token:
9
+ # raise RuntimeError("HF_TOKEN not set.")
10
+
11
  # # Use a user-writable cache directory (important for Docker non-root)
12
  # HF_CACHE = os.path.expanduser("~/.cache/huggingface")
13
  # os.makedirs(HF_CACHE, exist_ok=True)
 
15
  # os.environ["TRANSFORMERS_CACHE"] = HF_CACHE
16
  # os.environ["HF_HOME"] = HF_CACHE
17
 
18
+ # base_model = AutoModelForCausalLM.from_pretrained(
19
+ # "meta-llama/Llama-2-7b-chat-hf",
20
+ # use_auth_token=hf_token,
21
+ # cache_dir="/tmp/hf_cache",
22
+ # torch_dtype="auto",
23
+ # device_map="auto",
24
+ # load_in_8bit=True # <-- Try enabling 8-bit
25
+ # )
26
+ # model = PeftModel.from_pretrained(
27
+ # base_model,
28
+ # "BrainGPT/BrainGPT-7B-v0.1",
29
+ # use_auth_token=hf_token,
30
+ # cache_dir="/tmp/hf_cache"
31
+ # )
32
  # tokenizer = AutoTokenizer.from_pretrained(
33
+ # "meta-llama/Llama-2-7b-chat-hf",
34
+ # use_auth_token=hf_token,
35
+ # cache_dir="/tmp/hf_cache"
36
  # )
37
+ # return model, tokenizer
38
 
39
+ ## GPT 2 Model
40
+ import os
41
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
42
 
43
+ def load_model():
44
+ # Use a user-writable cache directory (important for Docker non-root)
45
+ HF_CACHE = os.path.expanduser("~/.cache/huggingface")
46
+ os.makedirs(HF_CACHE, exist_ok=True)
47
+
48
+ os.environ["TRANSFORMERS_CACHE"] = HF_CACHE
49
+ os.environ["HF_HOME"] = HF_CACHE
50
+
51
+ model_name = "gpt2"
52
+
53
+ tokenizer = AutoTokenizer.from_pretrained(
54
+ model_name,
55
+ cache_dir=HF_CACHE
56
+ )
57
+
58
+ model = AutoModelForCausalLM.from_pretrained(
59
+ model_name,
60
+ cache_dir=HF_CACHE
61
+ )
62
+
63
+ return model, tokenizer