ArthurLin commited on
Commit
19a3384
·
verified ·
1 Parent(s): 83b2e38

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +2 -16
model.py CHANGED
@@ -12,21 +12,7 @@ bnb_config = BitsAndBytesConfig(
12
  llm_int8_skip_modules=None
13
  )
14
 
15
- def load_model(model_path="meta-llama/Meta-Llama-3-8B-Instruct"):
16
- # 不要使用 torch.device 來傳入 pipeline,改用 device_map="auto"
17
- pipe = pipeline(
18
- "text-generation",
19
- model=model_path,
20
- model_kwargs={
21
- "quantization_config": bnb_config,
22
- "device_map": "auto",
23
- "torch_dtype": torch.float16
24
- },
25
- token=hf_token
26
- )
27
- return pipe
28
 
29
- '''
30
  def load_model(model_path="meta-llama/Meta-Llama-3-8B-Instruct"):
31
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
32
 
@@ -34,10 +20,10 @@ def load_model(model_path="meta-llama/Meta-Llama-3-8B-Instruct"):
34
  "text-generation",
35
  model=model_path,
36
  model_kwargs={"torch_dtype": torch.float16} if torch.cuda.is_available() else {},
37
- quantization_config=bnb_config,
38
  device=device,
39
  token=hf_token
40
  )
41
  return pipe
42
- '''
43
 
 
12
  llm_int8_skip_modules=None
13
  )
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
 
16
  def load_model(model_path="meta-llama/Meta-Llama-3-8B-Instruct"):
17
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
18
 
 
20
  "text-generation",
21
  model=model_path,
22
  model_kwargs={"torch_dtype": torch.float16} if torch.cuda.is_available() else {},
23
+ # quantization_config=bnb_config,
24
  device=device,
25
  token=hf_token
26
  )
27
  return pipe
28
+
29