emya commited on
Commit
8b0674e
·
1 Parent(s): f87d353

use quantization config

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -1,17 +1,23 @@
1
  import gradio as gr
2
 
3
- from transformers import AutoModelForCausalLM, pipeline
 
 
 
 
4
  import torch
5
 
 
 
6
  model_name = "lmsys/vicuna-7b-v1.5"
7
  base_model = AutoModelForCausalLM.from_pretrained(
8
  model_name,
9
  low_cpu_mem_usage=True,
10
  return_dict=True,
11
- torch_dtype=torch.float16,
12
- load_in_8bit_fp32_cpu_offload=True,
13
- device_map="auto",
14
  load_in_8bit=True,
 
15
  )
16
  new_model = "emya/vicuna-7b-v1.5-steve-jobs-8bit-v1"
17
  model = PeftModel.from_pretrained(base_model, new_model, load_in_8bit=True)
 
1
  import gradio as gr
2
 
3
+ from transformers import (
4
+ AutoModelForCausalLM,
5
+ BitsAndBytesConfig,
6
+ pipeline
7
+ )
8
  import torch
9
 
10
+ quantization_config = BitsAndBytesConfig(llm_int8_enable_fp16_cpu_offload=True)
11
+
12
  model_name = "lmsys/vicuna-7b-v1.5"
13
  base_model = AutoModelForCausalLM.from_pretrained(
14
  model_name,
15
  low_cpu_mem_usage=True,
16
  return_dict=True,
17
+ #torch_dtype=torch.float16,
18
+ device_map="cpu",
 
19
  load_in_8bit=True,
20
+ quantization_config=quantization_config,
21
  )
22
  new_model = "emya/vicuna-7b-v1.5-steve-jobs-8bit-v1"
23
  model = PeftModel.from_pretrained(base_model, new_model, load_in_8bit=True)