DimasMP3 commited on
Commit
959b2d1
·
1 Parent(s): c6791fe

feat: Configure model loading with `BitsAndBytesConfig` for 4-bit quantization.

Browse files
Files changed (1) hide show
  1. app.py +11 -3
app.py CHANGED
@@ -1,16 +1,24 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
  from threading import Thread
5
 
6
  MODEL_ID = "DimasMP3/qwen2.5-math-finetuned-7b"
7
 
 
 
 
 
 
 
 
 
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
9
  model = AutoModelForCausalLM.from_pretrained(
10
  MODEL_ID,
11
- torch_dtype=torch.float16,
12
  device_map="auto",
13
- load_in_4bit=True,
14
  low_cpu_mem_usage=True
15
  )
16
 
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
4
  from threading import Thread
5
 
6
  MODEL_ID = "DimasMP3/qwen2.5-math-finetuned-7b"
7
 
8
+
9
+ bnb_config = BitsAndBytesConfig(
10
+ load_in_4bit=True,
11
+ bnb_4bit_use_double_quant=True,
12
+ bnb_4bit_quant_type="nf4",
13
+ bnb_4bit_compute_dtype=torch.float16
14
+ )
15
+
16
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
17
+
18
  model = AutoModelForCausalLM.from_pretrained(
19
  MODEL_ID,
20
+ quantization_config=bnb_config,
21
  device_map="auto",
 
22
  low_cpu_mem_usage=True
23
  )
24