calvinchaochao commited on
Commit
8ab373d
·
1 Parent(s): d652d06

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -3,7 +3,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer,BitsAndBytesConfig
3
  from transformers.generation import GenerationConfig
4
  quantization_config = BitsAndBytesConfig(
5
  load_in_4bit=True,
6
- bnb_4bit_quant_type='nf4',
7
  bnb_4bit_compute_dtype=torch.bfloat16)
8
  # Note: The default behavior now has injection attack prevention off.
9
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True)
 
3
  from transformers.generation import GenerationConfig
4
  quantization_config = BitsAndBytesConfig(
5
  load_in_4bit=True,
6
+ bnb_4bit_quant_type='int8',
7
  bnb_4bit_compute_dtype=torch.bfloat16)
8
  # Note: The default behavior now has injection attack prevention off.
9
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True)