eduard76 commited on
Commit
8aa7a71
·
verified ·
1 Parent(s): bd0ad23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -4,18 +4,18 @@ import gradio as gr
4
 
5
  model_id = "eduard76/Llama3-8b-good-new"
6
 
7
- quant_config = BitsAndBytesConfig(
8
- load_in_4bit=True,
9
- bnb_4bit_compute_dtype=torch.float16,
10
- bnb_4bit_use_double_quant=True,
11
- bnb_4bit_quant_type="nf4"
12
- )
13
 
14
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
15
  model = AutoModelForCausalLM.from_pretrained(
16
  model_id,
17
  device_map="auto",
18
- torch_dtype=torch.float16,
19
  quantization_config=quant_config,
20
  trust_remote_code=True
21
  )
@@ -40,7 +40,7 @@ AI:"""
40
 
41
  response = pipe(
42
  prompt,
43
- max_new_tokens=200,
44
  do_sample=False,
45
  temperature=0.0,
46
  repetition_penalty=1.2,
 
4
 
5
  model_id = "eduard76/Llama3-8b-good-new"
6
 
7
+ #quant_config = BitsAndBytesConfig(
8
+ # load_in_4bit=True,
9
+ # bnb_4bit_compute_dtype=torch.float16,
10
+ # bnb_4bit_use_double_quant=True,
11
+ # bnb_4bit_quant_type="nf4"
12
+ #)
13
 
14
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
15
  model = AutoModelForCausalLM.from_pretrained(
16
  model_id,
17
  device_map="auto",
18
+ #torch_dtype=torch.float16,
19
  quantization_config=quant_config,
20
  trust_remote_code=True
21
  )
 
40
 
41
  response = pipe(
42
  prompt,
43
+ max_new_tokens=256,
44
  do_sample=False,
45
  temperature=0.0,
46
  repetition_penalty=1.2,