YCWTG commited on
Commit
cc43e88
·
verified ·
1 Parent(s): 6cab9f6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -5
README.md CHANGED
@@ -180,8 +180,8 @@ def chat_loop(model, tokenizer):
180
  input_ids=input_ids,
181
  max_new_tokens=max_new_tokens,
182
  do_sample=True,
183
- temperature=0.9,
184
- top_p=0.9,
185
  top_k=40,
186
  use_cache=False,
187
  pad_token_id=tokenizer.pad_token_id,
@@ -222,15 +222,15 @@ layer_config = {}
222
  for i in range(48): # 48 layers
223
  prefix = f"model.layers.{i}"
224
 
225
- # Attention layers -> 8-bit
226
  if i in [3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47]: # self_attn layers
227
  for proj in ["q_proj", "k_proj", "v_proj", "o_proj"]:
228
  layer_config[f"{prefix}.self_attn.{proj}"] = {"bits": 16}
229
- else: # linear_attn layers
230
  for proj in ["in_proj_qkvz", "in_proj_ba", "out_proj"]:
231
  layer_config[f"{prefix}.linear_attn.{proj}"] = {"bits": 16}
232
 
233
- # MLP gate -> 8-bit
234
  layer_config[f"{prefix}.mlp.gate"] = {"bits": 16}
235
 
236
  # shared_expert_gate -> 16-bit (skipped)
 
180
  input_ids=input_ids,
181
  max_new_tokens=max_new_tokens,
182
  do_sample=True,
183
+ temperature=1.0,
184
+ top_p=0.95,
185
  top_k=40,
186
  use_cache=False,
187
  pad_token_id=tokenizer.pad_token_id,
 
222
  for i in range(48): # 48 layers
223
  prefix = f"model.layers.{i}"
224
 
225
+ # Attention layers -> 16-bit
226
  if i in [3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47]: # self_attn layers
227
  for proj in ["q_proj", "k_proj", "v_proj", "o_proj"]:
228
  layer_config[f"{prefix}.self_attn.{proj}"] = {"bits": 16}
229
+ else: # linear_attn layers -> 16-bit
230
  for proj in ["in_proj_qkvz", "in_proj_ba", "out_proj"]:
231
  layer_config[f"{prefix}.linear_attn.{proj}"] = {"bits": 16}
232
 
233
+ # MLP gate -> 16-bit
234
  layer_config[f"{prefix}.mlp.gate"] = {"bits": 16}
235
 
236
  # shared_expert_gate -> 16-bit (skipped)