tuandunghcmut commited on
Commit
626a9f3
·
verified ·
1 Parent(s): 4c137c1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +0 -2
README.md CHANGED
@@ -57,14 +57,12 @@ tokenizer = AutoTokenizer.from_pretrained(
57
  trust_remote_code=True
58
  )
59
 
60
- # IMPORTANT: Load with flash-attention for correct behavior
61
  model, tokenizer = FastLanguageModel.from_pretrained(
62
  model_name=model_id,
63
  token=hf_token,
64
  max_seq_length=2048, # Adjust based on your memory constraints
65
  dtype=None, # Auto-detect best dtype
66
  load_in_4bit=True, # Use 4-bit quantization for efficiency
67
- use_flash_attention=True # REQUIRED for correct results
68
  )
69
 
70
  # Enable fast inference mode
 
57
  trust_remote_code=True
58
  )
59
 
 
60
  model, tokenizer = FastLanguageModel.from_pretrained(
61
  model_name=model_id,
62
  token=hf_token,
63
  max_seq_length=2048, # Adjust based on your memory constraints
64
  dtype=None, # Auto-detect best dtype
65
  load_in_4bit=True, # Use 4-bit quantization for efficiency
 
66
  )
67
 
68
  # Enable fast inference mode