tuandunghcmut
/

Qwen25_Coder_MultipleChoice

Text Generation

Model card Files Files and versions

tuandunghcmut commited on Mar 31, 2025

Commit

626a9f3

·

verified ·

1 Parent(s): 4c137c1

Update README.md

Files changed (1) hide show

README.md +0 -2

README.md CHANGED Viewed

@@ -57,14 +57,12 @@ tokenizer = AutoTokenizer.from_pretrained(
     trust_remote_code=True
 )
-# IMPORTANT: Load with flash-attention for correct behavior
 model, tokenizer = FastLanguageModel.from_pretrained(
     model_name=model_id,
     token=hf_token,
     max_seq_length=2048,  # Adjust based on your memory constraints
     dtype=None,  # Auto-detect best dtype
     load_in_4bit=True,  # Use 4-bit quantization for efficiency
-    use_flash_attention=True  # REQUIRED for correct results
 )
 # Enable fast inference mode

     trust_remote_code=True
 )
 model, tokenizer = FastLanguageModel.from_pretrained(
     model_name=model_id,
     token=hf_token,
     max_seq_length=2048,  # Adjust based on your memory constraints
     dtype=None,  # Auto-detect best dtype
     load_in_4bit=True,  # Use 4-bit quantization for efficiency
 )
 # Enable fast inference mode