Spaces:

saravanatanjiro
/

Openenv

Paused

kavin57447 commited on Apr 25

Commit

e9dea07

1 Parent(s): 332efeb

Replace flash-attn with PyTorch built-in SDPA (no CUDA compile needed)

Files changed (2) hide show

cloud_arena/llm_training.py CHANGED Viewed

@@ -183,7 +183,7 @@ def train_llm(model_name="meta-llama/Llama-3.1-8B",
     tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
     model = AutoModelForCausalLM.from_pretrained(
         model_name, torch_dtype=torch.bfloat16, token=hf_token,
-        attn_implementation="flash_attention_2",
     ).to(DEVICE)
     lora_config = LoraConfig(

     tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
     model = AutoModelForCausalLM.from_pretrained(
         model_name, torch_dtype=torch.bfloat16, token=hf_token,
+        attn_implementation="sdpa",  # PyTorch built-in, no flash-attn package needed
     ).to(DEVICE)
     lora_config = LoraConfig(

requirements.txt CHANGED Viewed

@@ -14,4 +14,3 @@ peft==0.12.0
 accelerate==0.33.0
 bitsandbytes>=0.43.0
 sentencepiece
-flash-attn>=2.5.0

 accelerate==0.33.0
 bitsandbytes>=0.43.0
 sentencepiece