peterproofpath commited on
Commit
c53b700
·
verified ·
1 Parent(s): 784a62f

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +1 -1
handler.py CHANGED
@@ -57,7 +57,7 @@ class EndpointHandler:
57
  model_id,
58
  trust_remote_code=True,
59
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
60
- attn_implementation="flash_attention_2" if torch.cuda.is_available() else "sdpa",
61
  device_map="auto" if torch.cuda.is_available() else None,
62
  token=hf_token,
63
  )
 
57
  model_id,
58
  trust_remote_code=True,
59
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
60
+ attn_implementation="sdpa", # Use SDPA - flash_attn not installed in HF containers
61
  device_map="auto" if torch.cuda.is_available() else None,
62
  token=hf_token,
63
  )