Spaces:
Running
Running
Patryk Studzinski
commited on
Commit
·
9153886
1
Parent(s):
eaa2e37
Fix: Remove unsupported use_xformers_attention parameter
Browse files
app/models/huggingface_local.py
CHANGED
|
@@ -149,7 +149,6 @@ class HuggingFaceLocal(BaseLLM):
|
|
| 149 |
temperature=temperature,
|
| 150 |
top_p=top_p,
|
| 151 |
use_cache=True, # CRITICAL: Enable KV cache
|
| 152 |
-
use_xformers_attention=False, # CPU doesn't support this
|
| 153 |
eos_token_id=self.tokenizer.eos_token_id,
|
| 154 |
pad_token_id=self.tokenizer.eos_token_id if self.tokenizer.pad_token_id is None else self.tokenizer.pad_token_id,
|
| 155 |
)
|
|
|
|
| 149 |
temperature=temperature,
|
| 150 |
top_p=top_p,
|
| 151 |
use_cache=True, # CRITICAL: Enable KV cache
|
|
|
|
| 152 |
eos_token_id=self.tokenizer.eos_token_id,
|
| 153 |
pad_token_id=self.tokenizer.eos_token_id if self.tokenizer.pad_token_id is None else self.tokenizer.pad_token_id,
|
| 154 |
)
|