Spaces:
Build error
Build error
updated with Flashattention
Browse files
app.py
CHANGED
|
@@ -16,6 +16,7 @@ token = os.environ["HF_TOKEN"]
|
|
| 16 |
|
| 17 |
|
| 18 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
| 19 |
"microsoft/Phi-3-mini-128k-instruct", token=token,trust_remote_code=True
|
| 20 |
)
|
| 21 |
tok = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct", token=token)
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
model = AutoModelForCausalLM.from_pretrained(
|
| 19 |
+
use_cache=False,attn_implementation="flash_attention_2",
|
| 20 |
"microsoft/Phi-3-mini-128k-instruct", token=token,trust_remote_code=True
|
| 21 |
)
|
| 22 |
tok = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct", token=token)
|