Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,3 @@
|
|
| 1 |
-
import subprocess
|
| 2 |
-
subprocess.run(
|
| 3 |
-
'pip install flash-attn --no-build-isolation',
|
| 4 |
-
env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
|
| 5 |
-
shell=True
|
| 6 |
-
)
|
| 7 |
import os
|
| 8 |
import time
|
| 9 |
import spaces
|
|
@@ -43,7 +37,6 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
|
| 43 |
model = AutoModelForCausalLM.from_pretrained(
|
| 44 |
MODEL,
|
| 45 |
torch_dtype=torch.bfloat16,
|
| 46 |
-
attn_implementation="flash_attention_2",
|
| 47 |
device_map="auto",
|
| 48 |
ignore_mismatched_sizes=True)
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import time
|
| 3 |
import spaces
|
|
|
|
| 37 |
model = AutoModelForCausalLM.from_pretrained(
|
| 38 |
MODEL,
|
| 39 |
torch_dtype=torch.bfloat16,
|
|
|
|
| 40 |
device_map="auto",
|
| 41 |
ignore_mismatched_sizes=True)
|
| 42 |
|