Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -61,9 +61,9 @@ vqllm = AutoModelForCausalLM.from_pretrained(
|
|
| 61 |
model_id,
|
| 62 |
attn_implementation='flash_attention_2',
|
| 63 |
torch_dtype=torch.bfloat16,
|
| 64 |
-
load_in_8bit=True,
|
| 65 |
-
max_memory={0: "40GiB" },
|
| 66 |
-
)
|
| 67 |
|
| 68 |
stop_flag = False
|
| 69 |
|
|
|
|
| 61 |
model_id,
|
| 62 |
attn_implementation='flash_attention_2',
|
| 63 |
torch_dtype=torch.bfloat16,
|
| 64 |
+
# load_in_8bit=True,
|
| 65 |
+
# max_memory={0: "40GiB" },
|
| 66 |
+
).to("cuda:0")
|
| 67 |
|
| 68 |
stop_flag = False
|
| 69 |
|