Tim Luka Horstmann commited on
Commit ·
a79e01b
1
Parent(s): 09c93a8
Faster model
Browse files
app.py
CHANGED
|
@@ -80,11 +80,13 @@ try:
|
|
| 80 |
model_path=model_path,
|
| 81 |
n_ctx=3072,
|
| 82 |
n_threads=2,
|
| 83 |
-
n_batch=
|
| 84 |
n_gpu_layers=0,
|
| 85 |
use_mlock=True,
|
| 86 |
f16_kv=True,
|
| 87 |
verbose=True,
|
|
|
|
|
|
|
| 88 |
)
|
| 89 |
logger.info(f"{filename} model loaded")
|
| 90 |
|
|
|
|
| 80 |
model_path=model_path,
|
| 81 |
n_ctx=3072,
|
| 82 |
n_threads=2,
|
| 83 |
+
n_batch=64,
|
| 84 |
n_gpu_layers=0,
|
| 85 |
use_mlock=True,
|
| 86 |
f16_kv=True,
|
| 87 |
verbose=True,
|
| 88 |
+
batch_prefill=True,
|
| 89 |
+
prefill_logits=False,
|
| 90 |
)
|
| 91 |
logger.info(f"{filename} model loaded")
|
| 92 |
|