Spaces:
Sleeping
Sleeping
handle batched response for inference
Browse files- gemmademo/_model.py +1 -0
gemmademo/_model.py
CHANGED
|
@@ -101,6 +101,7 @@ class LlamaCppGemmaModel:
|
|
| 101 |
n_threads=os.cpu_count(),
|
| 102 |
n_ctx=n_ctx,
|
| 103 |
n_gpu_layers=n_gpu_layers,
|
|
|
|
| 104 |
)
|
| 105 |
return self
|
| 106 |
|
|
|
|
| 101 |
n_threads=os.cpu_count(),
|
| 102 |
n_ctx=n_ctx,
|
| 103 |
n_gpu_layers=n_gpu_layers,
|
| 104 |
+
n_batch=8,
|
| 105 |
)
|
| 106 |
return self
|
| 107 |
|