aadya1762 commited on
Commit
28295c6
·
1 Parent(s): b709bb5

handle batched response for inference

Browse files
Files changed (1) hide show
  1. gemmademo/_model.py +1 -0
gemmademo/_model.py CHANGED
@@ -101,6 +101,7 @@ class LlamaCppGemmaModel:
101
  n_threads=os.cpu_count(),
102
  n_ctx=n_ctx,
103
  n_gpu_layers=n_gpu_layers,
 
104
  )
105
  return self
106
 
 
101
  n_threads=os.cpu_count(),
102
  n_ctx=n_ctx,
103
  n_gpu_layers=n_gpu_layers,
104
+ n_batch=8,
105
  )
106
  return self
107