visamram02 commited on
Commit
6299d73
·
verified ·
1 Parent(s): 9ac4dcc

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -11,8 +11,10 @@ model_path = "model.gguf"
11
  print(f"Loading model from {model_path}...")
12
  llm = Llama(
13
  model_path=model_path,
14
- n_ctx=4096,
15
- n_threads=4,
 
 
16
  verbose=False
17
  )
18
 
 
11
  print(f"Loading model from {model_path}...")
12
  llm = Llama(
13
  model_path=model_path,
14
+ n_ctx=1024, # Drastically reduced context size (saves memory/time on CPU)
15
+ n_threads=8, # Maximize all available vCPUs
16
+ n_threads_batch=8, # Speed up prompt processing
17
+ n_batch=256, # Optimize batch size for prompt evaluation
18
  verbose=False
19
  )
20