devray11 commited on
Commit
4d7f8c5
·
verified ·
1 Parent(s): 0dd1fb3

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +3 -3
main.py CHANGED
@@ -32,9 +32,9 @@ def load_model():
32
 
33
  llm = Llama(
34
  model_path=model_path,
35
- n_ctx=512, # Reduced for low RAM
36
  n_threads=2, # HF free CPU = 2 cores
37
- n_batch=128,
38
  use_mmap=True,
39
  use_mlock=False
40
  )
@@ -64,7 +64,7 @@ async def generate(query: Query):
64
  try:
65
  output = llm(
66
  f"### Instruction:\n{query.prompt}\n\n### Response:\n",
67
- max_tokens=128, # Reduced for speed
68
  stop=["###"],
69
  echo=False
70
  )
 
32
 
33
  llm = Llama(
34
  model_path=model_path,
35
+ n_ctx=128, # Reduced for low RAM
36
  n_threads=2, # HF free CPU = 2 cores
37
+ n_batch=16,
38
  use_mmap=True,
39
  use_mlock=False
40
  )
 
64
  try:
65
  output = llm(
66
  f"### Instruction:\n{query.prompt}\n\n### Response:\n",
67
+ max_tokens=64, # Reduced for speed
68
  stop=["###"],
69
  echo=False
70
  )