kenlkehl commited on
Commit
edce79d
·
verified ·
1 Parent(s): 2a8c032

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -336,8 +336,8 @@ def load_llm_model(model_path: str) -> Tuple[str, str]:
336
  state.llm_model = LLM(
337
  model=model_path,
338
  tensor_parallel_size=tp_size,
339
- gpu_memory_utilization=0.20,
340
- max_model_len=10000
341
  )
342
  state.llm_tokenizer = state.llm_model.get_tokenizer()
343
  return f"✓ LLM loaded from {model_path} (vLLM, tp={tp_size})", ""
 
336
  state.llm_model = LLM(
337
  model=model_path,
338
  tensor_parallel_size=tp_size,
339
+ gpu_memory_utilization=0.60,
340
+ max_model_len=15000
341
  )
342
  state.llm_tokenizer = state.llm_model.get_tokenizer()
343
  return f"✓ LLM loaded from {model_path} (vLLM, tp={tp_size})", ""