Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -336,8 +336,8 @@ def load_llm_model(model_path: str) -> Tuple[str, str]:
|
|
| 336 |
state.llm_model = LLM(
|
| 337 |
model=model_path,
|
| 338 |
tensor_parallel_size=tp_size,
|
| 339 |
-
gpu_memory_utilization=0.
|
| 340 |
-
max_model_len=
|
| 341 |
)
|
| 342 |
state.llm_tokenizer = state.llm_model.get_tokenizer()
|
| 343 |
return f"✓ LLM loaded from {model_path} (vLLM, tp={tp_size})", ""
|
|
|
|
| 336 |
state.llm_model = LLM(
|
| 337 |
model=model_path,
|
| 338 |
tensor_parallel_size=tp_size,
|
| 339 |
+
gpu_memory_utilization=0.60,
|
| 340 |
+
max_model_len=15000
|
| 341 |
)
|
| 342 |
state.llm_tokenizer = state.llm_model.get_tokenizer()
|
| 343 |
return f"✓ LLM loaded from {model_path} (vLLM, tp={tp_size})", ""
|