Spaces:

kenlkehl
/

mm-ai-demo

Sleeping

kenlkehl commited on Nov 3

Commit

edce79d

verified ·

1 Parent(s): 2a8c032

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -336,8 +336,8 @@ def load_llm_model(model_path: str) -> Tuple[str, str]:
             state.llm_model = LLM(
                 model=model_path,
                 tensor_parallel_size=tp_size,
-                gpu_memory_utilization=0.20,
-                max_model_len=10000
             )
             state.llm_tokenizer = state.llm_model.get_tokenizer()
             return f"✓ LLM loaded from {model_path} (vLLM, tp={tp_size})", ""

             state.llm_model = LLM(
                 model=model_path,
                 tensor_parallel_size=tp_size,
+                gpu_memory_utilization=0.60,
+                max_model_len=15000
             )
             state.llm_tokenizer = state.llm_model.get_tokenizer()
             return f"✓ LLM loaded from {model_path} (vLLM, tp={tp_size})", ""