Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -276,7 +276,7 @@ def load_tagger_model(model_path: str) -> Tuple[str, str]:
|
|
| 276 |
device=0 if state.device == "cuda" else -1,
|
| 277 |
truncation=True,
|
| 278 |
padding="max_length",
|
| 279 |
-
max_length=
|
| 280 |
)
|
| 281 |
return f"β Tagger model loaded from {model_path}", ""
|
| 282 |
except Exception as e:
|
|
@@ -336,8 +336,8 @@ def load_llm_model(model_path: str) -> Tuple[str, str]:
|
|
| 336 |
state.llm_model = LLM(
|
| 337 |
model=model_path,
|
| 338 |
tensor_parallel_size=tp_size,
|
| 339 |
-
gpu_memory_utilization=0.
|
| 340 |
-
max_model_len=
|
| 341 |
)
|
| 342 |
state.llm_tokenizer = state.llm_model.get_tokenizer()
|
| 343 |
return f"β LLM loaded from {model_path} (vLLM, tp={tp_size})", ""
|
|
|
|
| 276 |
device=0 if state.device == "cuda" else -1,
|
| 277 |
truncation=True,
|
| 278 |
padding="max_length",
|
| 279 |
+
max_length=128
|
| 280 |
)
|
| 281 |
return f"β Tagger model loaded from {model_path}", ""
|
| 282 |
except Exception as e:
|
|
|
|
| 336 |
state.llm_model = LLM(
|
| 337 |
model=model_path,
|
| 338 |
tensor_parallel_size=tp_size,
|
| 339 |
+
gpu_memory_utilization=0.50,
|
| 340 |
+
max_model_len=15000
|
| 341 |
)
|
| 342 |
state.llm_tokenizer = state.llm_model.get_tokenizer()
|
| 343 |
return f"β LLM loaded from {model_path} (vLLM, tp={tp_size})", ""
|