Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
6a53027
1
Parent(s):
0b82992
refactor: simplify LLM initialization by removing gpu_memory_utilization parameter
Browse files- generate_summaries_uv.py +1 -3
generate_summaries_uv.py
CHANGED
|
@@ -111,9 +111,7 @@ def generate_summaries(
|
|
| 111 |
|
| 112 |
# Initialize model and tokenizer from local path
|
| 113 |
logger.info(f"Initializing vLLM model from local path: {local_model_path}")
|
| 114 |
-
llm = LLM(
|
| 115 |
-
model=local_model_path, gpu_memory_utilization=0.98, enable_chunked_prefill=True
|
| 116 |
-
)
|
| 117 |
tokenizer = AutoTokenizer.from_pretrained(local_model_path)
|
| 118 |
sampling_params = SamplingParams(
|
| 119 |
temperature=temperature,
|
|
|
|
| 111 |
|
| 112 |
# Initialize model and tokenizer from local path
|
| 113 |
logger.info(f"Initializing vLLM model from local path: {local_model_path}")
|
| 114 |
+
llm = LLM(model=local_model_path, enable_chunked_prefill=True)
|
|
|
|
|
|
|
| 115 |
tokenizer = AutoTokenizer.from_pretrained(local_model_path)
|
| 116 |
sampling_params = SamplingParams(
|
| 117 |
temperature=temperature,
|