Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -49,10 +49,13 @@ def load_models():
|
|
| 49 |
logger.info("🚀 Loading vLLM model on GPU...")
|
| 50 |
model = LLM(
|
| 51 |
model=LLASA_MODEL_ID,
|
| 52 |
-
gpu_memory_utilization=0.
|
| 53 |
-
max_model_len=2048,
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
| 56 |
)
|
| 57 |
|
| 58 |
if codec_model is None:
|
|
|
|
| 49 |
logger.info("🚀 Loading vLLM model on GPU...")
|
| 50 |
model = LLM(
|
| 51 |
model=LLASA_MODEL_ID,
|
| 52 |
+
gpu_memory_utilization=0.90,
|
| 53 |
+
max_model_len=2048,
|
| 54 |
+
enable_prefix_caching=True,
|
| 55 |
+
dtype='auto',
|
| 56 |
+
quantization=None,
|
| 57 |
+
enforce_eager=False,
|
| 58 |
+
kv_cache_dtype='auto'
|
| 59 |
)
|
| 60 |
|
| 61 |
if codec_model is None:
|