ASLP-lab commited on
Commit
d4f7955
·
verified ·
1 Parent(s): 07cdf55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -49,10 +49,13 @@ def load_models():
49
  logger.info("🚀 Loading vLLM model on GPU...")
50
  model = LLM(
51
  model=LLASA_MODEL_ID,
52
- gpu_memory_utilization=0.8,
53
- max_model_len=2048,
54
- enforce_eager=True,
55
- device="cuda"
 
 
 
56
  )
57
 
58
  if codec_model is None:
 
49
  logger.info("🚀 Loading vLLM model on GPU...")
50
  model = LLM(
51
  model=LLASA_MODEL_ID,
52
+ gpu_memory_utilization=0.90,
53
+ max_model_len=2048,
54
+ enable_prefix_caching=True,
55
+ dtype='auto',
56
+ quantization=None,
57
+ enforce_eager=False,
58
+ kv_cache_dtype='auto'
59
  )
60
 
61
  if codec_model is None: