Nightwalkx commited on
Commit
d8d5def
·
1 Parent(s): 0cadc91
Files changed (1) hide show
  1. app.py +2 -10
app.py CHANGED
@@ -337,19 +337,11 @@ def http_bot(
337
 
338
 
339
  title_markdown = """
340
- # 🌋 LLaVA: Large Language and Vision Assistant
341
  [[Code]](https://github.com/xi-jiajun/Spatial-LLaVA) [[Model]](https://huggingface.co/rogerxi/Spatial-LLaVA-7B)
342
 
343
  ONLY WORKS WITH GPU!
344
 
345
- You can load the model with 4-bit or 8-bit quantization to make it fit in smaller hardwares. Setting the environment variable `bits` to control the quantization.
346
- *Note: 8-bit seems to be slower than both 4-bit/16-bit. Although it has enough VRAM to support 8-bit, until we figure out the inference speed issue, we recommend 4-bit for A10G for the best efficiency.*
347
-
348
- Recommended configurations:
349
- | Hardware | T4-Small (16G) | A10G-Small (24G) | A100-Large (40G) |
350
- |-------------------|-----------------|------------------|------------------|
351
- | **Bits** | 4 (default) | 4 | 16 |
352
-
353
  """
354
 
355
  tos_markdown = """
@@ -611,7 +603,7 @@ if __name__ == "__main__":
611
  logger.info(f"args: {args}")
612
 
613
  model_path = "rogerxi/Spatial-LLaVA-7B"
614
- bits = int(os.getenv("bits", 8))
615
 
616
  controller_proc = start_controller()
617
  worker_proc = start_worker(model_path, bits=bits)
 
337
 
338
 
339
  title_markdown = """
340
+ # 🗺️ Spatial-LLaVA
341
  [[Code]](https://github.com/xi-jiajun/Spatial-LLaVA) [[Model]](https://huggingface.co/rogerxi/Spatial-LLaVA-7B)
342
 
343
  ONLY WORKS WITH GPU!
344
 
 
 
 
 
 
 
 
 
345
  """
346
 
347
  tos_markdown = """
 
603
  logger.info(f"args: {args}")
604
 
605
  model_path = "rogerxi/Spatial-LLaVA-7B"
606
+ bits = int(os.getenv("bits", 16))
607
 
608
  controller_proc = start_controller()
609
  worker_proc = start_worker(model_path, bits=bits)