Nightwalkx commited on
Commit
cf60d18
·
1 Parent(s): 9f0990f
Files changed (3) hide show
  1. app.py +5 -27
  2. install.sh +0 -4
  3. requirements.txt +2 -5
app.py CHANGED
@@ -558,40 +558,20 @@ def start_worker(model_path: str, bits=16):
558
  worker_command = [
559
  "python",
560
  "-m",
561
- "llava.serve.sglang_worker",
562
  "--host",
563
  "0.0.0.0",
564
  "--controller",
565
  "http://localhost:10000",
566
- "--port",
567
- "30000",
568
- "--worker",
569
- "http://localhost:40000",
570
- "--sgl-endpoint",
571
- "http://127.0.0.1:30000",
572
  ]
573
  if bits != 16:
574
  worker_command += [f"--load-{bits}bit"]
575
  return subprocess.Popen(worker_command)
576
 
577
- def start_sglang_backend_worker(model_path: str, bits=16):
578
- logger.info(f"Starting the model worker for the model {model_path}")
579
- model_name = model_path.strip("/").split("/")[-1]
580
- assert bits in [4, 8, 16], "It can be only loaded with 16-bit, 8-bit, and 4-bit."
581
- if bits != 16:
582
- model_name += f"-{bits}bit"
583
- worker_command = [
584
- "python3",
585
- "-m",
586
- "sglang.launch_server",
587
- "--model-path",
588
- model_path,
589
- "--tokenizer-path",
590
- "llava-hf/llava-1.5-7b-hf",
591
- "--port",
592
- "30000",
593
- ]
594
- return subprocess.Popen(worker_command)
595
 
596
  def get_args():
597
  parser = argparse.ArgumentParser()
@@ -626,7 +606,6 @@ if __name__ == "__main__":
626
  bits = int(os.getenv("bits", 16))
627
 
628
  controller_proc = start_controller()
629
- sglang_backend_worker_proc = start_sglang_backend_worker(model_path, bits=bits)
630
  worker_proc = start_worker(model_path, bits=bits)
631
 
632
  # Wait for worker and controller to start
@@ -640,7 +619,6 @@ if __name__ == "__main__":
640
  exit_status = 1
641
  finally:
642
  worker_proc.kill()
643
- sglang_backend_worker_proc.kill()
644
  controller_proc.kill()
645
 
646
  sys.exit(exit_status)
 
558
  worker_command = [
559
  "python",
560
  "-m",
561
+ "llava.serve.model_worker",
562
  "--host",
563
  "0.0.0.0",
564
  "--controller",
565
  "http://localhost:10000",
566
+ "--model-path",
567
+ model_path,
568
+ "--model-name",
569
+ model_name,
 
 
570
  ]
571
  if bits != 16:
572
  worker_command += [f"--load-{bits}bit"]
573
  return subprocess.Popen(worker_command)
574
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
 
576
  def get_args():
577
  parser = argparse.ArgumentParser()
 
606
  bits = int(os.getenv("bits", 16))
607
 
608
  controller_proc = start_controller()
 
609
  worker_proc = start_worker(model_path, bits=bits)
610
 
611
  # Wait for worker and controller to start
 
619
  exit_status = 1
620
  finally:
621
  worker_proc.kill()
 
622
  controller_proc.kill()
623
 
624
  sys.exit(exit_status)
install.sh DELETED
@@ -1,4 +0,0 @@
1
-
2
- #!/usr/bin/env bash
3
- pip uninstall -y numpy
4
- pip install --no-deps --prefer-binary numpy==1.23.5
 
 
 
 
 
requirements.txt CHANGED
@@ -1,19 +1,16 @@
1
  --extra-index-url https://download.pytorch.org/whl/cu121
2
  pip
3
- numpy==1.23.5
4
  einops
5
  fastapi
6
  gradio==3.35.2
7
  markdown2[all]
 
8
  requests
9
  sentencepiece
10
  tokenizers>=0.12.1
11
  torch==2.1.0+cu121
12
  torchvision==0.16.0+cu121
13
- sglang[all]>=0.4.6.post2
14
  uvicorn
15
- uvloop
16
- pyzmq
17
  wandb
18
  shortuuid
19
  httpx==0.24.0
@@ -27,4 +24,4 @@ sentencepiece==0.1.99
27
  einops==0.6.1
28
  einops-exts==0.0.4
29
  timm==0.6.13
30
- gradio_client==0.2.9
 
1
  --extra-index-url https://download.pytorch.org/whl/cu121
2
  pip
 
3
  einops
4
  fastapi
5
  gradio==3.35.2
6
  markdown2[all]
7
+ numpy<2.0.0
8
  requests
9
  sentencepiece
10
  tokenizers>=0.12.1
11
  torch==2.1.0+cu121
12
  torchvision==0.16.0+cu121
 
13
  uvicorn
 
 
14
  wandb
15
  shortuuid
16
  httpx==0.24.0
 
24
  einops==0.6.1
25
  einops-exts==0.0.4
26
  timm==0.6.13
27
+ gradio_client==0.2.9