Spaces:
Sleeping
Sleeping
Nightwalkx
commited on
Commit
·
cf60d18
1
Parent(s):
9f0990f
update
Browse files- app.py +5 -27
- install.sh +0 -4
- requirements.txt +2 -5
app.py
CHANGED
|
@@ -558,40 +558,20 @@ def start_worker(model_path: str, bits=16):
|
|
| 558 |
worker_command = [
|
| 559 |
"python",
|
| 560 |
"-m",
|
| 561 |
-
"llava.serve.
|
| 562 |
"--host",
|
| 563 |
"0.0.0.0",
|
| 564 |
"--controller",
|
| 565 |
"http://localhost:10000",
|
| 566 |
-
"--
|
| 567 |
-
|
| 568 |
-
"--
|
| 569 |
-
|
| 570 |
-
"--sgl-endpoint",
|
| 571 |
-
"http://127.0.0.1:30000",
|
| 572 |
]
|
| 573 |
if bits != 16:
|
| 574 |
worker_command += [f"--load-{bits}bit"]
|
| 575 |
return subprocess.Popen(worker_command)
|
| 576 |
|
| 577 |
-
def start_sglang_backend_worker(model_path: str, bits=16):
|
| 578 |
-
logger.info(f"Starting the model worker for the model {model_path}")
|
| 579 |
-
model_name = model_path.strip("/").split("/")[-1]
|
| 580 |
-
assert bits in [4, 8, 16], "It can be only loaded with 16-bit, 8-bit, and 4-bit."
|
| 581 |
-
if bits != 16:
|
| 582 |
-
model_name += f"-{bits}bit"
|
| 583 |
-
worker_command = [
|
| 584 |
-
"python3",
|
| 585 |
-
"-m",
|
| 586 |
-
"sglang.launch_server",
|
| 587 |
-
"--model-path",
|
| 588 |
-
model_path,
|
| 589 |
-
"--tokenizer-path",
|
| 590 |
-
"llava-hf/llava-1.5-7b-hf",
|
| 591 |
-
"--port",
|
| 592 |
-
"30000",
|
| 593 |
-
]
|
| 594 |
-
return subprocess.Popen(worker_command)
|
| 595 |
|
| 596 |
def get_args():
|
| 597 |
parser = argparse.ArgumentParser()
|
|
@@ -626,7 +606,6 @@ if __name__ == "__main__":
|
|
| 626 |
bits = int(os.getenv("bits", 16))
|
| 627 |
|
| 628 |
controller_proc = start_controller()
|
| 629 |
-
sglang_backend_worker_proc = start_sglang_backend_worker(model_path, bits=bits)
|
| 630 |
worker_proc = start_worker(model_path, bits=bits)
|
| 631 |
|
| 632 |
# Wait for worker and controller to start
|
|
@@ -640,7 +619,6 @@ if __name__ == "__main__":
|
|
| 640 |
exit_status = 1
|
| 641 |
finally:
|
| 642 |
worker_proc.kill()
|
| 643 |
-
sglang_backend_worker_proc.kill()
|
| 644 |
controller_proc.kill()
|
| 645 |
|
| 646 |
sys.exit(exit_status)
|
|
|
|
| 558 |
worker_command = [
|
| 559 |
"python",
|
| 560 |
"-m",
|
| 561 |
+
"llava.serve.model_worker",
|
| 562 |
"--host",
|
| 563 |
"0.0.0.0",
|
| 564 |
"--controller",
|
| 565 |
"http://localhost:10000",
|
| 566 |
+
"--model-path",
|
| 567 |
+
model_path,
|
| 568 |
+
"--model-name",
|
| 569 |
+
model_name,
|
|
|
|
|
|
|
| 570 |
]
|
| 571 |
if bits != 16:
|
| 572 |
worker_command += [f"--load-{bits}bit"]
|
| 573 |
return subprocess.Popen(worker_command)
|
| 574 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 575 |
|
| 576 |
def get_args():
|
| 577 |
parser = argparse.ArgumentParser()
|
|
|
|
| 606 |
bits = int(os.getenv("bits", 16))
|
| 607 |
|
| 608 |
controller_proc = start_controller()
|
|
|
|
| 609 |
worker_proc = start_worker(model_path, bits=bits)
|
| 610 |
|
| 611 |
# Wait for worker and controller to start
|
|
|
|
| 619 |
exit_status = 1
|
| 620 |
finally:
|
| 621 |
worker_proc.kill()
|
|
|
|
| 622 |
controller_proc.kill()
|
| 623 |
|
| 624 |
sys.exit(exit_status)
|
install.sh
DELETED
|
@@ -1,4 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
#!/usr/bin/env bash
|
| 3 |
-
pip uninstall -y numpy
|
| 4 |
-
pip install --no-deps --prefer-binary numpy==1.23.5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,19 +1,16 @@
|
|
| 1 |
--extra-index-url https://download.pytorch.org/whl/cu121
|
| 2 |
pip
|
| 3 |
-
numpy==1.23.5
|
| 4 |
einops
|
| 5 |
fastapi
|
| 6 |
gradio==3.35.2
|
| 7 |
markdown2[all]
|
|
|
|
| 8 |
requests
|
| 9 |
sentencepiece
|
| 10 |
tokenizers>=0.12.1
|
| 11 |
torch==2.1.0+cu121
|
| 12 |
torchvision==0.16.0+cu121
|
| 13 |
-
sglang[all]>=0.4.6.post2
|
| 14 |
uvicorn
|
| 15 |
-
uvloop
|
| 16 |
-
pyzmq
|
| 17 |
wandb
|
| 18 |
shortuuid
|
| 19 |
httpx==0.24.0
|
|
@@ -27,4 +24,4 @@ sentencepiece==0.1.99
|
|
| 27 |
einops==0.6.1
|
| 28 |
einops-exts==0.0.4
|
| 29 |
timm==0.6.13
|
| 30 |
-
gradio_client==0.2.9
|
|
|
|
| 1 |
--extra-index-url https://download.pytorch.org/whl/cu121
|
| 2 |
pip
|
|
|
|
| 3 |
einops
|
| 4 |
fastapi
|
| 5 |
gradio==3.35.2
|
| 6 |
markdown2[all]
|
| 7 |
+
numpy<2.0.0
|
| 8 |
requests
|
| 9 |
sentencepiece
|
| 10 |
tokenizers>=0.12.1
|
| 11 |
torch==2.1.0+cu121
|
| 12 |
torchvision==0.16.0+cu121
|
|
|
|
| 13 |
uvicorn
|
|
|
|
|
|
|
| 14 |
wandb
|
| 15 |
shortuuid
|
| 16 |
httpx==0.24.0
|
|
|
|
| 24 |
einops==0.6.1
|
| 25 |
einops-exts==0.0.4
|
| 26 |
timm==0.6.13
|
| 27 |
+
gradio_client==0.2.9
|