Spaces:
Paused
Paused
lock vllm v0.4.3
Browse files- Dockerfile +1 -1
- api_server.py +1 -5
Dockerfile
CHANGED
|
@@ -14,7 +14,7 @@ RUN pip3 install "torch==2.1.1"
|
|
| 14 |
# This build is slow but NVIDIA does not provide binaries. Increase MAX_JOBS as needed.
|
| 15 |
# RUN pip3 install "git+https://github.com/stanford-futuredata/megablocks.git"
|
| 16 |
RUN pip3 install -U openai
|
| 17 |
-
RUN pip3 install
|
| 18 |
RUN pip3 install -U pydantic
|
| 19 |
RUN pip3 install -U aioprometheus
|
| 20 |
|
|
|
|
| 14 |
# This build is slow but NVIDIA does not provide binaries. Increase MAX_JOBS as needed.
|
| 15 |
# RUN pip3 install "git+https://github.com/stanford-futuredata/megablocks.git"
|
| 16 |
RUN pip3 install -U openai
|
| 17 |
+
RUN pip3 install vllm==0.4.3
|
| 18 |
RUN pip3 install -U pydantic
|
| 19 |
RUN pip3 install -U aioprometheus
|
| 20 |
|
api_server.py
CHANGED
|
@@ -29,7 +29,6 @@ from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
|
|
| 29 |
from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
|
| 30 |
from vllm.logger import init_logger
|
| 31 |
from vllm.usage.usage_lib import UsageContext
|
| 32 |
-
from vllm.utils import FlexibleArgumentParser
|
| 33 |
|
| 34 |
TIMEOUT_KEEP_ALIVE = 5 # seconds
|
| 35 |
|
|
@@ -60,11 +59,8 @@ async def lifespan(app: fastapi.FastAPI):
|
|
| 60 |
|
| 61 |
app = fastapi.FastAPI(lifespan=lifespan)
|
| 62 |
|
| 63 |
-
|
| 64 |
def parse_args():
|
| 65 |
-
|
| 66 |
-
description="vLLM OpenAI-Compatible RESTful API server.")
|
| 67 |
-
parser = make_arg_parser(parser_text)
|
| 68 |
return parser.parse_args()
|
| 69 |
|
| 70 |
|
|
|
|
| 29 |
from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
|
| 30 |
from vllm.logger import init_logger
|
| 31 |
from vllm.usage.usage_lib import UsageContext
|
|
|
|
| 32 |
|
| 33 |
TIMEOUT_KEEP_ALIVE = 5 # seconds
|
| 34 |
|
|
|
|
| 59 |
|
| 60 |
app = fastapi.FastAPI(lifespan=lifespan)
|
| 61 |
|
|
|
|
| 62 |
def parse_args():
|
| 63 |
+
parser = make_arg_parser()
|
|
|
|
|
|
|
| 64 |
return parser.parse_args()
|
| 65 |
|
| 66 |
|