leideng/QCFuse / test /doc_patch.py
leideng's picture
download
raw
1.61 kB
"""
Do some monkey patch to make the documentation compilation faster and more reliable.
- Avoid port conflicts
- Reduce the server launch time
"""
import weakref
import nest_asyncio
nest_asyncio.apply()
import sglang.srt.server_args as server_args_mod
from sglang.utils import execute_shell_command, reserve_port
DEFAULT_MAX_RUNNING_REQUESTS = 128
DEFAULT_MAX_TOTAL_TOKENS = 20480 # To allow multiple servers on the same machine
_original_post_init = server_args_mod.ServerArgs.__post_init__
def patched_post_init(self):
_original_post_init(self)
if self.max_running_requests is None:
self.max_running_requests = DEFAULT_MAX_RUNNING_REQUESTS
if self.max_total_tokens is None:
self.max_total_tokens = DEFAULT_MAX_TOTAL_TOKENS
self.cuda_graph_max_bs = 4
server_args_mod.ServerArgs.__post_init__ = patched_post_init
process_socket_map = weakref.WeakKeyDictionary()
def launch_server_cmd(command: str, host: str = "0.0.0.0", port: int = None):
"""
Launch the server using the given command.
If no port is specified, a free port is reserved.
"""
if port is None:
port, lock_socket = reserve_port(host)
else:
lock_socket = None
extra_flags = (
f"--max-running-requests {DEFAULT_MAX_RUNNING_REQUESTS} "
f"--max-total-tokens {DEFAULT_MAX_TOTAL_TOKENS} "
f"--cuda-graph-max-bs 4"
)
full_command = f"{command} --port {port} {extra_flags}"
process = execute_shell_command(full_command)
if lock_socket is not None:
process_socket_map[process] = lock_socket
return process, port

Xet Storage Details

Size:
1.61 kB
·
Xet hash:
5a0c6248f8da126c40bc125689ae14e7666cddda4ae430838ba8a80ef0835207

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.