File size: 1,590 Bytes
61ba51e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | # SPDX-License-Identifier: Apache-2.0
"""
Saves each worker's model state dict directly to a checkpoint, which enables a
fast load path for large tensor-parallel models where each worker only needs to
read its own shard rather than the entire checkpoint.
Example usage:
python save_remote_state.py \
--model-path /path/to/load \
--tensor-parallel-size 8 \
--remote-model-save-url [protocol]://[host]:[port]/[model_name] \
Then, the model can be loaded with
llm = Engine(
model_path="[protocol]://[host]:[port]/[model_name]",
tensor_parallel_size=8,
)
"""
import dataclasses
from argparse import ArgumentParser
from pathlib import Path
from sglang import Engine, ServerArgs
parser = ArgumentParser()
ServerArgs.add_cli_args(parser)
parser.add_argument(
"--remote-model-save-url",
required=True,
type=str,
help="remote address to store model weights",
)
parser.add_argument(
"--remote-draft-model-save-url",
default=None,
type=str,
help="remote address to store draft model weights",
)
def main(args):
engine_args = ServerArgs.from_cli_args(args)
model_path = engine_args.model_path
if not Path(model_path).is_dir():
raise ValueError("model path must be a local directory")
# Create LLM instance from arguments
llm = Engine(**dataclasses.asdict(engine_args))
llm.save_remote_model(
url=args.remote_model_save_url, draft_url=args.remote_draft_model_save_url
)
print("save remote (draft) model successfully")
if __name__ == "__main__":
args = parser.parse_args()
main(args)
|