Hanrui / sglang /test /registered /spec /eagle /test_eagle_constrained_decoding.py
Lekr0's picture
Add files using upload-large-folder tool
a402b9b verified
import unittest
from sglang.srt.environ import envs
from sglang.srt.utils import kill_process_tree
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.kits.json_constrained_kit import TestJSONConstrainedMixin
from sglang.test.kits.regex_constrained_kit import TestRegexConstrainedMixin
from sglang.test.test_utils import (
DEFAULT_DRAFT_MODEL_EAGLE,
DEFAULT_TARGET_MODEL_EAGLE,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
register_cuda_ci(est_time=100, suite="stage-b-test-large-1-gpu")
class TestEagleConstrainedDecoding(
CustomTestCase, TestRegexConstrainedMixin, TestJSONConstrainedMixin
):
max_running_requests = 64
attention_backend = "triton"
spec_steps = 5
spec_topk = 1
spec_draft_tokens = 6
page_size = 1
other_launch_args = []
model = DEFAULT_TARGET_MODEL_EAGLE
draft_model = DEFAULT_DRAFT_MODEL_EAGLE
grammar_backend = "xgrammar"
spec_v2 = False
@classmethod
def setUpClass(cls):
cls.base_url = DEFAULT_URL_FOR_TEST
launch_args = [
"--trust-remote-code",
"--attention-backend",
cls.attention_backend,
"--speculative-algorithm",
"EAGLE",
"--speculative-draft-model",
cls.draft_model,
"--speculative-num-steps",
cls.spec_steps,
"--speculative-eagle-topk",
cls.spec_topk,
"--speculative-num-draft-tokens",
cls.spec_draft_tokens,
"--page-size",
str(cls.page_size),
"--mem-fraction-static",
"0.75",
"--max-running-requests",
str(cls.max_running_requests),
"--grammar-backend",
cls.grammar_backend,
]
launch_args.extend(cls.other_launch_args)
with envs.SGLANG_ENABLE_SPEC_V2.override(
cls.spec_v2
), envs.SGLANG_SPEC_NAN_DETECTION.override(
True
), envs.SGLANG_SPEC_OOB_DETECTION.override(
True
):
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=launch_args,
)
@classmethod
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
class TestEagleConstrainedDecodingV2(TestEagleConstrainedDecoding):
spec_v2 = True
if __name__ == "__main__":
unittest.main()