Hanrui / sglang /test /registered /constrained_decoding /test_constrained_decoding.py
Lekr0's picture
Add files using upload-large-folder tool
a402b9b verified
import unittest
from sglang.srt.utils import kill_process_tree
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
from sglang.test.kits.ebnf_constrained_kit import TestEBNFConstrainedMixin
from sglang.test.kits.json_constrained_kit import TestJSONConstrainedMixin
from sglang.test.kits.regex_constrained_kit import TestRegexConstrainedMixin
from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
popen_launch_server,
)
register_cuda_ci(est_time=111, suite="stage-b-test-small-1-gpu")
register_amd_ci(est_time=179, suite="stage-b-test-small-1-gpu-amd")
class ServerWithGrammar(CustomTestCase):
backend = "xgrammar"
disable_overlap = False
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
launch_args = [
"--max-running-requests",
"10",
"--grammar-backend",
cls.backend,
]
if cls.disable_overlap:
launch_args += ["--disable-overlap-schedule"]
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=launch_args,
)
@classmethod
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
class TestXGrammarBackend(
ServerWithGrammar,
TestJSONConstrainedMixin,
TestEBNFConstrainedMixin,
TestRegexConstrainedMixin,
):
backend = "xgrammar"
class TestOutlinesBackend(ServerWithGrammar, TestJSONConstrainedMixin):
backend = "outlines"
class TestLLGuidanceBackend(
ServerWithGrammar,
TestJSONConstrainedMixin,
TestEBNFConstrainedMixin,
TestRegexConstrainedMixin,
):
backend = "llguidance"
if __name__ == "__main__":
unittest.main()