import json import unittest import openai from sglang.srt.environ import envs from sglang.srt.utils import kill_process_tree from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.test_utils import ( DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_URL_FOR_TEST, CustomTestCase, popen_launch_server, ) # Constrained decoding with EAGLE3 speculative reasoning (tp=2) register_cuda_ci(est_time=60, suite="stage-b-test-large-2-gpu") class ServerWithGrammar(CustomTestCase): json_schema = json.dumps( { "type": "object", "properties": { "name": {"type": "string", "pattern": "^[\\w]+$"}, "population": {"type": "integer"}, "languages": { "type": "array", "items": {"type": "string"}, "minItems": 1, }, "has_held_olympics": {"type": "boolean"}, }, "required": ["name", "population", "languages", "has_held_olympics"], "additionalProperties": False, } ) @classmethod def setUpClass(cls): cls.model = "openai/gpt-oss-120b" cls.base_url = DEFAULT_URL_FOR_TEST launch_args = [ "--trust-remote-code", "--tp=2", "--reasoning-parser=gpt-oss", "--speculative-algorithm=EAGLE3", "--speculative-draft-model-path=lmsys/EAGLE3-gpt-oss-120b-bf16", "--speculative-num-steps=5", "--speculative-eagle-topk=4", "--speculative-num-draft-tokens=8", ] with envs.SGLANG_SPEC_NAN_DETECTION.override( True ), envs.SGLANG_SPEC_OOB_DETECTION.override(True): cls.process = popen_launch_server( cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, other_args=launch_args, ) @classmethod def tearDownClass(cls): kill_process_tree(cls.process.pid) def test_json_openai(self): client = openai.Client(api_key="EMPTY", base_url=f"{self.base_url}/v1") response = client.chat.completions.create( model=self.model, messages=[ {"role": "system", "content": "You are a helpful AI assistant"}, { "role": "user", "content": "Introduce the capital of France. Return in a JSON format. " "The JSON Schema is: " + json.dumps(self.json_schema), }, ], temperature=0, max_tokens=1024, response_format={ "type": "json_schema", "json_schema": {"name": "foo", "schema": json.loads(self.json_schema)}, }, ) text = response.choices[0].message.content print("\n=== Reasoning Content ===") reasoning_content = response.choices[0].message.reasoning_content assert reasoning_content is not None and len(reasoning_content) > 0 print(reasoning_content) try: js_obj = json.loads(text) print("\n=== Parsed JSON Content ===") print(json.dumps(js_obj)) except (TypeError, json.decoder.JSONDecodeError): print("JSONDecodeError", text) raise self.assertIsInstance(js_obj["name"], str) self.assertIsInstance(js_obj["population"], int) if __name__ == "__main__": unittest.main()