| import unittest |
| from types import SimpleNamespace |
|
|
| import requests |
|
|
| from sglang.srt.utils import kill_process_tree |
| from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci |
| from sglang.test.run_eval import run_eval |
| from sglang.test.test_utils import ( |
| DEFAULT_MODEL_NAME_FOR_TEST, |
| DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, |
| DEFAULT_URL_FOR_TEST, |
| CustomTestCase, |
| popen_launch_server, |
| ) |
|
|
| register_cuda_ci(est_time=66, suite="stage-b-test-small-1-gpu") |
| register_amd_ci(est_time=66, suite="stage-b-test-small-1-gpu-amd") |
|
|
|
|
| class TestPyTorchSamplingBackend(CustomTestCase): |
| @classmethod |
| def setUpClass(cls): |
| cls.model = DEFAULT_MODEL_NAME_FOR_TEST |
| cls.base_url = DEFAULT_URL_FOR_TEST |
| cls.process = popen_launch_server( |
| cls.model, |
| cls.base_url, |
| timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, |
| other_args=["--sampling-backend", "pytorch", "--disable-radix-cache"], |
| ) |
|
|
| @classmethod |
| def tearDownClass(cls): |
| kill_process_tree(cls.process.pid) |
|
|
| def test_mmlu(self): |
| args = SimpleNamespace( |
| base_url=self.base_url, |
| model=self.model, |
| eval_name="mmlu", |
| num_examples=64, |
| num_threads=32, |
| temperature=0.1, |
| ) |
|
|
| metrics = run_eval(args) |
| self.assertGreaterEqual(metrics["score"], 0.65) |
|
|
| def test_greedy(self): |
|
|
| first_text = None |
|
|
| |
| for _ in range(5): |
| response_single = requests.post( |
| self.base_url + "/generate", |
| json={ |
| "text": "The capital of Germany is", |
| "sampling_params": { |
| "temperature": 0, |
| "max_new_tokens": 32, |
| }, |
| }, |
| ).json() |
| text = response_single["text"] |
| if first_text is None: |
| first_text = text |
|
|
| self.assertEqual(text, first_text) |
|
|
| first_text = None |
|
|
| response_batch = requests.post( |
| self.base_url + "/generate", |
| json={ |
| "text": ["The capital of Germany is"] * 10, |
| "sampling_params": { |
| "temperature": 0, |
| "max_new_tokens": 32, |
| }, |
| }, |
| ).json() |
|
|
| |
| for i in range(10): |
| text = response_batch[i]["text"] |
| if first_text is None: |
| first_text = text |
| self.assertEqual(text, first_text) |
|
|
|
|
| if __name__ == "__main__": |
| unittest.main() |
|
|