| import unittest |
| from unittest.mock import MagicMock |
|
|
| from sglang.srt.configs.model_config import ModelConfig |
| from sglang.test.ci.ci_register import register_cpu_ci |
| from sglang.test.test_utils import CustomTestCase |
|
|
| register_cpu_ci(est_time=5, suite="stage-a-cpu-only") |
|
|
|
|
| class TestQuantLogString(CustomTestCase): |
| def test_qwen_fp8_config(self): |
| |
| quant_config = { |
| "activation_scheme": "dynamic", |
| "modules_to_not_convert": ["lm_head"], |
| "fmt": "e4m3", |
| "quant_method": "fp8", |
| "weight_block_size": [128, 128], |
| } |
|
|
| |
| model_config = ModelConfig.__new__(ModelConfig) |
| model_config._parse_quant_hf_config = MagicMock(return_value=quant_config) |
|
|
| expected = "quant=fp8, fmt=e4m3" |
| result = model_config.get_quantization_config_log_str() |
| print(f"\n[Test Qwen FP8] Result: {result}") |
| self.assertEqual(result, expected) |
|
|
| def test_llama_gptq_int4_config(self): |
| |
| quant_config = {"bits": 4, "quant_method": "gptq", "group_size": 128} |
| model_config = ModelConfig.__new__(ModelConfig) |
| model_config._parse_quant_hf_config = MagicMock(return_value=quant_config) |
|
|
| expected = "quant=gptq, bits=4" |
| result = model_config.get_quantization_config_log_str() |
| print(f"\n[Test Llama GPTQ] Result: {result}") |
| self.assertEqual(result, expected) |
|
|
| def test_awq_config(self): |
| quant_config = { |
| "quant_method": "awq", |
| "bits": 4, |
| "group_size": 128, |
| } |
| model_config = ModelConfig.__new__(ModelConfig) |
| model_config._parse_quant_hf_config = MagicMock(return_value=quant_config) |
|
|
| expected = "quant=awq, bits=4" |
| result = model_config.get_quantization_config_log_str() |
| print(f"\n[Test AWQ] Result: {result}") |
| self.assertEqual(result, expected) |
|
|
| def test_modelopt_nvfp4(self): |
| quant_config = {"quant_method": "modelopt_fp4", "quant_algo": "NVFP4"} |
| model_config = ModelConfig.__new__(ModelConfig) |
| model_config._parse_quant_hf_config = MagicMock(return_value=quant_config) |
|
|
| expected = "quant=modelopt_fp4, quant_algo=NVFP4" |
| result = model_config.get_quantization_config_log_str() |
| print(f"\n[Test ModelOpt] Result: {result}") |
| self.assertEqual(result, expected) |
|
|
| def test_no_quant_config(self): |
| model_config = ModelConfig.__new__(ModelConfig) |
| model_config._parse_quant_hf_config = MagicMock(return_value=None) |
|
|
| result = model_config.get_quantization_config_log_str() |
| print(f"\n[Test No Quant] Result: {result}") |
| self.assertIsNone(result) |
|
|
|
|
| if __name__ == "__main__": |
| unittest.main() |
|
|