import unittest from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.kits.gsm8k_accuracy_kit import GSM8KMixin from sglang.test.kits.spec_decoding_kit import SpecDecodingMixin from sglang.test.server_fixtures.default_fixture import DefaultServerBase register_cuda_ci(est_time=200, suite="stage-c-test-8-gpu-h200") class TestMiMoV2Flash(GSM8KMixin, SpecDecodingMixin, DefaultServerBase): gsm8k_accuracy_thres = 0.75 gsm8k_num_questions = 1319 gsm8k_parallel = 1319 model = "XiaomiMiMo/MiMo-V2-Flash" other_args = [ "--tp", "4", "--dp", "2", "--enable-dp-attention", "--trust-remote-code", "--attention-backend", "fa3", "--max-running-requests", "128", "--cuda-graph-max-bs", "64", "--mem-fraction-static", "0.75", "--speculative-algorithm", "EAGLE", "--speculative-num-steps", "3", "--speculative-eagle-topk", "1", "--speculative-num-draft-tokens", "4", "--enable-multi-layer-eagle", "--model-loader-extra-config", '{"enable_multithread_load": true,"num_threads": 64}', ] bs_1_speed_thres = 170 accept_length_thres = 3.2 if __name__ == "__main__": unittest.main()