FasterDFlash
/

Hanrui

Model card Files Files and versions

Hanrui / sglang /test /registered /8-gpu-models /test_mimo_models.py

Lekr0's picture

Add files using upload-large-folder tool

61ba51e verified about 2 months ago

history blame contribute delete

1.33 kB

	import unittest

	from sglang.test.ci.ci_register import register_cuda_ci
	from sglang.test.kits.gsm8k_accuracy_kit import GSM8KMixin
	from sglang.test.kits.spec_decoding_kit import SpecDecodingMixin
	from sglang.test.server_fixtures.default_fixture import DefaultServerBase

	register_cuda_ci(est_time=200, suite="stage-c-test-8-gpu-h200")


	class TestMiMoV2Flash(GSM8KMixin, SpecDecodingMixin, DefaultServerBase):
	gsm8k_accuracy_thres = 0.75
	gsm8k_num_questions = 1319
	gsm8k_parallel = 1319
	model = "XiaomiMiMo/MiMo-V2-Flash"

	other_args = [
	"--tp",
	"4",
	"--dp",
	"2",
	"--enable-dp-attention",
	"--trust-remote-code",
	"--attention-backend",
	"fa3",
	"--max-running-requests",
	"128",
	"--cuda-graph-max-bs",
	"64",
	"--mem-fraction-static",
	"0.75",
	"--speculative-algorithm",
	"EAGLE",
	"--speculative-num-steps",
	"3",
	"--speculative-eagle-topk",
	"1",
	"--speculative-num-draft-tokens",
	"4",
	"--enable-multi-layer-eagle",
	"--model-loader-extra-config",
	'{"enable_multithread_load": true,"num_threads": 64}',
	]

	bs_1_speed_thres = 170
	accept_length_thres = 3.2


	if __name__ == "__main__":
	unittest.main()