qwen2.5-7b-hf: demo_gsm8k_accuracy: 78.12 race-middle_accuracy: 90.46 race-high_accuracy: 86.54 internlm3-8b-instruct-lmdeploy: demo_gsm8k_accuracy: 75.00 race-middle_accuracy: 93.31 race-high_accuracy: 90.28 internlm3-8b-instruct_hf-lmdeploy: demo_gsm8k_accuracy: 73.44 race-middle_accuracy: 93.38 race-high_accuracy: 90.34 Qwen2.5-7B_hf: demo_gsm8k_accuracy: 78.12 race-middle_accuracy: 90.46 race-high_accuracy: 86.54 Qwen3-0.6B_hf-vllm: demo_gsm8k_accuracy: 53.12 race-middle_accuracy: 38.23 race-high_accuracy: 28.07 lmdeploy-api-test: IFEval_Prompt-level-strict-accuracy: 81.25 hle_llmjudge_accuracy: 75.00 mmlu_pro_math_accuracy: 18.75 mmlu_pro_other_accuracy: 25.00 race-middle_accuracy: 87.50 race-high_accuracy: 81.25 gsm8k_accuracy: 18.75 lmdeploy-api-streaming-test: IFEval_Prompt-level-strict-accuracy: 68.75 hle_llmjudge_accuracy: 81.25 mmlu_pro_math_accuracy: 18.75 mmlu_pro_other_accuracy: 31.25 race-middle_accuracy: 81.25 race-high_accuracy: 75.00 gsm8k_accuracy: 25.00 lmdeploy-api-streaming-test-chunk: IFEval_Prompt-level-strict-accuracy: 81.25 hle_llmjudge_accuracy: 75 mmlu_pro_math_accuracy: 18.75 mmlu_pro_other_accuracy: 37.50 race-middle_accuracy: 75 race-high_accuracy: 81.25 gsm8k_accuracy: 37.5 lmdeploy-api-test-maxlen: IFEval_Prompt-level-strict-accuracy: 93.75 hle_llmjudge_accuracy: 56.25 mmlu_pro_math_accuracy: 62.50 mmlu_pro_other_accuracy: 43.75 race-middle_accuracy: 81.25 race-high_accuracy: 81.25 gsm8k_accuracy: 31.25 lmdeploy-api-test-maxlen-mid: IFEval_Prompt-level-strict-accuracy: 12.50 hle_llmjudge_accuracy: 81.25 mmlu_pro_math_accuracy: 0 mmlu_pro_other_accuracy: 0 race-middle_accuracy: 18.75 race-high_accuracy: 12.50 gsm8k_accuracy: 43.75 lmdeploy-api-test-nothink: IFEval_Prompt-level-strict-accuracy: 93.75 hle_llmjudge_accuracy: 62.50 mmlu_pro_math_accuracy: 62.50 mmlu_pro_other_accuracy: 50.00 race-middle_accuracy: 87.50 race-high_accuracy: 87.50 gsm8k_accuracy: 31.25 lmdeploy-api-test-chat-template: IFEval_Prompt-level-strict-accuracy: 68.75 hle_llmjudge_accuracy: 62.50 mmlu_pro_math_accuracy: 18.75 mmlu_pro_other_accuracy: 6.25 race-middle_accuracy: 81.25 race-high_accuracy: 68.75