RAMEN-MISO
MISO ใฏใๆพๅฐพ็ LLM ้็บใณใณใ 2025 ใซใใใฆ Team RAMEN (Reasoning AI Model Engineering Network) ใ้็บใใๅคง่ฆๆจก่จ่ชใขใใซใงใใใ้ซ้ฃๅบฆ้ ๅใซใใใๆจ่ซๆง่ฝใฎๆๅคงๅใ็ฎ็ใจใใฆใQwen3 ็ณป Mixture-of-Experts (MoE) ใๅบ็คใซ Chain-of-Thought Supervised Fine-Tuning (CoT-SFT) ใงๆ้ฉๅใใฆใใใๆฐ็ใป่ช็ถ็งๅญฆใปไบบๆ็คพไผใชใฉๅคๆงใชใใกใคใณใซใใใ้ทๆใใค้ซ่ฒ ่ทใชๆจ่ซใๅๆใซ่จญ่จใใใ
1. ใขใใซไปๆง
- ใใผในใขใใซ: https://huggingface.co/Qwen/Qwen3-235B-A22B
- ๆจ่ซ
- ใชใฝใผใน: H100 80GB ร 16 (2 node) ใพใใฏ H100 80GB ร 8 (1 node)
- ๅญฆ็ฟ
- ๆๆณ: CoT-SFT
- ใชใฝใผใน: H100 ร 16 (2 node)
- ใใผใฟ: ๅคงๅญฆ้ขใฌใใซไปฅไธใฎๆฐ็ใป่ช็ถ็งๅญฆใปไบบๆ็คพไผ็ณปใฎๅ
ฌ้๏ผๅๆใใผใฟ
https://huggingface.co/datasets/weblab-llm-competition-2025-bridge/RAMEN-phase1
2. ่ฉไพกๆนๆณ
- ๅ ฑ้ๅฎ่ก็ฐๅข: vLLM 0.10.1.1๏ผๆฌ README ่จ่ผใฎๆจๅฅจๆงๆใงๆค่จผๆธใฟ๏ผ
2.1 Humanityโs Last Exam๏ผHLE๏ผ
- ่ฉไพกใณใผใ: https://github.com/matsuolab/llm_bridge_prod/tree/master/eval_hle
- ้็จใกใข: 2401 ไปถ๏ผtext-only๏ผใฎๅ จๅใไธๅบฆใงๅฎไบใใชใๅ ดๅใฏใ่คๆฐๅใซๅใใฆๅฎ่กใใฆใใ ใใใ
- ใพใ 2ใใผใ ใ่ฉฆใใๅคฑๆใใใ 1ใใผใ ใงๅฎ่กใใฆใใ ใใใ
- ๆฌ README ใซ่จ่ผใฎ่จญๅฎใใกใคใซใใใณ Slurm ใใณใใฌใผใใฏใ้ๅถๆไพใฎๅ ฌๅผใณใผใใๅบใซๆ้ฉๅใใๆจๅฅจๆงๆใงใใ่ฉไพกใๅฎๆฝใใ้ใฏใๆฌ่จญๅฎใๅฉ็จใใฆใใ ใใใ
่จญๅฎใใกใคใซ๏ผconf/config.yaml๏ผ
dataset: cais/hle
provider: vllm # [vllm]
base_url: http://localhost:8000/v1
model: weblab-llm-competition-2025-bridge/RAMEN-MISO
max_completion_tokens: 35000
reasoning: true
# sample with multimodal is 2500, so text-only sample is about 2400
num_workers: 2500
max_samples: 2500
judge: o3-mini-2025-01-31
Slurm ใใณใใฌใผใ
1ใใผใๅฎ่ก
#!/bin/bash
#SBATCH --job-name=qwen3_8gpu
#SBATCH --partition=P01
#SBATCH --nodelist=osk-gpu51
#SBATCH --nodes=1
#SBATCH --gpus-per-node=8
#SBATCH --cpus-per-task=240
#SBATCH --time=24:00:00
#SBATCH --output=/home/Competition2025/adm/X006/logs/%x-%j.out
#SBATCH --error=/home/Competition2025/adm/X006/logs/%x-%j.err
#SBATCH --export=OPENAI_API_KEY="openai_api_keyใใใใซ"
#--- ใขใธใฅใผใซ & Conda --------------------------------------------
module purge
module load cuda/12.6 miniconda/24.7.1-py312
module load cudnn/9.6.0
module load nccl/2.24.3
source "$(conda info --base)/etc/profile.d/conda.sh"
conda activate llmbench
# Hugging Face ่ช่จผ
export HF_TOKEN= "<huggingface_tokenใใใใซ>"
export HF_HOME=${SLURM_TMPDIR:-$HOME}/.hf_cache
export TRANSFORMERS_CACHE=$HF_HOME
export HUGGINGFACE_HUB_TOKEN=$HF_TOKEN
mkdir -p "$HF_HOME"
echo "HF cache dir : $HF_HOME" # ใใใใฐ็จ
#--- ใกใขใช/ๆง่ฝใใฅใผใใณใฐ ------------------------------------------
# ใในใๆงๆใงใฎๆจๅฅจ๏ผๆกๅผตใปใฐใกใณใใงใกใขใชๆญ็ๅใไฝๆธ
export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True"
#--- GPU ็ฃ่ฆ -------------------------------------------------------
nvidia-smi -i 0,1,2,3,4,5,6,7 -l 3 > nvidia-smi.log &
pid_nvsmi=$!
#--- vLLM ่ตทๅ๏ผ8GPU๏ผ----------------------------------------------
# ใในใๆงๆใงใฎๆจๅฅจ๏ผrope-scaling ใชใ / reasoning-parser ใชใ
# - tensor-parallel=4, pipeline-parallel=2
# - enable-expert-parallel
# - disable-custom-all-reduce
# - gpu-memory-utilization=0.90
vllm serve weblab-llm-competition-2025-bridge/REMEN-MISO \
--tensor-parallel-size 4 \
--pipeline-parallel-size 2 \
--enable-expert-parallel \
--gpu-memory-utilization 0.90 \
--disable-custom-all-reduce \
> vllm.log 2>&1 &
pid_vllm=$!
#--- ใใซในใใงใใฏ -------------------------------------------------
until curl -s http://127.0.0.1:8000/health >/dev/null; do
echo "$(date +%T) vLLM starting โฆ"
sleep 10
done
echo "vLLM READY"
#--- ๆจ่ซ -----------------------------------------------------------
python predict.py > predict.log 2>&1
#--- ่ฉไพก -----------------------------------------------------------
OPENAI_API_KEY=xxx python judge.py
#--- ๅพ็ไปใ -------------------------------------------------------
kill $pid_vllm
kill $pid_nvsmi
wait
2ใใผใๅฎ่ก๏ผRay ใฏใฉในใฟๆนๅผใป้ๅถๆ็คบๆบๆ ๏ผjobs/ray_cluster.shใไฝฟ็จใใฆray clusterใ่ตทๅใใฆใใ ใใใใใฎๆpartitionใnodelistใใญใฐใใกใคใซใ่จญๅฎใใฆใใ ใใใ
ใใใฆsshใงๅบๅใใใheadใใผใใฎใใผใใซๆฅ็ถใใใขใธใฅใผใซใจcondaใ่ชญใฟ่พผใฟใvLLMใใใคใ้ใ่ตทๅใใฆใใ ใใใray clusterใ่ชๅใง่ช่ญใใพใใ
ใใฎๅพใไธ่จในใฏใชใใใใvLLM่ตทๅใจใใซในใใงใใฏใๅ้คใใconfigใไฟฎๆญฃใใฆใใๆจ่ซใใฆใใ ใใใ
2.2 Do-Not-Answer๏ผDNA๏ผ
- ่ฉไพกใณใผใ: https://github.com/matsuolab/llm_bridge_prod/tree/master/eval_dna
Slurm ใใณใใฌใผใ
#!/bin/bash
#SBATCH --job-name=qwen3_8gpu
#SBATCH --partition=P01
#SBATCH --nodelist=osk-gpu51
#SBATCH --nodes=1
#SBATCH --gpus-per-node=8
#SBATCH --cpus-per-task=240
#SBATCH --time=24:00:00
#SBATCH --output=/home/Competition2025/adm/X006/logs/%x-%j.out
#SBATCH --error=/home/Competition2025/adm/X006/logs/%x-%j.err
#SBATCH --export=OPENAI_API_KEY="openai_api_keyใใใใซ"
#--- ใขใธใฅใผใซ & Conda --------------------------------------------
module purge
module load cuda/12.6 miniconda/24.7.1-py312
module load cudnn/9.6.0
module load nccl/2.24.3
source "$(conda info --base)/etc/profile.d/conda.sh"
conda activate llmbench
# Hugging Face ่ช่จผ
export HF_TOKEN= "<huggingface_tokenใใใใซ>"
export HF_HOME=${SLURM_TMPDIR:-$HOME}/.hf_cache
export TRANSFORMERS_CACHE=$HF_HOME
export HUGGINGFACE_HUB_TOKEN=$HF_TOKEN
mkdir -p "$HF_HOME"
echo "HF cache dir : $HF_HOME" # ใใใใฐ็จ
#--- ใกใขใช/ๆง่ฝใใฅใผใใณใฐ -------------------------------------------
# ใในใๆงๆใงใฎๆจๅฅจ๏ผๆกๅผตใปใฐใกใณใใงใกใขใชๆญ็ๅใไฝๆธ
export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True"
#--- GPU ็ฃ่ฆ -------------------------------------------------------
nvidia-smi -i 0,1,2,3,4,5,6,7 -l 3 > nvidia-smi.log &
pid_nvsmi=$!
#--- ๅฟ
่ฆใชใใฃใฌใฏใใชใไฝๆ -----------------------------------------
mkdir -p evaluation_results
#--- vLLM ่ตทๅ๏ผ8GPU๏ผ----------------------------------------------
# ใในใๆงๆใงใฎๆจๅฅจ๏ผrope-scaling ใชใ / reasoning-parser ใชใ
# - tensor-parallel=4, pipeline-parallel=2
# - enable-expert-parallel
# - disable-custom-all-reduce
# - gpu-memory-utilization=0.90
vllm serve weblab-llm-competition-2025-bridge/REMEN-MISO \
--tensor-parallel-size 4 \
--pipeline-parallel-size 2 \
--enable-expert-parallel \
--gpu-memory-utilization 0.90 \
--disable-custom-all-reduce \
> vllm.log 2>&1 &
pid_vllm=$!
#--- ใใซในใใงใใฏ -------------------------------------------------
until curl -s http://127.0.0.1:8000/health >/dev/null; do
echo "$(date +%T) vLLM starting โฆ"
sleep 10
done
echo "vLLM READY"
#--- ๆจ่ซ -----------------------------------------------------------
python llm-compe-eval/evaluate_huggingface_models.py \
--model_name "weblab-llm-competition-2025-bridge/RAMEN-MISO" \
--dataset_path datasets/Instruction/do_not_answer_en.csv \
--output_dir evaluation_results \
--use_vllm \
--max_questions 939 \
--vllm_base_url http://localhost:8000/v1 > predict.log 2>&1
#--- ๅพ็ไปใ -------------------------------------------------------
kill $pid_vllm
kill $pid_nvsmi
wait
3. ่ฉไพก็ตๆ
| Benchmark | DeepSeek R1 0528 Qwen3 8B | Qwen3 235B A22B | MISO |
|---|---|---|---|
| Humanity's Last Exam (text-only) | 6.46 ยฑ1.96 | 11.75 ยฑ1.36 | 11.12 |
| Humanity's Last Exam Extract120 (text-only) | 4.85 ยฑ4.15 | 11.54 ยฑ6.14 | 19.33 ยฑ7.10 |
| Do-Not-Answer | 97.2 | 97.9 | 92.0 |
ๆณจ: HLE-Extract120 ใฏใๅ ฌๅผใใผใฟใปใใ๏ผcais/hle๏ผใฎ text-only ๅ้กใใใซใใดใชๆฏ็ใ็ถญๆใใฆ 120ๅ ใๅฑคๅๆฝๅบใใใตใใปใใใงใใ
- Downloads last month
- -
Model tree for weblab-llm-competition-2025-bridge/RAMEN-MISO
Base model
Qwen/Qwen3-235B-A22B