Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +1 -0
- ccevolve/baselines/thetaevolve/adapted/__pycache__/ac1.cpython-313.pyc +0 -0
- ccevolve/baselines/thetaevolve/adapted/__pycache__/circle_packing.cpython-313.pyc +0 -0
- ccevolve/baselines/thetaevolve/adapted/ac1.py +21 -0
- ccevolve/baselines/thetaevolve/adapted/circle_packing.py +41 -0
- ccevolve/baselines/thetaevolve/adapted_openevolve/ac1.py +27 -0
- ccevolve/baselines/thetaevolve/adapted_openevolve/ac1_ae.py +27 -0
- ccevolve/baselines/thetaevolve/docker/Dockerfile +118 -0
- ccevolve/baselines/thetaevolve/docker/Dockerfile.rocm +363 -0
- ccevolve/baselines/thetaevolve/docker/Dockerfile.rocm_MI350-5 +252 -0
- ccevolve/baselines/thetaevolve/docker/Dockerfile_20250810_9a48ba0.rocm +361 -0
- ccevolve/baselines/thetaevolve/docker/Dockerfile_20250810_c22f55b.rocm +374 -0
- ccevolve/baselines/thetaevolve/docker/Dockerfile_Aug_10_2025_9a48ba0.rocm +361 -0
- ccevolve/baselines/thetaevolve/docker/Dockerfile_after_c22f55b_Aug_10_2025.rocm +374 -0
- ccevolve/baselines/thetaevolve/docker/Dockerfile_b200 +64 -0
- ccevolve/baselines/thetaevolve/docker/README.md +27 -0
- ccevolve/baselines/thetaevolve/docker/justfile +40 -0
- ccevolve/baselines/thetaevolve/docker/version.txt +1 -0
- ccevolve/baselines/thetaevolve/docs/README.md +31 -0
- ccevolve/baselines/thetaevolve/docs/build.sh +13 -0
- ccevolve/baselines/thetaevolve/docs/build_all.sh +44 -0
- ccevolve/baselines/thetaevolve/docs/conf.py +262 -0
- ccevolve/baselines/thetaevolve/docs/requirements.txt +20 -0
- ccevolve/baselines/thetaevolve/docs/serve.sh +29 -0
- ccevolve/baselines/thetaevolve/eval_results/ac1/correct.json +4 -0
- ccevolve/baselines/thetaevolve/eval_results/ac1/metrics.json +22 -0
- ccevolve/baselines/thetaevolve/eval_results/circle_packing/correct.json +4 -0
- ccevolve/baselines/thetaevolve/eval_results/circle_packing/metrics.json +15 -0
- ccevolve/baselines/thetaevolve/examples/__init__.py +0 -0
- ccevolve/baselines/thetaevolve/examples/fully_async/README.md +45 -0
- ccevolve/baselines/thetaevolve/examples/fully_async/fully_async_rollout.py +247 -0
- ccevolve/baselines/thetaevolve/examples/fully_async/run-qwen3-4b-fully_async.sh +135 -0
- ccevolve/baselines/thetaevolve/examples/multi_agent/README.md +53 -0
- ccevolve/baselines/thetaevolve/examples/multi_agent/__init__.py +0 -0
- ccevolve/baselines/thetaevolve/examples/multi_agent/agent_system.py +273 -0
- ccevolve/baselines/thetaevolve/examples/multi_agent/prompts.py +90 -0
- ccevolve/baselines/thetaevolve/examples/multi_agent/rollout_with_multi_agents.py +33 -0
- ccevolve/baselines/thetaevolve/examples/multi_agent/run-qwen3-30B-A3B-multi-agent.sh +159 -0
- ccevolve/baselines/thetaevolve/examples/reproducibility/README.md +53 -0
- ccevolve/baselines/thetaevolve/examples/reproducibility/run-qwen2.5-0.5B-gsm8k.sh +140 -0
- ccevolve/baselines/thetaevolve/examples/retool/requirements.txt +3 -0
- ccevolve/baselines/thetaevolve/examples/retool/retool_qwen3_4b_rl.sh +157 -0
- ccevolve/baselines/thetaevolve/examples/retool/sft_data_processing.py +31 -0
- ccevolve/baselines/thetaevolve/examples/search-r1/README.md +75 -0
- ccevolve/baselines/thetaevolve/examples/search-r1/README_zh.md +77 -0
- ccevolve/baselines/thetaevolve/examples/search-r1/generate_with_search.py +169 -0
- ccevolve/baselines/thetaevolve/examples/search-r1/google_search_server.py +150 -0
- ccevolve/baselines/thetaevolve/examples/search-r1/qa_em_format.py +208 -0
- ccevolve/baselines/thetaevolve/examples/search-r1/run_qwen2.5_3B.sh +137 -0
- ccevolve/baselines/thetaevolve/openevolve_adapted/.gitignore +61 -0
.gitattributes
CHANGED
|
@@ -1271,3 +1271,4 @@ examples_deprecated/circle_packing/results/results_baseline_gemini3_flash_gen200
|
|
| 1271 |
examples_deprecated/circle_packing/results/results_mmv1_2_gemini3_flash_gen200_periodic10_20260211_003248/evolution_db.sqlite filter=lfs diff=lfs merge=lfs -text
|
| 1272 |
examples_deprecated/circle_packing/results/results_full_gen200_plateau10_20260208_010426/evolution_db.sqlite filter=lfs diff=lfs merge=lfs -text
|
| 1273 |
examples_deprecated/circle_packing/results/results_mmv1_1_gen200_periodic10_20260208_083104/evolution_db.sqlite filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 1271 |
examples_deprecated/circle_packing/results/results_mmv1_2_gemini3_flash_gen200_periodic10_20260211_003248/evolution_db.sqlite filter=lfs diff=lfs merge=lfs -text
|
| 1272 |
examples_deprecated/circle_packing/results/results_full_gen200_plateau10_20260208_010426/evolution_db.sqlite filter=lfs diff=lfs merge=lfs -text
|
| 1273 |
examples_deprecated/circle_packing/results/results_mmv1_1_gen200_periodic10_20260208_083104/evolution_db.sqlite filter=lfs diff=lfs merge=lfs -text
|
| 1274 |
+
examples_deprecated/circle_packing/results/results_full_gen200_period10_20260206_062935/best/results/circle_packing_visualization.png filter=lfs diff=lfs merge=lfs -text
|
ccevolve/baselines/thetaevolve/adapted/__pycache__/ac1.cpython-313.pyc
ADDED
|
Binary file (1.41 kB). View file
|
|
|
ccevolve/baselines/thetaevolve/adapted/__pycache__/circle_packing.cpython-313.pyc
ADDED
|
Binary file (2.3 kB). View file
|
|
|
ccevolve/baselines/thetaevolve/adapted/ac1.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""ThetaEvolve AC1 — uses the embedded AlphaEvolve v2 sequence from ac1_data.py in TTT-Discover.
|
| 2 |
+
|
| 3 |
+
ThetaEvolve's AC1 programs depend on internal imports (openevolve, ref.sota_alphaevolve2)
|
| 4 |
+
and are search algorithms, not fixed solutions. The data.json in FirstAutoCorrIneq
|
| 5 |
+
doesn't contain a usable best sequence in a simple format.
|
| 6 |
+
|
| 7 |
+
We use the AlphaEvolve v2 (150316) sequence from TTT-Discover's ac1_data.py as the
|
| 8 |
+
baseline for comparison, since ThetaEvolve claims to improve upon it.
|
| 9 |
+
"""
|
| 10 |
+
import sys
|
| 11 |
+
import os
|
| 12 |
+
|
| 13 |
+
_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 14 |
+
# Import from TTT-Discover's ac1_data.py
|
| 15 |
+
sys.path.insert(0, os.path.join(_DIR, "..", "..", "ttt-discover", "results", "mathematics"))
|
| 16 |
+
from ac1_data import alphaevolve_v2_150316
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def run(seed=42, budget_s=1000, **kwargs):
|
| 20 |
+
"""Return the AlphaEvolve v2 AC1 sequence (baseline used by ThetaEvolve)."""
|
| 21 |
+
return list(alphaevolve_v2_150316)
|
ccevolve/baselines/thetaevolve/adapted/circle_packing.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""ThetaEvolve best circle packing solution (from data.json, '8B-w_RL@65' entry)."""
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
import numpy as np
|
| 5 |
+
|
| 6 |
+
_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 7 |
+
_DATA_PATH = os.path.join(_DIR, "..", "Results", "CirclePacking", "data.json")
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def run_packing():
|
| 11 |
+
"""Return pre-computed best packing in ShinkaEvolve eval format."""
|
| 12 |
+
with open(_DATA_PATH) as f:
|
| 13 |
+
data = json.load(f)
|
| 14 |
+
|
| 15 |
+
# Find the entry with the highest sum of radii
|
| 16 |
+
best_name = None
|
| 17 |
+
best_sum = -1.0
|
| 18 |
+
best_circles = None
|
| 19 |
+
for entry in data:
|
| 20 |
+
name = entry["name"]
|
| 21 |
+
circles = entry["list"]
|
| 22 |
+
# Skip entries with nested lists (like Formal proofs)
|
| 23 |
+
if not circles or not isinstance(circles[0], list):
|
| 24 |
+
continue
|
| 25 |
+
if isinstance(circles[0][0], list):
|
| 26 |
+
continue
|
| 27 |
+
if len(circles) != 26:
|
| 28 |
+
continue
|
| 29 |
+
s = sum(c[2] for c in circles)
|
| 30 |
+
if s > best_sum:
|
| 31 |
+
best_sum = s
|
| 32 |
+
best_name = name
|
| 33 |
+
best_circles = circles
|
| 34 |
+
|
| 35 |
+
print(f"Using ThetaEvolve entry: {best_name} (sum={best_sum:.10f})")
|
| 36 |
+
|
| 37 |
+
centers = np.array([[c[0], c[1]] for c in best_circles])
|
| 38 |
+
radii = np.array([c[2] for c in best_circles])
|
| 39 |
+
sum_radii = float(np.sum(radii))
|
| 40 |
+
|
| 41 |
+
return centers, radii, sum_radii
|
ccevolve/baselines/thetaevolve/adapted_openevolve/ac1.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""AlphaEvolve v2 AC1 baseline adapted for OpenEvolve evaluator.
|
| 2 |
+
|
| 3 |
+
OpenEvolve AC1 evaluator expects: run() -> (f_values, c1_achieved, loss, n_points)
|
| 4 |
+
"""
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
import numpy as np
|
| 8 |
+
|
| 9 |
+
_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 10 |
+
sys.path.insert(0, os.path.join(_DIR, "..", "..", "ttt-discover", "results", "mathematics"))
|
| 11 |
+
from ac1_data import alphaevolve_v2_150316
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def run():
|
| 15 |
+
"""Return the AlphaEvolve v2 AC1 sequence in OpenEvolve eval format."""
|
| 16 |
+
f_values = np.array(alphaevolve_v2_150316, dtype=np.float64)
|
| 17 |
+
f_values = np.maximum(f_values, 0.0)
|
| 18 |
+
n_points = len(f_values)
|
| 19 |
+
|
| 20 |
+
dx = 0.5 / n_points
|
| 21 |
+
autoconv = np.convolve(f_values, f_values, mode="full") * dx
|
| 22 |
+
integral_sq = (np.sum(f_values) * dx) ** 2
|
| 23 |
+
c1_achieved = float(np.max(autoconv / integral_sq))
|
| 24 |
+
|
| 25 |
+
loss = c1_achieved
|
| 26 |
+
|
| 27 |
+
return f_values, c1_achieved, loss, n_points
|
ccevolve/baselines/thetaevolve/adapted_openevolve/ac1_ae.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""AlphaEvolve v2 AC1 baseline adapted for OpenEvolve evaluator.
|
| 2 |
+
|
| 3 |
+
OpenEvolve AC1 evaluator expects: run() -> (f_values, c1_achieved, loss, n_points)
|
| 4 |
+
"""
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
import numpy as np
|
| 8 |
+
|
| 9 |
+
_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 10 |
+
sys.path.insert(0, os.path.join(_DIR, "..", "..", "ttt-discover", "results", "mathematics"))
|
| 11 |
+
from ac1_data import alphaevolve_v2_150316
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def run():
|
| 15 |
+
"""Return the AlphaEvolve v2 AC1 sequence in OpenEvolve eval format."""
|
| 16 |
+
f_values = np.array(alphaevolve_v2_150316, dtype=np.float64)
|
| 17 |
+
f_values = np.maximum(f_values, 0.0)
|
| 18 |
+
n_points = len(f_values)
|
| 19 |
+
|
| 20 |
+
dx = 0.5 / n_points
|
| 21 |
+
autoconv = np.convolve(f_values, f_values, mode="full") * dx
|
| 22 |
+
integral_sq = (np.sum(f_values) * dx) ** 2
|
| 23 |
+
c1_achieved = float(np.max(autoconv / integral_sq))
|
| 24 |
+
|
| 25 |
+
loss = c1_achieved
|
| 26 |
+
|
| 27 |
+
return f_values, c1_achieved, loss, n_points
|
ccevolve/baselines/thetaevolve/docker/Dockerfile
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG SGLANG_IMAGE_TAG=nightly-dev-20260107-dce8b060
|
| 2 |
+
FROM slimerl/sglang:${SGLANG_IMAGE_TAG} AS sglang
|
| 3 |
+
|
| 4 |
+
# ======================================== Arguments =============================================
|
| 5 |
+
|
| 6 |
+
ARG PATCH_VERSION=latest
|
| 7 |
+
ARG MEGATRON_COMMIT=3714d81d418c9f1bca4594fc35f9e8289f652862
|
| 8 |
+
|
| 9 |
+
ARG ENABLE_CUDA_13=0
|
| 10 |
+
|
| 11 |
+
# ======================================== Setup =============================================
|
| 12 |
+
|
| 13 |
+
WORKDIR /root/
|
| 14 |
+
|
| 15 |
+
# ======================================== Apt dependencies =============================================
|
| 16 |
+
|
| 17 |
+
RUN apt update
|
| 18 |
+
RUN apt install -y nvtop rsync dnsutils
|
| 19 |
+
|
| 20 |
+
# ====================================== Python dependencies ============================================
|
| 21 |
+
|
| 22 |
+
# The compilation is slow, thus should be put at top
|
| 23 |
+
# TransformerEngines does not support too high FA2
|
| 24 |
+
RUN MAX_JOBS=64 pip -v install flash-attn==2.7.4.post1 --no-build-isolation
|
| 25 |
+
|
| 26 |
+
# The compilation is slow, thus should be put at top
|
| 27 |
+
RUN git clone https://github.com/Dao-AILab/flash-attention.git && \
|
| 28 |
+
cd flash-attention/ && git checkout fbf24f67cf7f6442c5cfb2c1057f4bfc57e72d89 && git submodule update --init && cd hopper/ && \
|
| 29 |
+
MAX_JOBS=96 python setup.py install && \
|
| 30 |
+
export python_path=`python -c "import site; print(site.getsitepackages()[0])"` && \
|
| 31 |
+
mkdir -p $python_path/flash_attn_3 && \
|
| 32 |
+
cp flash_attn_interface.py $python_path/flash_attn_3/flash_attn_interface.py && \
|
| 33 |
+
rm -rf flash-attention/
|
| 34 |
+
|
| 35 |
+
RUN pip install git+https://github.com/ISEEKYAN/mbridge.git@89eb10887887bc74853f89a4de258c0702932a1c --no-deps
|
| 36 |
+
|
| 37 |
+
RUN pip install flash-linear-attention==0.4.1
|
| 38 |
+
RUN pip install tilelang -f https://tile-ai.github.io/whl/nightly/cu128/
|
| 39 |
+
|
| 40 |
+
# TE does not have wheel on cuda 13 yet, thus need to install from source
|
| 41 |
+
RUN if [ "${ENABLE_CUDA_13}" = "1" ]; then \
|
| 42 |
+
pip install nvidia-mathdx==26.6.0 && \
|
| 43 |
+
pip -v install --no-build-isolation git+https://github.com/NVIDIA/TransformerEngine.git@release_v2.10; \
|
| 44 |
+
else \
|
| 45 |
+
pip -v install --no-build-isolation "transformer_engine[pytorch]==2.10.0"; \
|
| 46 |
+
fi
|
| 47 |
+
|
| 48 |
+
RUN NVCC_APPEND_FLAGS="--threads 4" \
|
| 49 |
+
pip -v install --disable-pip-version-check --no-cache-dir \
|
| 50 |
+
--no-build-isolation \
|
| 51 |
+
--config-settings "--build-option=--cpp_ext --cuda_ext --parallel 8" git+https://github.com/NVIDIA/apex.git@10417aceddd7d5d05d7cbf7b0fc2daad1105f8b4
|
| 52 |
+
|
| 53 |
+
RUN git clone https://github.com/NVIDIA/Megatron-LM.git --recursive && \
|
| 54 |
+
cd Megatron-LM && git checkout ${MEGATRON_COMMIT} && \
|
| 55 |
+
pip install -e .
|
| 56 |
+
|
| 57 |
+
RUN pip install git+https://github.com/fzyzcjy/torch_memory_saver.git@dc6876905830430b5054325fa4211ff302169c6b --no-cache-dir --force-reinstall
|
| 58 |
+
RUN pip install git+https://github.com/fzyzcjy/Megatron-Bridge.git@dev_rl --no-build-isolation
|
| 59 |
+
RUN pip install nvidia-modelopt[torch]>=0.37.0 --no-build-isolation
|
| 60 |
+
|
| 61 |
+
# This patch from masahi will be included in later Triton releases
|
| 62 |
+
RUN if [ "$ENABLE_CUDA_13" = "1" ]; then \
|
| 63 |
+
(cd /root && git clone -b feat/v350_plus_8045 https://github.com/fzyzcjy/triton.git && cd triton && pip install -r python/requirements.txt && pip install --verbose -e .); \
|
| 64 |
+
fi
|
| 65 |
+
|
| 66 |
+
COPY requirements.txt /tmp/requirements.txt
|
| 67 |
+
RUN pip install -r /tmp/requirements.txt
|
| 68 |
+
|
| 69 |
+
# Temporarily install another sgl-kernel version for GB300 without rebuilding the whole image
|
| 70 |
+
RUN if [ "$ENABLE_CUDA_13" = "1" ]; then \
|
| 71 |
+
SGL_KERNEL_VERSION=0.3.17.post2 && \
|
| 72 |
+
python3 -m pip install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu130-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps; \
|
| 73 |
+
fi
|
| 74 |
+
|
| 75 |
+
# https://github.com/pytorch/pytorch/issues/168167
|
| 76 |
+
RUN pip install nvidia-cudnn-cu12==9.16.0.29
|
| 77 |
+
|
| 78 |
+
# reinstall numpy 1.x for megatron
|
| 79 |
+
RUN pip install "numpy<2"
|
| 80 |
+
|
| 81 |
+
RUN rm -rf /root/.cache/pip /root/flash-attention
|
| 82 |
+
|
| 83 |
+
# ====================================== Patches ============================================
|
| 84 |
+
|
| 85 |
+
COPY docker/patch/${PATCH_VERSION}/megatron.patch /root/Megatron-LM/
|
| 86 |
+
RUN cd Megatron-LM && \
|
| 87 |
+
git update-index --refresh && \
|
| 88 |
+
git apply megatron.patch --3way && \
|
| 89 |
+
if grep -R -n '^<<<<<<< ' .; then \
|
| 90 |
+
echo "Patch failed to apply cleanly. Please resolve conflicts." && \
|
| 91 |
+
exit 1; \
|
| 92 |
+
fi && \
|
| 93 |
+
rm megatron.patch
|
| 94 |
+
|
| 95 |
+
# TODO temporarily skip patching for GB200/GB300 (and require users to bring their own sglang version). should add back later.
|
| 96 |
+
ARG ENABLE_SGLANG_PATCH=1
|
| 97 |
+
COPY docker/patch/${PATCH_VERSION}/sglang.patch /sgl-workspace/sglang/
|
| 98 |
+
RUN if [ "$ENABLE_SGLANG_PATCH" = "1" ]; then \
|
| 99 |
+
cd /sgl-workspace/sglang && \
|
| 100 |
+
git update-index --refresh && \
|
| 101 |
+
git apply sglang.patch --3way && \
|
| 102 |
+
if grep -R -n '^<<<<<<< ' .; then \
|
| 103 |
+
echo "Patch failed to apply cleanly. Please resolve conflicts." && \
|
| 104 |
+
exit 1; \
|
| 105 |
+
fi && \
|
| 106 |
+
rm sglang.patch; \
|
| 107 |
+
fi
|
| 108 |
+
|
| 109 |
+
# ====================================== Install main package ============================================
|
| 110 |
+
|
| 111 |
+
ARG SLIME_COMMIT=main
|
| 112 |
+
RUN git clone https://github.com/THUDM/slime.git /root/slime && \
|
| 113 |
+
cd /root/slime && \
|
| 114 |
+
git checkout ${SLIME_COMMIT} && \
|
| 115 |
+
pip install -e . --no-deps
|
| 116 |
+
|
| 117 |
+
RUN cd /root/slime/slime/backends/megatron_utils/kernels/int4_qat && \
|
| 118 |
+
pip install . --no-build-isolation
|
ccevolve/baselines/thetaevolve/docker/Dockerfile.rocm
ADDED
|
@@ -0,0 +1,363 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#### Use the base image
|
| 2 |
+
|
| 3 |
+
# The Docker image built with this Dockerfile:
|
| 4 |
+
# Supports at least up to slime commit ID: 2710445 (Oct 9, 2025) - supported by amd_patch/sglv0.5.0rc0
|
| 5 |
+
# Still need to update amd_patch
|
| 6 |
+
|
| 7 |
+
# You can find the latest pre-built Docker image from here: https://hub.docker.com/r/rlsys/slime/tags
|
| 8 |
+
# Current latest docker img: `rlsys/slime:slime_ubuntu22.04_rocm6.3.4-patch-numa-patch_sglang0.4.9_megatron-patch_ray2.47.1_apex_torch-memory-saver0.0.8-patch-vim` manually add the patch to mitigate checkpoint loading issue. (vim /workspace/Megatron-LM-amd_version/megatron/training/checkpointing.py. Line: 1449 ~ 1457 - comment out if becasue of dismatch number of dist checkpoints
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# The Docker image built with this Dockerfile:
|
| 12 |
+
# PR: commit ID 36711aa (Aug 22, 2025) dockerfile - Supports at least up to slime commit ID: d4a7741 (Sep 7, 2025)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# You can find the latest pre-built Docker image from here: https://hub.docker.com/r/rlsys/slime/tags
|
| 16 |
+
# Current latest docker img: `rlsys/slime:slime_ubuntu22.04_rocm6.3.4-patch-numa-patch_sglang0.4.9_megatron-patch_ray2.47.1_apex_torch-memory-saver0.0.8-patch-vim` manually add the patch to mitigate checkpoint loading issue. (vim /workspace/Megatron-LM-amd_version/megatron/training/checkpointing.py. Line: 1449 ~ 1457 - comment out if becasue of dismatch number of dist checkpoints
|
| 17 |
+
|
| 18 |
+
# Thanks to Yang Wang (https://www.microsoft.com/en-us/research/people/yangwang5/) for working on the patch for this ROCm base Docker image to support virtual memory management on MI300X.
|
| 19 |
+
|
| 20 |
+
# FROM "rlfoundation.azurecr.io/rocm6.3.4:vllm-0.8.5-numa-patch-ubuntu-22.04"
|
| 21 |
+
FROM "rlsys/rocm-6.3.4-patch:rocm6.3.4-numa-patch_ubuntu-22.04"
|
| 22 |
+
|
| 23 |
+
SHELL ["/bin/bash", "-ceuxo", "pipefail"]
|
| 24 |
+
|
| 25 |
+
ARG MAX_JOBS=512
|
| 26 |
+
ENV MAX_JOBS=${MAX_JOBS}
|
| 27 |
+
|
| 28 |
+
ENV PATH="/usr/local/python3.12/bin:$PATH"
|
| 29 |
+
RUN ln -sf /usr/bin/python3.12 /usr/bin/python && \
|
| 30 |
+
ln -sf /usr/bin/pip3.12 /usr/bin/pip
|
| 31 |
+
|
| 32 |
+
RUN apt-get update
|
| 33 |
+
RUN apt-get install -y pkg-config liblzma-dev
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
###########################################
|
| 37 |
+
##########Install TransformerEngine########
|
| 38 |
+
###########################################
|
| 39 |
+
WORKDIR /workspace/
|
| 40 |
+
|
| 41 |
+
RUN rm -rf TransformerEngine
|
| 42 |
+
# RUN git clone --recursive https://github.com/ROCm/TransformerEngine.git
|
| 43 |
+
RUN git clone https://github.com/ROCm/TransformerEngine.git
|
| 44 |
+
WORKDIR /workspace/TransformerEngine
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
RUN git checkout 236178e
|
| 48 |
+
# RUN git checkout bb061ad
|
| 49 |
+
# RUN git checkout 864405c
|
| 50 |
+
|
| 51 |
+
RUN git submodule update --init --recursive
|
| 52 |
+
|
| 53 |
+
ENV NVTE_FRAMEWORK=pytorch
|
| 54 |
+
ENV NVTE_ROCM_ARCH=gfx942
|
| 55 |
+
ENV NVTE_USE_HIPBLASLT=1
|
| 56 |
+
ENV NVTE_USE_ROCM=1
|
| 57 |
+
|
| 58 |
+
# export CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr:${CMAKE_PREFIX_PATH:-}"
|
| 59 |
+
ENV CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr"
|
| 60 |
+
RUN MAX_JOBS=${MAX_JOBS} pip install . -vvv
|
| 61 |
+
WORKDIR /workspace/
|
| 62 |
+
###########################################
|
| 63 |
+
###########################################
|
| 64 |
+
###########################################
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
###########################################
|
| 69 |
+
##############Install SGLang###############
|
| 70 |
+
###########################################
|
| 71 |
+
|
| 72 |
+
# This is necessary for scope purpose
|
| 73 |
+
# ARG GPU_ARCH=gfx942
|
| 74 |
+
ENV GPU_ARCH=gfx942
|
| 75 |
+
|
| 76 |
+
# ===============================
|
| 77 |
+
# Base image 942 and args
|
| 78 |
+
# FROM $BASE_IMAGE_942 AS gfx942
|
| 79 |
+
ENV BUILD_VLLM="0"
|
| 80 |
+
ENV BUILD_TRITON="1"
|
| 81 |
+
ENV BUILD_AITER_ALL="1"
|
| 82 |
+
ENV AITER_COMMIT="v0.1.4"
|
| 83 |
+
|
| 84 |
+
# # ===============================
|
| 85 |
+
# # Base image 950 and args
|
| 86 |
+
# FROM $BASE_IMAGE_950 AS gfx950
|
| 87 |
+
# ENV BUILD_VLLM="0"
|
| 88 |
+
# ENV BUILD_TRITON="0"
|
| 89 |
+
# ENV BUILD_AITER_ALL="1"
|
| 90 |
+
# ENV AITER_COMMIT="v0.1.4"
|
| 91 |
+
|
| 92 |
+
# ===============================
|
| 93 |
+
# Chosen arch and args
|
| 94 |
+
# FROM ${GPU_ARCH}
|
| 95 |
+
|
| 96 |
+
# This is necessary for scope purpose, again
|
| 97 |
+
# ARG GPU_ARCH=gfx950
|
| 98 |
+
ENV GPU_ARCH_LIST=${GPU_ARCH:-${PYTORCH_ROCM_ARCH}}
|
| 99 |
+
|
| 100 |
+
ARG SGL_REPO="https://github.com/sgl-project/sglang.git"
|
| 101 |
+
ARG SGL_DEFAULT="main"
|
| 102 |
+
# ARG SGL_BRANCH=${SGL_DEFAULT}
|
| 103 |
+
ARG SGL_BRANCH="8ecf6b9d2480c3f600826c7d8fef6a16ed603c3f"
|
| 104 |
+
|
| 105 |
+
ARG TRITON_REPO="https://github.com/ROCm/triton.git"
|
| 106 |
+
ARG TRITON_COMMIT="improve_fa_decode_3.0.0"
|
| 107 |
+
|
| 108 |
+
ARG AITER_REPO="https://github.com/ROCm/aiter.git"
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
WORKDIR /workspace
|
| 112 |
+
# -----------------------
|
| 113 |
+
# AITER
|
| 114 |
+
RUN pip uninstall -y aiter
|
| 115 |
+
RUN git clone ${AITER_REPO} \
|
| 116 |
+
&& cd aiter \
|
| 117 |
+
&& git checkout ${AITER_COMMIT} \
|
| 118 |
+
&& git submodule update --init --recursive
|
| 119 |
+
RUN cd aiter \
|
| 120 |
+
&& if [ "$BUILD_AITER_ALL" = "1" ]; then \
|
| 121 |
+
PREBUILD_KERNELS=1 GPU_ARCHS=$GPU_ARCH_LIST python setup.py develop; \
|
| 122 |
+
else \
|
| 123 |
+
GPU_ARCHS=$GPU_ARCH_LIST python setup.py develop; \
|
| 124 |
+
fi
|
| 125 |
+
|
| 126 |
+
# -----------------------
|
| 127 |
+
# Triton
|
| 128 |
+
RUN if [ "$BUILD_TRITON" = "1" ]; then \
|
| 129 |
+
pip uninstall -y triton \
|
| 130 |
+
&& git clone ${TRITON_REPO} \
|
| 131 |
+
&& cd triton \
|
| 132 |
+
&& git checkout ${TRITON_COMMIT} \
|
| 133 |
+
&& cd python \
|
| 134 |
+
&& python setup.py install; \
|
| 135 |
+
fi
|
| 136 |
+
|
| 137 |
+
# -----------------------
|
| 138 |
+
# Build vLLM
|
| 139 |
+
ARG VLLM_REPO="https://github.com/ROCm/vllm.git"
|
| 140 |
+
ARG VLLM_BRANCH="9f6b92db47c3444b7a7d67451ba0c3a2d6af4c2c"
|
| 141 |
+
RUN if [ "$BUILD_VLLM" = "1" ]; then \
|
| 142 |
+
git clone ${VLLM_REPO} \
|
| 143 |
+
&& cd vllm \
|
| 144 |
+
&& git checkout ${VLLM_BRANCH} \
|
| 145 |
+
&& python -m pip install -r requirements/rocm.txt \
|
| 146 |
+
&& python setup.py clean --all \
|
| 147 |
+
&& python setup.py develop; \
|
| 148 |
+
fi
|
| 149 |
+
|
| 150 |
+
# -----------------------
|
| 151 |
+
# Build SGLang
|
| 152 |
+
ARG BUILD_TYPE=all
|
| 153 |
+
|
| 154 |
+
RUN pip install IPython \
|
| 155 |
+
&& pip install orjson \
|
| 156 |
+
&& pip install python-multipart \
|
| 157 |
+
&& pip install torchao \
|
| 158 |
+
&& pip install pybind11
|
| 159 |
+
|
| 160 |
+
RUN pip install "setuptools<70.0.0" --force-reinstall
|
| 161 |
+
RUN pip uninstall -y sgl_kernel sglang
|
| 162 |
+
RUN git clone ${SGL_REPO} \
|
| 163 |
+
&& cd sglang \
|
| 164 |
+
&& if [ "${SGL_BRANCH}" = ${SGL_DEFAULT} ]; then \
|
| 165 |
+
echo "Using ${SGL_DEFAULT}, default branch."; \
|
| 166 |
+
git checkout ${SGL_DEFAULT}; \
|
| 167 |
+
else \
|
| 168 |
+
echo "Using ${SGL_BRANCH} branch."; \
|
| 169 |
+
git checkout ${SGL_BRANCH}; \
|
| 170 |
+
fi \
|
| 171 |
+
&& cd sgl-kernel \
|
| 172 |
+
&& rm -f pyproject.toml \
|
| 173 |
+
&& mv pyproject_rocm.toml pyproject.toml \
|
| 174 |
+
&& AMDGPU_TARGET=$GPU_ARCH_LIST python setup_rocm.py install \
|
| 175 |
+
&& cd .. \
|
| 176 |
+
&& if [ "$BUILD_TYPE" = "srt" ]; then \
|
| 177 |
+
python -m pip --no-cache-dir install -e "python[srt_hip]"; \
|
| 178 |
+
else \
|
| 179 |
+
python -m pip --no-cache-dir install -e "python[all_hip]"; \
|
| 180 |
+
fi
|
| 181 |
+
|
| 182 |
+
RUN python -m pip cache purge
|
| 183 |
+
|
| 184 |
+
# Copy config files to support MI300X in virtualized environments (MI300X_VF). Symlinks will not be created in image build.
|
| 185 |
+
RUN find /workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
|
| 186 |
+
/workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
|
| 187 |
+
-type f -name '*MI300X*' | xargs -I {} sh -c 'vf_config=$(echo "$1" | sed "s/MI300X/MI300X_VF/"); cp "$1" "$vf_config"' -- {}
|
| 188 |
+
|
| 189 |
+
# Performance environment variable.
|
| 190 |
+
ENV HIP_FORCE_DEV_KERNARG=1
|
| 191 |
+
ENV HSA_NO_SCRATCH_RECLAIM=1
|
| 192 |
+
ENV SGLANG_SET_CPU_AFFINITY=1
|
| 193 |
+
ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
|
| 194 |
+
ENV NCCL_MIN_NCHANNELS=112
|
| 195 |
+
|
| 196 |
+
ENV SGLANG_USE_AITER=1
|
| 197 |
+
ENV SGLANG_MOE_PADDING=1
|
| 198 |
+
ENV VLLM_FP8_PADDING=1
|
| 199 |
+
ENV VLLM_FP8_ACT_PADDING=1
|
| 200 |
+
ENV VLLM_FP8_WEIGHT_PADDING=1
|
| 201 |
+
ENV VLLM_FP8_REDUCE_CONV=1
|
| 202 |
+
ENV TORCHINDUCTOR_MAX_AUTOTUNE=1
|
| 203 |
+
ENV TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISE=1
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
# sglang patch
|
| 207 |
+
# COPY patch/${SGLANG_VERSION}/sglang.patch /sgl-workspace/sglang/
|
| 208 |
+
COPY amd_patch/sglv0.5.0rc0 /workspace/patch
|
| 209 |
+
RUN cd /workspace/sglang && \
|
| 210 |
+
git apply /workspace/patch/sglang.patch && \
|
| 211 |
+
if grep -R -n '^<<<<<<< ' .; then \
|
| 212 |
+
echo "Patch failed to apply cleanly. Please resolve conflicts." && \
|
| 213 |
+
exit 1; \
|
| 214 |
+
fi
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
RUN pip install sglang-router --force-reinstall
|
| 219 |
+
|
| 220 |
+
###########################################
|
| 221 |
+
###########################################
|
| 222 |
+
###########################################
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
RUN pip install transformers==4.51.1
|
| 226 |
+
|
| 227 |
+
#########################################
|
| 228 |
+
#####Install vllm v0.8.5#################
|
| 229 |
+
#########################################
|
| 230 |
+
|
| 231 |
+
WORKDIR /workspace/
|
| 232 |
+
|
| 233 |
+
ENV VLLM_TARGET_DEVICE=rocm
|
| 234 |
+
ENV ROCM_PATH=/opt/rocm
|
| 235 |
+
ENV SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev
|
| 236 |
+
|
| 237 |
+
RUN pip uninstall -y vllm || true
|
| 238 |
+
RUN rm -rf vllm-patch
|
| 239 |
+
|
| 240 |
+
# Fix importlib-metadata version conflict before vllm installation
|
| 241 |
+
RUN pip install "importlib-metadata>=6.0,<=8.0.0" --force-reinstall
|
| 242 |
+
|
| 243 |
+
RUN git clone https://github.com/RLFoundation/vllm-patch.git \
|
| 244 |
+
&& cd vllm-patch \
|
| 245 |
+
&& git checkout v0.8.5-sleep-numa \
|
| 246 |
+
&& rm -rf build/ dist/ *.egg-info \
|
| 247 |
+
&& ln -sf /opt/rocm/lib/libamdhip64.so /usr/lib/libamdhip64.so \
|
| 248 |
+
&& SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev PYTORCH_ROCM_ARCH="gfx90a;gfx942" MAX_JOBS=${MAX_JOBS} python3 setup.py install
|
| 249 |
+
|
| 250 |
+
WORKDIR /workspace/
|
| 251 |
+
###########################################
|
| 252 |
+
###########################################
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
#########################################
|
| 256 |
+
#### Install megatron-core###############
|
| 257 |
+
#########################################
|
| 258 |
+
# Can be removed just the current megatron-lm dependency
|
| 259 |
+
RUN pip install "numpy>=1.21.0,<2.0" --force-reinstall
|
| 260 |
+
|
| 261 |
+
COPY amd_patch/sglv0.5.0rc0 /workspace/patch
|
| 262 |
+
|
| 263 |
+
RUN pip uninstall -y megatron-core && \
|
| 264 |
+
git clone https://github.com/NVIDIA/Megatron-LM && \
|
| 265 |
+
cd Megatron-LM && \
|
| 266 |
+
git checkout 48406695c4efcf1026a7ed70bb390793918dd97b && \
|
| 267 |
+
git apply /workspace/patch/amd_megatron_fused_kernels_init.patch && \
|
| 268 |
+
pip install -vvv -e . && \
|
| 269 |
+
cd /workspace/
|
| 270 |
+
|
| 271 |
+
# sandwitch norm for GLM models
|
| 272 |
+
RUN cd Megatron-LM && \
|
| 273 |
+
git apply /workspace/patch/megatron.patch --3way && \
|
| 274 |
+
if grep -R -n '^<<<<<<< ' .; then \
|
| 275 |
+
echo "Patch failed to apply cleanly. Please resolve conflicts." && \
|
| 276 |
+
exit 1; \
|
| 277 |
+
fi
|
| 278 |
+
|
| 279 |
+
#########################################
|
| 280 |
+
#########################################
|
| 281 |
+
#########################################
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
#########################################
|
| 287 |
+
###Add torch_memory_saver################
|
| 288 |
+
#########################################
|
| 289 |
+
# # Set environment variables
|
| 290 |
+
# ENV HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__"
|
| 291 |
+
# ENV CFLAGS="-D__HIP_PLATFORM_AMD__"
|
| 292 |
+
# ENV CXXFLAGS="-D__HIP_PLATFORM_AMD__"
|
| 293 |
+
# Install torch_memory_saver
|
| 294 |
+
# RUN pip install git+https://github.com/YangWang92/torch_memory_saver_numa.git --no-deps
|
| 295 |
+
# RUN pip install "git+https://github.com/YangWang92/torch_memory_saver_numa.git@numa"
|
| 296 |
+
RUN pip install "git+https://github.com/yushengsu-thu/torch_memory_saver.git"
|
| 297 |
+
# pip install git+https://github.com/fzyzcjy/torch_memory_saver.git --no-deps
|
| 298 |
+
#########################################
|
| 299 |
+
#########################################
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
########################################
|
| 305 |
+
######Install ray#######################
|
| 306 |
+
########################################
|
| 307 |
+
# need to add this patch manually: https://github.com/ray-project/ray/pull/53531/files
|
| 308 |
+
RUN pip uninstall ray -y
|
| 309 |
+
# RUN pip install "ray[data,train,tune,serve]>=2.47.0"
|
| 310 |
+
RUN pip install "ray[data,train,tune,serve]==2.47.1"
|
| 311 |
+
########################################
|
| 312 |
+
########################################
|
| 313 |
+
########################################
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
### Need to verify whether numerical/convergence issue
|
| 317 |
+
#######################################
|
| 318 |
+
################apex###################
|
| 319 |
+
#######################################
|
| 320 |
+
WORKDIR /workspace/
|
| 321 |
+
RUN pip uninstall -y apex && \
|
| 322 |
+
git clone https://github.com/ROCm/apex.git && \
|
| 323 |
+
cd apex && \
|
| 324 |
+
python setup.py install && \
|
| 325 |
+
cd /workspace/
|
| 326 |
+
#######################################
|
| 327 |
+
#######################################
|
| 328 |
+
#######################################
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
########################################
|
| 332 |
+
############ mbridge####################
|
| 333 |
+
########################################
|
| 334 |
+
RUN pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
|
| 335 |
+
########################################
|
| 336 |
+
########################################
|
| 337 |
+
########################################
|
| 338 |
+
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
########################################
|
| 342 |
+
########slime agent framewrok need######
|
| 343 |
+
########################################
|
| 344 |
+
RUN pip install pydra_config==0.0.15
|
| 345 |
+
RUN pip install together
|
| 346 |
+
RUN pip install google-generativeai
|
| 347 |
+
########################################
|
| 348 |
+
########################################
|
| 349 |
+
########################################
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
########################################
|
| 353 |
+
########Additional packages#############
|
| 354 |
+
########################################
|
| 355 |
+
RUN pip install tensorboard
|
| 356 |
+
########################################
|
| 357 |
+
########################################
|
| 358 |
+
########################################
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
WORKDIR /workspace/
|
| 362 |
+
|
| 363 |
+
CMD ["/usr/bin/bash"]
|
ccevolve/baselines/thetaevolve/docker/Dockerfile.rocm_MI350-5
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#### Use the base image for ROCm 7 / gfx950 (MI355)
|
| 2 |
+
|
| 3 |
+
# The Docker image built with this Dockerfile:
|
| 4 |
+
# Base image: ROCm 7 with vllm pre-built for gfx950
|
| 5 |
+
# Target GPU: MI355 (gfx950)
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
FROM rocm/sgl-dev:rocm7-vllm-20250904
|
| 9 |
+
|
| 10 |
+
SHELL ["/bin/bash", "-ceuxo", "pipefail"]
|
| 11 |
+
|
| 12 |
+
ARG MAX_JOBS=128
|
| 13 |
+
ENV MAX_JOBS=${MAX_JOBS}
|
| 14 |
+
|
| 15 |
+
# Set environment variables for gfx950
|
| 16 |
+
ENV GPU_ARCH=gfx950
|
| 17 |
+
ENV PYTORCH_ROCM_ARCH=gfx950
|
| 18 |
+
ENV GPU_ARCH_LIST=gfx950
|
| 19 |
+
ENV AMDGPU_TARGET=gfx950
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
###########################################
|
| 23 |
+
##############1. Install AITER#############
|
| 24 |
+
###########################################
|
| 25 |
+
WORKDIR /app
|
| 26 |
+
|
| 27 |
+
RUN pip uninstall -y aiter || true
|
| 28 |
+
RUN rm -rf aiter
|
| 29 |
+
RUN git clone https://github.com/ROCm/aiter.git \
|
| 30 |
+
&& cd aiter \
|
| 31 |
+
&& git checkout v0.1.7.post2 \
|
| 32 |
+
&& git submodule update --init --recursive \
|
| 33 |
+
&& GPU_ARCHS=gfx950 python setup.py develop
|
| 34 |
+
###########################################
|
| 35 |
+
###########################################
|
| 36 |
+
###########################################
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
###########################################
|
| 40 |
+
####2. Install TransformerEngine for gfx950
|
| 41 |
+
###########################################
|
| 42 |
+
WORKDIR /app
|
| 43 |
+
|
| 44 |
+
RUN rm -rf TransformerEngine
|
| 45 |
+
RUN git clone https://github.com/ROCm/TransformerEngine.git \
|
| 46 |
+
&& cd TransformerEngine \
|
| 47 |
+
&& git checkout 90c04bcdc3c109505b318f40a39680263af55edf \
|
| 48 |
+
&& git submodule update --init --recursive
|
| 49 |
+
|
| 50 |
+
ENV NVTE_FRAMEWORK=pytorch
|
| 51 |
+
ENV NVTE_ROCM_ARCH=gfx950
|
| 52 |
+
ENV NVTE_USE_HIPBLASLT=1
|
| 53 |
+
ENV NVTE_USE_ROCM=1
|
| 54 |
+
ENV CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr"
|
| 55 |
+
|
| 56 |
+
RUN cd TransformerEngine && pip install . -v
|
| 57 |
+
###########################################
|
| 58 |
+
###########################################
|
| 59 |
+
###########################################
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
#########################################
|
| 63 |
+
####3. Install Megatron-LM (NVIDIA version)
|
| 64 |
+
#########################################
|
| 65 |
+
WORKDIR /app
|
| 66 |
+
|
| 67 |
+
RUN pip install "numpy>=1.21.0,<2.0" --force-reinstall
|
| 68 |
+
|
| 69 |
+
RUN pip uninstall -y megatron-core || true
|
| 70 |
+
RUN rm -rf Megatron-LM
|
| 71 |
+
RUN git clone https://github.com/NVIDIA/Megatron-LM \
|
| 72 |
+
&& cd Megatron-LM \
|
| 73 |
+
&& git checkout 48406695c4efcf1026a7ed70bb390793918dd97b \
|
| 74 |
+
&& pip install -e .
|
| 75 |
+
#########################################
|
| 76 |
+
#########################################
|
| 77 |
+
#########################################
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
########################################
|
| 81 |
+
############ 4. Install mbridge#########
|
| 82 |
+
########################################
|
| 83 |
+
RUN pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
|
| 84 |
+
########################################
|
| 85 |
+
########################################
|
| 86 |
+
########################################
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
########################################
|
| 90 |
+
######5. Install Ray####################
|
| 91 |
+
########################################
|
| 92 |
+
RUN pip uninstall ray -y || true
|
| 93 |
+
RUN pip install "ray[data,train,tune,serve]==2.47.1"
|
| 94 |
+
########################################
|
| 95 |
+
########################################
|
| 96 |
+
########################################
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
#########################################
|
| 100 |
+
###6. Install torch_memory_saver#########
|
| 101 |
+
#########################################
|
| 102 |
+
RUN pip install torch_memory_saver
|
| 103 |
+
#########################################
|
| 104 |
+
#########################################
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
#######################################
|
| 108 |
+
####7. Install Apex for ROCm###########
|
| 109 |
+
#######################################
|
| 110 |
+
WORKDIR /app
|
| 111 |
+
|
| 112 |
+
RUN pip uninstall -y apex || true
|
| 113 |
+
RUN rm -rf apex
|
| 114 |
+
RUN git clone https://github.com/ROCm/apex.git \
|
| 115 |
+
&& cd apex \
|
| 116 |
+
&& python setup.py install
|
| 117 |
+
#######################################
|
| 118 |
+
#######################################
|
| 119 |
+
#######################################
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
########################################
|
| 123 |
+
###8. Install slime agent framework deps
|
| 124 |
+
########################################
|
| 125 |
+
RUN pip install pydra_config==0.0.15
|
| 126 |
+
RUN pip install together
|
| 127 |
+
RUN pip install google-generativeai
|
| 128 |
+
RUN pip install tensorboard
|
| 129 |
+
########################################
|
| 130 |
+
########################################
|
| 131 |
+
########################################
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
########################################
|
| 135 |
+
###9. Set performance environment vars##
|
| 136 |
+
########################################
|
| 137 |
+
ENV HIP_FORCE_DEV_KERNARG=1
|
| 138 |
+
ENV HSA_NO_SCRATCH_RECLAIM=1
|
| 139 |
+
ENV SGLANG_USE_AITER=1
|
| 140 |
+
ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
|
| 141 |
+
ENV SGLANG_MOE_PADDING=1
|
| 142 |
+
ENV SGLANG_SET_CPU_AFFINITY=1
|
| 143 |
+
ENV SGLANG_ROCM_FUSED_DECODE_MLA=1
|
| 144 |
+
ENV SGLANG_USE_ROCM700A=1
|
| 145 |
+
ENV NCCL_MIN_NCHANNELS=112
|
| 146 |
+
ENV VLLM_FP8_PADDING=1
|
| 147 |
+
ENV VLLM_FP8_ACT_PADDING=1
|
| 148 |
+
ENV VLLM_FP8_WEIGHT_PADDING=1
|
| 149 |
+
ENV VLLM_FP8_REDUCE_CONV=1
|
| 150 |
+
ENV TORCHINDUCTOR_MAX_AUTOTUNE=1
|
| 151 |
+
ENV TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISE=1
|
| 152 |
+
########################################
|
| 153 |
+
########################################
|
| 154 |
+
########################################
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
###########################################
|
| 158 |
+
##############Install SGLang###############
|
| 159 |
+
###########################################
|
| 160 |
+
WORKDIR /app
|
| 161 |
+
|
| 162 |
+
# Install prerequisites
|
| 163 |
+
RUN pip install IPython orjson python-multipart torchao==0.9.0 pybind11
|
| 164 |
+
|
| 165 |
+
# Clone SGLang
|
| 166 |
+
RUN pip uninstall -y sgl_kernel sglang || true
|
| 167 |
+
RUN rm -rf sglang
|
| 168 |
+
RUN git clone https://github.com/sgl-project/sglang.git \
|
| 169 |
+
&& cd sglang \
|
| 170 |
+
&& git checkout v0.5.6
|
| 171 |
+
|
| 172 |
+
# Build sgl-kernel for gfx950
|
| 173 |
+
RUN cd sglang/sgl-kernel \
|
| 174 |
+
&& rm -f pyproject.toml \
|
| 175 |
+
&& mv pyproject_rocm.toml pyproject.toml \
|
| 176 |
+
&& AMDGPU_TARGET=gfx950 python setup_rocm.py install
|
| 177 |
+
|
| 178 |
+
# Install SGLang
|
| 179 |
+
RUN cd sglang \
|
| 180 |
+
&& rm -rf python/pyproject.toml \
|
| 181 |
+
&& mv python/pyproject_other.toml python/pyproject.toml \
|
| 182 |
+
&& pip install -e "python[all_hip]"
|
| 183 |
+
|
| 184 |
+
# Test SGLang installation
|
| 185 |
+
RUN python -c "import sglang; import sgl_kernel; print('SGLang + sgl_kernel: OK')"
|
| 186 |
+
|
| 187 |
+
RUN python -m pip cache purge
|
| 188 |
+
###########################################
|
| 189 |
+
###########################################
|
| 190 |
+
###########################################
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
###########################################
|
| 194 |
+
#### APPLY PATCHES (gfx950/MI355) #########
|
| 195 |
+
###########################################
|
| 196 |
+
|
| 197 |
+
# Copy patches from slime repo
|
| 198 |
+
COPY amd_patch/latest /app/patch
|
| 199 |
+
|
| 200 |
+
# Apply Megatron patches
|
| 201 |
+
RUN cd /app/Megatron-LM \
|
| 202 |
+
&& git apply /app/patch/amd_megatron_fused_kernels_init.patch \
|
| 203 |
+
&& git apply /app/patch/megatron.patch --3way \
|
| 204 |
+
&& if grep -R -n '^<<<<<<< ' .; then \
|
| 205 |
+
echo "Patch failed to apply cleanly. Please resolve conflicts." && \
|
| 206 |
+
exit 1; \
|
| 207 |
+
fi \
|
| 208 |
+
&& pip install -e . -v
|
| 209 |
+
|
| 210 |
+
# Apply SGLang patch
|
| 211 |
+
RUN cd /app/sglang \
|
| 212 |
+
&& git apply /app/patch/sglang.patch || echo "Check patch compatibility with v0.5.6" \
|
| 213 |
+
&& if grep -R -n '^<<<<<<< ' .; then \
|
| 214 |
+
echo "Patch failed to apply cleanly. Please resolve conflicts." && \
|
| 215 |
+
exit 1; \
|
| 216 |
+
fi
|
| 217 |
+
|
| 218 |
+
# Copy MOE configs for gfx950/MI355
|
| 219 |
+
RUN find /app/sglang/python/sglang/srt/layers/quantization/configs/ \
|
| 220 |
+
/app/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
|
| 221 |
+
-type f -name '*MI300X*' 2>/dev/null | while read f; do \
|
| 222 |
+
cp "$f" "$(echo $f | sed 's/MI300X/MI300X_VF/')" 2>/dev/null || true; \
|
| 223 |
+
cp "$f" "$(echo $f | sed 's/MI300X/MI355/')" 2>/dev/null || true; \
|
| 224 |
+
done
|
| 225 |
+
|
| 226 |
+
###########################################
|
| 227 |
+
###########################################
|
| 228 |
+
###########################################
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
########################################
|
| 232 |
+
#### Install additional packages########
|
| 233 |
+
########################################
|
| 234 |
+
RUN pip install sglang-router --force-reinstall
|
| 235 |
+
########################################
|
| 236 |
+
########################################
|
| 237 |
+
########################################
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
########################################
|
| 241 |
+
# Fix click/ray incompatibility with Python 3.10
|
| 242 |
+
########################################
|
| 243 |
+
RUN pip install click==8.2.1
|
| 244 |
+
########################################
|
| 245 |
+
########################################
|
| 246 |
+
########################################
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
WORKDIR /app
|
| 250 |
+
|
| 251 |
+
CMD ["/usr/bin/bash"]
|
| 252 |
+
|
ccevolve/baselines/thetaevolve/docker/Dockerfile_20250810_9a48ba0.rocm
ADDED
|
@@ -0,0 +1,361 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#### Use the base image
|
| 2 |
+
|
| 3 |
+
# The Docker image built with this Dockerfile:
|
| 4 |
+
# PR: commit ID 36711aa (Aug 22, 2025) dockerfile - Supports up to slime commit ID: 9a48ba0 (Aug 10, 2025)
|
| 5 |
+
|
| 6 |
+
# You can find the latest pre-built Docker image from here: https://hub.docker.com/r/rlsys/slime/tags
|
| 7 |
+
# Current latest docker img: `rlsys/slime:slime_ubuntu22.04_rocm6.3.4-patch-numa-patch_sglang0.4.9_megatron-patch_ray2.47.1_apex_torch-memory-saver0.0.8-patch-vim` manually add the patch to mitigate checkpoint loading issue. (vim /workspace/Megatron-LM-amd_version/megatron/training/checkpointing.py. Line: 1449 ~ 1457 - comment out if becasue of dismatch number of dist checkpoints
|
| 8 |
+
|
| 9 |
+
# Thanks to Yang Wang (https://www.microsoft.com/en-us/research/people/yangwang5/) for working on the patch for this ROCm base Docker image to support virtual memory management on MI300X.
|
| 10 |
+
|
| 11 |
+
# FROM "rlfoundation.azurecr.io/rocm6.3.4:vllm-0.8.5-numa-patch-ubuntu-22.04"
|
| 12 |
+
FROM "rlsys/rocm-6.3.4-patch:rocm6.3.4-numa-patch_ubuntu-22.04"
|
| 13 |
+
|
| 14 |
+
SHELL ["/bin/bash", "-ceuxo", "pipefail"]
|
| 15 |
+
|
| 16 |
+
ARG MAX_JOBS=512
|
| 17 |
+
ENV MAX_JOBS=${MAX_JOBS}
|
| 18 |
+
|
| 19 |
+
ENV PATH="/usr/local/python3.12/bin:$PATH"
|
| 20 |
+
RUN ln -sf /usr/bin/python3.12 /usr/bin/python && \
|
| 21 |
+
ln -sf /usr/bin/pip3.12 /usr/bin/pip
|
| 22 |
+
|
| 23 |
+
RUN apt-get update
|
| 24 |
+
RUN apt-get install -y pkg-config liblzma-dev
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
###########################################
|
| 28 |
+
##########Install TransformerEngine########
|
| 29 |
+
###########################################
|
| 30 |
+
WORKDIR /workspace/
|
| 31 |
+
|
| 32 |
+
RUN rm -rf TransformerEngine
|
| 33 |
+
# RUN git clone --recursive https://github.com/ROCm/TransformerEngine.git
|
| 34 |
+
RUN git clone https://github.com/ROCm/TransformerEngine.git
|
| 35 |
+
WORKDIR /workspace/TransformerEngine
|
| 36 |
+
|
| 37 |
+
RUN git checkout 236178e
|
| 38 |
+
# RUN git checkout bb061ad
|
| 39 |
+
# RUN git checkout 864405c
|
| 40 |
+
|
| 41 |
+
RUN git submodule update --init --recursive
|
| 42 |
+
|
| 43 |
+
ENV NVTE_FRAMEWORK=pytorch
|
| 44 |
+
ENV NVTE_ROCM_ARCH=gfx942
|
| 45 |
+
ENV NVTE_USE_HIPBLASLT=1
|
| 46 |
+
ENV NVTE_USE_ROCM=1
|
| 47 |
+
|
| 48 |
+
# export CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr:${CMAKE_PREFIX_PATH:-}"
|
| 49 |
+
ENV CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr"
|
| 50 |
+
RUN MAX_JOBS=${MAX_JOBS} pip install . -vvv
|
| 51 |
+
WORKDIR /workspace/
|
| 52 |
+
###########################################
|
| 53 |
+
###########################################
|
| 54 |
+
###########################################
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
###########################################################
|
| 59 |
+
####Install vllm - sglang require vllm 0.6.7 dependency####
|
| 60 |
+
# #########################################################
|
| 61 |
+
#### Require vllm 0.6.7 - checkout 113274a0
|
| 62 |
+
WORKDIR /workspace/
|
| 63 |
+
RUN rm -rf vllm
|
| 64 |
+
RUN pip uninstall -y vllm
|
| 65 |
+
# Refer to here (down-grade vllm to 0.6.3): https://docs.vllm.ai/en/v0.6.3/getting_started/amd-installation.html
|
| 66 |
+
RUN git clone https://github.com/ROCm/vllm.git
|
| 67 |
+
# git clone https://github.com/vllm-project/vllm.git
|
| 68 |
+
WORKDIR /workspace/vllm
|
| 69 |
+
RUN git checkout 113274a0
|
| 70 |
+
ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
|
| 71 |
+
ENV MAX_JOBS=${MAX_JOBS}
|
| 72 |
+
RUN pip install "boto3>=1.26.0"
|
| 73 |
+
RUN pip install setuptools_scm
|
| 74 |
+
# will add src into py. You can delete the repo
|
| 75 |
+
RUN python3 setup.py install
|
| 76 |
+
WORKDIR /workspace/
|
| 77 |
+
###########################################
|
| 78 |
+
###########################################
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
RUN pip install setuptools==75.8.0
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
###########################################
|
| 85 |
+
############build sglang###################
|
| 86 |
+
###########################################
|
| 87 |
+
# Set environment variables
|
| 88 |
+
ENV BASE_DIR=/workspace
|
| 89 |
+
# ENV BASE_DIR=/sgl-workspace
|
| 90 |
+
ENV BUILD_TYPE=all
|
| 91 |
+
ENV SGL_REPO=https://github.com/sgl-project/sglang
|
| 92 |
+
ENV SGL_BRANCH=v0.4.7
|
| 93 |
+
ENV TRITON_REPO=https://github.com/ROCm/triton.git
|
| 94 |
+
ENV TRITON_COMMIT=improve_fa_decode_3.0.0
|
| 95 |
+
ENV AITER_REPO=https://github.com/ROCm/aiter.git
|
| 96 |
+
ENV AITER_COMMIT=v0.1.2
|
| 97 |
+
# v0.1.2 version - commit id: 9d11f47
|
| 98 |
+
# ENV AITER_COMMIT=9d11f47
|
| 99 |
+
|
| 100 |
+
ENV HIP_FORCE_DEV_KERNARG=1
|
| 101 |
+
ENV HSA_NO_SCRATCH_RECLAIM=1
|
| 102 |
+
ENV SGLANG_SET_CPU_AFFINITY=1
|
| 103 |
+
ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
|
| 104 |
+
ENV NCCL_MIN_NCHANNELS=112
|
| 105 |
+
|
| 106 |
+
ENV SGLANG_USE_AITER=1
|
| 107 |
+
ENV SGLANG_MOE_PADDING=1
|
| 108 |
+
# ENV MOE_PADDING=1
|
| 109 |
+
ENV VLLM_FP8_PADDING=1
|
| 110 |
+
ENV VLLM_FP8_ACT_PADDING=1
|
| 111 |
+
ENV VLLM_FP8_WEIGHT_PADDING=1
|
| 112 |
+
ENV VLLM_FP8_REDUCE_CONV=1
|
| 113 |
+
ENV TORCHINDUCTOR_MAX_AUTOTUNE=1
|
| 114 |
+
ENV TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISE=1
|
| 115 |
+
ENV HIPCC_COMPILE_FLAGS_APPEND="--offload-arch=gfx942"
|
| 116 |
+
ENV AMDGPU_TARGETS=gfx942
|
| 117 |
+
ENV ROCM_ARCH=gfx942
|
| 118 |
+
ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
|
| 119 |
+
|
| 120 |
+
# Switch to working directory
|
| 121 |
+
# WORKDIR /sgl-workspace
|
| 122 |
+
WORKDIR /workspace
|
| 123 |
+
|
| 124 |
+
# Clean and create directory
|
| 125 |
+
# RUN rm -rf /sgl-workspace && mkdir -p /sgl-workspace
|
| 126 |
+
RUN rm -rf /workspace && mkdir -p /workspace
|
| 127 |
+
|
| 128 |
+
# # Clone and build sglang
|
| 129 |
+
# RUN git clone ${SGL_REPO} \
|
| 130 |
+
# && cd sglang \
|
| 131 |
+
# && git checkout ${SGL_BRANCH} || echo "Using default branch" \
|
| 132 |
+
# && cd sgl-kernel \
|
| 133 |
+
# && rm -f pyproject.toml \
|
| 134 |
+
# && mv pyproject_rocm.toml pyproject.toml \
|
| 135 |
+
# && python setup_rocm.py install \
|
| 136 |
+
# && cd .. \
|
| 137 |
+
# && if [ "$BUILD_TYPE" = "srt" ]; then \
|
| 138 |
+
# python -m pip --no-cache-dir install -e "python[srt_hip]"; \
|
| 139 |
+
# else \
|
| 140 |
+
# python -m pip --no-cache-dir install -e "python[all_hip]"; \
|
| 141 |
+
# fi \
|
| 142 |
+
# && cd /sgl-workspace \
|
| 143 |
+
# && cp -r /sgl-workspace/sglang /sglang \
|
| 144 |
+
# && python -m pip cache purge
|
| 145 |
+
|
| 146 |
+
# Install common Python packages
|
| 147 |
+
RUN pip install IPython orjson python-multipart torchao pybind11
|
| 148 |
+
|
| 149 |
+
# Rebuild Triton
|
| 150 |
+
RUN pip uninstall -y triton || true \
|
| 151 |
+
&& git clone ${TRITON_REPO} \
|
| 152 |
+
&& cd triton \
|
| 153 |
+
&& git checkout ${TRITON_COMMIT} \
|
| 154 |
+
&& cd python \
|
| 155 |
+
&& python3 setup.py install \
|
| 156 |
+
&& cd /workspace
|
| 157 |
+
# && cd /sgl-workspace
|
| 158 |
+
|
| 159 |
+
# Build aiter
|
| 160 |
+
#version: Commit 9d11f47
|
| 161 |
+
# && git checkout ${AITER_COMMIT} \
|
| 162 |
+
RUN pip uninstall -y aiter || true
|
| 163 |
+
RUN git clone ${AITER_REPO} \
|
| 164 |
+
&& cd aiter \
|
| 165 |
+
&& git checkout ${AITER_COMMIT} \
|
| 166 |
+
&& git submodule sync \
|
| 167 |
+
&& git submodule update --init --recursive \
|
| 168 |
+
&& PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop \
|
| 169 |
+
&& cd /workspace
|
| 170 |
+
# && cd /sgl-workspace
|
| 171 |
+
# && PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop \
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
###########################################
|
| 175 |
+
# Clone and build sglang
|
| 176 |
+
RUN git clone ${SGL_REPO} \
|
| 177 |
+
&& cd sglang \
|
| 178 |
+
&& git checkout ${SGL_BRANCH} || echo "Using default branch" \
|
| 179 |
+
&& cd sgl-kernel \
|
| 180 |
+
&& rm -f pyproject.toml \
|
| 181 |
+
&& mv pyproject_rocm.toml pyproject.toml \
|
| 182 |
+
&& python setup_rocm.py install \
|
| 183 |
+
&& cd .. \
|
| 184 |
+
&& if [ "$BUILD_TYPE" = "srt" ]; then \
|
| 185 |
+
python -m pip --no-cache-dir install -e "python[srt_hip]"; \
|
| 186 |
+
else \
|
| 187 |
+
python -m pip --no-cache-dir install -e "python[all_hip]"; \
|
| 188 |
+
fi \
|
| 189 |
+
&& cd /workspace \
|
| 190 |
+
&& cp -r /workspace/sglang /sglang \
|
| 191 |
+
&& python -m pip cache purge
|
| 192 |
+
# && cd /sgl-workspace \
|
| 193 |
+
# && cp -r /sgl-workspace/sglang /sglang \
|
| 194 |
+
# && python -m pip cache purge
|
| 195 |
+
###########################################
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
# Copy MI300X config
|
| 199 |
+
# RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
|
| 200 |
+
# /sgl-workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
|
| 201 |
+
RUN find /workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
|
| 202 |
+
/workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
|
| 203 |
+
-type f -name '*MI300X*' | \
|
| 204 |
+
xargs -I {} sh -c 'vf_config=$(echo "$1" | sed "s/MI300X/MI300X_VF/"); cp "$1" "$vf_config"' -- {}
|
| 205 |
+
|
| 206 |
+
# Environment setup complete.
|
| 207 |
+
RUN echo "Environment setup complete."
|
| 208 |
+
WORKDIR /workspace/
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
# # patch
|
| 212 |
+
# # Date: Jul 5, 2025
|
| 213 |
+
# ENV SLIME_COMMIT=9ddbdbd
|
| 214 |
+
# RUN git clone https://github.com/THUDM/slime.git \
|
| 215 |
+
# && cd slime \
|
| 216 |
+
# && git checkout ${SLIME_COMMIT} \
|
| 217 |
+
# && cp docker/patch/sglang.patch /workspace/sglang/
|
| 218 |
+
# # && cp docker/patch/sglang.patch /sgl-workspace/sglang/
|
| 219 |
+
# # && cp docker/patch/v0.4.10-cu126/sglang.patch /sgl-workspace/sglang/
|
| 220 |
+
# # && cp docker/patch/latest/sglang.patch /sgl-workspace/sglang/
|
| 221 |
+
# # && cp docker/patch/sglang.patch /sgl-workspace/sglang/
|
| 222 |
+
# # COPY /home/yushensu/projects/slime/docker/patch/sglang.patch /sgl-workspace/sglang/
|
| 223 |
+
# WORKDIR /workspace/sglang/
|
| 224 |
+
# # WORKDIR /sgl-workspace/sglang/
|
| 225 |
+
# RUN git apply sglang.patch && rm sglang.patch
|
| 226 |
+
# # WORKDIR /workspace/
|
| 227 |
+
|
| 228 |
+
# # sgl-router
|
| 229 |
+
# # WORKDIR /sgl-workspace/sglang/
|
| 230 |
+
# RUN apt-get update && apt-get install -y pkg-config libssl-dev
|
| 231 |
+
# RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
| 232 |
+
# RUN pip install setuptools-rust wheel build
|
| 233 |
+
# RUN source $HOME/.cargo/env && \
|
| 234 |
+
# mkdir -p sgl-router && \
|
| 235 |
+
# cd sgl-router && \
|
| 236 |
+
# cargo build -j 64 && \
|
| 237 |
+
# python3 -m build && \
|
| 238 |
+
# pip install dist/*.whl --force-reinstall
|
| 239 |
+
|
| 240 |
+
RUN pip install sglang-router --force-reinstall
|
| 241 |
+
|
| 242 |
+
###########################################
|
| 243 |
+
###########################################
|
| 244 |
+
###########################################
|
| 245 |
+
|
| 246 |
+
RUN pip install transformers==4.51.1
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
#########################################
|
| 250 |
+
#####Install vllm v0.8.5#################
|
| 251 |
+
#########################################
|
| 252 |
+
|
| 253 |
+
WORKDIR /workspace/
|
| 254 |
+
|
| 255 |
+
ENV VLLM_TARGET_DEVICE=rocm
|
| 256 |
+
ENV ROCM_PATH=/opt/rocm
|
| 257 |
+
ENV SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev
|
| 258 |
+
|
| 259 |
+
RUN pip uninstall -y vllm || true
|
| 260 |
+
RUN rm -rf vllm-patch
|
| 261 |
+
|
| 262 |
+
RUN git clone https://github.com/RLFoundation/vllm-patch.git \
|
| 263 |
+
&& cd vllm-patch \
|
| 264 |
+
&& git checkout v0.8.5-sleep-numa \
|
| 265 |
+
&& rm -rf build/ dist/ *.egg-info \
|
| 266 |
+
&& ln -sf /opt/rocm/lib/libamdhip64.so /usr/lib/libamdhip64.so \
|
| 267 |
+
&& SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev PYTORCH_ROCM_ARCH="gfx90a;gfx942" MAX_JOBS=${MAX_JOBS} python3 setup.py install
|
| 268 |
+
|
| 269 |
+
WORKDIR /workspace/
|
| 270 |
+
###########################################
|
| 271 |
+
###########################################
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
#########################################
|
| 275 |
+
#### Install megatron-core###############
|
| 276 |
+
#########################################
|
| 277 |
+
# Can be removed just the current megatron-lm dependency
|
| 278 |
+
RUN pip install "numpy>=1.21.0,<2.0" --force-reinstall
|
| 279 |
+
|
| 280 |
+
RUN pip uninstall -y megatron-core && \
|
| 281 |
+
git clone https://github.com/yushengsu-thu/Megatron-LM-amd_version.git && \
|
| 282 |
+
cd Megatron-LM-amd_version && \
|
| 283 |
+
pip install -vvv -e . && \
|
| 284 |
+
cd /workspace/
|
| 285 |
+
#########################################
|
| 286 |
+
#########################################
|
| 287 |
+
#########################################
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
#########################################
|
| 293 |
+
###Add torch_memory_saver################
|
| 294 |
+
#########################################
|
| 295 |
+
# Set environment variables
|
| 296 |
+
ENV HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__"
|
| 297 |
+
ENV CFLAGS="-D__HIP_PLATFORM_AMD__"
|
| 298 |
+
ENV CXXFLAGS="-D__HIP_PLATFORM_AMD__"
|
| 299 |
+
# Install torch_memory_saver
|
| 300 |
+
# RUN pip install git+https://github.com/YangWang92/torch_memory_saver_numa.git --no-deps
|
| 301 |
+
# RUN pip install "git+https://github.com/YangWang92/torch_memory_saver_numa.git@numa"
|
| 302 |
+
RUN pip install "git+https://github.com/yushengsu-thu/torch_memory_saver.git"
|
| 303 |
+
# pip install git+https://github.com/fzyzcjy/torch_memory_saver.git --no-deps
|
| 304 |
+
#########################################
|
| 305 |
+
#########################################
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
########################################
|
| 311 |
+
######Install ray#######################
|
| 312 |
+
########################################
|
| 313 |
+
# need to add this patch manually: https://github.com/ray-project/ray/pull/53531/files
|
| 314 |
+
RUN pip uninstall ray -y
|
| 315 |
+
# RUN pip install "ray[data,train,tune,serve]>=2.47.0"
|
| 316 |
+
RUN pip install "ray[data,train,tune,serve]==2.47.1"
|
| 317 |
+
########################################
|
| 318 |
+
########################################
|
| 319 |
+
########################################
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
### Need to verify whether numerical/convergence issue
|
| 323 |
+
#######################################
|
| 324 |
+
################apex###################
|
| 325 |
+
#######################################
|
| 326 |
+
WORKDIR /workspace/
|
| 327 |
+
RUN pip uninstall -y apex && \
|
| 328 |
+
git clone https://github.com/ROCm/apex.git && \
|
| 329 |
+
cd apex && \
|
| 330 |
+
python setup.py install && \
|
| 331 |
+
cd /workspace/
|
| 332 |
+
#######################################
|
| 333 |
+
#######################################
|
| 334 |
+
#######################################
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
########################################
|
| 338 |
+
############ mbridge####################
|
| 339 |
+
########################################
|
| 340 |
+
RUN pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
|
| 341 |
+
########################################
|
| 342 |
+
########################################
|
| 343 |
+
########################################
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
########################################
|
| 348 |
+
########slime agent framewrok need######
|
| 349 |
+
########################################
|
| 350 |
+
RUN pip install pydra_config==0.0.15
|
| 351 |
+
RUN pip install together
|
| 352 |
+
RUN pip install google-generativeai
|
| 353 |
+
########################################
|
| 354 |
+
########################################
|
| 355 |
+
########################################
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
WORKDIR /workspace/
|
| 360 |
+
|
| 361 |
+
CMD ["/usr/bin/bash"]
|
ccevolve/baselines/thetaevolve/docker/Dockerfile_20250810_c22f55b.rocm
ADDED
|
@@ -0,0 +1,374 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#### Use the base image
|
| 2 |
+
|
| 3 |
+
# The Docker image built with this Dockerfile:
|
| 4 |
+
# Supports up to slime commit ID: 9a48ba0 (Aug 10, 2025)
|
| 5 |
+
# Start to fail from c22f55b (Aug 10, 2025) - Need to fix the bug from here
|
| 6 |
+
|
| 7 |
+
# You can find the latest pre-built Docker image from here: https://hub.docker.com/r/rlsys/slime/tags
|
| 8 |
+
# Current latest docker img: `rlsys/slime:slime_ubuntu22.04_rocm6.3.4-patch-numa-patch_sglang0.4.9_megatron-patch_ray2.47.1_apex_torch-memory-saver0.0.8-patch-vim` manually add the patch to mitigate checkpoint loading issue. (vim /workspace/Megatron-LM-amd_version/megatron/training/checkpointing.py. Line: 1449 ~ 1457 - comment out if becasue of dismatch number of dist checkpoints
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# The Docker image built with this Dockerfile:
|
| 12 |
+
# PR: commit ID 36711aa (Aug 22, 2025) dockerfile - Supports up to slime commit ID: 9a48ba0 (Aug 10, 2025)
|
| 13 |
+
|
| 14 |
+
# Start to failfrom c22f55b (Aug 10, 2025) - Need to fix the bug from here
|
| 15 |
+
|
| 16 |
+
# You can find the latest pre-built Docker image from here: https://hub.docker.com/r/rlsys/slime/tags
|
| 17 |
+
# Current latest docker img: `rlsys/slime:slime_ubuntu22.04_rocm6.3.4-patch-numa-patch_sglang0.4.9_megatron-patch_ray2.47.1_apex_torch-memory-saver0.0.8-patch-vim` manually add the patch to mitigate checkpoint loading issue. (vim /workspace/Megatron-LM-amd_version/megatron/training/checkpointing.py. Line: 1449 ~ 1457 - comment out if becasue of dismatch number of dist checkpoints
|
| 18 |
+
|
| 19 |
+
# Thanks to Yang Wang (https://www.microsoft.com/en-us/research/people/yangwang5/) for working on the patch for this ROCm base Docker image to support virtual memory management on MI300X.
|
| 20 |
+
|
| 21 |
+
# FROM "rlfoundation.azurecr.io/rocm6.3.4:vllm-0.8.5-numa-patch-ubuntu-22.04"
|
| 22 |
+
FROM "rlsys/rocm-6.3.4-patch:rocm6.3.4-numa-patch_ubuntu-22.04"
|
| 23 |
+
|
| 24 |
+
SHELL ["/bin/bash", "-ceuxo", "pipefail"]
|
| 25 |
+
|
| 26 |
+
ARG MAX_JOBS=512
|
| 27 |
+
ENV MAX_JOBS=${MAX_JOBS}
|
| 28 |
+
|
| 29 |
+
ENV PATH="/usr/local/python3.12/bin:$PATH"
|
| 30 |
+
RUN ln -sf /usr/bin/python3.12 /usr/bin/python && \
|
| 31 |
+
ln -sf /usr/bin/pip3.12 /usr/bin/pip
|
| 32 |
+
|
| 33 |
+
RUN apt-get update
|
| 34 |
+
RUN apt-get install -y pkg-config liblzma-dev
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
###########################################
|
| 38 |
+
##########Install TransformerEngine########
|
| 39 |
+
###########################################
|
| 40 |
+
WORKDIR /workspace/
|
| 41 |
+
|
| 42 |
+
RUN rm -rf TransformerEngine
|
| 43 |
+
# RUN git clone --recursive https://github.com/ROCm/TransformerEngine.git
|
| 44 |
+
RUN git clone https://github.com/ROCm/TransformerEngine.git
|
| 45 |
+
WORKDIR /workspace/TransformerEngine
|
| 46 |
+
|
| 47 |
+
RUN git checkout 236178e
|
| 48 |
+
# RUN git checkout bb061ad
|
| 49 |
+
# RUN git checkout 864405c
|
| 50 |
+
|
| 51 |
+
RUN git submodule update --init --recursive
|
| 52 |
+
|
| 53 |
+
ENV NVTE_FRAMEWORK=pytorch
|
| 54 |
+
ENV NVTE_ROCM_ARCH=gfx942
|
| 55 |
+
ENV NVTE_USE_HIPBLASLT=1
|
| 56 |
+
ENV NVTE_USE_ROCM=1
|
| 57 |
+
|
| 58 |
+
# export CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr:${CMAKE_PREFIX_PATH:-}"
|
| 59 |
+
ENV CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr"
|
| 60 |
+
RUN MAX_JOBS=${MAX_JOBS} pip install . -vvv
|
| 61 |
+
WORKDIR /workspace/
|
| 62 |
+
###########################################
|
| 63 |
+
###########################################
|
| 64 |
+
###########################################
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
###########################################################
|
| 69 |
+
####Install vllm - sglang require vllm 0.6.7 dependency####
|
| 70 |
+
# #########################################################
|
| 71 |
+
#### Require vllm 0.6.7 - checkout 113274a0
|
| 72 |
+
WORKDIR /workspace/
|
| 73 |
+
RUN rm -rf vllm
|
| 74 |
+
RUN pip uninstall -y vllm
|
| 75 |
+
# Refer to here (down-grade vllm to 0.6.3): https://docs.vllm.ai/en/v0.6.3/getting_started/amd-installation.html
|
| 76 |
+
RUN git clone https://github.com/ROCm/vllm.git
|
| 77 |
+
# git clone https://github.com/vllm-project/vllm.git
|
| 78 |
+
WORKDIR /workspace/vllm
|
| 79 |
+
RUN git checkout 113274a0
|
| 80 |
+
ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
|
| 81 |
+
ENV MAX_JOBS=${MAX_JOBS}
|
| 82 |
+
RUN pip install "boto3>=1.26.0"
|
| 83 |
+
RUN pip install setuptools_scm
|
| 84 |
+
# will add src into py. You can delete the repo
|
| 85 |
+
RUN python3 setup.py install
|
| 86 |
+
WORKDIR /workspace/
|
| 87 |
+
###########################################
|
| 88 |
+
###########################################
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
RUN pip install setuptools==75.8.0
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
###########################################
|
| 95 |
+
############build sglang###################
|
| 96 |
+
###########################################
|
| 97 |
+
# Set environment variables
|
| 98 |
+
ENV BASE_DIR=/workspace
|
| 99 |
+
# ENV BASE_DIR=/sgl-workspace
|
| 100 |
+
ENV BUILD_TYPE=all
|
| 101 |
+
ENV SGL_REPO=https://github.com/sgl-project/sglang
|
| 102 |
+
# ENV SGL_BRANCH=v0.4.9
|
| 103 |
+
ENV SGL_BRANCH=0.4.9.post6
|
| 104 |
+
# ENV SGL_BRANCH=v0.4.10
|
| 105 |
+
ENV TRITON_REPO=https://github.com/ROCm/triton.git
|
| 106 |
+
ENV TRITON_COMMIT=improve_fa_decode_3.0.0
|
| 107 |
+
ENV AITER_REPO=https://github.com/ROCm/aiter.git
|
| 108 |
+
ENV AITER_COMMIT=v0.1.3
|
| 109 |
+
# ENV AITER_COMMIT=v0.1.4
|
| 110 |
+
# v0.1.2 version - commit id: 9d11f47
|
| 111 |
+
# ENV AITER_COMMIT=9d11f47
|
| 112 |
+
|
| 113 |
+
ENV HIP_FORCE_DEV_KERNARG=1
|
| 114 |
+
ENV HSA_NO_SCRATCH_RECLAIM=1
|
| 115 |
+
ENV SGLANG_SET_CPU_AFFINITY=1
|
| 116 |
+
ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
|
| 117 |
+
ENV NCCL_MIN_NCHANNELS=112
|
| 118 |
+
|
| 119 |
+
ENV SGLANG_USE_AITER=1
|
| 120 |
+
ENV SGLANG_MOE_PADDING=1
|
| 121 |
+
# ENV MOE_PADDING=1
|
| 122 |
+
ENV VLLM_FP8_PADDING=1
|
| 123 |
+
ENV VLLM_FP8_ACT_PADDING=1
|
| 124 |
+
ENV VLLM_FP8_WEIGHT_PADDING=1
|
| 125 |
+
ENV VLLM_FP8_REDUCE_CONV=1
|
| 126 |
+
ENV TORCHINDUCTOR_MAX_AUTOTUNE=1
|
| 127 |
+
ENV TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISE=1
|
| 128 |
+
ENV HIPCC_COMPILE_FLAGS_APPEND="--offload-arch=gfx942"
|
| 129 |
+
ENV AMDGPU_TARGETS=gfx942
|
| 130 |
+
ENV ROCM_ARCH=gfx942
|
| 131 |
+
ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
|
| 132 |
+
|
| 133 |
+
# Switch to working directory
|
| 134 |
+
# WORKDIR /sgl-workspace
|
| 135 |
+
WORKDIR /workspace
|
| 136 |
+
|
| 137 |
+
# Clean and create directory
|
| 138 |
+
# RUN rm -rf /sgl-workspace && mkdir -p /sgl-workspace
|
| 139 |
+
RUN rm -rf /workspace && mkdir -p /workspace
|
| 140 |
+
|
| 141 |
+
# # Clone and build sglang
|
| 142 |
+
# RUN git clone ${SGL_REPO} \
|
| 143 |
+
# && cd sglang \
|
| 144 |
+
# && git checkout ${SGL_BRANCH} || echo "Using default branch" \
|
| 145 |
+
# && cd sgl-kernel \
|
| 146 |
+
# && rm -f pyproject.toml \
|
| 147 |
+
# && mv pyproject_rocm.toml pyproject.toml \
|
| 148 |
+
# && python setup_rocm.py install \
|
| 149 |
+
# && cd .. \
|
| 150 |
+
# && if [ "$BUILD_TYPE" = "srt" ]; then \
|
| 151 |
+
# python -m pip --no-cache-dir install -e "python[srt_hip]"; \
|
| 152 |
+
# else \
|
| 153 |
+
# python -m pip --no-cache-dir install -e "python[all_hip]"; \
|
| 154 |
+
# fi \
|
| 155 |
+
# && cd /sgl-workspace \
|
| 156 |
+
# && cp -r /sgl-workspace/sglang /sglang \
|
| 157 |
+
# && python -m pip cache purge
|
| 158 |
+
|
| 159 |
+
# Install common Python packages
|
| 160 |
+
RUN pip install IPython orjson python-multipart torchao pybind11
|
| 161 |
+
|
| 162 |
+
# Rebuild Triton
|
| 163 |
+
RUN pip uninstall -y triton || true \
|
| 164 |
+
&& git clone ${TRITON_REPO} \
|
| 165 |
+
&& cd triton \
|
| 166 |
+
&& git checkout ${TRITON_COMMIT} \
|
| 167 |
+
&& cd python \
|
| 168 |
+
&& python3 setup.py install \
|
| 169 |
+
&& cd /workspace
|
| 170 |
+
# && cd /sgl-workspace
|
| 171 |
+
|
| 172 |
+
# Build aiter
|
| 173 |
+
#version: Commit 9d11f47
|
| 174 |
+
# && git checkout ${AITER_COMMIT} \
|
| 175 |
+
RUN pip uninstall -y aiter || true
|
| 176 |
+
RUN git clone ${AITER_REPO} \
|
| 177 |
+
&& cd aiter \
|
| 178 |
+
&& git checkout ${AITER_COMMIT} \
|
| 179 |
+
&& git submodule sync \
|
| 180 |
+
&& git submodule update --init --recursive \
|
| 181 |
+
&& PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop \
|
| 182 |
+
&& cd /workspace
|
| 183 |
+
# && cd /sgl-workspace
|
| 184 |
+
# && PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop \
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
###########################################
|
| 188 |
+
# Clone and build sglang
|
| 189 |
+
RUN git clone ${SGL_REPO} \
|
| 190 |
+
&& cd sglang \
|
| 191 |
+
&& git checkout ${SGL_BRANCH} || echo "Using default branch" \
|
| 192 |
+
&& cd sgl-kernel \
|
| 193 |
+
&& rm -f pyproject.toml \
|
| 194 |
+
&& mv pyproject_rocm.toml pyproject.toml \
|
| 195 |
+
&& python setup_rocm.py install \
|
| 196 |
+
&& cd .. \
|
| 197 |
+
&& if [ "$BUILD_TYPE" = "srt" ]; then \
|
| 198 |
+
python -m pip --no-cache-dir install -e "python[srt_hip]"; \
|
| 199 |
+
else \
|
| 200 |
+
python -m pip --no-cache-dir install -e "python[all_hip]"; \
|
| 201 |
+
fi \
|
| 202 |
+
&& cd /workspace \
|
| 203 |
+
&& cp -r /workspace/sglang /sglang \
|
| 204 |
+
&& python -m pip cache purge
|
| 205 |
+
# && cd /sgl-workspace \
|
| 206 |
+
# && cp -r /sgl-workspace/sglang /sglang \
|
| 207 |
+
# && python -m pip cache purge
|
| 208 |
+
###########################################
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
# Copy MI300X config
|
| 212 |
+
# RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
|
| 213 |
+
# /sgl-workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
|
| 214 |
+
RUN find /workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
|
| 215 |
+
/workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
|
| 216 |
+
-type f -name '*MI300X*' | \
|
| 217 |
+
xargs -I {} sh -c 'vf_config=$(echo "$1" | sed "s/MI300X/MI300X_VF/"); cp "$1" "$vf_config"' -- {}
|
| 218 |
+
|
| 219 |
+
# Environment setup complete.
|
| 220 |
+
RUN echo "Environment setup complete."
|
| 221 |
+
WORKDIR /workspace/
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
# # patch
|
| 225 |
+
# # Date: Jul 5, 2025
|
| 226 |
+
# ENV SLIME_COMMIT=9ddbdbd
|
| 227 |
+
# RUN git clone https://github.com/THUDM/slime.git \
|
| 228 |
+
# && cd slime \
|
| 229 |
+
# && git checkout ${SLIME_COMMIT} \
|
| 230 |
+
# && cp docker/patch/sglang.patch /workspace/sglang/
|
| 231 |
+
# # && cp docker/patch/sglang.patch /sgl-workspace/sglang/
|
| 232 |
+
# # && cp docker/patch/v0.4.10-cu126/sglang.patch /sgl-workspace/sglang/
|
| 233 |
+
# # && cp docker/patch/latest/sglang.patch /sgl-workspace/sglang/
|
| 234 |
+
# # && cp docker/patch/sglang.patch /sgl-workspace/sglang/
|
| 235 |
+
# # COPY /home/yushensu/projects/slime/docker/patch/sglang.patch /sgl-workspace/sglang/
|
| 236 |
+
# WORKDIR /workspace/sglang/
|
| 237 |
+
# # WORKDIR /sgl-workspace/sglang/
|
| 238 |
+
# RUN git apply sglang.patch && rm sglang.patch
|
| 239 |
+
# # WORKDIR /workspace/
|
| 240 |
+
|
| 241 |
+
# # sgl-router
|
| 242 |
+
# # WORKDIR /sgl-workspace/sglang/
|
| 243 |
+
# RUN apt-get update && apt-get install -y pkg-config libssl-dev
|
| 244 |
+
# RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
| 245 |
+
# RUN pip install setuptools-rust wheel build
|
| 246 |
+
# RUN source $HOME/.cargo/env && \
|
| 247 |
+
# mkdir -p sgl-router && \
|
| 248 |
+
# cd sgl-router && \
|
| 249 |
+
# cargo build -j 64 && \
|
| 250 |
+
# python3 -m build && \
|
| 251 |
+
# pip install dist/*.whl --force-reinstall
|
| 252 |
+
|
| 253 |
+
RUN pip install sglang-router --force-reinstall
|
| 254 |
+
|
| 255 |
+
###########################################
|
| 256 |
+
###########################################
|
| 257 |
+
###########################################
|
| 258 |
+
|
| 259 |
+
RUN pip install transformers==4.51.1
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
#########################################
|
| 263 |
+
#####Install vllm v0.8.5#################
|
| 264 |
+
#########################################
|
| 265 |
+
|
| 266 |
+
WORKDIR /workspace/
|
| 267 |
+
|
| 268 |
+
ENV VLLM_TARGET_DEVICE=rocm
|
| 269 |
+
ENV ROCM_PATH=/opt/rocm
|
| 270 |
+
ENV SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev
|
| 271 |
+
|
| 272 |
+
RUN pip uninstall -y vllm || true
|
| 273 |
+
RUN rm -rf vllm-patch
|
| 274 |
+
|
| 275 |
+
RUN git clone https://github.com/RLFoundation/vllm-patch.git \
|
| 276 |
+
&& cd vllm-patch \
|
| 277 |
+
&& git checkout v0.8.5-sleep-numa \
|
| 278 |
+
&& rm -rf build/ dist/ *.egg-info \
|
| 279 |
+
&& ln -sf /opt/rocm/lib/libamdhip64.so /usr/lib/libamdhip64.so \
|
| 280 |
+
&& SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev PYTORCH_ROCM_ARCH="gfx90a;gfx942" MAX_JOBS=${MAX_JOBS} python3 setup.py install
|
| 281 |
+
|
| 282 |
+
WORKDIR /workspace/
|
| 283 |
+
###########################################
|
| 284 |
+
###########################################
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
#########################################
|
| 288 |
+
#### Install megatron-core###############
|
| 289 |
+
#########################################
|
| 290 |
+
# Can be removed just the current megatron-lm dependency
|
| 291 |
+
RUN pip install "numpy>=1.21.0,<2.0" --force-reinstall
|
| 292 |
+
|
| 293 |
+
RUN pip uninstall -y megatron-core && \
|
| 294 |
+
git clone https://github.com/yushengsu-thu/Megatron-LM-amd_version.git && \
|
| 295 |
+
cd Megatron-LM-amd_version && \
|
| 296 |
+
pip install -vvv -e . && \
|
| 297 |
+
cd /workspace/
|
| 298 |
+
#########################################
|
| 299 |
+
#########################################
|
| 300 |
+
#########################################
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
#########################################
|
| 306 |
+
###Add torch_memory_saver################
|
| 307 |
+
#########################################
|
| 308 |
+
# Set environment variables
|
| 309 |
+
ENV HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__"
|
| 310 |
+
ENV CFLAGS="-D__HIP_PLATFORM_AMD__"
|
| 311 |
+
ENV CXXFLAGS="-D__HIP_PLATFORM_AMD__"
|
| 312 |
+
# Install torch_memory_saver
|
| 313 |
+
# RUN pip install git+https://github.com/YangWang92/torch_memory_saver_numa.git --no-deps
|
| 314 |
+
# RUN pip install "git+https://github.com/YangWang92/torch_memory_saver_numa.git@numa"
|
| 315 |
+
RUN pip install "git+https://github.com/yushengsu-thu/torch_memory_saver.git"
|
| 316 |
+
# pip install git+https://github.com/fzyzcjy/torch_memory_saver.git --no-deps
|
| 317 |
+
#########################################
|
| 318 |
+
#########################################
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
########################################
|
| 324 |
+
######Install ray#######################
|
| 325 |
+
########################################
|
| 326 |
+
# need to add this patch manually: https://github.com/ray-project/ray/pull/53531/files
|
| 327 |
+
RUN pip uninstall ray -y
|
| 328 |
+
# RUN pip install "ray[data,train,tune,serve]>=2.47.0"
|
| 329 |
+
RUN pip install "ray[data,train,tune,serve]==2.47.1"
|
| 330 |
+
########################################
|
| 331 |
+
########################################
|
| 332 |
+
########################################
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
### Need to verify whether numerical/convergence issue
|
| 336 |
+
#######################################
|
| 337 |
+
################apex###################
|
| 338 |
+
#######################################
|
| 339 |
+
WORKDIR /workspace/
|
| 340 |
+
RUN pip uninstall -y apex && \
|
| 341 |
+
git clone https://github.com/ROCm/apex.git && \
|
| 342 |
+
cd apex && \
|
| 343 |
+
python setup.py install && \
|
| 344 |
+
cd /workspace/
|
| 345 |
+
#######################################
|
| 346 |
+
#######################################
|
| 347 |
+
#######################################
|
| 348 |
+
|
| 349 |
+
|
| 350 |
+
########################################
|
| 351 |
+
############ mbridge####################
|
| 352 |
+
########################################
|
| 353 |
+
RUN pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
|
| 354 |
+
########################################
|
| 355 |
+
########################################
|
| 356 |
+
########################################
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
|
| 360 |
+
########################################
|
| 361 |
+
########slime agent framewrok need######
|
| 362 |
+
########################################
|
| 363 |
+
RUN pip install pydra_config==0.0.15
|
| 364 |
+
RUN pip install together
|
| 365 |
+
RUN pip install google-generativeai
|
| 366 |
+
########################################
|
| 367 |
+
########################################
|
| 368 |
+
########################################
|
| 369 |
+
|
| 370 |
+
|
| 371 |
+
|
| 372 |
+
WORKDIR /workspace/
|
| 373 |
+
|
| 374 |
+
CMD ["/usr/bin/bash"]
|
ccevolve/baselines/thetaevolve/docker/Dockerfile_Aug_10_2025_9a48ba0.rocm
ADDED
|
@@ -0,0 +1,361 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#### Use the base image
|
| 2 |
+
|
| 3 |
+
# The Docker image built with this Dockerfile:
|
| 4 |
+
# PR: commit ID 36711aa (Aug 22, 2025) dockerfile - Supports up to slime commit ID: 9a48ba0 (Aug 10, 2025)
|
| 5 |
+
|
| 6 |
+
# You can find the latest pre-built Docker image from here: https://hub.docker.com/r/rlsys/slime/tags
|
| 7 |
+
# Current latest docker img: `rlsys/slime:slime_ubuntu22.04_rocm6.3.4-patch-numa-patch_sglang0.4.9_megatron-patch_ray2.47.1_apex_torch-memory-saver0.0.8-patch-vim` manually add the patch to mitigate checkpoint loading issue. (vim /workspace/Megatron-LM-amd_version/megatron/training/checkpointing.py. Line: 1449 ~ 1457 - comment out if becasue of dismatch number of dist checkpoints
|
| 8 |
+
|
| 9 |
+
# Thanks to Yang Wang (https://www.microsoft.com/en-us/research/people/yangwang5/) for working on the patch for this ROCm base Docker image to support virtual memory management on MI300X.
|
| 10 |
+
|
| 11 |
+
# FROM "rlfoundation.azurecr.io/rocm6.3.4:vllm-0.8.5-numa-patch-ubuntu-22.04"
|
| 12 |
+
FROM "rlsys/rocm-6.3.4-patch:rocm6.3.4-numa-patch_ubuntu-22.04"
|
| 13 |
+
|
| 14 |
+
SHELL ["/bin/bash", "-ceuxo", "pipefail"]
|
| 15 |
+
|
| 16 |
+
ARG MAX_JOBS=512
|
| 17 |
+
ENV MAX_JOBS=${MAX_JOBS}
|
| 18 |
+
|
| 19 |
+
ENV PATH="/usr/local/python3.12/bin:$PATH"
|
| 20 |
+
RUN ln -sf /usr/bin/python3.12 /usr/bin/python && \
|
| 21 |
+
ln -sf /usr/bin/pip3.12 /usr/bin/pip
|
| 22 |
+
|
| 23 |
+
RUN apt-get update
|
| 24 |
+
RUN apt-get install -y pkg-config liblzma-dev
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
###########################################
|
| 28 |
+
##########Install TransformerEngine########
|
| 29 |
+
###########################################
|
| 30 |
+
WORKDIR /workspace/
|
| 31 |
+
|
| 32 |
+
RUN rm -rf TransformerEngine
|
| 33 |
+
# RUN git clone --recursive https://github.com/ROCm/TransformerEngine.git
|
| 34 |
+
RUN git clone https://github.com/ROCm/TransformerEngine.git
|
| 35 |
+
WORKDIR /workspace/TransformerEngine
|
| 36 |
+
|
| 37 |
+
RUN git checkout 236178e
|
| 38 |
+
# RUN git checkout bb061ad
|
| 39 |
+
# RUN git checkout 864405c
|
| 40 |
+
|
| 41 |
+
RUN git submodule update --init --recursive
|
| 42 |
+
|
| 43 |
+
ENV NVTE_FRAMEWORK=pytorch
|
| 44 |
+
ENV NVTE_ROCM_ARCH=gfx942
|
| 45 |
+
ENV NVTE_USE_HIPBLASLT=1
|
| 46 |
+
ENV NVTE_USE_ROCM=1
|
| 47 |
+
|
| 48 |
+
# export CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr:${CMAKE_PREFIX_PATH:-}"
|
| 49 |
+
ENV CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr"
|
| 50 |
+
RUN MAX_JOBS=${MAX_JOBS} pip install . -vvv
|
| 51 |
+
WORKDIR /workspace/
|
| 52 |
+
###########################################
|
| 53 |
+
###########################################
|
| 54 |
+
###########################################
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
###########################################################
|
| 59 |
+
####Install vllm - sglang require vllm 0.6.7 dependency####
|
| 60 |
+
# #########################################################
|
| 61 |
+
#### Require vllm 0.6.7 - checkout 113274a0
|
| 62 |
+
WORKDIR /workspace/
|
| 63 |
+
RUN rm -rf vllm
|
| 64 |
+
RUN pip uninstall -y vllm
|
| 65 |
+
# Refer to here (down-grade vllm to 0.6.3): https://docs.vllm.ai/en/v0.6.3/getting_started/amd-installation.html
|
| 66 |
+
RUN git clone https://github.com/ROCm/vllm.git
|
| 67 |
+
# git clone https://github.com/vllm-project/vllm.git
|
| 68 |
+
WORKDIR /workspace/vllm
|
| 69 |
+
RUN git checkout 113274a0
|
| 70 |
+
ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
|
| 71 |
+
ENV MAX_JOBS=${MAX_JOBS}
|
| 72 |
+
RUN pip install "boto3>=1.26.0"
|
| 73 |
+
RUN pip install setuptools_scm
|
| 74 |
+
# will add src into py. You can delete the repo
|
| 75 |
+
RUN python3 setup.py install
|
| 76 |
+
WORKDIR /workspace/
|
| 77 |
+
###########################################
|
| 78 |
+
###########################################
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
RUN pip install setuptools==75.8.0
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
###########################################
|
| 85 |
+
############build sgalng###################
|
| 86 |
+
###########################################
|
| 87 |
+
# Set environment variables
|
| 88 |
+
ENV BASE_DIR=/workspace
|
| 89 |
+
# ENV BASE_DIR=/sgl-workspace
|
| 90 |
+
ENV BUILD_TYPE=all
|
| 91 |
+
ENV SGL_REPO=https://github.com/sgl-project/sglang
|
| 92 |
+
ENV SGL_BRANCH=v0.4.7
|
| 93 |
+
ENV TRITON_REPO=https://github.com/ROCm/triton.git
|
| 94 |
+
ENV TRITON_COMMIT=improve_fa_decode_3.0.0
|
| 95 |
+
ENV AITER_REPO=https://github.com/ROCm/aiter.git
|
| 96 |
+
ENV AITER_COMMIT=v0.1.2
|
| 97 |
+
# v0.1.2 version - commit id: 9d11f47
|
| 98 |
+
# ENV AITER_COMMIT=9d11f47
|
| 99 |
+
|
| 100 |
+
ENV HIP_FORCE_DEV_KERNARG=1
|
| 101 |
+
ENV HSA_NO_SCRATCH_RECLAIM=1
|
| 102 |
+
ENV SGLANG_SET_CPU_AFFINITY=1
|
| 103 |
+
ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
|
| 104 |
+
ENV NCCL_MIN_NCHANNELS=112
|
| 105 |
+
|
| 106 |
+
ENV SGLANG_USE_AITER=1
|
| 107 |
+
ENV SGLANG_MOE_PADDING=1
|
| 108 |
+
# ENV MOE_PADDING=1
|
| 109 |
+
ENV VLLM_FP8_PADDING=1
|
| 110 |
+
ENV VLLM_FP8_ACT_PADDING=1
|
| 111 |
+
ENV VLLM_FP8_WEIGHT_PADDING=1
|
| 112 |
+
ENV VLLM_FP8_REDUCE_CONV=1
|
| 113 |
+
ENV TORCHINDUCTOR_MAX_AUTOTUNE=1
|
| 114 |
+
ENV TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISE=1
|
| 115 |
+
ENV HIPCC_COMPILE_FLAGS_APPEND="--offload-arch=gfx942"
|
| 116 |
+
ENV AMDGPU_TARGETS=gfx942
|
| 117 |
+
ENV ROCM_ARCH=gfx942
|
| 118 |
+
ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
|
| 119 |
+
|
| 120 |
+
# Switch to working directory
|
| 121 |
+
# WORKDIR /sgl-workspace
|
| 122 |
+
WORKDIR /workspace
|
| 123 |
+
|
| 124 |
+
# Clean and create directory
|
| 125 |
+
# RUN rm -rf /sgl-workspace && mkdir -p /sgl-workspace
|
| 126 |
+
RUN rm -rf /workspace && mkdir -p /workspace
|
| 127 |
+
|
| 128 |
+
# # Clone and build sglang
|
| 129 |
+
# RUN git clone ${SGL_REPO} \
|
| 130 |
+
# && cd sglang \
|
| 131 |
+
# && git checkout ${SGL_BRANCH} || echo "Using default branch" \
|
| 132 |
+
# && cd sgl-kernel \
|
| 133 |
+
# && rm -f pyproject.toml \
|
| 134 |
+
# && mv pyproject_rocm.toml pyproject.toml \
|
| 135 |
+
# && python setup_rocm.py install \
|
| 136 |
+
# && cd .. \
|
| 137 |
+
# && if [ "$BUILD_TYPE" = "srt" ]; then \
|
| 138 |
+
# python -m pip --no-cache-dir install -e "python[srt_hip]"; \
|
| 139 |
+
# else \
|
| 140 |
+
# python -m pip --no-cache-dir install -e "python[all_hip]"; \
|
| 141 |
+
# fi \
|
| 142 |
+
# && cd /sgl-workspace \
|
| 143 |
+
# && cp -r /sgl-workspace/sglang /sglang \
|
| 144 |
+
# && python -m pip cache purge
|
| 145 |
+
|
| 146 |
+
# Install common Python packages
|
| 147 |
+
RUN pip install IPython orjson python-multipart torchao pybind11
|
| 148 |
+
|
| 149 |
+
# Rebuild Triton
|
| 150 |
+
RUN pip uninstall -y triton || true \
|
| 151 |
+
&& git clone ${TRITON_REPO} \
|
| 152 |
+
&& cd triton \
|
| 153 |
+
&& git checkout ${TRITON_COMMIT} \
|
| 154 |
+
&& cd python \
|
| 155 |
+
&& python3 setup.py install \
|
| 156 |
+
&& cd /workspace
|
| 157 |
+
# && cd /sgl-workspace
|
| 158 |
+
|
| 159 |
+
# Build aiter
|
| 160 |
+
#version: Commit 9d11f47
|
| 161 |
+
# && git checkout ${AITER_COMMIT} \
|
| 162 |
+
RUN pip uninstall -y aiter || true
|
| 163 |
+
RUN git clone ${AITER_REPO} \
|
| 164 |
+
&& cd aiter \
|
| 165 |
+
&& git checkout ${AITER_COMMIT} \
|
| 166 |
+
&& git submodule sync \
|
| 167 |
+
&& git submodule update --init --recursive \
|
| 168 |
+
&& PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop \
|
| 169 |
+
&& cd /workspace
|
| 170 |
+
# && cd /sgl-workspace
|
| 171 |
+
# && PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop \
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
###########################################
|
| 175 |
+
# Clone and build sglang
|
| 176 |
+
RUN git clone ${SGL_REPO} \
|
| 177 |
+
&& cd sglang \
|
| 178 |
+
&& git checkout ${SGL_BRANCH} || echo "Using default branch" \
|
| 179 |
+
&& cd sgl-kernel \
|
| 180 |
+
&& rm -f pyproject.toml \
|
| 181 |
+
&& mv pyproject_rocm.toml pyproject.toml \
|
| 182 |
+
&& python setup_rocm.py install \
|
| 183 |
+
&& cd .. \
|
| 184 |
+
&& if [ "$BUILD_TYPE" = "srt" ]; then \
|
| 185 |
+
python -m pip --no-cache-dir install -e "python[srt_hip]"; \
|
| 186 |
+
else \
|
| 187 |
+
python -m pip --no-cache-dir install -e "python[all_hip]"; \
|
| 188 |
+
fi \
|
| 189 |
+
&& cd /workspace \
|
| 190 |
+
&& cp -r /workspace/sglang /sglang \
|
| 191 |
+
&& python -m pip cache purge
|
| 192 |
+
# && cd /sgl-workspace \
|
| 193 |
+
# && cp -r /sgl-workspace/sglang /sglang \
|
| 194 |
+
# && python -m pip cache purge
|
| 195 |
+
###########################################
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
# Copy MI300X config
|
| 199 |
+
# RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
|
| 200 |
+
# /sgl-workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
|
| 201 |
+
RUN find /workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
|
| 202 |
+
/workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
|
| 203 |
+
-type f -name '*MI300X*' | \
|
| 204 |
+
xargs -I {} sh -c 'vf_config=$(echo "$1" | sed "s/MI300X/MI300X_VF/"); cp "$1" "$vf_config"' -- {}
|
| 205 |
+
|
| 206 |
+
# Environment setup complete.
|
| 207 |
+
RUN echo "Environment setup complete."
|
| 208 |
+
WORKDIR /workspace/
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
# # patch
|
| 212 |
+
# # Date: Jul 5, 2025
|
| 213 |
+
# ENV SLIME_COMMIT=9ddbdbd
|
| 214 |
+
# RUN git clone https://github.com/THUDM/slime.git \
|
| 215 |
+
# && cd slime \
|
| 216 |
+
# && git checkout ${SLIME_COMMIT} \
|
| 217 |
+
# && cp docker/patch/sglang.patch /workspace/sglang/
|
| 218 |
+
# # && cp docker/patch/sglang.patch /sgl-workspace/sglang/
|
| 219 |
+
# # && cp docker/patch/v0.4.10-cu126/sglang.patch /sgl-workspace/sglang/
|
| 220 |
+
# # && cp docker/patch/latest/sglang.patch /sgl-workspace/sglang/
|
| 221 |
+
# # && cp docker/patch/sglang.patch /sgl-workspace/sglang/
|
| 222 |
+
# # COPY /home/yushensu/projects/slime/docker/patch/sglang.patch /sgl-workspace/sglang/
|
| 223 |
+
# WORKDIR /workspace/sglang/
|
| 224 |
+
# # WORKDIR /sgl-workspace/sglang/
|
| 225 |
+
# RUN git apply sglang.patch && rm sglang.patch
|
| 226 |
+
# # WORKDIR /workspace/
|
| 227 |
+
|
| 228 |
+
# # sgl-router
|
| 229 |
+
# # WORKDIR /sgl-workspace/sglang/
|
| 230 |
+
# RUN apt-get update && apt-get install -y pkg-config libssl-dev
|
| 231 |
+
# RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
| 232 |
+
# RUN pip install setuptools-rust wheel build
|
| 233 |
+
# RUN source $HOME/.cargo/env && \
|
| 234 |
+
# mkdir -p sgl-router && \
|
| 235 |
+
# cd sgl-router && \
|
| 236 |
+
# cargo build -j 64 && \
|
| 237 |
+
# python3 -m build && \
|
| 238 |
+
# pip install dist/*.whl --force-reinstall
|
| 239 |
+
|
| 240 |
+
RUN pip install sglang-router --force-reinstall
|
| 241 |
+
|
| 242 |
+
###########################################
|
| 243 |
+
###########################################
|
| 244 |
+
###########################################
|
| 245 |
+
|
| 246 |
+
RUN pip install transformers==4.51.1
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
#########################################
|
| 250 |
+
#####Install vllm v0.8.5#################
|
| 251 |
+
#########################################
|
| 252 |
+
|
| 253 |
+
WORKDIR /workspace/
|
| 254 |
+
|
| 255 |
+
ENV VLLM_TARGET_DEVICE=rocm
|
| 256 |
+
ENV ROCM_PATH=/opt/rocm
|
| 257 |
+
ENV SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev
|
| 258 |
+
|
| 259 |
+
RUN pip uninstall -y vllm || true
|
| 260 |
+
RUN rm -rf vllm-patch
|
| 261 |
+
|
| 262 |
+
RUN git clone https://github.com/RLFoundation/vllm-patch.git \
|
| 263 |
+
&& cd vllm-patch \
|
| 264 |
+
&& git checkout v0.8.5-sleep-numa \
|
| 265 |
+
&& rm -rf build/ dist/ *.egg-info \
|
| 266 |
+
&& ln -sf /opt/rocm/lib/libamdhip64.so /usr/lib/libamdhip64.so \
|
| 267 |
+
&& SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev PYTORCH_ROCM_ARCH="gfx90a;gfx942" MAX_JOBS=${MAX_JOBS} python3 setup.py install
|
| 268 |
+
|
| 269 |
+
WORKDIR /workspace/
|
| 270 |
+
###########################################
|
| 271 |
+
###########################################
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
#########################################
|
| 275 |
+
#### Install megatron-core###############
|
| 276 |
+
#########################################
|
| 277 |
+
# Can be removed just the current megatron-lm dependency
|
| 278 |
+
RUN pip install "numpy>=1.21.0,<2.0" --force-reinstall
|
| 279 |
+
|
| 280 |
+
RUN pip uninstall -y megatron-core && \
|
| 281 |
+
git clone https://github.com/yushengsu-thu/Megatron-LM-amd_version.git && \
|
| 282 |
+
cd Megatron-LM-amd_version && \
|
| 283 |
+
pip install -vvv -e . && \
|
| 284 |
+
cd /workspace/
|
| 285 |
+
#########################################
|
| 286 |
+
#########################################
|
| 287 |
+
#########################################
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
#########################################
|
| 293 |
+
###Add torch_memory_saver################
|
| 294 |
+
#########################################
|
| 295 |
+
# Set environment variables
|
| 296 |
+
ENV HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__"
|
| 297 |
+
ENV CFLAGS="-D__HIP_PLATFORM_AMD__"
|
| 298 |
+
ENV CXXFLAGS="-D__HIP_PLATFORM_AMD__"
|
| 299 |
+
# Install torch_memory_saver
|
| 300 |
+
# RUN pip install git+https://github.com/YangWang92/torch_memory_saver_numa.git --no-deps
|
| 301 |
+
# RUN pip install "git+https://github.com/YangWang92/torch_memory_saver_numa.git@numa"
|
| 302 |
+
RUN pip install "git+https://github.com/yushengsu-thu/torch_memory_saver.git"
|
| 303 |
+
# pip install git+https://github.com/fzyzcjy/torch_memory_saver.git --no-deps
|
| 304 |
+
#########################################
|
| 305 |
+
#########################################
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
########################################
|
| 311 |
+
######Install ray#######################
|
| 312 |
+
########################################
|
| 313 |
+
# need to add this patch manually: https://github.com/ray-project/ray/pull/53531/files
|
| 314 |
+
RUN pip uninstall ray -y
|
| 315 |
+
# RUN pip install "ray[data,train,tune,serve]>=2.47.0"
|
| 316 |
+
RUN pip install "ray[data,train,tune,serve]==2.47.1"
|
| 317 |
+
########################################
|
| 318 |
+
########################################
|
| 319 |
+
########################################
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
### Need to verify whether numerical/convergence issue
|
| 323 |
+
#######################################
|
| 324 |
+
################apex###################
|
| 325 |
+
#######################################
|
| 326 |
+
WORKDIR /workspace/
|
| 327 |
+
RUN pip uninstall -y apex && \
|
| 328 |
+
git clone https://github.com/ROCm/apex.git && \
|
| 329 |
+
cd apex && \
|
| 330 |
+
python setup.py install && \
|
| 331 |
+
cd /workspace/
|
| 332 |
+
#######################################
|
| 333 |
+
#######################################
|
| 334 |
+
#######################################
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
########################################
|
| 338 |
+
############ mbridge####################
|
| 339 |
+
########################################
|
| 340 |
+
RUN pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
|
| 341 |
+
########################################
|
| 342 |
+
########################################
|
| 343 |
+
########################################
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
########################################
|
| 348 |
+
########slime agent framewrok need######
|
| 349 |
+
########################################
|
| 350 |
+
RUN pip install pydra_config==0.0.15
|
| 351 |
+
RUN pip install together
|
| 352 |
+
RUN pip install google-generativeai
|
| 353 |
+
########################################
|
| 354 |
+
########################################
|
| 355 |
+
########################################
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
WORKDIR /workspace/
|
| 360 |
+
|
| 361 |
+
CMD ["/usr/bin/bash"]
|
ccevolve/baselines/thetaevolve/docker/Dockerfile_after_c22f55b_Aug_10_2025.rocm
ADDED
|
@@ -0,0 +1,374 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#### Use the base image
|
| 2 |
+
|
| 3 |
+
# The Docker image built with this Dockerfile:
|
| 4 |
+
# Supports up to slime commit ID: 9a48ba0 (Aug 10, 2025)
|
| 5 |
+
# Start to fail from c22f55b (Aug 10, 2025) - Need to fix the bug from here
|
| 6 |
+
|
| 7 |
+
# You can find the latest pre-built Docker image from here: https://hub.docker.com/r/rlsys/slime/tags
|
| 8 |
+
# Current latest docker img: `rlsys/slime:slime_ubuntu22.04_rocm6.3.4-patch-numa-patch_sglang0.4.9_megatron-patch_ray2.47.1_apex_torch-memory-saver0.0.8-patch-vim` manually add the patch to mitigate checkpoint loading issue. (vim /workspace/Megatron-LM-amd_version/megatron/training/checkpointing.py. Line: 1449 ~ 1457 - comment out if becasue of dismatch number of dist checkpoints
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# The Docker image built with this Dockerfile:
|
| 12 |
+
# PR: commit ID 36711aa (Aug 22, 2025) dockerfile - Supports up to slime commit ID: 9a48ba0 (Aug 10, 2025)
|
| 13 |
+
|
| 14 |
+
# Start to failfrom c22f55b (Aug 10, 2025) - Need to fix the bug from here
|
| 15 |
+
|
| 16 |
+
# You can find the latest pre-built Docker image from here: https://hub.docker.com/r/rlsys/slime/tags
|
| 17 |
+
# Current latest docker img: `rlsys/slime:slime_ubuntu22.04_rocm6.3.4-patch-numa-patch_sglang0.4.9_megatron-patch_ray2.47.1_apex_torch-memory-saver0.0.8-patch-vim` manually add the patch to mitigate checkpoint loading issue. (vim /workspace/Megatron-LM-amd_version/megatron/training/checkpointing.py. Line: 1449 ~ 1457 - comment out if becasue of dismatch number of dist checkpoints
|
| 18 |
+
|
| 19 |
+
# Thanks to Yang Wang (https://www.microsoft.com/en-us/research/people/yangwang5/) for working on the patch for this ROCm base Docker image to support virtual memory management on MI300X.
|
| 20 |
+
|
| 21 |
+
# FROM "rlfoundation.azurecr.io/rocm6.3.4:vllm-0.8.5-numa-patch-ubuntu-22.04"
|
| 22 |
+
FROM "rlsys/rocm-6.3.4-patch:rocm6.3.4-numa-patch_ubuntu-22.04"
|
| 23 |
+
|
| 24 |
+
SHELL ["/bin/bash", "-ceuxo", "pipefail"]
|
| 25 |
+
|
| 26 |
+
ARG MAX_JOBS=512
|
| 27 |
+
ENV MAX_JOBS=${MAX_JOBS}
|
| 28 |
+
|
| 29 |
+
ENV PATH="/usr/local/python3.12/bin:$PATH"
|
| 30 |
+
RUN ln -sf /usr/bin/python3.12 /usr/bin/python && \
|
| 31 |
+
ln -sf /usr/bin/pip3.12 /usr/bin/pip
|
| 32 |
+
|
| 33 |
+
RUN apt-get update
|
| 34 |
+
RUN apt-get install -y pkg-config liblzma-dev
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
###########################################
|
| 38 |
+
##########Install TransformerEngine########
|
| 39 |
+
###########################################
|
| 40 |
+
WORKDIR /workspace/
|
| 41 |
+
|
| 42 |
+
RUN rm -rf TransformerEngine
|
| 43 |
+
# RUN git clone --recursive https://github.com/ROCm/TransformerEngine.git
|
| 44 |
+
RUN git clone https://github.com/ROCm/TransformerEngine.git
|
| 45 |
+
WORKDIR /workspace/TransformerEngine
|
| 46 |
+
|
| 47 |
+
RUN git checkout 236178e
|
| 48 |
+
# RUN git checkout bb061ad
|
| 49 |
+
# RUN git checkout 864405c
|
| 50 |
+
|
| 51 |
+
RUN git submodule update --init --recursive
|
| 52 |
+
|
| 53 |
+
ENV NVTE_FRAMEWORK=pytorch
|
| 54 |
+
ENV NVTE_ROCM_ARCH=gfx942
|
| 55 |
+
ENV NVTE_USE_HIPBLASLT=1
|
| 56 |
+
ENV NVTE_USE_ROCM=1
|
| 57 |
+
|
| 58 |
+
# export CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr:${CMAKE_PREFIX_PATH:-}"
|
| 59 |
+
ENV CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr"
|
| 60 |
+
RUN MAX_JOBS=${MAX_JOBS} pip install . -vvv
|
| 61 |
+
WORKDIR /workspace/
|
| 62 |
+
###########################################
|
| 63 |
+
###########################################
|
| 64 |
+
###########################################
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
###########################################################
|
| 69 |
+
####Install vllm - sglang require vllm 0.6.7 dependency####
|
| 70 |
+
# #########################################################
|
| 71 |
+
#### Require vllm 0.6.7 - checkout 113274a0
|
| 72 |
+
WORKDIR /workspace/
|
| 73 |
+
RUN rm -rf vllm
|
| 74 |
+
RUN pip uninstall -y vllm
|
| 75 |
+
# Refer to here (down-grade vllm to 0.6.3): https://docs.vllm.ai/en/v0.6.3/getting_started/amd-installation.html
|
| 76 |
+
RUN git clone https://github.com/ROCm/vllm.git
|
| 77 |
+
# git clone https://github.com/vllm-project/vllm.git
|
| 78 |
+
WORKDIR /workspace/vllm
|
| 79 |
+
RUN git checkout 113274a0
|
| 80 |
+
ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
|
| 81 |
+
ENV MAX_JOBS=${MAX_JOBS}
|
| 82 |
+
RUN pip install "boto3>=1.26.0"
|
| 83 |
+
RUN pip install setuptools_scm
|
| 84 |
+
# will add src into py. You can delete the repo
|
| 85 |
+
RUN python3 setup.py install
|
| 86 |
+
WORKDIR /workspace/
|
| 87 |
+
###########################################
|
| 88 |
+
###########################################
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
RUN pip install setuptools==75.8.0
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
###########################################
|
| 95 |
+
############build sgalng###################
|
| 96 |
+
###########################################
|
| 97 |
+
# Set environment variables
|
| 98 |
+
ENV BASE_DIR=/workspace
|
| 99 |
+
# ENV BASE_DIR=/sgl-workspace
|
| 100 |
+
ENV BUILD_TYPE=all
|
| 101 |
+
ENV SGL_REPO=https://github.com/sgl-project/sglang
|
| 102 |
+
# ENV SGL_BRANCH=v0.4.9
|
| 103 |
+
ENV SGL_BRANCH=0.4.9.post6
|
| 104 |
+
# ENV SGL_BRANCH=v0.4.10
|
| 105 |
+
ENV TRITON_REPO=https://github.com/ROCm/triton.git
|
| 106 |
+
ENV TRITON_COMMIT=improve_fa_decode_3.0.0
|
| 107 |
+
ENV AITER_REPO=https://github.com/ROCm/aiter.git
|
| 108 |
+
ENV AITER_COMMIT=v0.1.3
|
| 109 |
+
# ENV AITER_COMMIT=v0.1.4
|
| 110 |
+
# v0.1.2 version - commit id: 9d11f47
|
| 111 |
+
# ENV AITER_COMMIT=9d11f47
|
| 112 |
+
|
| 113 |
+
ENV HIP_FORCE_DEV_KERNARG=1
|
| 114 |
+
ENV HSA_NO_SCRATCH_RECLAIM=1
|
| 115 |
+
ENV SGLANG_SET_CPU_AFFINITY=1
|
| 116 |
+
ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
|
| 117 |
+
ENV NCCL_MIN_NCHANNELS=112
|
| 118 |
+
|
| 119 |
+
ENV SGLANG_USE_AITER=1
|
| 120 |
+
ENV SGLANG_MOE_PADDING=1
|
| 121 |
+
# ENV MOE_PADDING=1
|
| 122 |
+
ENV VLLM_FP8_PADDING=1
|
| 123 |
+
ENV VLLM_FP8_ACT_PADDING=1
|
| 124 |
+
ENV VLLM_FP8_WEIGHT_PADDING=1
|
| 125 |
+
ENV VLLM_FP8_REDUCE_CONV=1
|
| 126 |
+
ENV TORCHINDUCTOR_MAX_AUTOTUNE=1
|
| 127 |
+
ENV TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISE=1
|
| 128 |
+
ENV HIPCC_COMPILE_FLAGS_APPEND="--offload-arch=gfx942"
|
| 129 |
+
ENV AMDGPU_TARGETS=gfx942
|
| 130 |
+
ENV ROCM_ARCH=gfx942
|
| 131 |
+
ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
|
| 132 |
+
|
| 133 |
+
# Switch to working directory
|
| 134 |
+
# WORKDIR /sgl-workspace
|
| 135 |
+
WORKDIR /workspace
|
| 136 |
+
|
| 137 |
+
# Clean and create directory
|
| 138 |
+
# RUN rm -rf /sgl-workspace && mkdir -p /sgl-workspace
|
| 139 |
+
RUN rm -rf /workspace && mkdir -p /workspace
|
| 140 |
+
|
| 141 |
+
# # Clone and build sglang
|
| 142 |
+
# RUN git clone ${SGL_REPO} \
|
| 143 |
+
# && cd sglang \
|
| 144 |
+
# && git checkout ${SGL_BRANCH} || echo "Using default branch" \
|
| 145 |
+
# && cd sgl-kernel \
|
| 146 |
+
# && rm -f pyproject.toml \
|
| 147 |
+
# && mv pyproject_rocm.toml pyproject.toml \
|
| 148 |
+
# && python setup_rocm.py install \
|
| 149 |
+
# && cd .. \
|
| 150 |
+
# && if [ "$BUILD_TYPE" = "srt" ]; then \
|
| 151 |
+
# python -m pip --no-cache-dir install -e "python[srt_hip]"; \
|
| 152 |
+
# else \
|
| 153 |
+
# python -m pip --no-cache-dir install -e "python[all_hip]"; \
|
| 154 |
+
# fi \
|
| 155 |
+
# && cd /sgl-workspace \
|
| 156 |
+
# && cp -r /sgl-workspace/sglang /sglang \
|
| 157 |
+
# && python -m pip cache purge
|
| 158 |
+
|
| 159 |
+
# Install common Python packages
|
| 160 |
+
RUN pip install IPython orjson python-multipart torchao pybind11
|
| 161 |
+
|
| 162 |
+
# Rebuild Triton
|
| 163 |
+
RUN pip uninstall -y triton || true \
|
| 164 |
+
&& git clone ${TRITON_REPO} \
|
| 165 |
+
&& cd triton \
|
| 166 |
+
&& git checkout ${TRITON_COMMIT} \
|
| 167 |
+
&& cd python \
|
| 168 |
+
&& python3 setup.py install \
|
| 169 |
+
&& cd /workspace
|
| 170 |
+
# && cd /sgl-workspace
|
| 171 |
+
|
| 172 |
+
# Build aiter
|
| 173 |
+
#version: Commit 9d11f47
|
| 174 |
+
# && git checkout ${AITER_COMMIT} \
|
| 175 |
+
RUN pip uninstall -y aiter || true
|
| 176 |
+
RUN git clone ${AITER_REPO} \
|
| 177 |
+
&& cd aiter \
|
| 178 |
+
&& git checkout ${AITER_COMMIT} \
|
| 179 |
+
&& git submodule sync \
|
| 180 |
+
&& git submodule update --init --recursive \
|
| 181 |
+
&& PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop \
|
| 182 |
+
&& cd /workspace
|
| 183 |
+
# && cd /sgl-workspace
|
| 184 |
+
# && PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop \
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
###########################################
|
| 188 |
+
# Clone and build sglang
|
| 189 |
+
RUN git clone ${SGL_REPO} \
|
| 190 |
+
&& cd sglang \
|
| 191 |
+
&& git checkout ${SGL_BRANCH} || echo "Using default branch" \
|
| 192 |
+
&& cd sgl-kernel \
|
| 193 |
+
&& rm -f pyproject.toml \
|
| 194 |
+
&& mv pyproject_rocm.toml pyproject.toml \
|
| 195 |
+
&& python setup_rocm.py install \
|
| 196 |
+
&& cd .. \
|
| 197 |
+
&& if [ "$BUILD_TYPE" = "srt" ]; then \
|
| 198 |
+
python -m pip --no-cache-dir install -e "python[srt_hip]"; \
|
| 199 |
+
else \
|
| 200 |
+
python -m pip --no-cache-dir install -e "python[all_hip]"; \
|
| 201 |
+
fi \
|
| 202 |
+
&& cd /workspace \
|
| 203 |
+
&& cp -r /workspace/sglang /sglang \
|
| 204 |
+
&& python -m pip cache purge
|
| 205 |
+
# && cd /sgl-workspace \
|
| 206 |
+
# && cp -r /sgl-workspace/sglang /sglang \
|
| 207 |
+
# && python -m pip cache purge
|
| 208 |
+
###########################################
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
# Copy MI300X config
|
| 212 |
+
# RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
|
| 213 |
+
# /sgl-workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
|
| 214 |
+
RUN find /workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
|
| 215 |
+
/workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
|
| 216 |
+
-type f -name '*MI300X*' | \
|
| 217 |
+
xargs -I {} sh -c 'vf_config=$(echo "$1" | sed "s/MI300X/MI300X_VF/"); cp "$1" "$vf_config"' -- {}
|
| 218 |
+
|
| 219 |
+
# Environment setup complete.
|
| 220 |
+
RUN echo "Environment setup complete."
|
| 221 |
+
WORKDIR /workspace/
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
# # patch
|
| 225 |
+
# # Date: Jul 5, 2025
|
| 226 |
+
# ENV SLIME_COMMIT=9ddbdbd
|
| 227 |
+
# RUN git clone https://github.com/THUDM/slime.git \
|
| 228 |
+
# && cd slime \
|
| 229 |
+
# && git checkout ${SLIME_COMMIT} \
|
| 230 |
+
# && cp docker/patch/sglang.patch /workspace/sglang/
|
| 231 |
+
# # && cp docker/patch/sglang.patch /sgl-workspace/sglang/
|
| 232 |
+
# # && cp docker/patch/v0.4.10-cu126/sglang.patch /sgl-workspace/sglang/
|
| 233 |
+
# # && cp docker/patch/latest/sglang.patch /sgl-workspace/sglang/
|
| 234 |
+
# # && cp docker/patch/sglang.patch /sgl-workspace/sglang/
|
| 235 |
+
# # COPY /home/yushensu/projects/slime/docker/patch/sglang.patch /sgl-workspace/sglang/
|
| 236 |
+
# WORKDIR /workspace/sglang/
|
| 237 |
+
# # WORKDIR /sgl-workspace/sglang/
|
| 238 |
+
# RUN git apply sglang.patch && rm sglang.patch
|
| 239 |
+
# # WORKDIR /workspace/
|
| 240 |
+
|
| 241 |
+
# # sgl-router
|
| 242 |
+
# # WORKDIR /sgl-workspace/sglang/
|
| 243 |
+
# RUN apt-get update && apt-get install -y pkg-config libssl-dev
|
| 244 |
+
# RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
| 245 |
+
# RUN pip install setuptools-rust wheel build
|
| 246 |
+
# RUN source $HOME/.cargo/env && \
|
| 247 |
+
# mkdir -p sgl-router && \
|
| 248 |
+
# cd sgl-router && \
|
| 249 |
+
# cargo build -j 64 && \
|
| 250 |
+
# python3 -m build && \
|
| 251 |
+
# pip install dist/*.whl --force-reinstall
|
| 252 |
+
|
| 253 |
+
RUN pip install sglang-router --force-reinstall
|
| 254 |
+
|
| 255 |
+
###########################################
|
| 256 |
+
###########################################
|
| 257 |
+
###########################################
|
| 258 |
+
|
| 259 |
+
RUN pip install transformers==4.51.1
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
#########################################
|
| 263 |
+
#####Install vllm v0.8.5#################
|
| 264 |
+
#########################################
|
| 265 |
+
|
| 266 |
+
WORKDIR /workspace/
|
| 267 |
+
|
| 268 |
+
ENV VLLM_TARGET_DEVICE=rocm
|
| 269 |
+
ENV ROCM_PATH=/opt/rocm
|
| 270 |
+
ENV SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev
|
| 271 |
+
|
| 272 |
+
RUN pip uninstall -y vllm || true
|
| 273 |
+
RUN rm -rf vllm-patch
|
| 274 |
+
|
| 275 |
+
RUN git clone https://github.com/RLFoundation/vllm-patch.git \
|
| 276 |
+
&& cd vllm-patch \
|
| 277 |
+
&& git checkout v0.8.5-sleep-numa \
|
| 278 |
+
&& rm -rf build/ dist/ *.egg-info \
|
| 279 |
+
&& ln -sf /opt/rocm/lib/libamdhip64.so /usr/lib/libamdhip64.so \
|
| 280 |
+
&& SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev PYTORCH_ROCM_ARCH="gfx90a;gfx942" MAX_JOBS=${MAX_JOBS} python3 setup.py install
|
| 281 |
+
|
| 282 |
+
WORKDIR /workspace/
|
| 283 |
+
###########################################
|
| 284 |
+
###########################################
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
#########################################
|
| 288 |
+
#### Install megatron-core###############
|
| 289 |
+
#########################################
|
| 290 |
+
# Can be removed just the current megatron-lm dependency
|
| 291 |
+
RUN pip install "numpy>=1.21.0,<2.0" --force-reinstall
|
| 292 |
+
|
| 293 |
+
RUN pip uninstall -y megatron-core && \
|
| 294 |
+
git clone https://github.com/yushengsu-thu/Megatron-LM-amd_version.git && \
|
| 295 |
+
cd Megatron-LM-amd_version && \
|
| 296 |
+
pip install -vvv -e . && \
|
| 297 |
+
cd /workspace/
|
| 298 |
+
#########################################
|
| 299 |
+
#########################################
|
| 300 |
+
#########################################
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
#########################################
|
| 306 |
+
###Add torch_memory_saver################
|
| 307 |
+
#########################################
|
| 308 |
+
# Set environment variables
|
| 309 |
+
ENV HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__"
|
| 310 |
+
ENV CFLAGS="-D__HIP_PLATFORM_AMD__"
|
| 311 |
+
ENV CXXFLAGS="-D__HIP_PLATFORM_AMD__"
|
| 312 |
+
# Install torch_memory_saver
|
| 313 |
+
# RUN pip install git+https://github.com/YangWang92/torch_memory_saver_numa.git --no-deps
|
| 314 |
+
# RUN pip install "git+https://github.com/YangWang92/torch_memory_saver_numa.git@numa"
|
| 315 |
+
RUN pip install "git+https://github.com/yushengsu-thu/torch_memory_saver.git"
|
| 316 |
+
# pip install git+https://github.com/fzyzcjy/torch_memory_saver.git --no-deps
|
| 317 |
+
#########################################
|
| 318 |
+
#########################################
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
########################################
|
| 324 |
+
######Install ray#######################
|
| 325 |
+
########################################
|
| 326 |
+
# need to add this patch manually: https://github.com/ray-project/ray/pull/53531/files
|
| 327 |
+
RUN pip uninstall ray -y
|
| 328 |
+
# RUN pip install "ray[data,train,tune,serve]>=2.47.0"
|
| 329 |
+
RUN pip install "ray[data,train,tune,serve]==2.47.1"
|
| 330 |
+
########################################
|
| 331 |
+
########################################
|
| 332 |
+
########################################
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
### Need to verify whether numerical/convergence issue
|
| 336 |
+
#######################################
|
| 337 |
+
################apex###################
|
| 338 |
+
#######################################
|
| 339 |
+
WORKDIR /workspace/
|
| 340 |
+
RUN pip uninstall -y apex && \
|
| 341 |
+
git clone https://github.com/ROCm/apex.git && \
|
| 342 |
+
cd apex && \
|
| 343 |
+
python setup.py install && \
|
| 344 |
+
cd /workspace/
|
| 345 |
+
#######################################
|
| 346 |
+
#######################################
|
| 347 |
+
#######################################
|
| 348 |
+
|
| 349 |
+
|
| 350 |
+
########################################
|
| 351 |
+
############ mbridge####################
|
| 352 |
+
########################################
|
| 353 |
+
RUN pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
|
| 354 |
+
########################################
|
| 355 |
+
########################################
|
| 356 |
+
########################################
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
|
| 360 |
+
########################################
|
| 361 |
+
########slime agent framewrok need######
|
| 362 |
+
########################################
|
| 363 |
+
RUN pip install pydra_config==0.0.15
|
| 364 |
+
RUN pip install together
|
| 365 |
+
RUN pip install google-generativeai
|
| 366 |
+
########################################
|
| 367 |
+
########################################
|
| 368 |
+
########################################
|
| 369 |
+
|
| 370 |
+
|
| 371 |
+
|
| 372 |
+
WORKDIR /workspace/
|
| 373 |
+
|
| 374 |
+
CMD ["/usr/bin/bash"]
|
ccevolve/baselines/thetaevolve/docker/Dockerfile_b200
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG SGLANG_VERSION=v0.5.2rc2-cu128-b200
|
| 2 |
+
FROM lmsysorg/sglang:${SGLANG_VERSION} AS sglang
|
| 3 |
+
|
| 4 |
+
# we need to write this again after from
|
| 5 |
+
ARG SGLANG_VERSION
|
| 6 |
+
ARG MEGATRON_COMMIT=main
|
| 7 |
+
|
| 8 |
+
RUN apt update
|
| 9 |
+
RUN apt install -y nvtop
|
| 10 |
+
|
| 11 |
+
# TODO: change to pip install sglang-router after it has a new release
|
| 12 |
+
RUN pip install sglang-router --force-reinstall
|
| 13 |
+
RUN pip install git+https://github.com/fzyzcjy/torch_memory_saver.git --no-cache-dir --force-reinstall
|
| 14 |
+
RUN pip install ray[default]
|
| 15 |
+
RUN pip install httpx[http2] wandb pylatexenc blobfile accelerate "mcp[cli]"
|
| 16 |
+
RUN pip install git+https://github.com/zhuzilin/cumem_allocator.git
|
| 17 |
+
|
| 18 |
+
# mbridge
|
| 19 |
+
RUN pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
|
| 20 |
+
|
| 21 |
+
RUN TORCH_CUDA_ARCH_LIST="8.0;8.9;9.0;9.0a;10.0" pip install git+https://github.com/fanshiqing/grouped_gemm@v1.1.4
|
| 22 |
+
# apex
|
| 23 |
+
RUN NVCC_APPEND_FLAGS="--threads 4" \
|
| 24 |
+
pip -v install --disable-pip-version-check --no-cache-dir \
|
| 25 |
+
--no-build-isolation \
|
| 26 |
+
--config-settings "--build-option=--cpp_ext --cuda_ext --parallel 8" git+https://github.com/NVIDIA/apex.git
|
| 27 |
+
# transformer engine, we install with --no-deps to avoid installing torch and torch-extensions
|
| 28 |
+
RUN pip install pybind11
|
| 29 |
+
RUN pip -v install --no-build-isolation git+https://github.com/NVIDIA/TransformerEngine.git@stable
|
| 30 |
+
# flash attn
|
| 31 |
+
# the newest version megatron supports is v2.7.4.post1
|
| 32 |
+
RUN MAX_JOBS=64 pip -v install flash-attn==2.7.4.post1
|
| 33 |
+
RUN git clone https://github.com/Dao-AILab/flash-attention.git && cd flash-attention/ && git checkout 27f501d && cd hopper/ && python setup.py install
|
| 34 |
+
RUN python_path=`python -c "import site; print(site.getsitepackages()[0])"` && \
|
| 35 |
+
mkdir -p $python_path/flash_attn_3 && \
|
| 36 |
+
wget -P $python_path/flash_attn_3 https://raw.githubusercontent.com/Dao-AILab/flash-attention/27f501dbe011f4371bff938fe7e09311ab3002fa/hopper/flash_attn_interface.py
|
| 37 |
+
|
| 38 |
+
WORKDIR /root/
|
| 39 |
+
RUN git clone https://github.com/NVIDIA/Megatron-LM.git --recursive && \
|
| 40 |
+
cd Megatron-LM && \
|
| 41 |
+
pip install -e .
|
| 42 |
+
|
| 43 |
+
# sandwitch norm for GLM models
|
| 44 |
+
COPY patch/${SGLANG_VERSION}/megatron.patch /root/Megatron-LM/
|
| 45 |
+
RUN cd Megatron-LM && \
|
| 46 |
+
git checkout ${MEGATRON_COMMIT} && \
|
| 47 |
+
git apply megatron.patch --3way && \
|
| 48 |
+
if grep -R -n '^<<<<<<< ' .; then \
|
| 49 |
+
echo "Patch failed to apply cleanly. Please resolve conflicts." && \
|
| 50 |
+
exit 1; \
|
| 51 |
+
fi && \
|
| 52 |
+
rm megatron.patch
|
| 53 |
+
|
| 54 |
+
# sglang patch
|
| 55 |
+
COPY patch/${SGLANG_VERSION}/sglang.patch /sgl-workspace/sglang/
|
| 56 |
+
RUN cd /sgl-workspace/sglang && \
|
| 57 |
+
git apply sglang.patch && \
|
| 58 |
+
if grep -R -n '^<<<<<<< ' .; then \
|
| 59 |
+
echo "Patch failed to apply cleanly. Please resolve conflicts." && \
|
| 60 |
+
exit 1; \
|
| 61 |
+
fi && \
|
| 62 |
+
rm sglang.patch
|
| 63 |
+
|
| 64 |
+
RUN rm /root/.tmux.conf
|
ccevolve/baselines/thetaevolve/docker/README.md
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Docker release rule
|
| 2 |
+
|
| 3 |
+
We will publish 2 kinds of docker images:
|
| 4 |
+
1. stable version, which based on official sglang release. We will store the patch on those versions.
|
| 5 |
+
2. latest version, which aligns to `lmsysorg/sglang:latest`.
|
| 6 |
+
|
| 7 |
+
current stable version is:
|
| 8 |
+
- sglang v0.5.7 nightly-dev-20260107-dce8b060 (dce8b0606c06d3a191a24c7b8cbe8e238ab316c9), megatron dev 3714d81d418c9f1bca4594fc35f9e8289f652862
|
| 9 |
+
|
| 10 |
+
history versions:
|
| 11 |
+
- sglang v0.5.6 nightly-dev-20251208-5e2cda61 (5e2cda6158e670e64b926a9985d65826c537ac82), megatron v0.14.0 (23e00ed0963c35382dfe8a5a94fb3cda4d21e133)
|
| 12 |
+
- sglang v0.5.5.post1 (303cc957e62384044dfa8e52d7d8af8abe12f0ac), megatron v0.14.0 (23e00ed0963c35382dfe8a5a94fb3cda4d21e133)
|
| 13 |
+
- sglang v0.5.0rc0-cu126 (8ecf6b9d2480c3f600826c7d8fef6a16ed603c3f), megatron 48406695c4efcf1026a7ed70bb390793918dd97b
|
| 14 |
+
|
| 15 |
+
The command to build:
|
| 16 |
+
|
| 17 |
+
```bash
|
| 18 |
+
just release
|
| 19 |
+
```
|
| 20 |
+
|
| 21 |
+
Before each update, we will test the following models with 64xH100:
|
| 22 |
+
|
| 23 |
+
- Qwen3-4B sync
|
| 24 |
+
- Qwen3-4B async
|
| 25 |
+
- Qwen3-30B-A3B sync
|
| 26 |
+
- Qwen3-30B-A3B fp8 sync
|
| 27 |
+
- GLM-4.5-355B-A32B sync
|
ccevolve/baselines/thetaevolve/docker/justfile
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
release-primary:
|
| 2 |
+
ARG_TAG_POSTFIX="" ARG_BUILD_EXTRA_ARGS="" just _release-raw
|
| 3 |
+
|
| 4 |
+
# Should be executed on ARM machines
|
| 5 |
+
release-cu129-arm64:
|
| 6 |
+
ARG_TAG_POSTFIX="-cu129-arm64" ARG_BUILD_EXTRA_ARGS='--build-arg SGLANG_IMAGE_TAG=v0.5.5.post3-cu129-arm64 --build-arg ENABLE_SGLANG_PATCH=0' just _release-raw
|
| 7 |
+
|
| 8 |
+
# Should be executed on ARM machines
|
| 9 |
+
release-cu13-arm64:
|
| 10 |
+
ARG_TAG_POSTFIX="-cu13-arm64" ARG_BUILD_EXTRA_ARGS='--build-arg SGLANG_IMAGE_TAG=dev-arm64-cu13-20251122 --build-arg ENABLE_CUDA_13=1 --build-arg ENABLE_SGLANG_PATCH=0' just _release-raw
|
| 11 |
+
|
| 12 |
+
_release-raw:
|
| 13 |
+
#!/bin/bash
|
| 14 |
+
set -euxo pipefail
|
| 15 |
+
cd ..
|
| 16 |
+
|
| 17 |
+
VERSION="$(cat docker/version.txt | tr -d '\n')"
|
| 18 |
+
IMAGE_TAG=${VERSION}${ARG_TAG_POSTFIX}
|
| 19 |
+
|
| 20 |
+
docker build -f docker/Dockerfile . --build-arg HTTP_PROXY="$http_proxy" --build-arg HTTPS_PROXY="$https_proxy" --build-arg NO_PROXY="localhost,127.0.0.1" $ARG_BUILD_EXTRA_ARGS -t slimerl/slime:$IMAGE_TAG
|
| 21 |
+
docker push slimerl/slime:$IMAGE_TAG
|
| 22 |
+
|
| 23 |
+
if [ -z "${ARG_TAG_POSTFIX}" ]; then
|
| 24 |
+
docker tag slimerl/slime:$IMAGE_TAG slimerl/slime:latest
|
| 25 |
+
docker push slimerl/slime:latest
|
| 26 |
+
fi
|
| 27 |
+
|
| 28 |
+
debug:
|
| 29 |
+
#!/bin/bash
|
| 30 |
+
set -euxo pipefail
|
| 31 |
+
cd ..
|
| 32 |
+
|
| 33 |
+
VERSION="$(cat docker/version.txt | tr -d '\n')"
|
| 34 |
+
IMAGE_TAG=${VERSION}
|
| 35 |
+
|
| 36 |
+
docker build -f docker/Dockerfile . --build-arg HTTP_PROXY="$http_proxy" --build-arg HTTPS_PROXY="$https_proxy" --build-arg NO_PROXY="localhost,127.0.0.1" -t slimerl/slime-test:$IMAGE_TAG
|
| 37 |
+
docker push slimerl/slime-test:$IMAGE_TAG
|
| 38 |
+
|
| 39 |
+
docker tag slimerl/slime-test:$IMAGE_TAG slimerl/slime-test:latest
|
| 40 |
+
docker push slimerl/slime-test:latest
|
ccevolve/baselines/thetaevolve/docker/version.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
nightly-dev-20260202b
|
ccevolve/baselines/thetaevolve/docs/README.md
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# slime Documentation
|
| 2 |
+
|
| 3 |
+
We recommend new contributors start from writing documentation, which helps you quickly understand SGLang codebase.
|
| 4 |
+
Most documentation files are located under the `docs/` folder.
|
| 5 |
+
|
| 6 |
+
## Docs Workflow
|
| 7 |
+
|
| 8 |
+
### Install Dependency
|
| 9 |
+
|
| 10 |
+
```bash
|
| 11 |
+
apt-get update && apt-get install -y pandoc parallel retry
|
| 12 |
+
pip install -r requirements.txt
|
| 13 |
+
```
|
| 14 |
+
|
| 15 |
+
### Update Documentation
|
| 16 |
+
|
| 17 |
+
You can update the documentation in the en and zh folders by adding Markdown or Jupyter Notebook files to the appropriate subdirectories. If you create new files, make sure to update index.rst (or any other relevant .rst files) accordingly.
|
| 18 |
+
|
| 19 |
+
## Build and Render
|
| 20 |
+
|
| 21 |
+
```bash
|
| 22 |
+
# build english version
|
| 23 |
+
bash ./build.sh en
|
| 24 |
+
bash ./serve.sh en
|
| 25 |
+
|
| 26 |
+
# build chinese version
|
| 27 |
+
bash ./build.sh zh
|
| 28 |
+
bash ./serve.sh zh
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
You can then visit `http://localhost:8000` to view the documentation.
|
ccevolve/baselines/thetaevolve/docs/build.sh
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
| 4 |
+
LANG=$1
|
| 5 |
+
|
| 6 |
+
# make sure language is only en or zh
|
| 7 |
+
if [ "$LANG" != "en" ] && [ "$LANG" != "zh" ]; then
|
| 8 |
+
echo "Language must be en or zh"
|
| 9 |
+
exit 1
|
| 10 |
+
fi
|
| 11 |
+
|
| 12 |
+
cd $SCRIPT_DIR
|
| 13 |
+
SLIME_DOC_LANG=$LANG sphinx-build -b html -D language=$LANG --conf-dir ./ ./$LANG ./build/$LANG
|
ccevolve/baselines/thetaevolve/docs/build_all.sh
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
| 4 |
+
cd "$SCRIPT_DIR"
|
| 5 |
+
|
| 6 |
+
echo "[slime-docs] Building EN..."
|
| 7 |
+
./build.sh en
|
| 8 |
+
echo "[slime-docs] Building ZH..."
|
| 9 |
+
./build.sh zh
|
| 10 |
+
|
| 11 |
+
# Create a lightweight root index with auto redirect based on localStorage (done client side)
|
| 12 |
+
ROOT_INDEX=build/index.html
|
| 13 |
+
cat > "$ROOT_INDEX" <<'EOF'
|
| 14 |
+
<!DOCTYPE html>
|
| 15 |
+
<html lang="en">
|
| 16 |
+
<head>
|
| 17 |
+
<meta charset="utf-8" />
|
| 18 |
+
<title>slime docs</title>
|
| 19 |
+
<meta name="viewport" content="width=device-width,initial-scale=1" />
|
| 20 |
+
<style>
|
| 21 |
+
body{font:14px/1.4 system-ui,-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif;padding:40px;max-width:720px;margin:auto;color:#222}
|
| 22 |
+
a{color:#0969da;text-decoration:none}a:hover{text-decoration:underline}
|
| 23 |
+
.lang-links{margin-top:1.2rem;display:flex;gap:1rem}
|
| 24 |
+
.note{margin-top:2rem;font-size:12px;color:#666}
|
| 25 |
+
</style>
|
| 26 |
+
<script>
|
| 27 |
+
(function(){
|
| 28 |
+
var stored = null;
|
| 29 |
+
try{stored = localStorage.getItem('slime-doc-lang');}catch(e){}
|
| 30 |
+
var path = (stored === 'zh') ? 'zh/' : (stored === 'en') ? 'en/' : null;
|
| 31 |
+
if(path){ window.location.replace(path); }
|
| 32 |
+
})();
|
| 33 |
+
</script>
|
| 34 |
+
</head>
|
| 35 |
+
<body>
|
| 36 |
+
<h1>slime Documentation</h1>
|
| 37 |
+
<p>Select language:</p>
|
| 38 |
+
<p class="lang-links"><a href="en/">English</a> <a href="zh/">中文</a></p>
|
| 39 |
+
<p class="note">Auto-redirect uses your last choice if stored; else pick above.</p>
|
| 40 |
+
</body>
|
| 41 |
+
</html>
|
| 42 |
+
EOF
|
| 43 |
+
|
| 44 |
+
echo "[slime-docs] Done. Root landing page at build/index.html"
|
ccevolve/baselines/thetaevolve/docs/conf.py
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
import sys
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
sys.path.insert(0, os.path.abspath("../.."))
|
| 8 |
+
|
| 9 |
+
__version__ = "0.0.1"
|
| 10 |
+
|
| 11 |
+
project = "slime"
|
| 12 |
+
copyright = f"2025-{datetime.now().year}, slime"
|
| 13 |
+
author = "slime Team"
|
| 14 |
+
|
| 15 |
+
version = __version__
|
| 16 |
+
release = __version__
|
| 17 |
+
|
| 18 |
+
extensions = [
|
| 19 |
+
"sphinx.ext.autodoc",
|
| 20 |
+
"sphinx.ext.autosummary",
|
| 21 |
+
"sphinx.ext.napoleon",
|
| 22 |
+
"sphinx.ext.viewcode",
|
| 23 |
+
"sphinx.ext.autosectionlabel",
|
| 24 |
+
"sphinx.ext.intersphinx",
|
| 25 |
+
"sphinx_tabs.tabs",
|
| 26 |
+
"myst_parser",
|
| 27 |
+
"sphinx_copybutton",
|
| 28 |
+
"sphinxcontrib.mermaid",
|
| 29 |
+
"nbsphinx",
|
| 30 |
+
"sphinx.ext.mathjax",
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
nbsphinx_allow_errors = True
|
| 34 |
+
nbsphinx_execute = "never"
|
| 35 |
+
|
| 36 |
+
autosectionlabel_prefix_document = True
|
| 37 |
+
nbsphinx_allow_directives = True
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
myst_enable_extensions = [
|
| 41 |
+
"dollarmath",
|
| 42 |
+
"amsmath",
|
| 43 |
+
"deflist",
|
| 44 |
+
"colon_fence",
|
| 45 |
+
"html_image",
|
| 46 |
+
"linkify",
|
| 47 |
+
"substitution",
|
| 48 |
+
]
|
| 49 |
+
|
| 50 |
+
myst_heading_anchors = 3
|
| 51 |
+
|
| 52 |
+
nbsphinx_kernel_name = "python3"
|
| 53 |
+
nbsphinx_execute_arguments = [
|
| 54 |
+
"--InlineBackend.figure_formats={'svg', 'pdf'}",
|
| 55 |
+
"--InlineBackend.rc={'figure.dpi': 96}",
|
| 56 |
+
]
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
nb_render_priority = {
|
| 60 |
+
"html": (
|
| 61 |
+
"application/vnd.jupyter.widget-view+json",
|
| 62 |
+
"application/javascript",
|
| 63 |
+
"text/html",
|
| 64 |
+
"image/svg+xml",
|
| 65 |
+
"image/png",
|
| 66 |
+
"image/jpeg",
|
| 67 |
+
"text/markdown",
|
| 68 |
+
"text/latex",
|
| 69 |
+
"text/plain",
|
| 70 |
+
)
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
myst_enable_extensions = [
|
| 74 |
+
"dollarmath",
|
| 75 |
+
"amsmath",
|
| 76 |
+
"deflist",
|
| 77 |
+
"colon_fence",
|
| 78 |
+
"html_image",
|
| 79 |
+
"linkify",
|
| 80 |
+
"substitution",
|
| 81 |
+
]
|
| 82 |
+
|
| 83 |
+
myst_heading_anchors = 3
|
| 84 |
+
myst_ref_domains = ["std", "py"]
|
| 85 |
+
|
| 86 |
+
templates_path = ["_templates"]
|
| 87 |
+
|
| 88 |
+
source_suffix = {
|
| 89 |
+
".rst": "restructuredtext",
|
| 90 |
+
".md": "markdown",
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
master_doc = "index"
|
| 94 |
+
|
| 95 |
+
language = os.environ.get("SLIME_DOC_LANG", "en")
|
| 96 |
+
|
| 97 |
+
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
|
| 98 |
+
|
| 99 |
+
pygments_style = "sphinx"
|
| 100 |
+
|
| 101 |
+
html_theme = "sphinx_book_theme"
|
| 102 |
+
html_logo = "_static/image/logo.jpg"
|
| 103 |
+
html_favicon = "_static/image/logo.ico"
|
| 104 |
+
html_title = project
|
| 105 |
+
html_copy_source = True
|
| 106 |
+
html_last_updated_fmt = ""
|
| 107 |
+
|
| 108 |
+
html_theme_options = {
|
| 109 |
+
"repository_url": "https://github.com/THUDM/slime",
|
| 110 |
+
"repository_branch": "main",
|
| 111 |
+
"show_navbar_depth": 3,
|
| 112 |
+
"max_navbar_depth": 4,
|
| 113 |
+
"collapse_navbar": True,
|
| 114 |
+
"use_edit_page_button": True,
|
| 115 |
+
"use_source_button": True,
|
| 116 |
+
"use_issues_button": True,
|
| 117 |
+
"use_repository_button": True,
|
| 118 |
+
"use_download_button": True,
|
| 119 |
+
"use_sidenotes": True,
|
| 120 |
+
"show_toc_level": 2,
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
html_context = {
|
| 124 |
+
"display_github": True,
|
| 125 |
+
"github_user": "sgl-project",
|
| 126 |
+
"github_repo": "sgl-project.github.io",
|
| 127 |
+
"github_version": "main",
|
| 128 |
+
"conf_py_path": "/docs/",
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
html_static_path = ["_static"]
|
| 132 |
+
html_css_files = ["css/custom_log.css"]
|
| 133 |
+
# Add custom javascript for language toggle (en <-> zh)
|
| 134 |
+
html_js_files = [
|
| 135 |
+
"js/lang-toggle.js",
|
| 136 |
+
]
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def _sync_examples(app):
|
| 140 |
+
"""Sync top-level examples into language-specific doc trees.
|
| 141 |
+
|
| 142 |
+
Policy:
|
| 143 |
+
- README.md -> English docs/en/_examples_synced/<example>/README.md
|
| 144 |
+
- README_zh.md -> Chinese docs/zh/_examples_synced/<example>/README_zh.md
|
| 145 |
+
- If a language-specific README missing, that example is simply skipped for that language.
|
| 146 |
+
"""
|
| 147 |
+
docs_root = Path(__file__).resolve().parent
|
| 148 |
+
src_dir = docs_root.parent / "examples"
|
| 149 |
+
if not src_dir.exists():
|
| 150 |
+
return
|
| 151 |
+
|
| 152 |
+
lang_cfgs = {
|
| 153 |
+
"en": {
|
| 154 |
+
"dir": docs_root / "en",
|
| 155 |
+
"readme_name": "README.md",
|
| 156 |
+
},
|
| 157 |
+
"zh": {
|
| 158 |
+
"dir": docs_root / "zh",
|
| 159 |
+
# primary preferred name; will fallback to README.md
|
| 160 |
+
"readme_name": "README_zh.md",
|
| 161 |
+
},
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
for lang, cfg in lang_cfgs.items():
|
| 165 |
+
lang_dir = cfg["dir"]
|
| 166 |
+
if not lang_dir.exists():
|
| 167 |
+
continue
|
| 168 |
+
out_dir = lang_dir / "_examples_synced"
|
| 169 |
+
if out_dir.exists():
|
| 170 |
+
shutil.rmtree(out_dir)
|
| 171 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 172 |
+
|
| 173 |
+
entries = [] # (example_name, readme_rel_path)
|
| 174 |
+
for d in sorted(src_dir.iterdir()):
|
| 175 |
+
if not d.is_dir():
|
| 176 |
+
continue
|
| 177 |
+
# language-specific selection with fallback for zh
|
| 178 |
+
if lang == "zh":
|
| 179 |
+
primary = d / cfg["readme_name"] # README_zh.md
|
| 180 |
+
fallback = d / "README.md"
|
| 181 |
+
candidate = primary if primary.exists() else fallback
|
| 182 |
+
else:
|
| 183 |
+
candidate = d / cfg["readme_name"]
|
| 184 |
+
if not candidate.exists():
|
| 185 |
+
continue # skip entirely if nothing suitable
|
| 186 |
+
target_dir = out_dir / d.name
|
| 187 |
+
target_dir.mkdir(parents=True, exist_ok=True)
|
| 188 |
+
shutil.copy2(candidate, target_dir / "README.md")
|
| 189 |
+
entries.append((d.name, f"_examples_synced/{d.name}/README.md"))
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
def setup(app):
|
| 193 |
+
# ensure examples are synced before reading source files
|
| 194 |
+
app.connect("builder-inited", _sync_examples)
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
myst_enable_extensions = [
|
| 198 |
+
"dollarmath",
|
| 199 |
+
"amsmath",
|
| 200 |
+
"deflist",
|
| 201 |
+
"colon_fence",
|
| 202 |
+
]
|
| 203 |
+
myst_heading_anchors = 5
|
| 204 |
+
|
| 205 |
+
htmlhelp_basename = "slimedoc"
|
| 206 |
+
|
| 207 |
+
latex_elements = {}
|
| 208 |
+
|
| 209 |
+
latex_documents = [
|
| 210 |
+
(master_doc, "slime.tex", "slime Documentation", "slime Team", "manual"),
|
| 211 |
+
]
|
| 212 |
+
|
| 213 |
+
man_pages = [(master_doc, "slime", "slime Documentation", [author], 1)]
|
| 214 |
+
|
| 215 |
+
texinfo_documents = [
|
| 216 |
+
(
|
| 217 |
+
master_doc,
|
| 218 |
+
"slime",
|
| 219 |
+
"slime Documentation",
|
| 220 |
+
author,
|
| 221 |
+
"slime",
|
| 222 |
+
"One line description of project.",
|
| 223 |
+
"Miscellaneous",
|
| 224 |
+
),
|
| 225 |
+
]
|
| 226 |
+
|
| 227 |
+
epub_title = project
|
| 228 |
+
|
| 229 |
+
epub_exclude_files = ["search.html"]
|
| 230 |
+
|
| 231 |
+
copybutton_prompt_text = r">>> |\.\.\. "
|
| 232 |
+
copybutton_prompt_is_regexp = True
|
| 233 |
+
|
| 234 |
+
autodoc_preserve_defaults = True
|
| 235 |
+
navigation_with_keys = False
|
| 236 |
+
|
| 237 |
+
autodoc_mock_imports = [
|
| 238 |
+
"torch",
|
| 239 |
+
"transformers",
|
| 240 |
+
"triton",
|
| 241 |
+
]
|
| 242 |
+
|
| 243 |
+
intersphinx_mapping = {
|
| 244 |
+
"python": ("https://docs.python.org/3.12", None),
|
| 245 |
+
"typing_extensions": ("https://typing-extensions.readthedocs.io/en/latest", None),
|
| 246 |
+
"pillow": ("https://pillow.readthedocs.io/en/stable", None),
|
| 247 |
+
"numpy": ("https://numpy.org/doc/stable", None),
|
| 248 |
+
"torch": ("https://pytorch.org/docs/stable", None),
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
html_theme = "sphinx_book_theme"
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
nbsphinx_prolog = """
|
| 255 |
+
.. raw:: html
|
| 256 |
+
|
| 257 |
+
<style>
|
| 258 |
+
.output_area.stderr, .output_area.stdout {
|
| 259 |
+
color: #d3d3d3 !important; /* light gray */
|
| 260 |
+
}
|
| 261 |
+
</style>
|
| 262 |
+
"""
|
ccevolve/baselines/thetaevolve/docs/requirements.txt
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gguf>=0.10.0
|
| 2 |
+
ipykernel
|
| 3 |
+
ipywidgets
|
| 4 |
+
jupyter_client
|
| 5 |
+
markdown>=3.4.0
|
| 6 |
+
matplotlib
|
| 7 |
+
myst-parser
|
| 8 |
+
nbconvert
|
| 9 |
+
nbsphinx
|
| 10 |
+
nbstripout
|
| 11 |
+
pandoc
|
| 12 |
+
pillow
|
| 13 |
+
pydantic
|
| 14 |
+
sphinx
|
| 15 |
+
sphinx-autobuild
|
| 16 |
+
sphinx-book-theme
|
| 17 |
+
sphinx-copybutton
|
| 18 |
+
sphinx-tabs
|
| 19 |
+
sphinxcontrib-mermaid
|
| 20 |
+
urllib3<2.0.0
|
ccevolve/baselines/thetaevolve/docs/serve.sh
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
| 4 |
+
LANG="${1:-all}"
|
| 5 |
+
PORT="${PORT:-8000}"
|
| 6 |
+
|
| 7 |
+
cd "$SCRIPT_DIR"
|
| 8 |
+
|
| 9 |
+
if [ "$LANG" = "all" ]; then
|
| 10 |
+
# Expect both builds present
|
| 11 |
+
if [ ! -d build/en ] || [ ! -d build/zh ]; then
|
| 12 |
+
echo "[serve] Missing build/en or build/zh. Run ./build_all.sh first." >&2
|
| 13 |
+
fi
|
| 14 |
+
echo "[serve] Serving multi-language docs root on http://localhost:$PORT (en/, zh/)"
|
| 15 |
+
python -m http.server -d ./build "$PORT"
|
| 16 |
+
exit $?
|
| 17 |
+
fi
|
| 18 |
+
|
| 19 |
+
if [ "$LANG" != "en" ] && [ "$LANG" != "zh" ]; then
|
| 20 |
+
echo "Usage: $0 [en|zh|all]" >&2
|
| 21 |
+
exit 1
|
| 22 |
+
fi
|
| 23 |
+
|
| 24 |
+
if [ ! -d "build/$LANG" ]; then
|
| 25 |
+
echo "[serve] build/$LANG not found. Run ./build.sh $LANG first." >&2
|
| 26 |
+
exit 1
|
| 27 |
+
fi
|
| 28 |
+
echo "[serve] Serving $LANG docs on http://localhost:$PORT"
|
| 29 |
+
python -m http.server -d ./build/$LANG "$PORT"
|
ccevolve/baselines/thetaevolve/eval_results/ac1/correct.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"correct": true,
|
| 3 |
+
"error": null
|
| 4 |
+
}
|
ccevolve/baselines/thetaevolve/eval_results/ac1/metrics.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"combined_score": -1.503163554681561,
|
| 3 |
+
"public": {
|
| 4 |
+
"best_value": 1.503163554681561,
|
| 5 |
+
"best_length": 1319,
|
| 6 |
+
"num_runs": 1
|
| 7 |
+
},
|
| 8 |
+
"private": {
|
| 9 |
+
"all_values": [
|
| 10 |
+
1.503163554681561
|
| 11 |
+
],
|
| 12 |
+
"all_lengths": [
|
| 13 |
+
1319
|
| 14 |
+
]
|
| 15 |
+
},
|
| 16 |
+
"text_feedback": "Lower evaluate_sequence value is better. combined_score = -best_value.",
|
| 17 |
+
"execution_time_mean": 0.021179363131523132,
|
| 18 |
+
"execution_time_std": 0.0,
|
| 19 |
+
"num_valid_runs": 1,
|
| 20 |
+
"num_invalid_runs": 0,
|
| 21 |
+
"all_validation_errors": []
|
| 22 |
+
}
|
ccevolve/baselines/thetaevolve/eval_results/circle_packing/correct.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"correct": true,
|
| 3 |
+
"error": null
|
| 4 |
+
}
|
ccevolve/baselines/thetaevolve/eval_results/circle_packing/metrics.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"combined_score": 2.6359856612408987,
|
| 3 |
+
"public": {
|
| 4 |
+
"centers_str": " centers[0] = (0.1112, 0.1112)\n centers[1] = (0.2406, 0.2370)\n centers[2] = (0.4048, 0.2580)\n centers[3] = (0.5966, 0.2576)\n centers[4] = (0.7603, 0.2363)\n centers[5] = (0.8892, 0.1108)\n centers[6] = (0.0962, 0.3179)\n centers[7] = (0.3141, 0.0926)\n centers[8] = (0.5006, 0.0939)\n centers[9] = (0.6869, 0.0924)\n centers[10] = (0.9043, 0.3167)\n centers[11] = (0.1035, 0.5174)\n centers[12] = (0.2731, 0.4040)\n centers[13] = (0.5013, 0.4700)\n centers[14] = (0.7284, 0.4024)\n centers[15] = (0.8969, 0.5154)\n centers[16] = (0.1052, 0.7260)\n centers[17] = (0.2974, 0.6183)\n centers[18] = (0.5045, 0.7247)\n centers[19] = (0.7053, 0.6131)\n centers[20] = (0.8932, 0.7252)\n centers[21] = (0.0849, 0.9151)\n centers[22] = (0.2977, 0.8667)\n centers[23] = (0.5027, 0.9211)\n centers[24] = (0.7054, 0.8698)\n centers[25] = (0.9154, 0.9154)",
|
| 5 |
+
"num_circles": 26
|
| 6 |
+
},
|
| 7 |
+
"private": {
|
| 8 |
+
"reported_sum_of_radii": 2.6359856612408987
|
| 9 |
+
},
|
| 10 |
+
"execution_time_mean": 0.025406131520867348,
|
| 11 |
+
"execution_time_std": 0.0,
|
| 12 |
+
"num_valid_runs": 1,
|
| 13 |
+
"num_invalid_runs": 0,
|
| 14 |
+
"all_validation_errors": []
|
| 15 |
+
}
|
ccevolve/baselines/thetaevolve/examples/__init__.py
ADDED
|
File without changes
|
ccevolve/baselines/thetaevolve/examples/fully_async/README.md
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Fully Asynchronous Rollout Example
|
| 2 |
+
|
| 3 |
+
This example shows a simple way to make rollout generation **fully asynchronous**: a single global worker is created once and then keeps running in the background, continuously pulling prompts and launching generation tasks. Training only needs to fetch already finished results. This removes the per‑step wait that happens in the normal synchronous style.
|
| 4 |
+
|
| 5 |
+
### Files
|
| 6 |
+
* `fully_async_rollout.py`: global async worker + `generate_rollout_fully_async` entry.
|
| 7 |
+
* `run-qwen3-4b-fully_async.sh`: example launch script with Qwen3‑4B.
|
| 8 |
+
|
| 9 |
+
### Prerequisite
|
| 10 |
+
First set up model & environment following the Qwen3-4B example.
|
| 11 |
+
|
| 12 |
+
### Quick Start
|
| 13 |
+
```bash
|
| 14 |
+
cd slime
|
| 15 |
+
bash examples/fully_async/run-qwen3-4b-fully_async.sh
|
| 16 |
+
```
|
| 17 |
+
You should see log lines like:
|
| 18 |
+
```
|
| 19 |
+
Creating new global async worker...
|
| 20 |
+
Continuous async rollout worker started
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
### How It Works (Very Short)
|
| 24 |
+
* First call: create `AsyncRolloutWorker` (thread + asyncio loop).
|
| 25 |
+
* Loop keeps up to `--rollout-batch-size` tasks in flight using `generate_and_rm_group`.
|
| 26 |
+
* Completed groups are pushed into a queue; caller drains until it has enough samples.
|
| 27 |
+
* Worker is stopped automatically at process exit.
|
| 28 |
+
|
| 29 |
+
### Limitations
|
| 30 |
+
* No evaluation mode.
|
| 31 |
+
* Ordering is best effort (sorted at the end by index).
|
| 32 |
+
* Minimal error handling.
|
| 33 |
+
|
| 34 |
+
### Config Differences (2 Key Points)
|
| 35 |
+
To enable the fully async pattern there are only two changes compared to a normal run:
|
| 36 |
+
|
| 37 |
+
1. Use the async training driver: `train_async.py` (not `train.py`).
|
| 38 |
+
2. Set the rollout function path:
|
| 39 |
+
```bash
|
| 40 |
+
--rollout-function-path fully_async_rollout.generate_rollout_fully_async
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
Why is it still "fully" async although `train_async.py` itself schedules rollouts step‑by‑step?
|
| 44 |
+
|
| 45 |
+
Because the real generation work is done by a **persistent background worker** created in `generate_rollout_fully_async`. Each call from `train_async.py` only drains already completed samples from the worker's output queue; the worker has been continuously generating since the first call. Thus rollout production (model inference) and training consume happen in parallel with minimal waiting.
|
ccevolve/baselines/thetaevolve/examples/fully_async/fully_async_rollout.py
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import queue
|
| 3 |
+
import threading
|
| 4 |
+
import time
|
| 5 |
+
from typing import List
|
| 6 |
+
|
| 7 |
+
# Import core functions from sglang_rollout directly to avoid code duplication
|
| 8 |
+
from slime.rollout.sglang_rollout import GenerateState, generate_and_rm_group
|
| 9 |
+
from slime.utils.async_utils import run
|
| 10 |
+
from slime.utils.types import Sample
|
| 11 |
+
|
| 12 |
+
# Global worker manager
|
| 13 |
+
_global_worker = None
|
| 14 |
+
_worker_lock = threading.Lock()
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def get_global_worker(args, data_buffer):
|
| 18 |
+
"""Get or create global worker"""
|
| 19 |
+
global _global_worker
|
| 20 |
+
with _worker_lock:
|
| 21 |
+
if _global_worker is None or not _global_worker.worker_thread.is_alive():
|
| 22 |
+
print("Creating new global async worker...")
|
| 23 |
+
_global_worker = AsyncRolloutWorker(args, data_buffer, concurrency=args.sglang_server_concurrency)
|
| 24 |
+
_global_worker.start()
|
| 25 |
+
return _global_worker
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def stop_global_worker():
|
| 29 |
+
"""Stop global worker"""
|
| 30 |
+
global _global_worker
|
| 31 |
+
with _worker_lock:
|
| 32 |
+
if _global_worker is not None:
|
| 33 |
+
_global_worker.stop()
|
| 34 |
+
_global_worker = None
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class AsyncRolloutWorker:
|
| 38 |
+
"""
|
| 39 |
+
Simplified asynchronous rollout worker, using threads instead of processes
|
| 40 |
+
Supports continuous running, independent of rollout function lifecycle
|
| 41 |
+
"""
|
| 42 |
+
|
| 43 |
+
def __init__(self, args, data_buffer, concurrency=10):
|
| 44 |
+
self.args = args
|
| 45 |
+
self.data_buffer = data_buffer # Directly save data_buffer reference
|
| 46 |
+
self.concurrency = concurrency
|
| 47 |
+
self.running = True
|
| 48 |
+
self.output_queue = queue.Queue(maxsize=1000) # Continuous output queue
|
| 49 |
+
self.worker_thread = None
|
| 50 |
+
self.state = GenerateState(args)
|
| 51 |
+
|
| 52 |
+
async def continuous_worker_loop(self):
|
| 53 |
+
"""Continuous work loop - constantly get data from data_buffer and process"""
|
| 54 |
+
print("Continuous async rollout worker started")
|
| 55 |
+
|
| 56 |
+
active_tasks = set()
|
| 57 |
+
max_concurrent_tasks = self.args.rollout_batch_size
|
| 58 |
+
group_id_counter = 0
|
| 59 |
+
|
| 60 |
+
while self.running:
|
| 61 |
+
try:
|
| 62 |
+
# Clean up completed tasks
|
| 63 |
+
if active_tasks:
|
| 64 |
+
done_tasks = {task for task in active_tasks if task.done()}
|
| 65 |
+
for task in done_tasks:
|
| 66 |
+
try:
|
| 67 |
+
task.result() # Results are already handled in callbacks
|
| 68 |
+
except Exception as e:
|
| 69 |
+
print(f"Task failed with exception: {e}")
|
| 70 |
+
active_tasks -= done_tasks
|
| 71 |
+
|
| 72 |
+
# If active task count hasn't reached limit, try to get new data and start tasks
|
| 73 |
+
while len(active_tasks) < max_concurrent_tasks and self.running:
|
| 74 |
+
samples = self.data_buffer.get_samples(1)
|
| 75 |
+
|
| 76 |
+
for group in samples:
|
| 77 |
+
group_id = group_id_counter
|
| 78 |
+
group_id_counter += 1
|
| 79 |
+
|
| 80 |
+
# Create new async task
|
| 81 |
+
task = asyncio.create_task(
|
| 82 |
+
generate_and_rm_group(
|
| 83 |
+
self.args,
|
| 84 |
+
group,
|
| 85 |
+
sampling_params=self.state.sampling_params.copy(),
|
| 86 |
+
evaluation=False,
|
| 87 |
+
)
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
# Add completion callback
|
| 91 |
+
def make_callback(gid):
|
| 92 |
+
def task_done_callback(task):
|
| 93 |
+
result = task.result()
|
| 94 |
+
self.output_queue.put((gid, result))
|
| 95 |
+
|
| 96 |
+
return task_done_callback
|
| 97 |
+
|
| 98 |
+
task.add_done_callback(make_callback(group_id))
|
| 99 |
+
active_tasks.add(task)
|
| 100 |
+
break
|
| 101 |
+
|
| 102 |
+
# Brief sleep to avoid busy waiting
|
| 103 |
+
await asyncio.sleep(1)
|
| 104 |
+
|
| 105 |
+
except Exception as e:
|
| 106 |
+
print(f"Error in continuous worker loop: {e}")
|
| 107 |
+
await asyncio.sleep(1)
|
| 108 |
+
|
| 109 |
+
if active_tasks:
|
| 110 |
+
print(f"Waiting for {len(active_tasks)} continuous tasks to complete...")
|
| 111 |
+
await asyncio.wait(active_tasks)
|
| 112 |
+
|
| 113 |
+
print("Continuous async rollout worker stopped")
|
| 114 |
+
|
| 115 |
+
def worker_thread_func(self):
|
| 116 |
+
"""Worker function running in independent thread"""
|
| 117 |
+
asyncio.run(self.continuous_worker_loop())
|
| 118 |
+
|
| 119 |
+
def start(self):
|
| 120 |
+
"""Start continuous work mode"""
|
| 121 |
+
if self.worker_thread is None or not self.worker_thread.is_alive():
|
| 122 |
+
self.worker_thread = threading.Thread(target=self.worker_thread_func, daemon=True)
|
| 123 |
+
self.worker_thread.start()
|
| 124 |
+
print("Started continuous async worker thread")
|
| 125 |
+
|
| 126 |
+
def stop(self):
|
| 127 |
+
"""Stop worker thread"""
|
| 128 |
+
self.running = False
|
| 129 |
+
if self.worker_thread and self.worker_thread.is_alive():
|
| 130 |
+
self.worker_thread.join(timeout=5)
|
| 131 |
+
print("Stopped async worker thread")
|
| 132 |
+
|
| 133 |
+
def get_completed_groups(self) -> List[tuple]:
|
| 134 |
+
"""Get completed sample groups"""
|
| 135 |
+
completed = []
|
| 136 |
+
while True:
|
| 137 |
+
try:
|
| 138 |
+
result = self.output_queue.get_nowait()
|
| 139 |
+
completed.append(result)
|
| 140 |
+
except queue.Empty:
|
| 141 |
+
break
|
| 142 |
+
return completed
|
| 143 |
+
|
| 144 |
+
def get_queue_size(self) -> int:
|
| 145 |
+
"""Get current output queue size"""
|
| 146 |
+
return self.output_queue.qsize()
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
async def generate_rollout_async(args, rollout_id: int, data_buffer) -> List[List[Sample]]:
|
| 150 |
+
"""
|
| 151 |
+
Simplified asynchronous rollout generation - using global continuous worker
|
| 152 |
+
"""
|
| 153 |
+
assert args.rollout_global_dataset
|
| 154 |
+
|
| 155 |
+
# Get global worker, which will run continuously
|
| 156 |
+
worker = get_global_worker(args, data_buffer)
|
| 157 |
+
|
| 158 |
+
# Simplified: directly use rollout_batch_size as target
|
| 159 |
+
target_data_size = args.rollout_batch_size
|
| 160 |
+
|
| 161 |
+
data = []
|
| 162 |
+
completed_groups = {}
|
| 163 |
+
do_print = True
|
| 164 |
+
|
| 165 |
+
print(f"Starting async rollout generation for {target_data_size} groups")
|
| 166 |
+
print(f"Global worker queue size: {worker.get_queue_size()}")
|
| 167 |
+
|
| 168 |
+
# Main loop: collect results from global worker's output queue
|
| 169 |
+
start_time = time.time()
|
| 170 |
+
last_progress_time = start_time
|
| 171 |
+
no_progress_timeout = 30.0 # Warn if no progress for 30 seconds
|
| 172 |
+
|
| 173 |
+
while len(data) < target_data_size:
|
| 174 |
+
# Collect completed results
|
| 175 |
+
completed = worker.get_completed_groups()
|
| 176 |
+
|
| 177 |
+
made_progress = False
|
| 178 |
+
for group_id, group in completed:
|
| 179 |
+
completed_groups[group_id] = group
|
| 180 |
+
made_progress = True
|
| 181 |
+
|
| 182 |
+
if made_progress:
|
| 183 |
+
last_progress_time = time.time()
|
| 184 |
+
|
| 185 |
+
# Process completed groups in order (try to maintain order, but not strict requirement)
|
| 186 |
+
processed_any = False
|
| 187 |
+
|
| 188 |
+
# Process all available completed groups
|
| 189 |
+
available_ids = list(completed_groups.keys())
|
| 190 |
+
for group_id in available_ids:
|
| 191 |
+
if len(data) >= target_data_size:
|
| 192 |
+
break
|
| 193 |
+
|
| 194 |
+
group = completed_groups.pop(group_id)
|
| 195 |
+
|
| 196 |
+
if do_print:
|
| 197 |
+
print(
|
| 198 |
+
f"First rollout sample: {[group[0].prompt + group[0].response]}, "
|
| 199 |
+
f"label: {group[0].label}, reward: {group[0].reward}",
|
| 200 |
+
flush=True,
|
| 201 |
+
)
|
| 202 |
+
do_print = False
|
| 203 |
+
|
| 204 |
+
# Simplified: directly add samples, no filters used
|
| 205 |
+
data.append(group)
|
| 206 |
+
processed_any = True
|
| 207 |
+
|
| 208 |
+
# Check progress
|
| 209 |
+
current_time = time.time()
|
| 210 |
+
if current_time - last_progress_time > no_progress_timeout:
|
| 211 |
+
print(
|
| 212 |
+
f"Warning: No progress for {no_progress_timeout}s. "
|
| 213 |
+
f"Queue size: {worker.get_queue_size()}, "
|
| 214 |
+
f"Collected: {len(data)}/{target_data_size}"
|
| 215 |
+
)
|
| 216 |
+
last_progress_time = current_time
|
| 217 |
+
|
| 218 |
+
# If no results were processed, brief sleep to avoid busy waiting
|
| 219 |
+
if not processed_any:
|
| 220 |
+
await asyncio.sleep(0.01)
|
| 221 |
+
|
| 222 |
+
duration = time.time() - start_time
|
| 223 |
+
print(f"Rollout completed in {duration:.2f}s! Global worker queue size: {worker.get_queue_size()}")
|
| 224 |
+
|
| 225 |
+
if data:
|
| 226 |
+
print(
|
| 227 |
+
f"Finish rollout: {[data[-1][0].prompt + data[-1][0].response]}, "
|
| 228 |
+
f"label: {data[-1][0].label}, reward: {data[-1][0].reward}",
|
| 229 |
+
flush=True,
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
data = sorted(data, key=lambda group: group[0].index)
|
| 233 |
+
return data
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
def generate_rollout_fully_async(args, rollout_id, data_buffer, evaluation=False):
|
| 237 |
+
if evaluation:
|
| 238 |
+
raise ValueError("Evaluation mode not supported in simple async rollout")
|
| 239 |
+
|
| 240 |
+
completed_samples = run(generate_rollout_async(args, rollout_id, data_buffer))
|
| 241 |
+
return completed_samples
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
# Register exit cleanup function
|
| 245 |
+
import atexit
|
| 246 |
+
|
| 247 |
+
atexit.register(stop_global_worker)
|
ccevolve/baselines/thetaevolve/examples/fully_async/run-qwen3-4b-fully_async.sh
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# for rerun the task
|
| 4 |
+
pkill -9 sglang
|
| 5 |
+
sleep 3
|
| 6 |
+
ray stop --force
|
| 7 |
+
pkill -9 ray
|
| 8 |
+
pkill -9 python
|
| 9 |
+
sleep 3
|
| 10 |
+
pkill -9 ray
|
| 11 |
+
pkill -9 python
|
| 12 |
+
|
| 13 |
+
set -ex
|
| 14 |
+
|
| 15 |
+
# will prevent ray from buffering stdout/stderr
|
| 16 |
+
export PYTHONBUFFERED=16
|
| 17 |
+
|
| 18 |
+
NVLINK_COUNT=$(nvidia-smi topo -m 2>/dev/null | grep -o 'NV[0-9][0-9]*' | wc -l)
|
| 19 |
+
if [ "$NVLINK_COUNT" -gt 0 ]; then
|
| 20 |
+
HAS_NVLINK=1
|
| 21 |
+
else
|
| 22 |
+
HAS_NVLINK=0
|
| 23 |
+
fi
|
| 24 |
+
echo "HAS_NVLINK: $HAS_NVLINK (detected $NVLINK_COUNT NVLink references)"
|
| 25 |
+
|
| 26 |
+
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
|
| 27 |
+
source "${SCRIPT_DIR}/../../scripts/models/qwen3-4B.sh"
|
| 28 |
+
|
| 29 |
+
CKPT_ARGS=(
|
| 30 |
+
--hf-checkpoint /root/Qwen3-4B
|
| 31 |
+
#--hf-checkpoint /root/Qwen3-4B-FP8
|
| 32 |
+
--ref-load /root/Qwen3-4B_torch_dist
|
| 33 |
+
--load /root/Qwen3-4B_slime/
|
| 34 |
+
--save /root/Qwen3-4B_slime/
|
| 35 |
+
--save-interval 20
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
ROLLOUT_ARGS=(
|
| 39 |
+
--rollout-function-path fully_async_rollout.generate_rollout_fully_async
|
| 40 |
+
--prompt-data /mnt/o1_alicloud/personal/zzl/rl_data/dapo-math-17k.jsonl
|
| 41 |
+
--input-key prompt
|
| 42 |
+
--label-key label
|
| 43 |
+
--apply-chat-template
|
| 44 |
+
--rollout-shuffle
|
| 45 |
+
--rm-type deepscaler
|
| 46 |
+
--num-rollout 3000
|
| 47 |
+
--rollout-batch-size 32
|
| 48 |
+
--n-samples-per-prompt 8
|
| 49 |
+
--rollout-max-response-len 8192
|
| 50 |
+
--rollout-temperature 0.8
|
| 51 |
+
|
| 52 |
+
--global-batch-size 256
|
| 53 |
+
--balance-data
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
PERF_ARGS=(
|
| 57 |
+
--tensor-model-parallel-size 2
|
| 58 |
+
--sequence-parallel
|
| 59 |
+
--pipeline-model-parallel-size 1
|
| 60 |
+
--context-parallel-size 1
|
| 61 |
+
--expert-model-parallel-size 1
|
| 62 |
+
--expert-tensor-parallel-size 1
|
| 63 |
+
|
| 64 |
+
--recompute-granularity full
|
| 65 |
+
--recompute-method uniform
|
| 66 |
+
--recompute-num-layers 1
|
| 67 |
+
|
| 68 |
+
# --micro-batch-size 1
|
| 69 |
+
--use-dynamic-batch-size
|
| 70 |
+
--max-tokens-per-gpu 9216
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
GRPO_ARGS=(
|
| 74 |
+
--advantage-estimator grpo
|
| 75 |
+
--use-kl-loss
|
| 76 |
+
--kl-loss-coef 0.00
|
| 77 |
+
--kl-loss-type low_var_kl
|
| 78 |
+
--entropy-coef 0.00
|
| 79 |
+
--eps-clip 0.2
|
| 80 |
+
--eps-clip-high 0.28
|
| 81 |
+
|
| 82 |
+
--use-tis
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
OPTIMIZER_ARGS=(
|
| 86 |
+
--optimizer adam
|
| 87 |
+
--lr 1e-6
|
| 88 |
+
--lr-decay-style constant
|
| 89 |
+
--weight-decay 0.1
|
| 90 |
+
--adam-beta1 0.9
|
| 91 |
+
--adam-beta2 0.98
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
SGLANG_ARGS=(
|
| 95 |
+
--rollout-num-gpus-per-engine 1
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
MISC_ARGS=(
|
| 99 |
+
# default dropout in megatron is 0.1
|
| 100 |
+
--attention-dropout 0.0
|
| 101 |
+
--hidden-dropout 0.0
|
| 102 |
+
# should be good for model performance
|
| 103 |
+
--accumulate-allreduce-grads-in-fp32
|
| 104 |
+
--attention-softmax-in-fp32
|
| 105 |
+
# need to comment this when using model with MLA
|
| 106 |
+
--attention-backend flash
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
# launch the master node of ray in container
|
| 110 |
+
export MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
|
| 111 |
+
ray start --head --node-ip-address ${MASTER_ADDR} --num-gpus 8 --disable-usage-stats
|
| 112 |
+
|
| 113 |
+
RUNTIME_ENV_JSON="{
|
| 114 |
+
\"env_vars\": {
|
| 115 |
+
\"PYTHONPATH\": \"/root/Megatron-LM/:${SCRIPT_DIR}\",
|
| 116 |
+
\"CUDA_DEVICE_MAX_CONNECTIONS\": \"1\",
|
| 117 |
+
\"NCCL_NVLS_ENABLE\": \"${HAS_NVLINK}\"
|
| 118 |
+
}
|
| 119 |
+
}"
|
| 120 |
+
|
| 121 |
+
ray job submit --address="http://127.0.0.1:8265" \
|
| 122 |
+
--runtime-env-json="${RUNTIME_ENV_JSON}" \
|
| 123 |
+
-- python3 train_async.py \
|
| 124 |
+
--actor-num-nodes 1 \
|
| 125 |
+
--actor-num-gpus-per-node 4 \
|
| 126 |
+
--rollout-num-gpus 4 \
|
| 127 |
+
${MODEL_ARGS[@]} \
|
| 128 |
+
${CKPT_ARGS[@]} \
|
| 129 |
+
${ROLLOUT_ARGS[@]} \
|
| 130 |
+
${OPTIMIZER_ARGS[@]} \
|
| 131 |
+
${GRPO_ARGS[@]} \
|
| 132 |
+
${DISTRIBUTED_ARGS[@]} \
|
| 133 |
+
${PERF_ARGS[@]} \
|
| 134 |
+
${SGLANG_ARGS[@]} \
|
| 135 |
+
${MISC_ARGS[@]}
|
ccevolve/baselines/thetaevolve/examples/multi_agent/README.md
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Multi-Agent RL
|
| 2 |
+
|
| 3 |
+
This directory provides an example of running multi-agent reinforcement learning (RL) with slime.
|
| 4 |
+
|
| 5 |
+
## Environment Setup
|
| 6 |
+
|
| 7 |
+
The environment setup is identical to the standard RL setup used in slime.
|
| 8 |
+
|
| 9 |
+
## Running the Script
|
| 10 |
+
|
| 11 |
+
You can either define your own multi-agent system or use the provided default configuration.
|
| 12 |
+
|
| 13 |
+
```python
|
| 14 |
+
MULTI_AGENT_CONFIGS = {
|
| 15 |
+
"custom_multi_agent_function_path": "examples.multi_agent.agent_system.run_agent_system",
|
| 16 |
+
"num_parallel": 5,
|
| 17 |
+
"incorrect_reward_weight": 0.8,
|
| 18 |
+
"correct_reward_weight": 1.2,
|
| 19 |
+
}
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
To start a run, execute:
|
| 23 |
+
|
| 24 |
+
```bash
|
| 25 |
+
cd slime/
|
| 26 |
+
bash examples/multi_agent/run-qwen3-30B-A3B-multi-agent.sh
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
## New Arguments
|
| 30 |
+
|
| 31 |
+
- Specify the agent rollout function with the `--custom-generate-function-path` argument.
|
| 32 |
+
- Set the `--rollout-max-context-len` argument according to your model’s context window.
|
| 33 |
+
|
| 34 |
+
```bash
|
| 35 |
+
ROLLOUT_ARGS=(
|
| 36 |
+
--custom-generate-function-path examples.multi_agent.rollout_with_multi_agents.generate_with_multi_agents
|
| 37 |
+
--prompt-data /root/dapo-math-17k/dapo-math-17k.jsonl
|
| 38 |
+
--input-key prompt
|
| 39 |
+
--label-key label
|
| 40 |
+
--apply-chat-template
|
| 41 |
+
--rollout-shuffle
|
| 42 |
+
--rm-type deepscaler
|
| 43 |
+
--num-rollout 3000
|
| 44 |
+
--rollout-batch-size 32
|
| 45 |
+
--n-samples-per-prompt 8
|
| 46 |
+
--rollout-max-context-len 16384
|
| 47 |
+
--rollout-max-response-len 8192
|
| 48 |
+
--rollout-temperature 0.8
|
| 49 |
+
|
| 50 |
+
--global-batch-size 256
|
| 51 |
+
--balance-data
|
| 52 |
+
)
|
| 53 |
+
```
|
ccevolve/baselines/thetaevolve/examples/multi_agent/__init__.py
ADDED
|
File without changes
|
ccevolve/baselines/thetaevolve/examples/multi_agent/agent_system.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import re
|
| 3 |
+
import time
|
| 4 |
+
import traceback
|
| 5 |
+
from copy import deepcopy
|
| 6 |
+
from typing import List
|
| 7 |
+
|
| 8 |
+
from slime.rollout.rm_hub import batched_async_rm
|
| 9 |
+
from slime.utils.http_utils import post
|
| 10 |
+
from slime.utils.types import Sample
|
| 11 |
+
|
| 12 |
+
from .prompts import SOLVER_PROMPT_TEMPLATE, generate_rewriter_template, generate_select_template
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
async def generate_response(args, prompt, key):
|
| 16 |
+
try:
|
| 17 |
+
sampling_params = args.sampling_params
|
| 18 |
+
tokenizer = args.tokenizer
|
| 19 |
+
max_context_length = args.rollout_max_context_len
|
| 20 |
+
sample = deepcopy(args.sample)
|
| 21 |
+
|
| 22 |
+
url = f"http://{args.sglang_router_ip}:{args.sglang_router_port}/generate"
|
| 23 |
+
|
| 24 |
+
prompt_token_ids = tokenizer.encode(prompt, add_special_tokens=False)
|
| 25 |
+
sample.tokens = prompt_token_ids
|
| 26 |
+
sample.prompt = prompt
|
| 27 |
+
input_token_ids = prompt_token_ids
|
| 28 |
+
prompt_length = len(input_token_ids)
|
| 29 |
+
current_sampling_params = deepcopy(sampling_params)
|
| 30 |
+
current_sampling_params["max_new_tokens"] = min(
|
| 31 |
+
sampling_params["max_new_tokens"], max_context_length - prompt_length
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
if current_sampling_params["max_new_tokens"] <= 0:
|
| 35 |
+
return None
|
| 36 |
+
|
| 37 |
+
payload = {"input_ids": input_token_ids, "sampling_params": current_sampling_params, "return_logprob": True}
|
| 38 |
+
|
| 39 |
+
output = await post(url, payload)
|
| 40 |
+
|
| 41 |
+
# Extract new response tokens
|
| 42 |
+
if "output_token_logprobs" in output["meta_info"]:
|
| 43 |
+
new_response_tokens = [item[1] for item in output["meta_info"]["output_token_logprobs"]]
|
| 44 |
+
else:
|
| 45 |
+
# abort
|
| 46 |
+
new_response_tokens = []
|
| 47 |
+
|
| 48 |
+
# Update sample with tokens directly - avoiding re-tokenization
|
| 49 |
+
sample.tokens = sample.tokens + new_response_tokens
|
| 50 |
+
sample.response_length += len(new_response_tokens)
|
| 51 |
+
sample.response = output["text"]
|
| 52 |
+
|
| 53 |
+
match output["meta_info"]["finish_reason"]["type"]:
|
| 54 |
+
case "length":
|
| 55 |
+
sample.status = Sample.Status.TRUNCATED
|
| 56 |
+
# case "abort":
|
| 57 |
+
# sample.status = Sample.Status.ABORTED
|
| 58 |
+
case "stop":
|
| 59 |
+
sample.status = Sample.Status.COMPLETED
|
| 60 |
+
|
| 61 |
+
args.results_dict[key].append(sample)
|
| 62 |
+
|
| 63 |
+
final = output["text"].replace("<|user|>", "")
|
| 64 |
+
if "</think>" in final:
|
| 65 |
+
contents = final.split("</think>")
|
| 66 |
+
if len(contents) == 2 and contents[1] != "":
|
| 67 |
+
reason_content = contents[0].strip()
|
| 68 |
+
response_content = contents[1].strip()
|
| 69 |
+
sample.reason_content = reason_content
|
| 70 |
+
sample.response_content = response_content
|
| 71 |
+
return response_content
|
| 72 |
+
sample.reason_content = None
|
| 73 |
+
sample.response_content = None
|
| 74 |
+
return None
|
| 75 |
+
except Exception as e:
|
| 76 |
+
print(f"Error generating response: {e}")
|
| 77 |
+
return None
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class Agent:
|
| 81 |
+
"""A base class for our AI agents."""
|
| 82 |
+
|
| 83 |
+
def __init__(self):
|
| 84 |
+
pass
|
| 85 |
+
|
| 86 |
+
async def run(self, args, prompt, max_retries: int = 1, key: str = None) -> str:
|
| 87 |
+
"""Runs the agent by sending a prompt to the LLM."""
|
| 88 |
+
for i in range(max_retries):
|
| 89 |
+
try:
|
| 90 |
+
response = await generate_response(args, prompt, key=key)
|
| 91 |
+
return response
|
| 92 |
+
except Exception as e:
|
| 93 |
+
print(f"Error querying LLM: {e}")
|
| 94 |
+
time.sleep(1)
|
| 95 |
+
print(f"Failed to query LLM after {max_retries} retries")
|
| 96 |
+
return None
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
class SolverAgent(Agent):
|
| 100 |
+
"""The agent responsible for generating and improving solutions."""
|
| 101 |
+
|
| 102 |
+
def __init__(self):
|
| 103 |
+
super().__init__()
|
| 104 |
+
|
| 105 |
+
async def generate_initial_solution(self, args, problem_statement) -> str:
|
| 106 |
+
"""Generates the first solution attempt."""
|
| 107 |
+
prompt = SOLVER_PROMPT_TEMPLATE.format(problem_statement=problem_statement)
|
| 108 |
+
return await self.run(args, prompt, max_retries=3, key="solver")
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
class RewriterAgent(Agent):
|
| 112 |
+
"""The agent responsible for rewriting solutions."""
|
| 113 |
+
|
| 114 |
+
def __init__(self):
|
| 115 |
+
super().__init__()
|
| 116 |
+
|
| 117 |
+
async def rewrite(self, args, problem_statement, previous_solutions: List[str]) -> str:
|
| 118 |
+
"""Generates the rewrited solution."""
|
| 119 |
+
|
| 120 |
+
# Dynamically generate template
|
| 121 |
+
template = generate_rewriter_template(len(previous_solutions))
|
| 122 |
+
|
| 123 |
+
# Build parameter dictionary
|
| 124 |
+
format_params = {"problem_statement": problem_statement}
|
| 125 |
+
for i, solution in enumerate(previous_solutions):
|
| 126 |
+
format_params[f"solution{i+1}"] = solution
|
| 127 |
+
|
| 128 |
+
prompt = template.format(**format_params)
|
| 129 |
+
return await self.run(args, prompt, max_retries=1, key="rewriter")
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
class SelectorAgent(Agent):
|
| 133 |
+
"""The agent responsible for selecting solutions."""
|
| 134 |
+
|
| 135 |
+
def __init__(self):
|
| 136 |
+
super().__init__()
|
| 137 |
+
|
| 138 |
+
async def select(self, args, problem_statement, candidate_solutions: List[str]) -> str:
|
| 139 |
+
"""Generates the rewrited solution."""
|
| 140 |
+
|
| 141 |
+
# Dynamically generate template
|
| 142 |
+
template = generate_select_template(len(candidate_solutions))
|
| 143 |
+
|
| 144 |
+
# Build parameter dictionary
|
| 145 |
+
format_params = {"problem_statement": problem_statement}
|
| 146 |
+
for i, solution in enumerate(candidate_solutions):
|
| 147 |
+
format_params[f"solution{i+1}"] = solution
|
| 148 |
+
|
| 149 |
+
prompt = template.format(**format_params)
|
| 150 |
+
return await self.run(args, prompt, max_retries=10, key="selector")
|
| 151 |
+
|
| 152 |
+
def extract_selected_solution_idx(self, response: str, candidate_solutions: List[str]) -> int:
|
| 153 |
+
"""Extracts the selected solution ID from the response."""
|
| 154 |
+
PATTERN = re.compile("Judgment:\s*(\d+)")
|
| 155 |
+
matched = PATTERN.findall(response)
|
| 156 |
+
try:
|
| 157 |
+
selected_id = int(matched[0]) - 1
|
| 158 |
+
if selected_id < len(candidate_solutions) and selected_id >= 0:
|
| 159 |
+
return selected_id
|
| 160 |
+
else:
|
| 161 |
+
return None
|
| 162 |
+
except Exception as e:
|
| 163 |
+
print(f"extract_selected_solution_idx error: {e}")
|
| 164 |
+
return None
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
async def rewrite_worker(args, previous_solutions, problem_statement, worker_id):
|
| 168 |
+
rewriter = RewriterAgent()
|
| 169 |
+
new_solution = await rewriter.rewrite(args, problem_statement, previous_solutions)
|
| 170 |
+
return new_solution
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
async def solver_worker(args, problem_statement, worker_id):
|
| 174 |
+
"""
|
| 175 |
+
Single solver workflow.
|
| 176 |
+
"""
|
| 177 |
+
|
| 178 |
+
try:
|
| 179 |
+
solver = SolverAgent()
|
| 180 |
+
current_solution = await solver.generate_initial_solution(args, problem_statement)
|
| 181 |
+
return current_solution
|
| 182 |
+
|
| 183 |
+
except Exception as e:
|
| 184 |
+
print(f"[Worker-{worker_id}] exception: {e}")
|
| 185 |
+
print(f"[Worker-{worker_id}] traceback: {traceback.format_exc()}")
|
| 186 |
+
return None
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
async def run_agent_system(args, sample):
|
| 190 |
+
"""
|
| 191 |
+
Concurrently run num_parallel pipeline instances.
|
| 192 |
+
"""
|
| 193 |
+
|
| 194 |
+
args = deepcopy(args) # Deep copy args since it may be modified in rollout_with_multi_agents
|
| 195 |
+
args.sample = sample
|
| 196 |
+
args.results_dict = {"solver": [], "rewriter": [], "selector": []}
|
| 197 |
+
|
| 198 |
+
problem_statement = sample.prompt
|
| 199 |
+
tasks = [solver_worker(args, problem_statement, worker_id) for worker_id in range(args.num_parallel)]
|
| 200 |
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
| 201 |
+
|
| 202 |
+
rewards = await batched_async_rm(args, args.results_dict["solver"])
|
| 203 |
+
for sample, reward in zip(args.results_dict["solver"], rewards):
|
| 204 |
+
sample.reward = reward
|
| 205 |
+
|
| 206 |
+
previous_solutions = [item for item in results if isinstance(item, str)]
|
| 207 |
+
|
| 208 |
+
def reward_adjustment(samples, reward_weight):
|
| 209 |
+
for sample in samples:
|
| 210 |
+
sample.reward = sample.reward * reward_weight
|
| 211 |
+
return samples
|
| 212 |
+
|
| 213 |
+
if len(previous_solutions) == 0:
|
| 214 |
+
reward_adjustment(args.results_dict["solver"], args.incorrect_reward_weight)
|
| 215 |
+
return args.results_dict["solver"]
|
| 216 |
+
|
| 217 |
+
# Rewriting
|
| 218 |
+
tasks = [
|
| 219 |
+
rewrite_worker(args, previous_solutions, problem_statement, worker_id)
|
| 220 |
+
for worker_id in range(args.num_parallel)
|
| 221 |
+
]
|
| 222 |
+
rewrited_solutions_raw = await asyncio.gather(*tasks, return_exceptions=True)
|
| 223 |
+
|
| 224 |
+
# Handle exception results
|
| 225 |
+
rewrited_solutions = []
|
| 226 |
+
for i, result in enumerate(rewrited_solutions_raw):
|
| 227 |
+
if isinstance(result, str):
|
| 228 |
+
rewrited_solutions.append(result)
|
| 229 |
+
|
| 230 |
+
rewards = await batched_async_rm(args, args.results_dict["rewriter"])
|
| 231 |
+
for sample, reward in zip(args.results_dict["rewriter"], rewards):
|
| 232 |
+
sample.reward = reward
|
| 233 |
+
|
| 234 |
+
if len(rewrited_solutions) == 0:
|
| 235 |
+
reward_adjustment(args.results_dict["solver"], args.incorrect_reward_weight)
|
| 236 |
+
reward_adjustment(args.results_dict["rewriter"], args.incorrect_reward_weight)
|
| 237 |
+
return args.results_dict["solver"] + args.results_dict["rewriter"]
|
| 238 |
+
|
| 239 |
+
# Selection
|
| 240 |
+
selector = SelectorAgent()
|
| 241 |
+
response = await selector.select(args, problem_statement, rewrited_solutions)
|
| 242 |
+
if len(args.results_dict["selector"]) == 0:
|
| 243 |
+
reward_adjustment(args.results_dict["solver"], args.incorrect_reward_weight)
|
| 244 |
+
reward_adjustment(args.results_dict["rewriter"], args.incorrect_reward_weight)
|
| 245 |
+
return args.results_dict["solver"] + args.results_dict["rewriter"]
|
| 246 |
+
|
| 247 |
+
assert (
|
| 248 |
+
len(args.results_dict["selector"]) == 1
|
| 249 |
+
), f"selector should only return one solution, but got {len(args.results_dict['selector'])}"
|
| 250 |
+
if response is None:
|
| 251 |
+
args.results_dict["selector"][0].reward = 0
|
| 252 |
+
else:
|
| 253 |
+
selected_solution_idx = selector.extract_selected_solution_idx(response, rewrited_solutions)
|
| 254 |
+
if selected_solution_idx is None:
|
| 255 |
+
args.results_dict["selector"][0].reward = 0
|
| 256 |
+
else:
|
| 257 |
+
selected_solution = rewrited_solutions[selected_solution_idx]
|
| 258 |
+
for sample in args.results_dict["rewriter"]:
|
| 259 |
+
if sample.response_content is not None and selected_solution in sample.response_content:
|
| 260 |
+
args.results_dict["selector"][0].reward = sample.reward
|
| 261 |
+
break
|
| 262 |
+
|
| 263 |
+
## If final answer is correct, add positive reward to all; if incorrect, add negative penalty to all.
|
| 264 |
+
if args.results_dict["selector"][0].reward == 1:
|
| 265 |
+
reward_adjustment(args.results_dict["solver"], args.correct_reward_weight)
|
| 266 |
+
reward_adjustment(args.results_dict["rewriter"], args.correct_reward_weight)
|
| 267 |
+
reward_adjustment(args.results_dict["selector"], args.correct_reward_weight)
|
| 268 |
+
else:
|
| 269 |
+
reward_adjustment(args.results_dict["solver"], args.incorrect_reward_weight)
|
| 270 |
+
reward_adjustment(args.results_dict["rewriter"], args.incorrect_reward_weight)
|
| 271 |
+
reward_adjustment(args.results_dict["selector"], args.incorrect_reward_weight)
|
| 272 |
+
|
| 273 |
+
return args.results_dict["solver"] + args.results_dict["rewriter"] + args.results_dict["selector"]
|
ccevolve/baselines/thetaevolve/examples/multi_agent/prompts.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Defines prompt templates for generating different prompts
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
SOLVER_PROMPT_TEMPLATE = """{problem_statement}"""
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def generate_rewriter_template(num_solutions: int) -> str:
|
| 8 |
+
"""Dynamically generate rewrite templates based on the number of solutions."""
|
| 9 |
+
solution_sections = []
|
| 10 |
+
for i in range(num_solutions):
|
| 11 |
+
solution_sections.append(f"#### Solution {i+1}\n{{solution{i+1}}}\n\n---")
|
| 12 |
+
|
| 13 |
+
solutions_text = "\n".join(solution_sections)
|
| 14 |
+
|
| 15 |
+
return f"""### Task: Solution Rewriting Based on Previous Solutions ###
|
| 16 |
+
You are being reactivated to revise your mathematical proof. You are provided with two documents:
|
| 17 |
+
1. The problem you need to solve.
|
| 18 |
+
2. Your {num_solutions} different "Previous Solutions".
|
| 19 |
+
|
| 20 |
+
Your sole task is to generate a new, correct version of your solution based on your previous discoveries in the provided {num_solutions} solutions.
|
| 21 |
+
|
| 22 |
+
Refer to the following {num_solutions} solutions and solve the problem.
|
| 23 |
+
---
|
| 24 |
+
|
| 25 |
+
### Problem
|
| 26 |
+
|
| 27 |
+
{{problem_statement}}
|
| 28 |
+
|
| 29 |
+
---
|
| 30 |
+
|
| 31 |
+
### Candidates Solution
|
| 32 |
+
{solutions_text}
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def generate_select_template(num_solutions: int) -> str:
|
| 37 |
+
"""Dynamically generate select templates based on the number of solutions."""
|
| 38 |
+
solution_sections = []
|
| 39 |
+
for i in range(num_solutions):
|
| 40 |
+
solution_sections.append(f"#### Solution {i+1}\n{{solution{i+1}}}\n\n---")
|
| 41 |
+
|
| 42 |
+
solutions_text = "\n".join(solution_sections)
|
| 43 |
+
|
| 44 |
+
return f"""You will be given a challenging math problem followed by {num_solutions} solutions.
|
| 45 |
+
Your task is to systematically analyze these solutions to identify the most mathematically sound approach.
|
| 46 |
+
|
| 47 |
+
You are provided with two documents:
|
| 48 |
+
1. The problem you need to solve.
|
| 49 |
+
2. Your {num_solutions} "Candidate Solutions".
|
| 50 |
+
|
| 51 |
+
Evaluation Process:
|
| 52 |
+
1. Initial Screening
|
| 53 |
+
- Group solutions by their final answers
|
| 54 |
+
- Identify and explain mathematical contradictions between different answers
|
| 55 |
+
- Eliminate solutions with clear mathematical errors
|
| 56 |
+
|
| 57 |
+
2. Detailed Analysis
|
| 58 |
+
For remaining solutions, evaluate:
|
| 59 |
+
- Mathematical precision and accuracy
|
| 60 |
+
- Logical progression of steps
|
| 61 |
+
- Completeness of mathematical reasoning
|
| 62 |
+
- Handling of edge cases or special conditions
|
| 63 |
+
- For solutions containing and addressing errors, evaluate the error identification and correction methodology.
|
| 64 |
+
|
| 65 |
+
3. Solution Comparison
|
| 66 |
+
Compare viable solutions based on:
|
| 67 |
+
- Efficiency of approach
|
| 68 |
+
- Clarity of mathematical reasoning
|
| 69 |
+
- Sophistication of method
|
| 70 |
+
- Robustness of solution (works for all cases)
|
| 71 |
+
|
| 72 |
+
Your response should include:
|
| 73 |
+
1. Brief analysis of conflicting answers
|
| 74 |
+
2. Detailed evaluation of mathematically sound solutions
|
| 75 |
+
3. Justification for eliminating incorrect solutions
|
| 76 |
+
4. Clear explanation for selecting the best approach
|
| 77 |
+
|
| 78 |
+
End your evaluation with exactly:
|
| 79 |
+
Judgment: IDX
|
| 80 |
+
where IDX is the index 1-{num_solutions} of the best solution
|
| 81 |
+
|
| 82 |
+
### Problem
|
| 83 |
+
|
| 84 |
+
{{problem_statement}}
|
| 85 |
+
|
| 86 |
+
---
|
| 87 |
+
|
| 88 |
+
### Candidate Solutions
|
| 89 |
+
{solutions_text}
|
| 90 |
+
"""
|
ccevolve/baselines/thetaevolve/examples/multi_agent/rollout_with_multi_agents.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
|
| 3 |
+
from transformers import AutoTokenizer
|
| 4 |
+
|
| 5 |
+
from slime.utils.misc import load_function
|
| 6 |
+
from slime.utils.types import Sample
|
| 7 |
+
|
| 8 |
+
MULTI_AGENT_CONFIGS = {
|
| 9 |
+
"custom_multi_agent_function_path": "examples.multi_agent.agent_system.run_agent_system",
|
| 10 |
+
"num_parallel": 5,
|
| 11 |
+
"incorrect_reward_weight": 0.8,
|
| 12 |
+
"correct_reward_weight": 1.2,
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
async def generate_with_multi_agents(args, sample: Sample, sampling_params, evaluation=False) -> list[Sample]:
|
| 17 |
+
|
| 18 |
+
tokenizer = AutoTokenizer.from_pretrained(args.hf_checkpoint, trust_remote_code=True)
|
| 19 |
+
max_context_length = args.rollout_max_context_len if not evaluation else args.eval_max_context_len
|
| 20 |
+
|
| 21 |
+
args.sampling_params = sampling_params
|
| 22 |
+
args.rollout_max_context_len = max_context_length
|
| 23 |
+
args.tokenizer = tokenizer
|
| 24 |
+
|
| 25 |
+
for key, value in MULTI_AGENT_CONFIGS.items():
|
| 26 |
+
setattr(args, key, value)
|
| 27 |
+
|
| 28 |
+
custom_multi_agent_func = load_function(args.custom_multi_agent_function_path)
|
| 29 |
+
samples = await custom_multi_agent_func(args, sample)
|
| 30 |
+
|
| 31 |
+
random.shuffle(samples)
|
| 32 |
+
|
| 33 |
+
return samples
|
ccevolve/baselines/thetaevolve/examples/multi_agent/run-qwen3-30B-A3B-multi-agent.sh
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# for rerun the task
|
| 4 |
+
pkill -9 sglang
|
| 5 |
+
sleep 3
|
| 6 |
+
ray stop --force
|
| 7 |
+
pkill -9 ray
|
| 8 |
+
pkill -9 python
|
| 9 |
+
sleep 3
|
| 10 |
+
pkill -9 ray
|
| 11 |
+
pkill -9 python
|
| 12 |
+
|
| 13 |
+
set -ex
|
| 14 |
+
|
| 15 |
+
# will prevent ray from buffering stdout/stderr
|
| 16 |
+
export PYTHONBUFFERED=16
|
| 17 |
+
|
| 18 |
+
NVLINK_COUNT=$(nvidia-smi topo -m 2>/dev/null | grep -o 'NV[0-9][0-9]*' | wc -l)
|
| 19 |
+
if [ "$NVLINK_COUNT" -gt 0 ]; then
|
| 20 |
+
HAS_NVLINK=1
|
| 21 |
+
else
|
| 22 |
+
HAS_NVLINK=0
|
| 23 |
+
fi
|
| 24 |
+
echo "HAS_NVLINK: $HAS_NVLINK (detected $NVLINK_COUNT NVLink references)"
|
| 25 |
+
|
| 26 |
+
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
|
| 27 |
+
source "/root/slime/scripts/models/qwen3-30B-A3B.sh"
|
| 28 |
+
|
| 29 |
+
CKPT_ARGS=(
|
| 30 |
+
--hf-checkpoint /root/Qwen3-30B-A3B
|
| 31 |
+
#--hf-checkpoint /root/Qwen3-30B-A3B-FP8
|
| 32 |
+
--ref-load /root/Qwen3-30B-A3B_torch_dist
|
| 33 |
+
--load /root/Qwen3-4B_slime/
|
| 34 |
+
--save /root/Qwen3-4B_slime/
|
| 35 |
+
--save-interval 20
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
ROLLOUT_ARGS=(
|
| 39 |
+
--custom-generate-function-path examples.multi_agent.rollout_with_multi_agents.generate_with_multi_agents
|
| 40 |
+
--prompt-data /root/dapo-math-17k/dapo-math-17k.jsonl
|
| 41 |
+
--input-key prompt
|
| 42 |
+
--label-key label
|
| 43 |
+
--apply-chat-template
|
| 44 |
+
--rollout-shuffle
|
| 45 |
+
--rm-type deepscaler
|
| 46 |
+
--num-rollout 3000
|
| 47 |
+
--rollout-batch-size 32
|
| 48 |
+
--n-samples-per-prompt 8
|
| 49 |
+
--rollout-max-context-len 16384
|
| 50 |
+
--rollout-max-response-len 8192
|
| 51 |
+
--rollout-temperature 0.8
|
| 52 |
+
|
| 53 |
+
--global-batch-size 256
|
| 54 |
+
--balance-data
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
# multi-agent do not support eval for now
|
| 58 |
+
EVAL_ARGS=(
|
| 59 |
+
# --eval-interval 20
|
| 60 |
+
# --eval-prompt-data aime /root/aime-2024/aime-2024.jsonl
|
| 61 |
+
--n-samples-per-eval-prompt 16
|
| 62 |
+
--eval-max-response-len 16384
|
| 63 |
+
--eval-top-p 0.7
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
PERF_ARGS=(
|
| 67 |
+
--tensor-model-parallel-size 4
|
| 68 |
+
--sequence-parallel
|
| 69 |
+
--pipeline-model-parallel-size 1
|
| 70 |
+
--context-parallel-size 1
|
| 71 |
+
--expert-model-parallel-size 8
|
| 72 |
+
--expert-tensor-parallel-size 1
|
| 73 |
+
|
| 74 |
+
--recompute-granularity full
|
| 75 |
+
--recompute-method uniform
|
| 76 |
+
--recompute-num-layers 1
|
| 77 |
+
|
| 78 |
+
# --micro-batch-size 1
|
| 79 |
+
--use-dynamic-batch-size
|
| 80 |
+
--max-tokens-per-gpu 20480
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
GRPO_ARGS=(
|
| 84 |
+
--advantage-estimator grpo
|
| 85 |
+
--use-kl-loss
|
| 86 |
+
--kl-loss-coef 0.00
|
| 87 |
+
--kl-loss-type low_var_kl
|
| 88 |
+
--entropy-coef 0.00
|
| 89 |
+
--eps-clip 0.2
|
| 90 |
+
--eps-clip-high 0.28
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
OPTIMIZER_ARGS=(
|
| 94 |
+
--optimizer adam
|
| 95 |
+
--lr 1e-6
|
| 96 |
+
--lr-decay-style constant
|
| 97 |
+
--weight-decay 0.1
|
| 98 |
+
--adam-beta1 0.9
|
| 99 |
+
--adam-beta2 0.98
|
| 100 |
+
|
| 101 |
+
--optimizer-cpu-offload
|
| 102 |
+
--overlap-cpu-optimizer-d2h-h2d
|
| 103 |
+
--use-precision-aware-optimizer
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
WANDB_ARGS=(
|
| 107 |
+
#--use-wandb
|
| 108 |
+
# --wandb-project slime-dev
|
| 109 |
+
# --wandb-group qwen3-30B-A3B-test
|
| 110 |
+
# --wandb-key ${WANDB_KEY}
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
SGLANG_ARGS=(
|
| 114 |
+
--rollout-num-gpus-per-engine 8
|
| 115 |
+
--sglang-mem-fraction-static 0.7
|
| 116 |
+
--sglang-cuda-graph-bs 1 2 4 8 $(seq 16 8 256)
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
MISC_ARGS=(
|
| 120 |
+
# default dropout in megatron is 0.1
|
| 121 |
+
--attention-dropout 0.0
|
| 122 |
+
--hidden-dropout 0.0
|
| 123 |
+
# should be good for model performance
|
| 124 |
+
--accumulate-allreduce-grads-in-fp32
|
| 125 |
+
--attention-softmax-in-fp32
|
| 126 |
+
# need to comment this when using model with MLA
|
| 127 |
+
--attention-backend flash
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
# launch the master node of ray in container
|
| 131 |
+
export MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
|
| 132 |
+
ray start --head --node-ip-address ${MASTER_ADDR} --num-gpus 8 --disable-usage-stats --dashboard-host=0.0.0.0 --dashboard-port=8265
|
| 133 |
+
|
| 134 |
+
# Build the runtime environment JSON with proper variable substitution
|
| 135 |
+
RUNTIME_ENV_JSON="{
|
| 136 |
+
\"env_vars\": {
|
| 137 |
+
\"PYTHONPATH\": \"/root/Megatron-LM/\",
|
| 138 |
+
\"CUDA_DEVICE_MAX_CONNECTIONS\": \"1\",
|
| 139 |
+
\"NCCL_NVLS_ENABLE\": \"${HAS_NVLINK}\"
|
| 140 |
+
}
|
| 141 |
+
}"
|
| 142 |
+
|
| 143 |
+
ray job submit --address="http://127.0.0.1:8265" \
|
| 144 |
+
--runtime-env-json="${RUNTIME_ENV_JSON}" \
|
| 145 |
+
-- python3 train.py \
|
| 146 |
+
--actor-num-nodes 1 \
|
| 147 |
+
--actor-num-gpus-per-node 8 \
|
| 148 |
+
--colocate \
|
| 149 |
+
${MODEL_ARGS[@]} \
|
| 150 |
+
${CKPT_ARGS[@]} \
|
| 151 |
+
${ROLLOUT_ARGS[@]} \
|
| 152 |
+
${OPTIMIZER_ARGS[@]} \
|
| 153 |
+
${GRPO_ARGS[@]} \
|
| 154 |
+
${DISTRIBUTED_ARGS[@]} \
|
| 155 |
+
${WANDB_ARGS[@]} \
|
| 156 |
+
${PERF_ARGS[@]} \
|
| 157 |
+
${EVAL_ARGS[@]} \
|
| 158 |
+
${SGLANG_ARGS[@]} \
|
| 159 |
+
${MISC_ARGS[@]}
|
ccevolve/baselines/thetaevolve/examples/reproducibility/README.md
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reproducibility
|
| 2 |
+
|
| 3 |
+
Reproducibility is a bedrock of scientific progress. By combining the [deterministic inference](https://lmsys.org/blog/2025-09-22-sglang-deterministic/) of SGLang and the deterministic mode of Megatron-LM, slime supports bitwise experiment reproduction.
|
| 4 |
+
|
| 5 |
+
To enable deterministic training, you need to set:
|
| 6 |
+
```bash
|
| 7 |
+
# sglang config
|
| 8 |
+
--sglang-enable-deterministic-inference
|
| 9 |
+
--sglang-attention-backend flashinfer
|
| 10 |
+
|
| 11 |
+
# megatron config
|
| 12 |
+
--deterministic-mode
|
| 13 |
+
```
|
| 14 |
+
|
| 15 |
+
And set the following environment variables:
|
| 16 |
+
|
| 17 |
+
```bash
|
| 18 |
+
"env_vars": {
|
| 19 |
+
...,
|
| 20 |
+
"NCCL_ALGO": "Ring",
|
| 21 |
+
"NVTE_ALLOW_NONDETERMINISTIC_ALGO": "0",
|
| 22 |
+
"CUBLAS_WORKSPACE_CONFIG": ":4096:8"
|
| 23 |
+
}
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
We also need to set `--use-slime-router` until the pypi whl of sglang-router updates.
|
| 27 |
+
|
| 28 |
+
Here we provide the script to do RL training on Qwen2.5 0.5B model and GSM8K dataset with full deterministic.
|
| 29 |
+
|
| 30 |
+
For data and checkpoint preparation, please run:
|
| 31 |
+
|
| 32 |
+
```bash
|
| 33 |
+
# download
|
| 34 |
+
huggingface-cli download --repo-type dataset zhuzilin/gsm8k --local-dir /root/gsm8k
|
| 35 |
+
huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct --local-dir /root/Qwen2.5-0.5B-Instruct
|
| 36 |
+
|
| 37 |
+
# convert ckpt
|
| 38 |
+
cd slime/
|
| 39 |
+
source scripts/models/qwen2.5-0.5B.sh
|
| 40 |
+
PYTHONPATH=/root/Megatron-LM/ python \
|
| 41 |
+
tools/convert_hf_to_torch_dist.py \
|
| 42 |
+
${MODEL_ARGS[@]} \
|
| 43 |
+
--hf-checkpoint /root/Qwen2.5-0.5B-Instruct \
|
| 44 |
+
--save /root/Qwen2.5-0.5B-Instruct_torch_dist/
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
And to run training,
|
| 48 |
+
|
| 49 |
+
```bash
|
| 50 |
+
bash examples/reproducibility/run-qwen2.5-0.5B-gsm8k.sh
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
For screen shots of the wandb, please refer to [pull#370](https://github.com/THUDM/slime/pull/370).
|
ccevolve/baselines/thetaevolve/examples/reproducibility/run-qwen2.5-0.5B-gsm8k.sh
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# for rerun the task
|
| 4 |
+
pkill -9 sglang
|
| 5 |
+
sleep 3
|
| 6 |
+
ray stop --force
|
| 7 |
+
pkill -9 ray
|
| 8 |
+
pkill -9 python
|
| 9 |
+
sleep 3
|
| 10 |
+
pkill -9 ray
|
| 11 |
+
pkill -9 python
|
| 12 |
+
|
| 13 |
+
set -ex
|
| 14 |
+
|
| 15 |
+
# will prevent ray from buffering stdout/stderr
|
| 16 |
+
export PYTHONBUFFERED=16
|
| 17 |
+
|
| 18 |
+
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
|
| 19 |
+
source "${SCRIPT_DIR}/../../scripts/models/qwen2.5-0.5B.sh"
|
| 20 |
+
|
| 21 |
+
CKPT_ARGS=(
|
| 22 |
+
--hf-checkpoint /root/Qwen2.5-0.5B-Instruct/
|
| 23 |
+
--ref-load /root/Qwen2.5-0.5B-Instruct_torch_dist/
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
ROLLOUT_ARGS=(
|
| 27 |
+
--prompt-data /root/gsm8k/train.parquet
|
| 28 |
+
--input-key messages
|
| 29 |
+
--label-key label
|
| 30 |
+
--apply-chat-template
|
| 31 |
+
--rollout-shuffle
|
| 32 |
+
--rm-type math
|
| 33 |
+
--num-rollout 100
|
| 34 |
+
--rollout-batch-size 32
|
| 35 |
+
--n-samples-per-prompt 8
|
| 36 |
+
--rollout-max-response-len 1024
|
| 37 |
+
--rollout-temperature 0.8
|
| 38 |
+
|
| 39 |
+
--global-batch-size 256
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
EVAL_ARGS=(
|
| 43 |
+
--eval-interval 20
|
| 44 |
+
--eval-prompt-data gsm8k /root/gsm8k/test.parquet
|
| 45 |
+
--n-samples-per-eval-prompt 1
|
| 46 |
+
--eval-max-response-len 1024
|
| 47 |
+
--eval-top-k 1
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
PERF_ARGS=(
|
| 51 |
+
--tensor-model-parallel-size 1
|
| 52 |
+
--sequence-parallel
|
| 53 |
+
--pipeline-model-parallel-size 1
|
| 54 |
+
--context-parallel-size 1
|
| 55 |
+
--expert-model-parallel-size 1
|
| 56 |
+
--expert-tensor-parallel-size 1
|
| 57 |
+
|
| 58 |
+
--use-dynamic-batch-size
|
| 59 |
+
--max-tokens-per-gpu 9216
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
GRPO_ARGS=(
|
| 63 |
+
--advantage-estimator grpo
|
| 64 |
+
--use-kl-loss
|
| 65 |
+
--kl-loss-coef 0.00
|
| 66 |
+
--kl-loss-type low_var_kl
|
| 67 |
+
--kl-coef 0.00
|
| 68 |
+
--entropy-coef 0.00
|
| 69 |
+
--eps-clip 0.2
|
| 70 |
+
--eps-clip-high 0.28
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
OPTIMIZER_ARGS=(
|
| 74 |
+
--optimizer adam
|
| 75 |
+
--lr 1e-6
|
| 76 |
+
--lr-decay-style constant
|
| 77 |
+
--weight-decay 0.1
|
| 78 |
+
--adam-beta1 0.9
|
| 79 |
+
--adam-beta2 0.98
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
WANDB_ARGS=(
|
| 83 |
+
--use-wandb
|
| 84 |
+
--wandb-host https://wandb.ai/
|
| 85 |
+
--wandb-team glm-zero
|
| 86 |
+
--wandb-project slime-dev
|
| 87 |
+
--wandb-group qwen2.5-0.5B-gsm8k-deterministic
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
SGLANG_ARGS=(
|
| 91 |
+
--rollout-num-gpus-per-engine 1
|
| 92 |
+
--sglang-mem-fraction-static 0.7
|
| 93 |
+
|
| 94 |
+
--sglang-enable-deterministic-inference
|
| 95 |
+
--sglang-attention-backend flashinfer
|
| 96 |
+
|
| 97 |
+
--deterministic-mode
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
MISC_ARGS=(
|
| 101 |
+
# default dropout in megatron is 0.1
|
| 102 |
+
--attention-dropout 0.0
|
| 103 |
+
--hidden-dropout 0.0
|
| 104 |
+
# should be good for model performance
|
| 105 |
+
--accumulate-allreduce-grads-in-fp32
|
| 106 |
+
--attention-softmax-in-fp32
|
| 107 |
+
# need to comment this when using model with MLA
|
| 108 |
+
--attention-backend flash
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
# launch the master node of ray in container
|
| 112 |
+
ray start --head --node-ip-address 127.0.0.1 --num-gpus 8 --disable-usage-stats
|
| 113 |
+
|
| 114 |
+
ray job submit --address="http://127.0.0.1:8265" \
|
| 115 |
+
--runtime-env-json='{
|
| 116 |
+
"env_vars": {
|
| 117 |
+
"PYTHONPATH": "/root/Megatron-LM",
|
| 118 |
+
"CUDA_DEVICE_MAX_CONNECTIONS": "1",
|
| 119 |
+
"NCCL_ALGO": "Ring",
|
| 120 |
+
"NVTE_ALLOW_NONDETERMINISTIC_ALGO": "0",
|
| 121 |
+
"CUBLAS_WORKSPACE_CONFIG": ":4096:8"
|
| 122 |
+
}
|
| 123 |
+
}' \
|
| 124 |
+
-- python3 train.py \
|
| 125 |
+
--actor-num-nodes 1 \
|
| 126 |
+
--actor-num-gpus-per-node 8 \
|
| 127 |
+
--colocate \
|
| 128 |
+
--calculate-per-token-loss \
|
| 129 |
+
--use-slime-router \
|
| 130 |
+
${MODEL_ARGS[@]} \
|
| 131 |
+
${CKPT_ARGS[@]} \
|
| 132 |
+
${ROLLOUT_ARGS[@]} \
|
| 133 |
+
${OPTIMIZER_ARGS[@]} \
|
| 134 |
+
${GRPO_ARGS[@]} \
|
| 135 |
+
${DISTRIBUTED_ARGS[@]} \
|
| 136 |
+
${WANDB_ARGS[@]} \
|
| 137 |
+
${PERF_ARGS[@]} \
|
| 138 |
+
${EVAL_ARGS[@]} \
|
| 139 |
+
${SGLANG_ARGS[@]} \
|
| 140 |
+
${MISC_ARGS[@]}
|
ccevolve/baselines/thetaevolve/examples/retool/requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
jinja2>=3.0.0
|
| 2 |
+
psutil>=5.8.0
|
| 3 |
+
pytest>=7.0.0
|
ccevolve/baselines/thetaevolve/examples/retool/retool_qwen3_4b_rl.sh
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# for rerun the task
|
| 4 |
+
pkill -9 sglang
|
| 5 |
+
sleep 3
|
| 6 |
+
ray stop --force
|
| 7 |
+
pkill -9 ray
|
| 8 |
+
pkill -9 python
|
| 9 |
+
sleep 3
|
| 10 |
+
pkill -9 ray
|
| 11 |
+
pkill -9 python
|
| 12 |
+
|
| 13 |
+
set -ex
|
| 14 |
+
|
| 15 |
+
# will prevent ray from buffering stdout/stderr
|
| 16 |
+
export PYTHONBUFFERED=16
|
| 17 |
+
|
| 18 |
+
NVLINK_COUNT=$(nvidia-smi topo -m 2>/dev/null | grep -o 'NV[0-9][0-9]*' | wc -l)
|
| 19 |
+
if [ "$NVLINK_COUNT" -gt 0 ]; then
|
| 20 |
+
HAS_NVLINK=1
|
| 21 |
+
else
|
| 22 |
+
HAS_NVLINK=0
|
| 23 |
+
fi
|
| 24 |
+
echo "HAS_NVLINK: $HAS_NVLINK (detected $NVLINK_COUNT NVLink references)"
|
| 25 |
+
|
| 26 |
+
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
|
| 27 |
+
source "/root/slime/scripts/models/qwen3-4B.sh"
|
| 28 |
+
|
| 29 |
+
CKPT_ARGS=(
|
| 30 |
+
--hf-checkpoint /root/font-info/qwen3-4b-sft
|
| 31 |
+
--ref-load /root/font-info/qwen3-4b-sft_torch_dist
|
| 32 |
+
# --load /root/Qwen3-4B_slime/
|
| 33 |
+
--save /root/font-info/qwen3-4b-sft/qwen3-4b-sft-multi-turn/
|
| 34 |
+
--save-interval 20
|
| 35 |
+
--rotary-base 5000000
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
ROLLOUT_ARGS=(
|
| 39 |
+
--prompt-data /root/dapo-math-17k/dapo-math-17k.jsonl
|
| 40 |
+
--input-key prompt
|
| 41 |
+
--label-key label
|
| 42 |
+
--apply-chat-template
|
| 43 |
+
--rollout-shuffle
|
| 44 |
+
--reward-key score
|
| 45 |
+
--num-rollout 3000
|
| 46 |
+
--rollout-batch-size 32
|
| 47 |
+
--n-samples-per-prompt 8
|
| 48 |
+
--rollout-max-response-len 8192
|
| 49 |
+
--rollout-temperature 0.8
|
| 50 |
+
|
| 51 |
+
--global-batch-size 256
|
| 52 |
+
--balance-data
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
EVAL_ARGS=(
|
| 56 |
+
--eval-interval 20
|
| 57 |
+
--eval-prompt-data aime /root/aime-2024/aime-2024.jsonl
|
| 58 |
+
--n-samples-per-eval-prompt 16
|
| 59 |
+
--eval-max-response-len 16384
|
| 60 |
+
--eval-top-p 0.7
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
PERF_ARGS=(
|
| 64 |
+
--tensor-model-parallel-size 2
|
| 65 |
+
--sequence-parallel
|
| 66 |
+
--pipeline-model-parallel-size 1
|
| 67 |
+
--context-parallel-size 1
|
| 68 |
+
--expert-model-parallel-size 1
|
| 69 |
+
--expert-tensor-parallel-size 1
|
| 70 |
+
|
| 71 |
+
--recompute-granularity full
|
| 72 |
+
--recompute-method uniform
|
| 73 |
+
--recompute-num-layers 1
|
| 74 |
+
|
| 75 |
+
# --micro-batch-size 1
|
| 76 |
+
--use-dynamic-batch-size
|
| 77 |
+
--max-tokens-per-gpu 9216
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
GRPO_ARGS=(
|
| 81 |
+
--advantage-estimator grpo
|
| 82 |
+
--use-kl-loss
|
| 83 |
+
--kl-loss-coef 0.00
|
| 84 |
+
--kl-loss-type low_var_kl
|
| 85 |
+
--entropy-coef 0.00
|
| 86 |
+
--eps-clip 0.2
|
| 87 |
+
--eps-clip-high 0.28
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
OPTIMIZER_ARGS=(
|
| 91 |
+
--optimizer adam
|
| 92 |
+
--lr 1e-6
|
| 93 |
+
--lr-decay-style constant
|
| 94 |
+
--weight-decay 0.1
|
| 95 |
+
--adam-beta1 0.9
|
| 96 |
+
--adam-beta2 0.98
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
WANDB_ARGS=(
|
| 100 |
+
--use-wandb
|
| 101 |
+
--wandb-project slime-dapo
|
| 102 |
+
--wandb-group qwen3-4B-test-multi-turn
|
| 103 |
+
--wandb-key ${WANDB_KEY}
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
SGLANG_ARGS=(
|
| 107 |
+
--rollout-num-gpus-per-engine 2
|
| 108 |
+
--sglang-mem-fraction-static 0.7
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
MISC_ARGS=(
|
| 112 |
+
# default dropout in megatron is 0.1
|
| 113 |
+
--attention-dropout 0.0
|
| 114 |
+
--hidden-dropout 0.0
|
| 115 |
+
# should be good for model performance
|
| 116 |
+
--accumulate-allreduce-grads-in-fp32
|
| 117 |
+
--attention-softmax-in-fp32
|
| 118 |
+
# need to comment this when using model with MLA
|
| 119 |
+
--attention-backend flash
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
CUSTOM_ARGS=(
|
| 123 |
+
--custom-generate-function-path generate_with_retool.generate
|
| 124 |
+
--custom-rm-path generate_with_retool.reward_func
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
# launch the master node of ray in container
|
| 128 |
+
export MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
|
| 129 |
+
ray start --head --node-ip-address ${MASTER_ADDR} --num-gpus 4 --disable-usage-stats --dashboard-host=0.0.0.0 --dashboard-port=8265
|
| 130 |
+
|
| 131 |
+
# Build the runtime environment JSON with proper variable substitution
|
| 132 |
+
RUNTIME_ENV_JSON="{
|
| 133 |
+
\"env_vars\": {
|
| 134 |
+
\"PYTHONPATH\": \"/root/Megatron-LM/:${SCRIPT_DIR}:/root/slime\",
|
| 135 |
+
\"CUDA_DEVICE_MAX_CONNECTIONS\": \"1\",
|
| 136 |
+
\"NCCL_NVLS_ENABLE\": \"${HAS_NVLINK}\"
|
| 137 |
+
}
|
| 138 |
+
}"
|
| 139 |
+
|
| 140 |
+
ray job submit --address="http://127.0.0.1:8265" \
|
| 141 |
+
--runtime-env-json="${RUNTIME_ENV_JSON}" \
|
| 142 |
+
-- python3 train.py \
|
| 143 |
+
--actor-num-nodes 1 \
|
| 144 |
+
--actor-num-gpus-per-node 4 \
|
| 145 |
+
--colocate \
|
| 146 |
+
${MODEL_ARGS[@]} \
|
| 147 |
+
${CKPT_ARGS[@]} \
|
| 148 |
+
${ROLLOUT_ARGS[@]} \
|
| 149 |
+
${OPTIMIZER_ARGS[@]} \
|
| 150 |
+
${GRPO_ARGS[@]} \
|
| 151 |
+
${DISTRIBUTED_ARGS[@]} \
|
| 152 |
+
${WANDB_ARGS[@]} \
|
| 153 |
+
${PERF_ARGS[@]} \
|
| 154 |
+
${EVAL_ARGS[@]} \
|
| 155 |
+
${SGLANG_ARGS[@]} \
|
| 156 |
+
${MISC_ARGS[@]} \
|
| 157 |
+
${CUSTOM_ARGS[@]}
|
ccevolve/baselines/thetaevolve/examples/retool/sft_data_processing.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datasets import load_dataset
|
| 2 |
+
|
| 3 |
+
ds = load_dataset("JoeYing/ReTool-SFT")["train"]
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def convert(sample):
|
| 7 |
+
conversations = sample["messages"]
|
| 8 |
+
|
| 9 |
+
def convert_role(role):
|
| 10 |
+
if role == "user":
|
| 11 |
+
return "user"
|
| 12 |
+
elif role == "assistant":
|
| 13 |
+
return "assistant"
|
| 14 |
+
elif role == "system":
|
| 15 |
+
return "system"
|
| 16 |
+
else:
|
| 17 |
+
raise ValueError(f"Unknown role: {role}")
|
| 18 |
+
|
| 19 |
+
messages = [
|
| 20 |
+
{
|
| 21 |
+
"role": convert_role(turn["role"]),
|
| 22 |
+
"content": turn["content"],
|
| 23 |
+
}
|
| 24 |
+
for turn in conversations
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
return {"messages": messages}
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
ds = ds.map(convert)
|
| 31 |
+
ds.to_parquet("./data/retool/ReTool-SFT.parquet")
|
ccevolve/baselines/thetaevolve/examples/search-r1/README.md
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Search-R1 lite
|
| 2 |
+
|
| 3 |
+
This is a minimal reproduction of [Search-R1](https://github.com/PeterGriffinJin/Search-R1) and an example of using multi-turn conversation and tool-calling in slime.
|
| 4 |
+
|
| 5 |
+
## Environment Setup
|
| 6 |
+
|
| 7 |
+
Use the `slimerl/slime:latest` image and initialize the environment required for Search-R1:
|
| 8 |
+
|
| 9 |
+
```bash
|
| 10 |
+
cd /root/
|
| 11 |
+
git clone https://github.com/THUDM/slime.git
|
| 12 |
+
pip install -e .
|
| 13 |
+
# for Search R1
|
| 14 |
+
pip install chardet
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
Please refer to the script provided in Search-R1 to download the data:
|
| 18 |
+
|
| 19 |
+
```bash
|
| 20 |
+
git clone https://github.com/PeterGriffinJin/Search-R1.git
|
| 21 |
+
cd Search-R1/
|
| 22 |
+
python scripts/data_process/nq_search.py --local_dir /root/nq_search/
|
| 23 |
+
```
|
| 24 |
+
|
| 25 |
+
Initialize the Qwen2.5-3B model:
|
| 26 |
+
|
| 27 |
+
```bash
|
| 28 |
+
# hf checkpoint
|
| 29 |
+
huggingface-cli download Qwen/Qwen2.5-3B --local-dir /root/Qwen2.5-3B
|
| 30 |
+
|
| 31 |
+
# mcore checkpoint
|
| 32 |
+
cd /root/slime
|
| 33 |
+
source scripts/models/qwen2.5-3B.sh
|
| 34 |
+
PYTHONPATH=/root/Megatron-LM python tools/convert_hf_to_torch_dist.py \
|
| 35 |
+
${MODEL_ARGS[@]} \
|
| 36 |
+
--hf-checkpoint /root/Qwen2.5-3B \
|
| 37 |
+
--save /root/Qwen2.5-3B_torch_dist
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
## Running the Script
|
| 41 |
+
|
| 42 |
+
You need to configure your serper.dev API in `generate_with_search.py`:
|
| 43 |
+
|
| 44 |
+
```python
|
| 45 |
+
SEARCH_R1_CONFIGS = {
|
| 46 |
+
"max_turns": 3,
|
| 47 |
+
"topk": 3,
|
| 48 |
+
"google_api_key": "YOUR_API_KEY", # Replace with your actual API key
|
| 49 |
+
"snippet_only": True, # Set to True to only return snippets
|
| 50 |
+
"proxy": None, # Set to your proxy if needed
|
| 51 |
+
"search_concurrency": 256,
|
| 52 |
+
# rm
|
| 53 |
+
"format_score": 0.2,
|
| 54 |
+
}
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
And run:
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
cd slime/
|
| 61 |
+
bash examples/search-r1/run_qwen2.5_3B.sh
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
## Code Structure
|
| 65 |
+
|
| 66 |
+
To implement multi-turn conversation + tool-calling in slime, you only need to implement a custom data generation function and a reward model for the task. These correspond to the following 2 configuration items in the startup script:
|
| 67 |
+
|
| 68 |
+
```bash
|
| 69 |
+
CUSTOM_ARGS=(
|
| 70 |
+
--custom-generate-function-path generate_with_search.generate
|
| 71 |
+
--custom-rm-path generate_with_search.reward_func
|
| 72 |
+
)
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
These are the `generate` and `reward_func` functions in `generate_with_search.py`.
|
ccevolve/baselines/thetaevolve/examples/search-r1/README_zh.md
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Search-R1 lite
|
| 2 |
+
|
| 3 |
+
[English](./README.md)
|
| 4 |
+
|
| 5 |
+
这里是一个对 [Search-R1](https://github.com/PeterGriffinJin/Search-R1) 的简单复现,以及是一个在 slime 中使用多轮对话和工具调用的样例。
|
| 6 |
+
|
| 7 |
+
## 配置环境
|
| 8 |
+
|
| 9 |
+
使用 `slimerl/slime:latest` 镜像,并初始化 Search-R1 需要的环境:
|
| 10 |
+
|
| 11 |
+
```bash
|
| 12 |
+
cd /root/
|
| 13 |
+
git clone https://github.com/THUDM/slime.git
|
| 14 |
+
pip install -e .
|
| 15 |
+
# for Search R1
|
| 16 |
+
pip install chardet
|
| 17 |
+
```
|
| 18 |
+
|
| 19 |
+
请参照 Search-R1 中提供的脚本下载数据:
|
| 20 |
+
|
| 21 |
+
```bash
|
| 22 |
+
git clone https://github.com/PeterGriffinJin/Search-R1.git
|
| 23 |
+
cd Search-R1/
|
| 24 |
+
python scripts/data_process/nq_search.py --local_dir /root/nq_search/
|
| 25 |
+
```
|
| 26 |
+
|
| 27 |
+
初始化 Qwen2.5-3B 模型:
|
| 28 |
+
|
| 29 |
+
```bash
|
| 30 |
+
# hf checkpoint
|
| 31 |
+
huggingface-cli download Qwen/Qwen2.5-3B --local-dir /root/Qwen2.5-3B
|
| 32 |
+
|
| 33 |
+
# mcore checkpoint
|
| 34 |
+
cd /root/slime
|
| 35 |
+
source scripts/models/qwen2.5-3B.sh
|
| 36 |
+
PYTHONPATH=/root/Megatron-LM python tools/convert_hf_to_torch_dist.py \
|
| 37 |
+
${MODEL_ARGS[@]} \
|
| 38 |
+
--hf-checkpoint /root/Qwen2.5-3B \
|
| 39 |
+
--save /root/Qwen2.5-3B_torch_dist
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
## 运行脚本
|
| 43 |
+
|
| 44 |
+
需要将你的 serper.dev API 配置在 `generate_with_search.py` 中:
|
| 45 |
+
|
| 46 |
+
```python
|
| 47 |
+
SEARCH_R1_CONFIGS = {
|
| 48 |
+
"max_turns": 3,
|
| 49 |
+
"topk": 3,
|
| 50 |
+
"google_api_key": "YOUR_API_KEY", # Replace with your actual API key
|
| 51 |
+
"snippet_only": True, # Set to True to only return snippets
|
| 52 |
+
"proxy": None, # Set to your proxy if needed
|
| 53 |
+
"search_concurrency": 256,
|
| 54 |
+
# rm
|
| 55 |
+
"format_score": 0.2,
|
| 56 |
+
}
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
并运行:
|
| 60 |
+
|
| 61 |
+
```bash
|
| 62 |
+
cd slime/
|
| 63 |
+
bash examples/search-r1/run_qwen2.5_3B.sh
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
## 代码结构
|
| 67 |
+
|
| 68 |
+
为了实现多轮 + 工具调用,在 slime 中只需要实现一个自定义的数据生成函数,以及一个任务所需的 reward model,对应启动脚本中的这 2 个配置项:
|
| 69 |
+
|
| 70 |
+
```bash
|
| 71 |
+
CUSTOM_ARGS=(
|
| 72 |
+
--custom-generate-function-path generate_with_search.generate
|
| 73 |
+
--custom-rm-path generate_with_search.reward_func
|
| 74 |
+
)
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
也就是 `generate_with_search.py` 中的 `generate` 和 `reward_func` 两个函数。
|
ccevolve/baselines/thetaevolve/examples/search-r1/generate_with_search.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Adapted form https://github.com/PeterGriffinJin/Search-R1/blob/ceee7b89655ed52f205b9beb98e1190c3eedcfb0/search_r1/llm_agent/generation.py
|
| 2 |
+
import asyncio
|
| 3 |
+
import re
|
| 4 |
+
|
| 5 |
+
from google_search_server import google_search
|
| 6 |
+
from qa_em_format import compute_score_em
|
| 7 |
+
|
| 8 |
+
from slime.rollout.sglang_rollout import GenerateState
|
| 9 |
+
from slime.utils.http_utils import post
|
| 10 |
+
from slime.utils.types import Sample
|
| 11 |
+
|
| 12 |
+
SEARCH_R1_CONFIGS = {
|
| 13 |
+
"max_turns": 3,
|
| 14 |
+
"topk": 3,
|
| 15 |
+
"google_api_key": "YOUR_API_KEY", # Replace with your actual API key
|
| 16 |
+
"snippet_only": True, # Set to True to only return snippets
|
| 17 |
+
"proxy": None, # Set to your proxy if needed
|
| 18 |
+
"search_concurrency": 256,
|
| 19 |
+
# rm
|
| 20 |
+
"format_score": 0.2,
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
SEMAPHORE = asyncio.Semaphore(SEARCH_R1_CONFIGS["search_concurrency"])
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def _passages2string(retrieval_result):
|
| 28 |
+
format_reference = ""
|
| 29 |
+
for idx, doc_item in enumerate(retrieval_result):
|
| 30 |
+
|
| 31 |
+
content = doc_item["document"]["contents"]
|
| 32 |
+
title = content.split("\n")[0]
|
| 33 |
+
text = "\n".join(content.split("\n")[1:])
|
| 34 |
+
format_reference += f"Doc {idx+1}(Title: {title}) {text}\n"
|
| 35 |
+
|
| 36 |
+
return format_reference
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
async def search(query: str) -> str:
|
| 40 |
+
result = await google_search(
|
| 41 |
+
SEARCH_R1_CONFIGS["google_api_key"],
|
| 42 |
+
query,
|
| 43 |
+
SEARCH_R1_CONFIGS["topk"],
|
| 44 |
+
snippet_only=SEARCH_R1_CONFIGS["snippet_only"],
|
| 45 |
+
proxy=SEARCH_R1_CONFIGS["proxy"],
|
| 46 |
+
)
|
| 47 |
+
return _passages2string(result)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def postprocess_responses(resp: str) -> str:
|
| 51 |
+
return (
|
| 52 |
+
resp.split("</search>")[0] + "</search>"
|
| 53 |
+
if "</search>" in resp
|
| 54 |
+
else resp.split("</answer>")[0] + "</answer>" if "</answer>" in resp else resp
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def postprocess_predictions(prediction: str):
|
| 59 |
+
pattern = r"<(search|answer)>(.*?)</\1>"
|
| 60 |
+
match = re.search(pattern, prediction, re.DOTALL)
|
| 61 |
+
if match:
|
| 62 |
+
content = match.group(2).strip() # Return only the content inside the tags
|
| 63 |
+
action = match.group(1)
|
| 64 |
+
else:
|
| 65 |
+
content = ""
|
| 66 |
+
action = None
|
| 67 |
+
|
| 68 |
+
return action, content
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
async def execute_predictions(prediction: str) -> str:
|
| 72 |
+
action, content = postprocess_predictions(prediction)
|
| 73 |
+
|
| 74 |
+
if action == "search":
|
| 75 |
+
search_query = content
|
| 76 |
+
async with SEMAPHORE:
|
| 77 |
+
search_results = await search(search_query)
|
| 78 |
+
next_obs = f"\n\n<information>{search_results.strip()}</information>\n\n"
|
| 79 |
+
done = False
|
| 80 |
+
elif action == "answer":
|
| 81 |
+
next_obs = ""
|
| 82 |
+
done = True
|
| 83 |
+
else:
|
| 84 |
+
next_obs = f"\nMy previous action is invalid. \
|
| 85 |
+
If I want to search, I should put the query between <search> and </search>. \
|
| 86 |
+
If I want to give the final answer, I should put the answer between <answer> and </answer>. Let me try again.\n"
|
| 87 |
+
done = False
|
| 88 |
+
|
| 89 |
+
return next_obs, done
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
async def generate(args, sample: Sample, sampling_params) -> Sample:
|
| 93 |
+
assert not args.partial_rollout, f"Partial rollout is not supported for this function at the moment."
|
| 94 |
+
|
| 95 |
+
state = GenerateState(args)
|
| 96 |
+
|
| 97 |
+
url = f"http://{args.sglang_router_ip}:{args.sglang_router_port}/generate"
|
| 98 |
+
|
| 99 |
+
# Handle partial rollout samples: continue generation from existing response
|
| 100 |
+
prompt = sample.prompt
|
| 101 |
+
prompt_tokens_ids = state.tokenizer(sample.prompt, add_special_tokens=False)["input_ids"]
|
| 102 |
+
response = ""
|
| 103 |
+
response_token_ids = []
|
| 104 |
+
loss_mask = []
|
| 105 |
+
for _ in range(SEARCH_R1_CONFIGS["max_turns"]):
|
| 106 |
+
payload = {
|
| 107 |
+
"text": prompt + response,
|
| 108 |
+
"sampling_params": sampling_params,
|
| 109 |
+
}
|
| 110 |
+
output = await post(url, payload)
|
| 111 |
+
|
| 112 |
+
# abort
|
| 113 |
+
if output["meta_info"]["finish_reason"]["type"] == "abort":
|
| 114 |
+
sample.status = Sample.Status.ABORTED
|
| 115 |
+
return sample
|
| 116 |
+
|
| 117 |
+
cur_response = output["text"]
|
| 118 |
+
cur_response = postprocess_responses(cur_response)
|
| 119 |
+
|
| 120 |
+
cur_response_token_ids = state.tokenizer(cur_response, add_special_tokens=False)["input_ids"]
|
| 121 |
+
response += cur_response
|
| 122 |
+
response_token_ids += cur_response_token_ids
|
| 123 |
+
loss_mask += [1] * len(cur_response_token_ids)
|
| 124 |
+
|
| 125 |
+
if output["meta_info"]["finish_reason"]["type"] == "length":
|
| 126 |
+
break
|
| 127 |
+
|
| 128 |
+
next_obs, done = await execute_predictions(cur_response)
|
| 129 |
+
if done:
|
| 130 |
+
break
|
| 131 |
+
|
| 132 |
+
assert next_obs != "", "Next observation should not be empty."
|
| 133 |
+
obs_tokens_ids = state.tokenizer(next_obs, add_special_tokens=False)["input_ids"]
|
| 134 |
+
response += next_obs
|
| 135 |
+
response_token_ids += obs_tokens_ids
|
| 136 |
+
loss_mask += [0] * len(obs_tokens_ids)
|
| 137 |
+
|
| 138 |
+
sample.tokens = prompt_tokens_ids + response_token_ids
|
| 139 |
+
sample.response_length = len(response_token_ids)
|
| 140 |
+
sample.response = response
|
| 141 |
+
sample.loss_mask = loss_mask
|
| 142 |
+
match output["meta_info"]["finish_reason"]["type"]:
|
| 143 |
+
case "length":
|
| 144 |
+
sample.status = Sample.Status.TRUNCATED
|
| 145 |
+
case "abort":
|
| 146 |
+
sample.status = Sample.Status.ABORTED
|
| 147 |
+
case "stop":
|
| 148 |
+
sample.status = Sample.Status.COMPLETED
|
| 149 |
+
|
| 150 |
+
return sample
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
async def reward_func(args, sample, **kwargs):
|
| 154 |
+
"""The reward function for retrieval-based question answering.
|
| 155 |
+
|
| 156 |
+
Args:
|
| 157 |
+
args: the arguments
|
| 158 |
+
sample: the sample to evaluate
|
| 159 |
+
"""
|
| 160 |
+
if not isinstance(sample, Sample):
|
| 161 |
+
raise TypeError("Sample must be an instance of Sample class.")
|
| 162 |
+
|
| 163 |
+
score = compute_score_em(
|
| 164 |
+
solution_str=sample.prompt + sample.response,
|
| 165 |
+
ground_truth=sample.label["ground_truth"],
|
| 166 |
+
format_score=SEARCH_R1_CONFIGS["format_score"],
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
return score
|
ccevolve/baselines/thetaevolve/examples/search-r1/google_search_server.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import os
|
| 3 |
+
import random
|
| 4 |
+
import re
|
| 5 |
+
from typing import Dict, List
|
| 6 |
+
|
| 7 |
+
import aiohttp
|
| 8 |
+
import chardet
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# --- Utilities ---
|
| 12 |
+
def parse_snippet(snippet: str) -> List[str]:
|
| 13 |
+
segments = snippet.split("...")
|
| 14 |
+
return [s.strip() for s in segments if len(s.strip().split()) > 5]
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def sanitize_search_query(query: str) -> str:
|
| 18 |
+
# Remove or replace special characters that might cause issues.
|
| 19 |
+
# This is a basic example; you might need to add more characters or patterns.
|
| 20 |
+
sanitized_query = re.sub(r"[^\w\s]", " ", query) # Replace non-alphanumeric and non-whitespace with spaces.
|
| 21 |
+
sanitized_query = re.sub(
|
| 22 |
+
r"[\t\r\f\v\n]", " ", sanitized_query
|
| 23 |
+
) # replace tab, return, formfeed, vertical tab with spaces.
|
| 24 |
+
sanitized_query = re.sub(
|
| 25 |
+
r"\s+", " ", sanitized_query
|
| 26 |
+
).strip() # remove duplicate spaces, and trailing/leading spaces.
|
| 27 |
+
|
| 28 |
+
return sanitized_query
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def filter_links(search_results: List[Dict]) -> List[str]:
|
| 32 |
+
links = []
|
| 33 |
+
for result in search_results:
|
| 34 |
+
for item in result.get("items", []):
|
| 35 |
+
if "mime" in item:
|
| 36 |
+
continue
|
| 37 |
+
ext = os.path.splitext(item["link"])[1]
|
| 38 |
+
if ext in ["", ".html", ".htm", ".shtml"]:
|
| 39 |
+
links.append(item["link"])
|
| 40 |
+
return links
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
async def fetch(session: aiohttp.ClientSession, url: str, semaphore: asyncio.Semaphore) -> str:
|
| 44 |
+
if url == "":
|
| 45 |
+
return ""
|
| 46 |
+
user_agents = [
|
| 47 |
+
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P)...",
|
| 48 |
+
"Mozilla/5.0 AppleWebKit/537.36...",
|
| 49 |
+
"Mozilla/5.0 (compatible; Googlebot/2.1; +https://www.google.com/bot.html)",
|
| 50 |
+
]
|
| 51 |
+
headers = {"User-Agent": random.choice(user_agents)}
|
| 52 |
+
|
| 53 |
+
async with semaphore:
|
| 54 |
+
try:
|
| 55 |
+
async with session.get(url, headers=headers) as response:
|
| 56 |
+
raw = await response.read()
|
| 57 |
+
detected = chardet.detect(raw)
|
| 58 |
+
encoding = detected["encoding"] or "utf-8"
|
| 59 |
+
return raw.decode(encoding, errors="ignore")
|
| 60 |
+
except (aiohttp.ClientError, asyncio.TimeoutError):
|
| 61 |
+
return ""
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
async def fetch_all(urls: List[str], limit: int = 8) -> List[str]:
|
| 65 |
+
semaphore = asyncio.Semaphore(limit)
|
| 66 |
+
timeout = aiohttp.ClientTimeout(total=5)
|
| 67 |
+
connector = aiohttp.TCPConnector(limit_per_host=limit, force_close=True)
|
| 68 |
+
|
| 69 |
+
async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
|
| 70 |
+
tasks = [fetch(session, url, semaphore) for url in urls]
|
| 71 |
+
return await asyncio.gather(*tasks)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def collect_context(snippet: str, doc: str) -> str:
|
| 75 |
+
snippets = parse_snippet(snippet)
|
| 76 |
+
ctx_paras = []
|
| 77 |
+
|
| 78 |
+
for s in snippets:
|
| 79 |
+
pos = doc.replace("\n", " ").find(s)
|
| 80 |
+
if pos == -1:
|
| 81 |
+
continue
|
| 82 |
+
sta = pos
|
| 83 |
+
while sta > 0 and doc[sta] != "\n":
|
| 84 |
+
sta -= 1
|
| 85 |
+
end = pos + len(s)
|
| 86 |
+
while end < len(doc) and doc[end] != "\n":
|
| 87 |
+
end += 1
|
| 88 |
+
para = doc[sta:end].strip()
|
| 89 |
+
if para not in ctx_paras:
|
| 90 |
+
ctx_paras.append(para)
|
| 91 |
+
|
| 92 |
+
return "\n".join(ctx_paras)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
async def google_search(api_key, query, top_k=5, timeout: int = 60, proxy=None, snippet_only=False) -> List[Dict]:
|
| 96 |
+
timeout_obj = aiohttp.ClientTimeout(total=timeout)
|
| 97 |
+
session_kwargs = {}
|
| 98 |
+
if proxy:
|
| 99 |
+
session_kwargs["proxy"] = proxy
|
| 100 |
+
async with aiohttp.ClientSession(**session_kwargs) as session:
|
| 101 |
+
async with session.post(
|
| 102 |
+
"https://google.serper.dev/search",
|
| 103 |
+
json={
|
| 104 |
+
"q": query,
|
| 105 |
+
"num": top_k,
|
| 106 |
+
"gl": "us",
|
| 107 |
+
"hl": "en",
|
| 108 |
+
},
|
| 109 |
+
headers={
|
| 110 |
+
"Content-Type": "application/json",
|
| 111 |
+
"X-API-KEY": api_key,
|
| 112 |
+
},
|
| 113 |
+
timeout=timeout_obj,
|
| 114 |
+
) as resp:
|
| 115 |
+
resp.raise_for_status()
|
| 116 |
+
response = await resp.json()
|
| 117 |
+
items = response.get("organic", [])
|
| 118 |
+
|
| 119 |
+
contexts = []
|
| 120 |
+
if snippet_only:
|
| 121 |
+
for item in items:
|
| 122 |
+
title = item.get("title", "")
|
| 123 |
+
context = " ".join(parse_snippet(item.get("snippet", "")))
|
| 124 |
+
if title != "" or context != "":
|
| 125 |
+
title = "No title." if not title else title
|
| 126 |
+
context = "No snippet available." if not context else context
|
| 127 |
+
contexts.append(
|
| 128 |
+
{
|
| 129 |
+
"document": {"contents": f'"{title}"\n{context}'},
|
| 130 |
+
}
|
| 131 |
+
)
|
| 132 |
+
else:
|
| 133 |
+
links = [item.get("link", "") for item in items if "link" in item]
|
| 134 |
+
web_contents = await fetch_all(links)
|
| 135 |
+
contexts = []
|
| 136 |
+
for i, item in enumerate(items):
|
| 137 |
+
title = item.get("title", "")
|
| 138 |
+
snippet = item.get("snippet", "")
|
| 139 |
+
|
| 140 |
+
context = collect_context(snippet, web_contents[i])
|
| 141 |
+
if title != "" or context != "":
|
| 142 |
+
title = "No title." if not title else title
|
| 143 |
+
context = "No snippet available." if not context else context
|
| 144 |
+
contexts.append(
|
| 145 |
+
{
|
| 146 |
+
"document": {"contents": f'"{title}"\n{context}'},
|
| 147 |
+
}
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
return contexts
|
ccevolve/baselines/thetaevolve/examples/search-r1/qa_em_format.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Adapt from https://github.com/PeterGriffinJin/Search-R1/blob/ceee7b89655ed52f205b9beb98e1190c3eedcfb0/verl/utils/reward_score/qa_em_format.py
|
| 2 |
+
# Copyright 2024 Bytedance Ltd. and/or its affiliates
|
| 3 |
+
#
|
| 4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
+
# you may not use this file except in compliance with the License.
|
| 6 |
+
# You may obtain a copy of the License at
|
| 7 |
+
#
|
| 8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 9 |
+
#
|
| 10 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 13 |
+
# See the License for the specific language governing permissions and
|
| 14 |
+
# limitations under the License.
|
| 15 |
+
|
| 16 |
+
import random
|
| 17 |
+
import re
|
| 18 |
+
import string
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def normalize_answer(s):
|
| 22 |
+
def remove_articles(text):
|
| 23 |
+
return re.sub(r"\b(a|an|the)\b", " ", text)
|
| 24 |
+
|
| 25 |
+
def white_space_fix(text):
|
| 26 |
+
return " ".join(text.split())
|
| 27 |
+
|
| 28 |
+
def remove_punc(text):
|
| 29 |
+
exclude = set(string.punctuation)
|
| 30 |
+
return "".join(ch for ch in text if ch not in exclude)
|
| 31 |
+
|
| 32 |
+
def lower(text):
|
| 33 |
+
return text.lower()
|
| 34 |
+
|
| 35 |
+
return white_space_fix(remove_articles(remove_punc(lower(s))))
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def em_check(prediction, golden_answers):
|
| 39 |
+
if isinstance(golden_answers, str):
|
| 40 |
+
golden_answers = [golden_answers]
|
| 41 |
+
normalized_prediction = normalize_answer(prediction)
|
| 42 |
+
score = 0
|
| 43 |
+
for golden_answer in golden_answers:
|
| 44 |
+
golden_answer = normalize_answer(golden_answer)
|
| 45 |
+
if golden_answer == normalized_prediction:
|
| 46 |
+
score = 1
|
| 47 |
+
break
|
| 48 |
+
return score
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def is_valid_sequence(text):
|
| 52 |
+
# Find the position of "<|im_start|>assistant" with potential whitespace
|
| 53 |
+
assistant_pattern = r"<\|im_start\|>assistant\s*"
|
| 54 |
+
assistant_match = re.search(assistant_pattern, text)
|
| 55 |
+
|
| 56 |
+
if not assistant_match:
|
| 57 |
+
return False, "Missing assistant marker"
|
| 58 |
+
|
| 59 |
+
# Extract the content after the assistant marker
|
| 60 |
+
start_pos = assistant_match.end()
|
| 61 |
+
content = text[start_pos:]
|
| 62 |
+
|
| 63 |
+
# Check for balanced tags
|
| 64 |
+
tags_to_check = ["think", "search", "information", "answer"]
|
| 65 |
+
for tag in tags_to_check:
|
| 66 |
+
opening_count = len(re.findall(f"<{tag}>", content))
|
| 67 |
+
closing_count = len(re.findall(f"</{tag}>", content))
|
| 68 |
+
if opening_count != closing_count:
|
| 69 |
+
return False, f"Mismatch in {tag} tags: {opening_count} opening vs {closing_count} closing tags"
|
| 70 |
+
|
| 71 |
+
# Now check for proper sequence pattern and no extraneous content
|
| 72 |
+
|
| 73 |
+
# 1. First split the content by any tags we recognize
|
| 74 |
+
split_pattern = r"(</?(?:think|search|information|answer)>)"
|
| 75 |
+
parts = re.split(split_pattern, content)
|
| 76 |
+
|
| 77 |
+
# 2. Keep track of the current position in the expected sequence
|
| 78 |
+
state = "start" # start -> think -> search -> information -> think -> ... -> answer -> end
|
| 79 |
+
|
| 80 |
+
# 3. Check each part
|
| 81 |
+
for i, part in enumerate(parts):
|
| 82 |
+
# Skip empty parts
|
| 83 |
+
if not part.strip():
|
| 84 |
+
continue
|
| 85 |
+
|
| 86 |
+
# Check if this is a tag
|
| 87 |
+
if re.match(r"</?(?:think|search|information|answer)>", part):
|
| 88 |
+
# This is a tag, check if it's valid in the current state
|
| 89 |
+
if part == "<think>" and state in ["start", "information"]:
|
| 90 |
+
state = "in_think"
|
| 91 |
+
elif part == "</think>" and state == "in_think":
|
| 92 |
+
state = "after_think"
|
| 93 |
+
elif part == "<search>" and state == "after_think":
|
| 94 |
+
state = "in_search"
|
| 95 |
+
elif part == "</search>" and state == "in_search":
|
| 96 |
+
state = "after_search"
|
| 97 |
+
elif part == "<information>" and state == "after_search":
|
| 98 |
+
state = "in_information"
|
| 99 |
+
elif part == "</information>" and state == "in_information":
|
| 100 |
+
state = "information"
|
| 101 |
+
elif part == "<answer>" and state == "after_think":
|
| 102 |
+
state = "in_answer"
|
| 103 |
+
elif part == "</answer>" and state == "in_answer":
|
| 104 |
+
state = "end"
|
| 105 |
+
else:
|
| 106 |
+
return False, f"Unexpected tag {part} in state {state}"
|
| 107 |
+
else:
|
| 108 |
+
# This is content, check if it's valid in the current state
|
| 109 |
+
if state in ["in_think", "in_search", "in_information", "in_answer"]:
|
| 110 |
+
# Content is allowed inside tags
|
| 111 |
+
pass
|
| 112 |
+
elif state in ["start", "after_think", "after_search", "information"]:
|
| 113 |
+
# Only whitespace is allowed between tags
|
| 114 |
+
if part.strip():
|
| 115 |
+
return False, f"Unexpected content '{part.strip()}' between tags (state: {state})"
|
| 116 |
+
else:
|
| 117 |
+
return False, f"Unexpected content in state {state}"
|
| 118 |
+
|
| 119 |
+
# Check final state
|
| 120 |
+
if state != "end":
|
| 121 |
+
return False, f"Incomplete sequence, ended in state {state}"
|
| 122 |
+
|
| 123 |
+
return True, "Valid sequence format"
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def extract_solution(solution_str):
|
| 127 |
+
"""Extract the equation from the solution string."""
|
| 128 |
+
|
| 129 |
+
answer_pattern = r"<answer>(.*?)</answer>"
|
| 130 |
+
match = re.finditer(answer_pattern, solution_str, re.DOTALL)
|
| 131 |
+
matches = list(match)
|
| 132 |
+
|
| 133 |
+
# If there are 0 or exactly 1 matches, return None
|
| 134 |
+
if len(matches) <= 1:
|
| 135 |
+
return None
|
| 136 |
+
|
| 137 |
+
# If there are 2 or more matches, return the last one
|
| 138 |
+
return matches[-1].group(1).strip()
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def extract_information_blocks(text: str) -> list[str]:
|
| 142 |
+
pattern = r"<information>(.*?)</information>"
|
| 143 |
+
matches = re.findall(pattern, text, re.DOTALL)
|
| 144 |
+
return [match.strip() for match in matches]
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def is_retrieval_correct(text: str, golden_answers: list[str]) -> list[str]:
|
| 148 |
+
seqs = extract_information_blocks(text)
|
| 149 |
+
for seq in seqs:
|
| 150 |
+
for golden_answer in golden_answers:
|
| 151 |
+
if normalize_answer(golden_answer) in normalize_answer(seq):
|
| 152 |
+
return True
|
| 153 |
+
return False
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def compute_score_em(
|
| 157 |
+
solution_str,
|
| 158 |
+
ground_truth,
|
| 159 |
+
method="strict",
|
| 160 |
+
structure_format_score=0,
|
| 161 |
+
final_format_score=0,
|
| 162 |
+
retrieval_score=0,
|
| 163 |
+
format_score=0,
|
| 164 |
+
score=1.0,
|
| 165 |
+
):
|
| 166 |
+
"""The scoring function for exact match (EM).
|
| 167 |
+
|
| 168 |
+
Args:
|
| 169 |
+
solution_str: the solution text
|
| 170 |
+
ground_truth: the ground truth
|
| 171 |
+
method: the method to extract the solution, choices are 'strict' and 'flexible'
|
| 172 |
+
format_score: the score for the format
|
| 173 |
+
score: the score for the correct answer
|
| 174 |
+
"""
|
| 175 |
+
is_valid_format, _ = is_valid_sequence(solution_str)
|
| 176 |
+
retrieval_correct = False
|
| 177 |
+
if is_valid_format:
|
| 178 |
+
retrieval_correct = is_retrieval_correct(solution_str, ground_truth["target"])
|
| 179 |
+
answer = extract_solution(solution_str=solution_str)
|
| 180 |
+
do_print = random.randint(1, 64) == 1
|
| 181 |
+
|
| 182 |
+
if do_print:
|
| 183 |
+
print(f"--------------------------------")
|
| 184 |
+
print(f"Golden answers: {ground_truth['target']}")
|
| 185 |
+
print(f"Extracted answer: {answer}")
|
| 186 |
+
print(f"Solution string: {solution_str}")
|
| 187 |
+
|
| 188 |
+
if answer is None:
|
| 189 |
+
if is_valid_format:
|
| 190 |
+
if retrieval_correct:
|
| 191 |
+
return structure_format_score + retrieval_score # 0.3
|
| 192 |
+
else:
|
| 193 |
+
return structure_format_score # 0.2
|
| 194 |
+
else:
|
| 195 |
+
return 0
|
| 196 |
+
else:
|
| 197 |
+
if em_check(answer, ground_truth["target"]):
|
| 198 |
+
if is_valid_format:
|
| 199 |
+
return score # 1
|
| 200 |
+
else:
|
| 201 |
+
return score - structure_format_score # 0.8
|
| 202 |
+
elif is_valid_format:
|
| 203 |
+
if retrieval_correct:
|
| 204 |
+
return structure_format_score + retrieval_score # 0.3
|
| 205 |
+
else:
|
| 206 |
+
return structure_format_score # 0.2
|
| 207 |
+
else:
|
| 208 |
+
return final_format_score # 0.1
|
ccevolve/baselines/thetaevolve/examples/search-r1/run_qwen2.5_3B.sh
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# for rerun the task
|
| 4 |
+
pkill -9 sglang
|
| 5 |
+
sleep 3
|
| 6 |
+
ray stop --force
|
| 7 |
+
pkill -9 ray
|
| 8 |
+
pkill -9 python
|
| 9 |
+
sleep 3
|
| 10 |
+
pkill -9 ray
|
| 11 |
+
pkill -9 python
|
| 12 |
+
|
| 13 |
+
set -ex
|
| 14 |
+
|
| 15 |
+
# will prevent ray from buffering stdout/stderr
|
| 16 |
+
export PYTHONBUFFERED=16
|
| 17 |
+
|
| 18 |
+
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
|
| 19 |
+
source "${SCRIPT_DIR}/../../scripts/models/qwen2.5-3B.sh"
|
| 20 |
+
|
| 21 |
+
CKPT_ARGS=(
|
| 22 |
+
--hf-checkpoint /root/Qwen2.5-3B/
|
| 23 |
+
--ref-load /root/Qwen2.5-3B_torch_dist/
|
| 24 |
+
--load /root/Qwen2.5-3B_slime/
|
| 25 |
+
--save /root/Qwen2.5-3B_slime/
|
| 26 |
+
--save-interval 20
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
ROLLOUT_ARGS=(
|
| 30 |
+
--prompt-data /root/nq_search/train.parquet
|
| 31 |
+
--input-key prompt
|
| 32 |
+
--label-key reward_model
|
| 33 |
+
--apply-chat-template
|
| 34 |
+
--rollout-shuffle
|
| 35 |
+
--num-rollout 3000
|
| 36 |
+
--rollout-batch-size 32
|
| 37 |
+
--n-samples-per-prompt 8
|
| 38 |
+
--rollout-max-response-len 512
|
| 39 |
+
--rollout-temperature 0.8
|
| 40 |
+
|
| 41 |
+
--global-batch-size 256
|
| 42 |
+
--balance-data
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
PERF_ARGS=(
|
| 46 |
+
--tensor-model-parallel-size 2
|
| 47 |
+
--sequence-parallel
|
| 48 |
+
--pipeline-model-parallel-size 1
|
| 49 |
+
--context-parallel-size 1
|
| 50 |
+
--expert-model-parallel-size 1
|
| 51 |
+
--expert-tensor-parallel-size 1
|
| 52 |
+
|
| 53 |
+
--recompute-granularity full
|
| 54 |
+
--recompute-method uniform
|
| 55 |
+
--recompute-num-layers 1
|
| 56 |
+
|
| 57 |
+
# --micro-batch-size 1
|
| 58 |
+
--use-dynamic-batch-size
|
| 59 |
+
--max-tokens-per-gpu 9216
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
GRPO_ARGS=(
|
| 63 |
+
--advantage-estimator grpo
|
| 64 |
+
--use-kl-loss
|
| 65 |
+
--kl-loss-coef 0.00
|
| 66 |
+
--kl-loss-type low_var_kl
|
| 67 |
+
--entropy-coef 0.00
|
| 68 |
+
--eps-clip 0.2
|
| 69 |
+
--eps-clip-high 0.28
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
OPTIMIZER_ARGS=(
|
| 73 |
+
--optimizer adam
|
| 74 |
+
--lr 1e-6
|
| 75 |
+
--lr-decay-style constant
|
| 76 |
+
--weight-decay 0.1
|
| 77 |
+
--adam-beta1 0.9
|
| 78 |
+
--adam-beta2 0.98
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
WANDB_ARGS=(
|
| 82 |
+
# --use-wandb
|
| 83 |
+
# --wandb-project slime-dev
|
| 84 |
+
# --wandb-group search-r1_qwen2.5-3B-test
|
| 85 |
+
# --wandb-key ${WANDB_KEY}
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
SGLANG_ARGS=(
|
| 89 |
+
--rollout-num-gpus-per-engine 2
|
| 90 |
+
--sglang-mem-fraction-static 0.7
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
MISC_ARGS=(
|
| 94 |
+
# default dropout in megatron is 0.1
|
| 95 |
+
--attention-dropout 0.0
|
| 96 |
+
--hidden-dropout 0.0
|
| 97 |
+
# should be good for model performance
|
| 98 |
+
--accumulate-allreduce-grads-in-fp32
|
| 99 |
+
--attention-softmax-in-fp32
|
| 100 |
+
# need to comment this when using model with MLA
|
| 101 |
+
--attention-backend flash
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
CUSTOM_ARGS=(
|
| 105 |
+
--custom-generate-function-path generate_with_search.generate
|
| 106 |
+
--custom-rm-path generate_with_search.reward_func
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
# launch the master node of ray in container
|
| 110 |
+
export MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
|
| 111 |
+
ray start --head --node-ip-address ${MASTER_ADDR} --num-gpus 8 --disable-usage-stats
|
| 112 |
+
|
| 113 |
+
RUNTIME_ENV_JSON="{
|
| 114 |
+
\"env_vars\": {
|
| 115 |
+
\"PYTHONPATH\": \"/root/Megatron-LM/:${SCRIPT_DIR}\",
|
| 116 |
+
\"CUDA_DEVICE_MAX_CONNECTIONS\": \"1\"
|
| 117 |
+
}
|
| 118 |
+
}"
|
| 119 |
+
|
| 120 |
+
ray job submit --address="http://127.0.0.1:8265" \
|
| 121 |
+
--runtime-env-json="${RUNTIME_ENV_JSON}" \
|
| 122 |
+
-- python3 train.py \
|
| 123 |
+
--actor-num-nodes 1 \
|
| 124 |
+
--actor-num-gpus-per-node 4 \
|
| 125 |
+
--rollout-num-gpus 4 \
|
| 126 |
+
--colocate \
|
| 127 |
+
${MODEL_ARGS[@]} \
|
| 128 |
+
${CKPT_ARGS[@]} \
|
| 129 |
+
${ROLLOUT_ARGS[@]} \
|
| 130 |
+
${OPTIMIZER_ARGS[@]} \
|
| 131 |
+
${GRPO_ARGS[@]} \
|
| 132 |
+
${DISTRIBUTED_ARGS[@]} \
|
| 133 |
+
${WANDB_ARGS[@]} \
|
| 134 |
+
${PERF_ARGS[@]} \
|
| 135 |
+
${SGLANG_ARGS[@]} \
|
| 136 |
+
${MISC_ARGS[@]} \
|
| 137 |
+
${CUSTOM_ARGS[@]}
|
ccevolve/baselines/thetaevolve/openevolve_adapted/.gitignore
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
results/
|
| 2 |
+
examples/lm_eval/prompts/system_message.txt
|
| 3 |
+
examples/lm_eval/prompts/evaluator_system_message.txt
|
| 4 |
+
|
| 5 |
+
# Python
|
| 6 |
+
__pycache__/
|
| 7 |
+
*.py[cod]
|
| 8 |
+
*$py.class
|
| 9 |
+
*.so
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
*.egg-info/
|
| 24 |
+
.installed.cfg
|
| 25 |
+
*.egg
|
| 26 |
+
MANIFEST
|
| 27 |
+
**/.ipynb_checkpoints/
|
| 28 |
+
# Virtual environments
|
| 29 |
+
venv/
|
| 30 |
+
env/
|
| 31 |
+
ENV/
|
| 32 |
+
|
| 33 |
+
# IDE
|
| 34 |
+
.idea/
|
| 35 |
+
.vscode/
|
| 36 |
+
*.swp
|
| 37 |
+
*.swo
|
| 38 |
+
|
| 39 |
+
# Output files
|
| 40 |
+
examples/*/output/
|
| 41 |
+
openevolve_output*/
|
| 42 |
+
*.log
|
| 43 |
+
|
| 44 |
+
# Test cache
|
| 45 |
+
.pytest_cache/
|
| 46 |
+
.coverage
|
| 47 |
+
htmlcov/
|
| 48 |
+
|
| 49 |
+
# Misc
|
| 50 |
+
.DS_Store
|
| 51 |
+
.venv
|
| 52 |
+
|
| 53 |
+
# For SR
|
| 54 |
+
secrets.yaml
|
| 55 |
+
problems
|
| 56 |
+
|
| 57 |
+
# Artifacts from running the evaluation
|
| 58 |
+
artifacts/
|
| 59 |
+
|
| 60 |
+
# all dir like stress_test_output_xxx/
|
| 61 |
+
stress_test_output_*/
|