JustinTX commited on
Commit
d28330f
·
verified ·
1 Parent(s): 13d6258

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. ccevolve/baselines/thetaevolve/adapted/__pycache__/ac1.cpython-313.pyc +0 -0
  3. ccevolve/baselines/thetaevolve/adapted/__pycache__/circle_packing.cpython-313.pyc +0 -0
  4. ccevolve/baselines/thetaevolve/adapted/ac1.py +21 -0
  5. ccevolve/baselines/thetaevolve/adapted/circle_packing.py +41 -0
  6. ccevolve/baselines/thetaevolve/adapted_openevolve/ac1.py +27 -0
  7. ccevolve/baselines/thetaevolve/adapted_openevolve/ac1_ae.py +27 -0
  8. ccevolve/baselines/thetaevolve/docker/Dockerfile +118 -0
  9. ccevolve/baselines/thetaevolve/docker/Dockerfile.rocm +363 -0
  10. ccevolve/baselines/thetaevolve/docker/Dockerfile.rocm_MI350-5 +252 -0
  11. ccevolve/baselines/thetaevolve/docker/Dockerfile_20250810_9a48ba0.rocm +361 -0
  12. ccevolve/baselines/thetaevolve/docker/Dockerfile_20250810_c22f55b.rocm +374 -0
  13. ccevolve/baselines/thetaevolve/docker/Dockerfile_Aug_10_2025_9a48ba0.rocm +361 -0
  14. ccevolve/baselines/thetaevolve/docker/Dockerfile_after_c22f55b_Aug_10_2025.rocm +374 -0
  15. ccevolve/baselines/thetaevolve/docker/Dockerfile_b200 +64 -0
  16. ccevolve/baselines/thetaevolve/docker/README.md +27 -0
  17. ccevolve/baselines/thetaevolve/docker/justfile +40 -0
  18. ccevolve/baselines/thetaevolve/docker/version.txt +1 -0
  19. ccevolve/baselines/thetaevolve/docs/README.md +31 -0
  20. ccevolve/baselines/thetaevolve/docs/build.sh +13 -0
  21. ccevolve/baselines/thetaevolve/docs/build_all.sh +44 -0
  22. ccevolve/baselines/thetaevolve/docs/conf.py +262 -0
  23. ccevolve/baselines/thetaevolve/docs/requirements.txt +20 -0
  24. ccevolve/baselines/thetaevolve/docs/serve.sh +29 -0
  25. ccevolve/baselines/thetaevolve/eval_results/ac1/correct.json +4 -0
  26. ccevolve/baselines/thetaevolve/eval_results/ac1/metrics.json +22 -0
  27. ccevolve/baselines/thetaevolve/eval_results/circle_packing/correct.json +4 -0
  28. ccevolve/baselines/thetaevolve/eval_results/circle_packing/metrics.json +15 -0
  29. ccevolve/baselines/thetaevolve/examples/__init__.py +0 -0
  30. ccevolve/baselines/thetaevolve/examples/fully_async/README.md +45 -0
  31. ccevolve/baselines/thetaevolve/examples/fully_async/fully_async_rollout.py +247 -0
  32. ccevolve/baselines/thetaevolve/examples/fully_async/run-qwen3-4b-fully_async.sh +135 -0
  33. ccevolve/baselines/thetaevolve/examples/multi_agent/README.md +53 -0
  34. ccevolve/baselines/thetaevolve/examples/multi_agent/__init__.py +0 -0
  35. ccevolve/baselines/thetaevolve/examples/multi_agent/agent_system.py +273 -0
  36. ccevolve/baselines/thetaevolve/examples/multi_agent/prompts.py +90 -0
  37. ccevolve/baselines/thetaevolve/examples/multi_agent/rollout_with_multi_agents.py +33 -0
  38. ccevolve/baselines/thetaevolve/examples/multi_agent/run-qwen3-30B-A3B-multi-agent.sh +159 -0
  39. ccevolve/baselines/thetaevolve/examples/reproducibility/README.md +53 -0
  40. ccevolve/baselines/thetaevolve/examples/reproducibility/run-qwen2.5-0.5B-gsm8k.sh +140 -0
  41. ccevolve/baselines/thetaevolve/examples/retool/requirements.txt +3 -0
  42. ccevolve/baselines/thetaevolve/examples/retool/retool_qwen3_4b_rl.sh +157 -0
  43. ccevolve/baselines/thetaevolve/examples/retool/sft_data_processing.py +31 -0
  44. ccevolve/baselines/thetaevolve/examples/search-r1/README.md +75 -0
  45. ccevolve/baselines/thetaevolve/examples/search-r1/README_zh.md +77 -0
  46. ccevolve/baselines/thetaevolve/examples/search-r1/generate_with_search.py +169 -0
  47. ccevolve/baselines/thetaevolve/examples/search-r1/google_search_server.py +150 -0
  48. ccevolve/baselines/thetaevolve/examples/search-r1/qa_em_format.py +208 -0
  49. ccevolve/baselines/thetaevolve/examples/search-r1/run_qwen2.5_3B.sh +137 -0
  50. ccevolve/baselines/thetaevolve/openevolve_adapted/.gitignore +61 -0
.gitattributes CHANGED
@@ -1271,3 +1271,4 @@ examples_deprecated/circle_packing/results/results_baseline_gemini3_flash_gen200
1271
  examples_deprecated/circle_packing/results/results_mmv1_2_gemini3_flash_gen200_periodic10_20260211_003248/evolution_db.sqlite filter=lfs diff=lfs merge=lfs -text
1272
  examples_deprecated/circle_packing/results/results_full_gen200_plateau10_20260208_010426/evolution_db.sqlite filter=lfs diff=lfs merge=lfs -text
1273
  examples_deprecated/circle_packing/results/results_mmv1_1_gen200_periodic10_20260208_083104/evolution_db.sqlite filter=lfs diff=lfs merge=lfs -text
 
 
1271
  examples_deprecated/circle_packing/results/results_mmv1_2_gemini3_flash_gen200_periodic10_20260211_003248/evolution_db.sqlite filter=lfs diff=lfs merge=lfs -text
1272
  examples_deprecated/circle_packing/results/results_full_gen200_plateau10_20260208_010426/evolution_db.sqlite filter=lfs diff=lfs merge=lfs -text
1273
  examples_deprecated/circle_packing/results/results_mmv1_1_gen200_periodic10_20260208_083104/evolution_db.sqlite filter=lfs diff=lfs merge=lfs -text
1274
+ examples_deprecated/circle_packing/results/results_full_gen200_period10_20260206_062935/best/results/circle_packing_visualization.png filter=lfs diff=lfs merge=lfs -text
ccevolve/baselines/thetaevolve/adapted/__pycache__/ac1.cpython-313.pyc ADDED
Binary file (1.41 kB). View file
 
ccevolve/baselines/thetaevolve/adapted/__pycache__/circle_packing.cpython-313.pyc ADDED
Binary file (2.3 kB). View file
 
ccevolve/baselines/thetaevolve/adapted/ac1.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ThetaEvolve AC1 — uses the embedded AlphaEvolve v2 sequence from ac1_data.py in TTT-Discover.
2
+
3
+ ThetaEvolve's AC1 programs depend on internal imports (openevolve, ref.sota_alphaevolve2)
4
+ and are search algorithms, not fixed solutions. The data.json in FirstAutoCorrIneq
5
+ doesn't contain a usable best sequence in a simple format.
6
+
7
+ We use the AlphaEvolve v2 (150316) sequence from TTT-Discover's ac1_data.py as the
8
+ baseline for comparison, since ThetaEvolve claims to improve upon it.
9
+ """
10
+ import sys
11
+ import os
12
+
13
+ _DIR = os.path.dirname(os.path.abspath(__file__))
14
+ # Import from TTT-Discover's ac1_data.py
15
+ sys.path.insert(0, os.path.join(_DIR, "..", "..", "ttt-discover", "results", "mathematics"))
16
+ from ac1_data import alphaevolve_v2_150316
17
+
18
+
19
+ def run(seed=42, budget_s=1000, **kwargs):
20
+ """Return the AlphaEvolve v2 AC1 sequence (baseline used by ThetaEvolve)."""
21
+ return list(alphaevolve_v2_150316)
ccevolve/baselines/thetaevolve/adapted/circle_packing.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ThetaEvolve best circle packing solution (from data.json, '8B-w_RL@65' entry)."""
2
+ import json
3
+ import os
4
+ import numpy as np
5
+
6
+ _DIR = os.path.dirname(os.path.abspath(__file__))
7
+ _DATA_PATH = os.path.join(_DIR, "..", "Results", "CirclePacking", "data.json")
8
+
9
+
10
+ def run_packing():
11
+ """Return pre-computed best packing in ShinkaEvolve eval format."""
12
+ with open(_DATA_PATH) as f:
13
+ data = json.load(f)
14
+
15
+ # Find the entry with the highest sum of radii
16
+ best_name = None
17
+ best_sum = -1.0
18
+ best_circles = None
19
+ for entry in data:
20
+ name = entry["name"]
21
+ circles = entry["list"]
22
+ # Skip entries with nested lists (like Formal proofs)
23
+ if not circles or not isinstance(circles[0], list):
24
+ continue
25
+ if isinstance(circles[0][0], list):
26
+ continue
27
+ if len(circles) != 26:
28
+ continue
29
+ s = sum(c[2] for c in circles)
30
+ if s > best_sum:
31
+ best_sum = s
32
+ best_name = name
33
+ best_circles = circles
34
+
35
+ print(f"Using ThetaEvolve entry: {best_name} (sum={best_sum:.10f})")
36
+
37
+ centers = np.array([[c[0], c[1]] for c in best_circles])
38
+ radii = np.array([c[2] for c in best_circles])
39
+ sum_radii = float(np.sum(radii))
40
+
41
+ return centers, radii, sum_radii
ccevolve/baselines/thetaevolve/adapted_openevolve/ac1.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """AlphaEvolve v2 AC1 baseline adapted for OpenEvolve evaluator.
2
+
3
+ OpenEvolve AC1 evaluator expects: run() -> (f_values, c1_achieved, loss, n_points)
4
+ """
5
+ import sys
6
+ import os
7
+ import numpy as np
8
+
9
+ _DIR = os.path.dirname(os.path.abspath(__file__))
10
+ sys.path.insert(0, os.path.join(_DIR, "..", "..", "ttt-discover", "results", "mathematics"))
11
+ from ac1_data import alphaevolve_v2_150316
12
+
13
+
14
+ def run():
15
+ """Return the AlphaEvolve v2 AC1 sequence in OpenEvolve eval format."""
16
+ f_values = np.array(alphaevolve_v2_150316, dtype=np.float64)
17
+ f_values = np.maximum(f_values, 0.0)
18
+ n_points = len(f_values)
19
+
20
+ dx = 0.5 / n_points
21
+ autoconv = np.convolve(f_values, f_values, mode="full") * dx
22
+ integral_sq = (np.sum(f_values) * dx) ** 2
23
+ c1_achieved = float(np.max(autoconv / integral_sq))
24
+
25
+ loss = c1_achieved
26
+
27
+ return f_values, c1_achieved, loss, n_points
ccevolve/baselines/thetaevolve/adapted_openevolve/ac1_ae.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """AlphaEvolve v2 AC1 baseline adapted for OpenEvolve evaluator.
2
+
3
+ OpenEvolve AC1 evaluator expects: run() -> (f_values, c1_achieved, loss, n_points)
4
+ """
5
+ import sys
6
+ import os
7
+ import numpy as np
8
+
9
+ _DIR = os.path.dirname(os.path.abspath(__file__))
10
+ sys.path.insert(0, os.path.join(_DIR, "..", "..", "ttt-discover", "results", "mathematics"))
11
+ from ac1_data import alphaevolve_v2_150316
12
+
13
+
14
+ def run():
15
+ """Return the AlphaEvolve v2 AC1 sequence in OpenEvolve eval format."""
16
+ f_values = np.array(alphaevolve_v2_150316, dtype=np.float64)
17
+ f_values = np.maximum(f_values, 0.0)
18
+ n_points = len(f_values)
19
+
20
+ dx = 0.5 / n_points
21
+ autoconv = np.convolve(f_values, f_values, mode="full") * dx
22
+ integral_sq = (np.sum(f_values) * dx) ** 2
23
+ c1_achieved = float(np.max(autoconv / integral_sq))
24
+
25
+ loss = c1_achieved
26
+
27
+ return f_values, c1_achieved, loss, n_points
ccevolve/baselines/thetaevolve/docker/Dockerfile ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG SGLANG_IMAGE_TAG=nightly-dev-20260107-dce8b060
2
+ FROM slimerl/sglang:${SGLANG_IMAGE_TAG} AS sglang
3
+
4
+ # ======================================== Arguments =============================================
5
+
6
+ ARG PATCH_VERSION=latest
7
+ ARG MEGATRON_COMMIT=3714d81d418c9f1bca4594fc35f9e8289f652862
8
+
9
+ ARG ENABLE_CUDA_13=0
10
+
11
+ # ======================================== Setup =============================================
12
+
13
+ WORKDIR /root/
14
+
15
+ # ======================================== Apt dependencies =============================================
16
+
17
+ RUN apt update
18
+ RUN apt install -y nvtop rsync dnsutils
19
+
20
+ # ====================================== Python dependencies ============================================
21
+
22
+ # The compilation is slow, thus should be put at top
23
+ # TransformerEngines does not support too high FA2
24
+ RUN MAX_JOBS=64 pip -v install flash-attn==2.7.4.post1 --no-build-isolation
25
+
26
+ # The compilation is slow, thus should be put at top
27
+ RUN git clone https://github.com/Dao-AILab/flash-attention.git && \
28
+ cd flash-attention/ && git checkout fbf24f67cf7f6442c5cfb2c1057f4bfc57e72d89 && git submodule update --init && cd hopper/ && \
29
+ MAX_JOBS=96 python setup.py install && \
30
+ export python_path=`python -c "import site; print(site.getsitepackages()[0])"` && \
31
+ mkdir -p $python_path/flash_attn_3 && \
32
+ cp flash_attn_interface.py $python_path/flash_attn_3/flash_attn_interface.py && \
33
+ rm -rf flash-attention/
34
+
35
+ RUN pip install git+https://github.com/ISEEKYAN/mbridge.git@89eb10887887bc74853f89a4de258c0702932a1c --no-deps
36
+
37
+ RUN pip install flash-linear-attention==0.4.1
38
+ RUN pip install tilelang -f https://tile-ai.github.io/whl/nightly/cu128/
39
+
40
+ # TE does not have wheel on cuda 13 yet, thus need to install from source
41
+ RUN if [ "${ENABLE_CUDA_13}" = "1" ]; then \
42
+ pip install nvidia-mathdx==26.6.0 && \
43
+ pip -v install --no-build-isolation git+https://github.com/NVIDIA/TransformerEngine.git@release_v2.10; \
44
+ else \
45
+ pip -v install --no-build-isolation "transformer_engine[pytorch]==2.10.0"; \
46
+ fi
47
+
48
+ RUN NVCC_APPEND_FLAGS="--threads 4" \
49
+ pip -v install --disable-pip-version-check --no-cache-dir \
50
+ --no-build-isolation \
51
+ --config-settings "--build-option=--cpp_ext --cuda_ext --parallel 8" git+https://github.com/NVIDIA/apex.git@10417aceddd7d5d05d7cbf7b0fc2daad1105f8b4
52
+
53
+ RUN git clone https://github.com/NVIDIA/Megatron-LM.git --recursive && \
54
+ cd Megatron-LM && git checkout ${MEGATRON_COMMIT} && \
55
+ pip install -e .
56
+
57
+ RUN pip install git+https://github.com/fzyzcjy/torch_memory_saver.git@dc6876905830430b5054325fa4211ff302169c6b --no-cache-dir --force-reinstall
58
+ RUN pip install git+https://github.com/fzyzcjy/Megatron-Bridge.git@dev_rl --no-build-isolation
59
+ RUN pip install nvidia-modelopt[torch]>=0.37.0 --no-build-isolation
60
+
61
+ # This patch from masahi will be included in later Triton releases
62
+ RUN if [ "$ENABLE_CUDA_13" = "1" ]; then \
63
+ (cd /root && git clone -b feat/v350_plus_8045 https://github.com/fzyzcjy/triton.git && cd triton && pip install -r python/requirements.txt && pip install --verbose -e .); \
64
+ fi
65
+
66
+ COPY requirements.txt /tmp/requirements.txt
67
+ RUN pip install -r /tmp/requirements.txt
68
+
69
+ # Temporarily install another sgl-kernel version for GB300 without rebuilding the whole image
70
+ RUN if [ "$ENABLE_CUDA_13" = "1" ]; then \
71
+ SGL_KERNEL_VERSION=0.3.17.post2 && \
72
+ python3 -m pip install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu130-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps; \
73
+ fi
74
+
75
+ # https://github.com/pytorch/pytorch/issues/168167
76
+ RUN pip install nvidia-cudnn-cu12==9.16.0.29
77
+
78
+ # reinstall numpy 1.x for megatron
79
+ RUN pip install "numpy<2"
80
+
81
+ RUN rm -rf /root/.cache/pip /root/flash-attention
82
+
83
+ # ====================================== Patches ============================================
84
+
85
+ COPY docker/patch/${PATCH_VERSION}/megatron.patch /root/Megatron-LM/
86
+ RUN cd Megatron-LM && \
87
+ git update-index --refresh && \
88
+ git apply megatron.patch --3way && \
89
+ if grep -R -n '^<<<<<<< ' .; then \
90
+ echo "Patch failed to apply cleanly. Please resolve conflicts." && \
91
+ exit 1; \
92
+ fi && \
93
+ rm megatron.patch
94
+
95
+ # TODO temporarily skip patching for GB200/GB300 (and require users to bring their own sglang version). should add back later.
96
+ ARG ENABLE_SGLANG_PATCH=1
97
+ COPY docker/patch/${PATCH_VERSION}/sglang.patch /sgl-workspace/sglang/
98
+ RUN if [ "$ENABLE_SGLANG_PATCH" = "1" ]; then \
99
+ cd /sgl-workspace/sglang && \
100
+ git update-index --refresh && \
101
+ git apply sglang.patch --3way && \
102
+ if grep -R -n '^<<<<<<< ' .; then \
103
+ echo "Patch failed to apply cleanly. Please resolve conflicts." && \
104
+ exit 1; \
105
+ fi && \
106
+ rm sglang.patch; \
107
+ fi
108
+
109
+ # ====================================== Install main package ============================================
110
+
111
+ ARG SLIME_COMMIT=main
112
+ RUN git clone https://github.com/THUDM/slime.git /root/slime && \
113
+ cd /root/slime && \
114
+ git checkout ${SLIME_COMMIT} && \
115
+ pip install -e . --no-deps
116
+
117
+ RUN cd /root/slime/slime/backends/megatron_utils/kernels/int4_qat && \
118
+ pip install . --no-build-isolation
ccevolve/baselines/thetaevolve/docker/Dockerfile.rocm ADDED
@@ -0,0 +1,363 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### Use the base image
2
+
3
+ # The Docker image built with this Dockerfile:
4
+ # Supports at least up to slime commit ID: 2710445 (Oct 9, 2025) - supported by amd_patch/sglv0.5.0rc0
5
+ # Still need to update amd_patch
6
+
7
+ # You can find the latest pre-built Docker image from here: https://hub.docker.com/r/rlsys/slime/tags
8
+ # Current latest docker img: `rlsys/slime:slime_ubuntu22.04_rocm6.3.4-patch-numa-patch_sglang0.4.9_megatron-patch_ray2.47.1_apex_torch-memory-saver0.0.8-patch-vim` manually add the patch to mitigate checkpoint loading issue. (vim /workspace/Megatron-LM-amd_version/megatron/training/checkpointing.py. Line: 1449 ~ 1457 - comment out if becasue of dismatch number of dist checkpoints
9
+
10
+
11
+ # The Docker image built with this Dockerfile:
12
+ # PR: commit ID 36711aa (Aug 22, 2025) dockerfile - Supports at least up to slime commit ID: d4a7741 (Sep 7, 2025)
13
+
14
+
15
+ # You can find the latest pre-built Docker image from here: https://hub.docker.com/r/rlsys/slime/tags
16
+ # Current latest docker img: `rlsys/slime:slime_ubuntu22.04_rocm6.3.4-patch-numa-patch_sglang0.4.9_megatron-patch_ray2.47.1_apex_torch-memory-saver0.0.8-patch-vim` manually add the patch to mitigate checkpoint loading issue. (vim /workspace/Megatron-LM-amd_version/megatron/training/checkpointing.py. Line: 1449 ~ 1457 - comment out if becasue of dismatch number of dist checkpoints
17
+
18
+ # Thanks to Yang Wang (https://www.microsoft.com/en-us/research/people/yangwang5/) for working on the patch for this ROCm base Docker image to support virtual memory management on MI300X.
19
+
20
+ # FROM "rlfoundation.azurecr.io/rocm6.3.4:vllm-0.8.5-numa-patch-ubuntu-22.04"
21
+ FROM "rlsys/rocm-6.3.4-patch:rocm6.3.4-numa-patch_ubuntu-22.04"
22
+
23
+ SHELL ["/bin/bash", "-ceuxo", "pipefail"]
24
+
25
+ ARG MAX_JOBS=512
26
+ ENV MAX_JOBS=${MAX_JOBS}
27
+
28
+ ENV PATH="/usr/local/python3.12/bin:$PATH"
29
+ RUN ln -sf /usr/bin/python3.12 /usr/bin/python && \
30
+ ln -sf /usr/bin/pip3.12 /usr/bin/pip
31
+
32
+ RUN apt-get update
33
+ RUN apt-get install -y pkg-config liblzma-dev
34
+
35
+
36
+ ###########################################
37
+ ##########Install TransformerEngine########
38
+ ###########################################
39
+ WORKDIR /workspace/
40
+
41
+ RUN rm -rf TransformerEngine
42
+ # RUN git clone --recursive https://github.com/ROCm/TransformerEngine.git
43
+ RUN git clone https://github.com/ROCm/TransformerEngine.git
44
+ WORKDIR /workspace/TransformerEngine
45
+
46
+
47
+ RUN git checkout 236178e
48
+ # RUN git checkout bb061ad
49
+ # RUN git checkout 864405c
50
+
51
+ RUN git submodule update --init --recursive
52
+
53
+ ENV NVTE_FRAMEWORK=pytorch
54
+ ENV NVTE_ROCM_ARCH=gfx942
55
+ ENV NVTE_USE_HIPBLASLT=1
56
+ ENV NVTE_USE_ROCM=1
57
+
58
+ # export CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr:${CMAKE_PREFIX_PATH:-}"
59
+ ENV CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr"
60
+ RUN MAX_JOBS=${MAX_JOBS} pip install . -vvv
61
+ WORKDIR /workspace/
62
+ ###########################################
63
+ ###########################################
64
+ ###########################################
65
+
66
+
67
+
68
+ ###########################################
69
+ ##############Install SGLang###############
70
+ ###########################################
71
+
72
+ # This is necessary for scope purpose
73
+ # ARG GPU_ARCH=gfx942
74
+ ENV GPU_ARCH=gfx942
75
+
76
+ # ===============================
77
+ # Base image 942 and args
78
+ # FROM $BASE_IMAGE_942 AS gfx942
79
+ ENV BUILD_VLLM="0"
80
+ ENV BUILD_TRITON="1"
81
+ ENV BUILD_AITER_ALL="1"
82
+ ENV AITER_COMMIT="v0.1.4"
83
+
84
+ # # ===============================
85
+ # # Base image 950 and args
86
+ # FROM $BASE_IMAGE_950 AS gfx950
87
+ # ENV BUILD_VLLM="0"
88
+ # ENV BUILD_TRITON="0"
89
+ # ENV BUILD_AITER_ALL="1"
90
+ # ENV AITER_COMMIT="v0.1.4"
91
+
92
+ # ===============================
93
+ # Chosen arch and args
94
+ # FROM ${GPU_ARCH}
95
+
96
+ # This is necessary for scope purpose, again
97
+ # ARG GPU_ARCH=gfx950
98
+ ENV GPU_ARCH_LIST=${GPU_ARCH:-${PYTORCH_ROCM_ARCH}}
99
+
100
+ ARG SGL_REPO="https://github.com/sgl-project/sglang.git"
101
+ ARG SGL_DEFAULT="main"
102
+ # ARG SGL_BRANCH=${SGL_DEFAULT}
103
+ ARG SGL_BRANCH="8ecf6b9d2480c3f600826c7d8fef6a16ed603c3f"
104
+
105
+ ARG TRITON_REPO="https://github.com/ROCm/triton.git"
106
+ ARG TRITON_COMMIT="improve_fa_decode_3.0.0"
107
+
108
+ ARG AITER_REPO="https://github.com/ROCm/aiter.git"
109
+
110
+
111
+ WORKDIR /workspace
112
+ # -----------------------
113
+ # AITER
114
+ RUN pip uninstall -y aiter
115
+ RUN git clone ${AITER_REPO} \
116
+ && cd aiter \
117
+ && git checkout ${AITER_COMMIT} \
118
+ && git submodule update --init --recursive
119
+ RUN cd aiter \
120
+ && if [ "$BUILD_AITER_ALL" = "1" ]; then \
121
+ PREBUILD_KERNELS=1 GPU_ARCHS=$GPU_ARCH_LIST python setup.py develop; \
122
+ else \
123
+ GPU_ARCHS=$GPU_ARCH_LIST python setup.py develop; \
124
+ fi
125
+
126
+ # -----------------------
127
+ # Triton
128
+ RUN if [ "$BUILD_TRITON" = "1" ]; then \
129
+ pip uninstall -y triton \
130
+ && git clone ${TRITON_REPO} \
131
+ && cd triton \
132
+ && git checkout ${TRITON_COMMIT} \
133
+ && cd python \
134
+ && python setup.py install; \
135
+ fi
136
+
137
+ # -----------------------
138
+ # Build vLLM
139
+ ARG VLLM_REPO="https://github.com/ROCm/vllm.git"
140
+ ARG VLLM_BRANCH="9f6b92db47c3444b7a7d67451ba0c3a2d6af4c2c"
141
+ RUN if [ "$BUILD_VLLM" = "1" ]; then \
142
+ git clone ${VLLM_REPO} \
143
+ && cd vllm \
144
+ && git checkout ${VLLM_BRANCH} \
145
+ && python -m pip install -r requirements/rocm.txt \
146
+ && python setup.py clean --all \
147
+ && python setup.py develop; \
148
+ fi
149
+
150
+ # -----------------------
151
+ # Build SGLang
152
+ ARG BUILD_TYPE=all
153
+
154
+ RUN pip install IPython \
155
+ && pip install orjson \
156
+ && pip install python-multipart \
157
+ && pip install torchao \
158
+ && pip install pybind11
159
+
160
+ RUN pip install "setuptools<70.0.0" --force-reinstall
161
+ RUN pip uninstall -y sgl_kernel sglang
162
+ RUN git clone ${SGL_REPO} \
163
+ && cd sglang \
164
+ && if [ "${SGL_BRANCH}" = ${SGL_DEFAULT} ]; then \
165
+ echo "Using ${SGL_DEFAULT}, default branch."; \
166
+ git checkout ${SGL_DEFAULT}; \
167
+ else \
168
+ echo "Using ${SGL_BRANCH} branch."; \
169
+ git checkout ${SGL_BRANCH}; \
170
+ fi \
171
+ && cd sgl-kernel \
172
+ && rm -f pyproject.toml \
173
+ && mv pyproject_rocm.toml pyproject.toml \
174
+ && AMDGPU_TARGET=$GPU_ARCH_LIST python setup_rocm.py install \
175
+ && cd .. \
176
+ && if [ "$BUILD_TYPE" = "srt" ]; then \
177
+ python -m pip --no-cache-dir install -e "python[srt_hip]"; \
178
+ else \
179
+ python -m pip --no-cache-dir install -e "python[all_hip]"; \
180
+ fi
181
+
182
+ RUN python -m pip cache purge
183
+
184
+ # Copy config files to support MI300X in virtualized environments (MI300X_VF). Symlinks will not be created in image build.
185
+ RUN find /workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
186
+ /workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
187
+ -type f -name '*MI300X*' | xargs -I {} sh -c 'vf_config=$(echo "$1" | sed "s/MI300X/MI300X_VF/"); cp "$1" "$vf_config"' -- {}
188
+
189
+ # Performance environment variable.
190
+ ENV HIP_FORCE_DEV_KERNARG=1
191
+ ENV HSA_NO_SCRATCH_RECLAIM=1
192
+ ENV SGLANG_SET_CPU_AFFINITY=1
193
+ ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
194
+ ENV NCCL_MIN_NCHANNELS=112
195
+
196
+ ENV SGLANG_USE_AITER=1
197
+ ENV SGLANG_MOE_PADDING=1
198
+ ENV VLLM_FP8_PADDING=1
199
+ ENV VLLM_FP8_ACT_PADDING=1
200
+ ENV VLLM_FP8_WEIGHT_PADDING=1
201
+ ENV VLLM_FP8_REDUCE_CONV=1
202
+ ENV TORCHINDUCTOR_MAX_AUTOTUNE=1
203
+ ENV TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISE=1
204
+
205
+
206
+ # sglang patch
207
+ # COPY patch/${SGLANG_VERSION}/sglang.patch /sgl-workspace/sglang/
208
+ COPY amd_patch/sglv0.5.0rc0 /workspace/patch
209
+ RUN cd /workspace/sglang && \
210
+ git apply /workspace/patch/sglang.patch && \
211
+ if grep -R -n '^<<<<<<< ' .; then \
212
+ echo "Patch failed to apply cleanly. Please resolve conflicts." && \
213
+ exit 1; \
214
+ fi
215
+
216
+
217
+
218
+ RUN pip install sglang-router --force-reinstall
219
+
220
+ ###########################################
221
+ ###########################################
222
+ ###########################################
223
+
224
+
225
+ RUN pip install transformers==4.51.1
226
+
227
+ #########################################
228
+ #####Install vllm v0.8.5#################
229
+ #########################################
230
+
231
+ WORKDIR /workspace/
232
+
233
+ ENV VLLM_TARGET_DEVICE=rocm
234
+ ENV ROCM_PATH=/opt/rocm
235
+ ENV SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev
236
+
237
+ RUN pip uninstall -y vllm || true
238
+ RUN rm -rf vllm-patch
239
+
240
+ # Fix importlib-metadata version conflict before vllm installation
241
+ RUN pip install "importlib-metadata>=6.0,<=8.0.0" --force-reinstall
242
+
243
+ RUN git clone https://github.com/RLFoundation/vllm-patch.git \
244
+ && cd vllm-patch \
245
+ && git checkout v0.8.5-sleep-numa \
246
+ && rm -rf build/ dist/ *.egg-info \
247
+ && ln -sf /opt/rocm/lib/libamdhip64.so /usr/lib/libamdhip64.so \
248
+ && SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev PYTORCH_ROCM_ARCH="gfx90a;gfx942" MAX_JOBS=${MAX_JOBS} python3 setup.py install
249
+
250
+ WORKDIR /workspace/
251
+ ###########################################
252
+ ###########################################
253
+
254
+
255
+ #########################################
256
+ #### Install megatron-core###############
257
+ #########################################
258
+ # Can be removed just the current megatron-lm dependency
259
+ RUN pip install "numpy>=1.21.0,<2.0" --force-reinstall
260
+
261
+ COPY amd_patch/sglv0.5.0rc0 /workspace/patch
262
+
263
+ RUN pip uninstall -y megatron-core && \
264
+ git clone https://github.com/NVIDIA/Megatron-LM && \
265
+ cd Megatron-LM && \
266
+ git checkout 48406695c4efcf1026a7ed70bb390793918dd97b && \
267
+ git apply /workspace/patch/amd_megatron_fused_kernels_init.patch && \
268
+ pip install -vvv -e . && \
269
+ cd /workspace/
270
+
271
+ # sandwitch norm for GLM models
272
+ RUN cd Megatron-LM && \
273
+ git apply /workspace/patch/megatron.patch --3way && \
274
+ if grep -R -n '^<<<<<<< ' .; then \
275
+ echo "Patch failed to apply cleanly. Please resolve conflicts." && \
276
+ exit 1; \
277
+ fi
278
+
279
+ #########################################
280
+ #########################################
281
+ #########################################
282
+
283
+
284
+
285
+
286
+ #########################################
287
+ ###Add torch_memory_saver################
288
+ #########################################
289
+ # # Set environment variables
290
+ # ENV HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__"
291
+ # ENV CFLAGS="-D__HIP_PLATFORM_AMD__"
292
+ # ENV CXXFLAGS="-D__HIP_PLATFORM_AMD__"
293
+ # Install torch_memory_saver
294
+ # RUN pip install git+https://github.com/YangWang92/torch_memory_saver_numa.git --no-deps
295
+ # RUN pip install "git+https://github.com/YangWang92/torch_memory_saver_numa.git@numa"
296
+ RUN pip install "git+https://github.com/yushengsu-thu/torch_memory_saver.git"
297
+ # pip install git+https://github.com/fzyzcjy/torch_memory_saver.git --no-deps
298
+ #########################################
299
+ #########################################
300
+
301
+
302
+
303
+
304
+ ########################################
305
+ ######Install ray#######################
306
+ ########################################
307
+ # need to add this patch manually: https://github.com/ray-project/ray/pull/53531/files
308
+ RUN pip uninstall ray -y
309
+ # RUN pip install "ray[data,train,tune,serve]>=2.47.0"
310
+ RUN pip install "ray[data,train,tune,serve]==2.47.1"
311
+ ########################################
312
+ ########################################
313
+ ########################################
314
+
315
+
316
+ ### Need to verify whether numerical/convergence issue
317
+ #######################################
318
+ ################apex###################
319
+ #######################################
320
+ WORKDIR /workspace/
321
+ RUN pip uninstall -y apex && \
322
+ git clone https://github.com/ROCm/apex.git && \
323
+ cd apex && \
324
+ python setup.py install && \
325
+ cd /workspace/
326
+ #######################################
327
+ #######################################
328
+ #######################################
329
+
330
+
331
+ ########################################
332
+ ############ mbridge####################
333
+ ########################################
334
+ RUN pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
335
+ ########################################
336
+ ########################################
337
+ ########################################
338
+
339
+
340
+
341
+ ########################################
342
+ ########slime agent framewrok need######
343
+ ########################################
344
+ RUN pip install pydra_config==0.0.15
345
+ RUN pip install together
346
+ RUN pip install google-generativeai
347
+ ########################################
348
+ ########################################
349
+ ########################################
350
+
351
+
352
+ ########################################
353
+ ########Additional packages#############
354
+ ########################################
355
+ RUN pip install tensorboard
356
+ ########################################
357
+ ########################################
358
+ ########################################
359
+
360
+
361
+ WORKDIR /workspace/
362
+
363
+ CMD ["/usr/bin/bash"]
ccevolve/baselines/thetaevolve/docker/Dockerfile.rocm_MI350-5 ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### Use the base image for ROCm 7 / gfx950 (MI355)
2
+
3
+ # The Docker image built with this Dockerfile:
4
+ # Base image: ROCm 7 with vllm pre-built for gfx950
5
+ # Target GPU: MI355 (gfx950)
6
+
7
+
8
+ FROM rocm/sgl-dev:rocm7-vllm-20250904
9
+
10
+ SHELL ["/bin/bash", "-ceuxo", "pipefail"]
11
+
12
+ ARG MAX_JOBS=128
13
+ ENV MAX_JOBS=${MAX_JOBS}
14
+
15
+ # Set environment variables for gfx950
16
+ ENV GPU_ARCH=gfx950
17
+ ENV PYTORCH_ROCM_ARCH=gfx950
18
+ ENV GPU_ARCH_LIST=gfx950
19
+ ENV AMDGPU_TARGET=gfx950
20
+
21
+
22
+ ###########################################
23
+ ##############1. Install AITER#############
24
+ ###########################################
25
+ WORKDIR /app
26
+
27
+ RUN pip uninstall -y aiter || true
28
+ RUN rm -rf aiter
29
+ RUN git clone https://github.com/ROCm/aiter.git \
30
+ && cd aiter \
31
+ && git checkout v0.1.7.post2 \
32
+ && git submodule update --init --recursive \
33
+ && GPU_ARCHS=gfx950 python setup.py develop
34
+ ###########################################
35
+ ###########################################
36
+ ###########################################
37
+
38
+
39
+ ###########################################
40
+ ####2. Install TransformerEngine for gfx950
41
+ ###########################################
42
+ WORKDIR /app
43
+
44
+ RUN rm -rf TransformerEngine
45
+ RUN git clone https://github.com/ROCm/TransformerEngine.git \
46
+ && cd TransformerEngine \
47
+ && git checkout 90c04bcdc3c109505b318f40a39680263af55edf \
48
+ && git submodule update --init --recursive
49
+
50
+ ENV NVTE_FRAMEWORK=pytorch
51
+ ENV NVTE_ROCM_ARCH=gfx950
52
+ ENV NVTE_USE_HIPBLASLT=1
53
+ ENV NVTE_USE_ROCM=1
54
+ ENV CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr"
55
+
56
+ RUN cd TransformerEngine && pip install . -v
57
+ ###########################################
58
+ ###########################################
59
+ ###########################################
60
+
61
+
62
+ #########################################
63
+ ####3. Install Megatron-LM (NVIDIA version)
64
+ #########################################
65
+ WORKDIR /app
66
+
67
+ RUN pip install "numpy>=1.21.0,<2.0" --force-reinstall
68
+
69
+ RUN pip uninstall -y megatron-core || true
70
+ RUN rm -rf Megatron-LM
71
+ RUN git clone https://github.com/NVIDIA/Megatron-LM \
72
+ && cd Megatron-LM \
73
+ && git checkout 48406695c4efcf1026a7ed70bb390793918dd97b \
74
+ && pip install -e .
75
+ #########################################
76
+ #########################################
77
+ #########################################
78
+
79
+
80
+ ########################################
81
+ ############ 4. Install mbridge#########
82
+ ########################################
83
+ RUN pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
84
+ ########################################
85
+ ########################################
86
+ ########################################
87
+
88
+
89
+ ########################################
90
+ ######5. Install Ray####################
91
+ ########################################
92
+ RUN pip uninstall ray -y || true
93
+ RUN pip install "ray[data,train,tune,serve]==2.47.1"
94
+ ########################################
95
+ ########################################
96
+ ########################################
97
+
98
+
99
+ #########################################
100
+ ###6. Install torch_memory_saver#########
101
+ #########################################
102
+ RUN pip install torch_memory_saver
103
+ #########################################
104
+ #########################################
105
+
106
+
107
+ #######################################
108
+ ####7. Install Apex for ROCm###########
109
+ #######################################
110
+ WORKDIR /app
111
+
112
+ RUN pip uninstall -y apex || true
113
+ RUN rm -rf apex
114
+ RUN git clone https://github.com/ROCm/apex.git \
115
+ && cd apex \
116
+ && python setup.py install
117
+ #######################################
118
+ #######################################
119
+ #######################################
120
+
121
+
122
+ ########################################
123
+ ###8. Install slime agent framework deps
124
+ ########################################
125
+ RUN pip install pydra_config==0.0.15
126
+ RUN pip install together
127
+ RUN pip install google-generativeai
128
+ RUN pip install tensorboard
129
+ ########################################
130
+ ########################################
131
+ ########################################
132
+
133
+
134
+ ########################################
135
+ ###9. Set performance environment vars##
136
+ ########################################
137
+ ENV HIP_FORCE_DEV_KERNARG=1
138
+ ENV HSA_NO_SCRATCH_RECLAIM=1
139
+ ENV SGLANG_USE_AITER=1
140
+ ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
141
+ ENV SGLANG_MOE_PADDING=1
142
+ ENV SGLANG_SET_CPU_AFFINITY=1
143
+ ENV SGLANG_ROCM_FUSED_DECODE_MLA=1
144
+ ENV SGLANG_USE_ROCM700A=1
145
+ ENV NCCL_MIN_NCHANNELS=112
146
+ ENV VLLM_FP8_PADDING=1
147
+ ENV VLLM_FP8_ACT_PADDING=1
148
+ ENV VLLM_FP8_WEIGHT_PADDING=1
149
+ ENV VLLM_FP8_REDUCE_CONV=1
150
+ ENV TORCHINDUCTOR_MAX_AUTOTUNE=1
151
+ ENV TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISE=1
152
+ ########################################
153
+ ########################################
154
+ ########################################
155
+
156
+
157
+ ###########################################
158
+ ##############Install SGLang###############
159
+ ###########################################
160
+ WORKDIR /app
161
+
162
+ # Install prerequisites
163
+ RUN pip install IPython orjson python-multipart torchao==0.9.0 pybind11
164
+
165
+ # Clone SGLang
166
+ RUN pip uninstall -y sgl_kernel sglang || true
167
+ RUN rm -rf sglang
168
+ RUN git clone https://github.com/sgl-project/sglang.git \
169
+ && cd sglang \
170
+ && git checkout v0.5.6
171
+
172
+ # Build sgl-kernel for gfx950
173
+ RUN cd sglang/sgl-kernel \
174
+ && rm -f pyproject.toml \
175
+ && mv pyproject_rocm.toml pyproject.toml \
176
+ && AMDGPU_TARGET=gfx950 python setup_rocm.py install
177
+
178
+ # Install SGLang
179
+ RUN cd sglang \
180
+ && rm -rf python/pyproject.toml \
181
+ && mv python/pyproject_other.toml python/pyproject.toml \
182
+ && pip install -e "python[all_hip]"
183
+
184
+ # Test SGLang installation
185
+ RUN python -c "import sglang; import sgl_kernel; print('SGLang + sgl_kernel: OK')"
186
+
187
+ RUN python -m pip cache purge
188
+ ###########################################
189
+ ###########################################
190
+ ###########################################
191
+
192
+
193
+ ###########################################
194
+ #### APPLY PATCHES (gfx950/MI355) #########
195
+ ###########################################
196
+
197
+ # Copy patches from slime repo
198
+ COPY amd_patch/latest /app/patch
199
+
200
+ # Apply Megatron patches
201
+ RUN cd /app/Megatron-LM \
202
+ && git apply /app/patch/amd_megatron_fused_kernels_init.patch \
203
+ && git apply /app/patch/megatron.patch --3way \
204
+ && if grep -R -n '^<<<<<<< ' .; then \
205
+ echo "Patch failed to apply cleanly. Please resolve conflicts." && \
206
+ exit 1; \
207
+ fi \
208
+ && pip install -e . -v
209
+
210
+ # Apply SGLang patch
211
+ RUN cd /app/sglang \
212
+ && git apply /app/patch/sglang.patch || echo "Check patch compatibility with v0.5.6" \
213
+ && if grep -R -n '^<<<<<<< ' .; then \
214
+ echo "Patch failed to apply cleanly. Please resolve conflicts." && \
215
+ exit 1; \
216
+ fi
217
+
218
+ # Copy MOE configs for gfx950/MI355
219
+ RUN find /app/sglang/python/sglang/srt/layers/quantization/configs/ \
220
+ /app/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
221
+ -type f -name '*MI300X*' 2>/dev/null | while read f; do \
222
+ cp "$f" "$(echo $f | sed 's/MI300X/MI300X_VF/')" 2>/dev/null || true; \
223
+ cp "$f" "$(echo $f | sed 's/MI300X/MI355/')" 2>/dev/null || true; \
224
+ done
225
+
226
+ ###########################################
227
+ ###########################################
228
+ ###########################################
229
+
230
+
231
+ ########################################
232
+ #### Install additional packages########
233
+ ########################################
234
+ RUN pip install sglang-router --force-reinstall
235
+ ########################################
236
+ ########################################
237
+ ########################################
238
+
239
+
240
+ ########################################
241
+ # Fix click/ray incompatibility with Python 3.10
242
+ ########################################
243
+ RUN pip install click==8.2.1
244
+ ########################################
245
+ ########################################
246
+ ########################################
247
+
248
+
249
+ WORKDIR /app
250
+
251
+ CMD ["/usr/bin/bash"]
252
+
ccevolve/baselines/thetaevolve/docker/Dockerfile_20250810_9a48ba0.rocm ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### Use the base image
2
+
3
+ # The Docker image built with this Dockerfile:
4
+ # PR: commit ID 36711aa (Aug 22, 2025) dockerfile - Supports up to slime commit ID: 9a48ba0 (Aug 10, 2025)
5
+
6
+ # You can find the latest pre-built Docker image from here: https://hub.docker.com/r/rlsys/slime/tags
7
+ # Current latest docker img: `rlsys/slime:slime_ubuntu22.04_rocm6.3.4-patch-numa-patch_sglang0.4.9_megatron-patch_ray2.47.1_apex_torch-memory-saver0.0.8-patch-vim` manually add the patch to mitigate checkpoint loading issue. (vim /workspace/Megatron-LM-amd_version/megatron/training/checkpointing.py. Line: 1449 ~ 1457 - comment out if becasue of dismatch number of dist checkpoints
8
+
9
+ # Thanks to Yang Wang (https://www.microsoft.com/en-us/research/people/yangwang5/) for working on the patch for this ROCm base Docker image to support virtual memory management on MI300X.
10
+
11
+ # FROM "rlfoundation.azurecr.io/rocm6.3.4:vllm-0.8.5-numa-patch-ubuntu-22.04"
12
+ FROM "rlsys/rocm-6.3.4-patch:rocm6.3.4-numa-patch_ubuntu-22.04"
13
+
14
+ SHELL ["/bin/bash", "-ceuxo", "pipefail"]
15
+
16
+ ARG MAX_JOBS=512
17
+ ENV MAX_JOBS=${MAX_JOBS}
18
+
19
+ ENV PATH="/usr/local/python3.12/bin:$PATH"
20
+ RUN ln -sf /usr/bin/python3.12 /usr/bin/python && \
21
+ ln -sf /usr/bin/pip3.12 /usr/bin/pip
22
+
23
+ RUN apt-get update
24
+ RUN apt-get install -y pkg-config liblzma-dev
25
+
26
+
27
+ ###########################################
28
+ ##########Install TransformerEngine########
29
+ ###########################################
30
+ WORKDIR /workspace/
31
+
32
+ RUN rm -rf TransformerEngine
33
+ # RUN git clone --recursive https://github.com/ROCm/TransformerEngine.git
34
+ RUN git clone https://github.com/ROCm/TransformerEngine.git
35
+ WORKDIR /workspace/TransformerEngine
36
+
37
+ RUN git checkout 236178e
38
+ # RUN git checkout bb061ad
39
+ # RUN git checkout 864405c
40
+
41
+ RUN git submodule update --init --recursive
42
+
43
+ ENV NVTE_FRAMEWORK=pytorch
44
+ ENV NVTE_ROCM_ARCH=gfx942
45
+ ENV NVTE_USE_HIPBLASLT=1
46
+ ENV NVTE_USE_ROCM=1
47
+
48
+ # export CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr:${CMAKE_PREFIX_PATH:-}"
49
+ ENV CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr"
50
+ RUN MAX_JOBS=${MAX_JOBS} pip install . -vvv
51
+ WORKDIR /workspace/
52
+ ###########################################
53
+ ###########################################
54
+ ###########################################
55
+
56
+
57
+
58
+ ###########################################################
59
+ ####Install vllm - sglang require vllm 0.6.7 dependency####
60
+ # #########################################################
61
+ #### Require vllm 0.6.7 - checkout 113274a0
62
+ WORKDIR /workspace/
63
+ RUN rm -rf vllm
64
+ RUN pip uninstall -y vllm
65
+ # Refer to here (down-grade vllm to 0.6.3): https://docs.vllm.ai/en/v0.6.3/getting_started/amd-installation.html
66
+ RUN git clone https://github.com/ROCm/vllm.git
67
+ # git clone https://github.com/vllm-project/vllm.git
68
+ WORKDIR /workspace/vllm
69
+ RUN git checkout 113274a0
70
+ ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
71
+ ENV MAX_JOBS=${MAX_JOBS}
72
+ RUN pip install "boto3>=1.26.0"
73
+ RUN pip install setuptools_scm
74
+ # will add src into py. You can delete the repo
75
+ RUN python3 setup.py install
76
+ WORKDIR /workspace/
77
+ ###########################################
78
+ ###########################################
79
+
80
+
81
+ RUN pip install setuptools==75.8.0
82
+
83
+
84
+ ###########################################
85
+ ############build sglang###################
86
+ ###########################################
87
+ # Set environment variables
88
+ ENV BASE_DIR=/workspace
89
+ # ENV BASE_DIR=/sgl-workspace
90
+ ENV BUILD_TYPE=all
91
+ ENV SGL_REPO=https://github.com/sgl-project/sglang
92
+ ENV SGL_BRANCH=v0.4.7
93
+ ENV TRITON_REPO=https://github.com/ROCm/triton.git
94
+ ENV TRITON_COMMIT=improve_fa_decode_3.0.0
95
+ ENV AITER_REPO=https://github.com/ROCm/aiter.git
96
+ ENV AITER_COMMIT=v0.1.2
97
+ # v0.1.2 version - commit id: 9d11f47
98
+ # ENV AITER_COMMIT=9d11f47
99
+
100
+ ENV HIP_FORCE_DEV_KERNARG=1
101
+ ENV HSA_NO_SCRATCH_RECLAIM=1
102
+ ENV SGLANG_SET_CPU_AFFINITY=1
103
+ ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
104
+ ENV NCCL_MIN_NCHANNELS=112
105
+
106
+ ENV SGLANG_USE_AITER=1
107
+ ENV SGLANG_MOE_PADDING=1
108
+ # ENV MOE_PADDING=1
109
+ ENV VLLM_FP8_PADDING=1
110
+ ENV VLLM_FP8_ACT_PADDING=1
111
+ ENV VLLM_FP8_WEIGHT_PADDING=1
112
+ ENV VLLM_FP8_REDUCE_CONV=1
113
+ ENV TORCHINDUCTOR_MAX_AUTOTUNE=1
114
+ ENV TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISE=1
115
+ ENV HIPCC_COMPILE_FLAGS_APPEND="--offload-arch=gfx942"
116
+ ENV AMDGPU_TARGETS=gfx942
117
+ ENV ROCM_ARCH=gfx942
118
+ ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
119
+
120
+ # Switch to working directory
121
+ # WORKDIR /sgl-workspace
122
+ WORKDIR /workspace
123
+
124
+ # Clean and create directory
125
+ # RUN rm -rf /sgl-workspace && mkdir -p /sgl-workspace
126
+ RUN rm -rf /workspace && mkdir -p /workspace
127
+
128
+ # # Clone and build sglang
129
+ # RUN git clone ${SGL_REPO} \
130
+ # && cd sglang \
131
+ # && git checkout ${SGL_BRANCH} || echo "Using default branch" \
132
+ # && cd sgl-kernel \
133
+ # && rm -f pyproject.toml \
134
+ # && mv pyproject_rocm.toml pyproject.toml \
135
+ # && python setup_rocm.py install \
136
+ # && cd .. \
137
+ # && if [ "$BUILD_TYPE" = "srt" ]; then \
138
+ # python -m pip --no-cache-dir install -e "python[srt_hip]"; \
139
+ # else \
140
+ # python -m pip --no-cache-dir install -e "python[all_hip]"; \
141
+ # fi \
142
+ # && cd /sgl-workspace \
143
+ # && cp -r /sgl-workspace/sglang /sglang \
144
+ # && python -m pip cache purge
145
+
146
+ # Install common Python packages
147
+ RUN pip install IPython orjson python-multipart torchao pybind11
148
+
149
+ # Rebuild Triton
150
+ RUN pip uninstall -y triton || true \
151
+ && git clone ${TRITON_REPO} \
152
+ && cd triton \
153
+ && git checkout ${TRITON_COMMIT} \
154
+ && cd python \
155
+ && python3 setup.py install \
156
+ && cd /workspace
157
+ # && cd /sgl-workspace
158
+
159
+ # Build aiter
160
+ #version: Commit 9d11f47
161
+ # && git checkout ${AITER_COMMIT} \
162
+ RUN pip uninstall -y aiter || true
163
+ RUN git clone ${AITER_REPO} \
164
+ && cd aiter \
165
+ && git checkout ${AITER_COMMIT} \
166
+ && git submodule sync \
167
+ && git submodule update --init --recursive \
168
+ && PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop \
169
+ && cd /workspace
170
+ # && cd /sgl-workspace
171
+ # && PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop \
172
+
173
+
174
+ ###########################################
175
+ # Clone and build sglang
176
+ RUN git clone ${SGL_REPO} \
177
+ && cd sglang \
178
+ && git checkout ${SGL_BRANCH} || echo "Using default branch" \
179
+ && cd sgl-kernel \
180
+ && rm -f pyproject.toml \
181
+ && mv pyproject_rocm.toml pyproject.toml \
182
+ && python setup_rocm.py install \
183
+ && cd .. \
184
+ && if [ "$BUILD_TYPE" = "srt" ]; then \
185
+ python -m pip --no-cache-dir install -e "python[srt_hip]"; \
186
+ else \
187
+ python -m pip --no-cache-dir install -e "python[all_hip]"; \
188
+ fi \
189
+ && cd /workspace \
190
+ && cp -r /workspace/sglang /sglang \
191
+ && python -m pip cache purge
192
+ # && cd /sgl-workspace \
193
+ # && cp -r /sgl-workspace/sglang /sglang \
194
+ # && python -m pip cache purge
195
+ ###########################################
196
+
197
+
198
+ # Copy MI300X config
199
+ # RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
200
+ # /sgl-workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
201
+ RUN find /workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
202
+ /workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
203
+ -type f -name '*MI300X*' | \
204
+ xargs -I {} sh -c 'vf_config=$(echo "$1" | sed "s/MI300X/MI300X_VF/"); cp "$1" "$vf_config"' -- {}
205
+
206
+ # Environment setup complete.
207
+ RUN echo "Environment setup complete."
208
+ WORKDIR /workspace/
209
+
210
+
211
+ # # patch
212
+ # # Date: Jul 5, 2025
213
+ # ENV SLIME_COMMIT=9ddbdbd
214
+ # RUN git clone https://github.com/THUDM/slime.git \
215
+ # && cd slime \
216
+ # && git checkout ${SLIME_COMMIT} \
217
+ # && cp docker/patch/sglang.patch /workspace/sglang/
218
+ # # && cp docker/patch/sglang.patch /sgl-workspace/sglang/
219
+ # # && cp docker/patch/v0.4.10-cu126/sglang.patch /sgl-workspace/sglang/
220
+ # # && cp docker/patch/latest/sglang.patch /sgl-workspace/sglang/
221
+ # # && cp docker/patch/sglang.patch /sgl-workspace/sglang/
222
+ # # COPY /home/yushensu/projects/slime/docker/patch/sglang.patch /sgl-workspace/sglang/
223
+ # WORKDIR /workspace/sglang/
224
+ # # WORKDIR /sgl-workspace/sglang/
225
+ # RUN git apply sglang.patch && rm sglang.patch
226
+ # # WORKDIR /workspace/
227
+
228
+ # # sgl-router
229
+ # # WORKDIR /sgl-workspace/sglang/
230
+ # RUN apt-get update && apt-get install -y pkg-config libssl-dev
231
+ # RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
232
+ # RUN pip install setuptools-rust wheel build
233
+ # RUN source $HOME/.cargo/env && \
234
+ # mkdir -p sgl-router && \
235
+ # cd sgl-router && \
236
+ # cargo build -j 64 && \
237
+ # python3 -m build && \
238
+ # pip install dist/*.whl --force-reinstall
239
+
240
+ RUN pip install sglang-router --force-reinstall
241
+
242
+ ###########################################
243
+ ###########################################
244
+ ###########################################
245
+
246
+ RUN pip install transformers==4.51.1
247
+
248
+
249
+ #########################################
250
+ #####Install vllm v0.8.5#################
251
+ #########################################
252
+
253
+ WORKDIR /workspace/
254
+
255
+ ENV VLLM_TARGET_DEVICE=rocm
256
+ ENV ROCM_PATH=/opt/rocm
257
+ ENV SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev
258
+
259
+ RUN pip uninstall -y vllm || true
260
+ RUN rm -rf vllm-patch
261
+
262
+ RUN git clone https://github.com/RLFoundation/vllm-patch.git \
263
+ && cd vllm-patch \
264
+ && git checkout v0.8.5-sleep-numa \
265
+ && rm -rf build/ dist/ *.egg-info \
266
+ && ln -sf /opt/rocm/lib/libamdhip64.so /usr/lib/libamdhip64.so \
267
+ && SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev PYTORCH_ROCM_ARCH="gfx90a;gfx942" MAX_JOBS=${MAX_JOBS} python3 setup.py install
268
+
269
+ WORKDIR /workspace/
270
+ ###########################################
271
+ ###########################################
272
+
273
+
274
+ #########################################
275
+ #### Install megatron-core###############
276
+ #########################################
277
+ # Can be removed just the current megatron-lm dependency
278
+ RUN pip install "numpy>=1.21.0,<2.0" --force-reinstall
279
+
280
+ RUN pip uninstall -y megatron-core && \
281
+ git clone https://github.com/yushengsu-thu/Megatron-LM-amd_version.git && \
282
+ cd Megatron-LM-amd_version && \
283
+ pip install -vvv -e . && \
284
+ cd /workspace/
285
+ #########################################
286
+ #########################################
287
+ #########################################
288
+
289
+
290
+
291
+
292
+ #########################################
293
+ ###Add torch_memory_saver################
294
+ #########################################
295
+ # Set environment variables
296
+ ENV HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__"
297
+ ENV CFLAGS="-D__HIP_PLATFORM_AMD__"
298
+ ENV CXXFLAGS="-D__HIP_PLATFORM_AMD__"
299
+ # Install torch_memory_saver
300
+ # RUN pip install git+https://github.com/YangWang92/torch_memory_saver_numa.git --no-deps
301
+ # RUN pip install "git+https://github.com/YangWang92/torch_memory_saver_numa.git@numa"
302
+ RUN pip install "git+https://github.com/yushengsu-thu/torch_memory_saver.git"
303
+ # pip install git+https://github.com/fzyzcjy/torch_memory_saver.git --no-deps
304
+ #########################################
305
+ #########################################
306
+
307
+
308
+
309
+
310
+ ########################################
311
+ ######Install ray#######################
312
+ ########################################
313
+ # need to add this patch manually: https://github.com/ray-project/ray/pull/53531/files
314
+ RUN pip uninstall ray -y
315
+ # RUN pip install "ray[data,train,tune,serve]>=2.47.0"
316
+ RUN pip install "ray[data,train,tune,serve]==2.47.1"
317
+ ########################################
318
+ ########################################
319
+ ########################################
320
+
321
+
322
+ ### Need to verify whether numerical/convergence issue
323
+ #######################################
324
+ ################apex###################
325
+ #######################################
326
+ WORKDIR /workspace/
327
+ RUN pip uninstall -y apex && \
328
+ git clone https://github.com/ROCm/apex.git && \
329
+ cd apex && \
330
+ python setup.py install && \
331
+ cd /workspace/
332
+ #######################################
333
+ #######################################
334
+ #######################################
335
+
336
+
337
+ ########################################
338
+ ############ mbridge####################
339
+ ########################################
340
+ RUN pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
341
+ ########################################
342
+ ########################################
343
+ ########################################
344
+
345
+
346
+
347
+ ########################################
348
+ ########slime agent framewrok need######
349
+ ########################################
350
+ RUN pip install pydra_config==0.0.15
351
+ RUN pip install together
352
+ RUN pip install google-generativeai
353
+ ########################################
354
+ ########################################
355
+ ########################################
356
+
357
+
358
+
359
+ WORKDIR /workspace/
360
+
361
+ CMD ["/usr/bin/bash"]
ccevolve/baselines/thetaevolve/docker/Dockerfile_20250810_c22f55b.rocm ADDED
@@ -0,0 +1,374 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### Use the base image
2
+
3
+ # The Docker image built with this Dockerfile:
4
+ # Supports up to slime commit ID: 9a48ba0 (Aug 10, 2025)
5
+ # Start to fail from c22f55b (Aug 10, 2025) - Need to fix the bug from here
6
+
7
+ # You can find the latest pre-built Docker image from here: https://hub.docker.com/r/rlsys/slime/tags
8
+ # Current latest docker img: `rlsys/slime:slime_ubuntu22.04_rocm6.3.4-patch-numa-patch_sglang0.4.9_megatron-patch_ray2.47.1_apex_torch-memory-saver0.0.8-patch-vim` manually add the patch to mitigate checkpoint loading issue. (vim /workspace/Megatron-LM-amd_version/megatron/training/checkpointing.py. Line: 1449 ~ 1457 - comment out if becasue of dismatch number of dist checkpoints
9
+
10
+
11
+ # The Docker image built with this Dockerfile:
12
+ # PR: commit ID 36711aa (Aug 22, 2025) dockerfile - Supports up to slime commit ID: 9a48ba0 (Aug 10, 2025)
13
+
14
+ # Start to failfrom c22f55b (Aug 10, 2025) - Need to fix the bug from here
15
+
16
+ # You can find the latest pre-built Docker image from here: https://hub.docker.com/r/rlsys/slime/tags
17
+ # Current latest docker img: `rlsys/slime:slime_ubuntu22.04_rocm6.3.4-patch-numa-patch_sglang0.4.9_megatron-patch_ray2.47.1_apex_torch-memory-saver0.0.8-patch-vim` manually add the patch to mitigate checkpoint loading issue. (vim /workspace/Megatron-LM-amd_version/megatron/training/checkpointing.py. Line: 1449 ~ 1457 - comment out if becasue of dismatch number of dist checkpoints
18
+
19
+ # Thanks to Yang Wang (https://www.microsoft.com/en-us/research/people/yangwang5/) for working on the patch for this ROCm base Docker image to support virtual memory management on MI300X.
20
+
21
+ # FROM "rlfoundation.azurecr.io/rocm6.3.4:vllm-0.8.5-numa-patch-ubuntu-22.04"
22
+ FROM "rlsys/rocm-6.3.4-patch:rocm6.3.4-numa-patch_ubuntu-22.04"
23
+
24
+ SHELL ["/bin/bash", "-ceuxo", "pipefail"]
25
+
26
+ ARG MAX_JOBS=512
27
+ ENV MAX_JOBS=${MAX_JOBS}
28
+
29
+ ENV PATH="/usr/local/python3.12/bin:$PATH"
30
+ RUN ln -sf /usr/bin/python3.12 /usr/bin/python && \
31
+ ln -sf /usr/bin/pip3.12 /usr/bin/pip
32
+
33
+ RUN apt-get update
34
+ RUN apt-get install -y pkg-config liblzma-dev
35
+
36
+
37
+ ###########################################
38
+ ##########Install TransformerEngine########
39
+ ###########################################
40
+ WORKDIR /workspace/
41
+
42
+ RUN rm -rf TransformerEngine
43
+ # RUN git clone --recursive https://github.com/ROCm/TransformerEngine.git
44
+ RUN git clone https://github.com/ROCm/TransformerEngine.git
45
+ WORKDIR /workspace/TransformerEngine
46
+
47
+ RUN git checkout 236178e
48
+ # RUN git checkout bb061ad
49
+ # RUN git checkout 864405c
50
+
51
+ RUN git submodule update --init --recursive
52
+
53
+ ENV NVTE_FRAMEWORK=pytorch
54
+ ENV NVTE_ROCM_ARCH=gfx942
55
+ ENV NVTE_USE_HIPBLASLT=1
56
+ ENV NVTE_USE_ROCM=1
57
+
58
+ # export CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr:${CMAKE_PREFIX_PATH:-}"
59
+ ENV CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr"
60
+ RUN MAX_JOBS=${MAX_JOBS} pip install . -vvv
61
+ WORKDIR /workspace/
62
+ ###########################################
63
+ ###########################################
64
+ ###########################################
65
+
66
+
67
+
68
+ ###########################################################
69
+ ####Install vllm - sglang require vllm 0.6.7 dependency####
70
+ # #########################################################
71
+ #### Require vllm 0.6.7 - checkout 113274a0
72
+ WORKDIR /workspace/
73
+ RUN rm -rf vllm
74
+ RUN pip uninstall -y vllm
75
+ # Refer to here (down-grade vllm to 0.6.3): https://docs.vllm.ai/en/v0.6.3/getting_started/amd-installation.html
76
+ RUN git clone https://github.com/ROCm/vllm.git
77
+ # git clone https://github.com/vllm-project/vllm.git
78
+ WORKDIR /workspace/vllm
79
+ RUN git checkout 113274a0
80
+ ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
81
+ ENV MAX_JOBS=${MAX_JOBS}
82
+ RUN pip install "boto3>=1.26.0"
83
+ RUN pip install setuptools_scm
84
+ # will add src into py. You can delete the repo
85
+ RUN python3 setup.py install
86
+ WORKDIR /workspace/
87
+ ###########################################
88
+ ###########################################
89
+
90
+
91
+ RUN pip install setuptools==75.8.0
92
+
93
+
94
+ ###########################################
95
+ ############build sglang###################
96
+ ###########################################
97
+ # Set environment variables
98
+ ENV BASE_DIR=/workspace
99
+ # ENV BASE_DIR=/sgl-workspace
100
+ ENV BUILD_TYPE=all
101
+ ENV SGL_REPO=https://github.com/sgl-project/sglang
102
+ # ENV SGL_BRANCH=v0.4.9
103
+ ENV SGL_BRANCH=0.4.9.post6
104
+ # ENV SGL_BRANCH=v0.4.10
105
+ ENV TRITON_REPO=https://github.com/ROCm/triton.git
106
+ ENV TRITON_COMMIT=improve_fa_decode_3.0.0
107
+ ENV AITER_REPO=https://github.com/ROCm/aiter.git
108
+ ENV AITER_COMMIT=v0.1.3
109
+ # ENV AITER_COMMIT=v0.1.4
110
+ # v0.1.2 version - commit id: 9d11f47
111
+ # ENV AITER_COMMIT=9d11f47
112
+
113
+ ENV HIP_FORCE_DEV_KERNARG=1
114
+ ENV HSA_NO_SCRATCH_RECLAIM=1
115
+ ENV SGLANG_SET_CPU_AFFINITY=1
116
+ ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
117
+ ENV NCCL_MIN_NCHANNELS=112
118
+
119
+ ENV SGLANG_USE_AITER=1
120
+ ENV SGLANG_MOE_PADDING=1
121
+ # ENV MOE_PADDING=1
122
+ ENV VLLM_FP8_PADDING=1
123
+ ENV VLLM_FP8_ACT_PADDING=1
124
+ ENV VLLM_FP8_WEIGHT_PADDING=1
125
+ ENV VLLM_FP8_REDUCE_CONV=1
126
+ ENV TORCHINDUCTOR_MAX_AUTOTUNE=1
127
+ ENV TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISE=1
128
+ ENV HIPCC_COMPILE_FLAGS_APPEND="--offload-arch=gfx942"
129
+ ENV AMDGPU_TARGETS=gfx942
130
+ ENV ROCM_ARCH=gfx942
131
+ ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
132
+
133
+ # Switch to working directory
134
+ # WORKDIR /sgl-workspace
135
+ WORKDIR /workspace
136
+
137
+ # Clean and create directory
138
+ # RUN rm -rf /sgl-workspace && mkdir -p /sgl-workspace
139
+ RUN rm -rf /workspace && mkdir -p /workspace
140
+
141
+ # # Clone and build sglang
142
+ # RUN git clone ${SGL_REPO} \
143
+ # && cd sglang \
144
+ # && git checkout ${SGL_BRANCH} || echo "Using default branch" \
145
+ # && cd sgl-kernel \
146
+ # && rm -f pyproject.toml \
147
+ # && mv pyproject_rocm.toml pyproject.toml \
148
+ # && python setup_rocm.py install \
149
+ # && cd .. \
150
+ # && if [ "$BUILD_TYPE" = "srt" ]; then \
151
+ # python -m pip --no-cache-dir install -e "python[srt_hip]"; \
152
+ # else \
153
+ # python -m pip --no-cache-dir install -e "python[all_hip]"; \
154
+ # fi \
155
+ # && cd /sgl-workspace \
156
+ # && cp -r /sgl-workspace/sglang /sglang \
157
+ # && python -m pip cache purge
158
+
159
+ # Install common Python packages
160
+ RUN pip install IPython orjson python-multipart torchao pybind11
161
+
162
+ # Rebuild Triton
163
+ RUN pip uninstall -y triton || true \
164
+ && git clone ${TRITON_REPO} \
165
+ && cd triton \
166
+ && git checkout ${TRITON_COMMIT} \
167
+ && cd python \
168
+ && python3 setup.py install \
169
+ && cd /workspace
170
+ # && cd /sgl-workspace
171
+
172
+ # Build aiter
173
+ #version: Commit 9d11f47
174
+ # && git checkout ${AITER_COMMIT} \
175
+ RUN pip uninstall -y aiter || true
176
+ RUN git clone ${AITER_REPO} \
177
+ && cd aiter \
178
+ && git checkout ${AITER_COMMIT} \
179
+ && git submodule sync \
180
+ && git submodule update --init --recursive \
181
+ && PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop \
182
+ && cd /workspace
183
+ # && cd /sgl-workspace
184
+ # && PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop \
185
+
186
+
187
+ ###########################################
188
+ # Clone and build sglang
189
+ RUN git clone ${SGL_REPO} \
190
+ && cd sglang \
191
+ && git checkout ${SGL_BRANCH} || echo "Using default branch" \
192
+ && cd sgl-kernel \
193
+ && rm -f pyproject.toml \
194
+ && mv pyproject_rocm.toml pyproject.toml \
195
+ && python setup_rocm.py install \
196
+ && cd .. \
197
+ && if [ "$BUILD_TYPE" = "srt" ]; then \
198
+ python -m pip --no-cache-dir install -e "python[srt_hip]"; \
199
+ else \
200
+ python -m pip --no-cache-dir install -e "python[all_hip]"; \
201
+ fi \
202
+ && cd /workspace \
203
+ && cp -r /workspace/sglang /sglang \
204
+ && python -m pip cache purge
205
+ # && cd /sgl-workspace \
206
+ # && cp -r /sgl-workspace/sglang /sglang \
207
+ # && python -m pip cache purge
208
+ ###########################################
209
+
210
+
211
+ # Copy MI300X config
212
+ # RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
213
+ # /sgl-workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
214
+ RUN find /workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
215
+ /workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
216
+ -type f -name '*MI300X*' | \
217
+ xargs -I {} sh -c 'vf_config=$(echo "$1" | sed "s/MI300X/MI300X_VF/"); cp "$1" "$vf_config"' -- {}
218
+
219
+ # Environment setup complete.
220
+ RUN echo "Environment setup complete."
221
+ WORKDIR /workspace/
222
+
223
+
224
+ # # patch
225
+ # # Date: Jul 5, 2025
226
+ # ENV SLIME_COMMIT=9ddbdbd
227
+ # RUN git clone https://github.com/THUDM/slime.git \
228
+ # && cd slime \
229
+ # && git checkout ${SLIME_COMMIT} \
230
+ # && cp docker/patch/sglang.patch /workspace/sglang/
231
+ # # && cp docker/patch/sglang.patch /sgl-workspace/sglang/
232
+ # # && cp docker/patch/v0.4.10-cu126/sglang.patch /sgl-workspace/sglang/
233
+ # # && cp docker/patch/latest/sglang.patch /sgl-workspace/sglang/
234
+ # # && cp docker/patch/sglang.patch /sgl-workspace/sglang/
235
+ # # COPY /home/yushensu/projects/slime/docker/patch/sglang.patch /sgl-workspace/sglang/
236
+ # WORKDIR /workspace/sglang/
237
+ # # WORKDIR /sgl-workspace/sglang/
238
+ # RUN git apply sglang.patch && rm sglang.patch
239
+ # # WORKDIR /workspace/
240
+
241
+ # # sgl-router
242
+ # # WORKDIR /sgl-workspace/sglang/
243
+ # RUN apt-get update && apt-get install -y pkg-config libssl-dev
244
+ # RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
245
+ # RUN pip install setuptools-rust wheel build
246
+ # RUN source $HOME/.cargo/env && \
247
+ # mkdir -p sgl-router && \
248
+ # cd sgl-router && \
249
+ # cargo build -j 64 && \
250
+ # python3 -m build && \
251
+ # pip install dist/*.whl --force-reinstall
252
+
253
+ RUN pip install sglang-router --force-reinstall
254
+
255
+ ###########################################
256
+ ###########################################
257
+ ###########################################
258
+
259
+ RUN pip install transformers==4.51.1
260
+
261
+
262
+ #########################################
263
+ #####Install vllm v0.8.5#################
264
+ #########################################
265
+
266
+ WORKDIR /workspace/
267
+
268
+ ENV VLLM_TARGET_DEVICE=rocm
269
+ ENV ROCM_PATH=/opt/rocm
270
+ ENV SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev
271
+
272
+ RUN pip uninstall -y vllm || true
273
+ RUN rm -rf vllm-patch
274
+
275
+ RUN git clone https://github.com/RLFoundation/vllm-patch.git \
276
+ && cd vllm-patch \
277
+ && git checkout v0.8.5-sleep-numa \
278
+ && rm -rf build/ dist/ *.egg-info \
279
+ && ln -sf /opt/rocm/lib/libamdhip64.so /usr/lib/libamdhip64.so \
280
+ && SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev PYTORCH_ROCM_ARCH="gfx90a;gfx942" MAX_JOBS=${MAX_JOBS} python3 setup.py install
281
+
282
+ WORKDIR /workspace/
283
+ ###########################################
284
+ ###########################################
285
+
286
+
287
+ #########################################
288
+ #### Install megatron-core###############
289
+ #########################################
290
+ # Can be removed just the current megatron-lm dependency
291
+ RUN pip install "numpy>=1.21.0,<2.0" --force-reinstall
292
+
293
+ RUN pip uninstall -y megatron-core && \
294
+ git clone https://github.com/yushengsu-thu/Megatron-LM-amd_version.git && \
295
+ cd Megatron-LM-amd_version && \
296
+ pip install -vvv -e . && \
297
+ cd /workspace/
298
+ #########################################
299
+ #########################################
300
+ #########################################
301
+
302
+
303
+
304
+
305
+ #########################################
306
+ ###Add torch_memory_saver################
307
+ #########################################
308
+ # Set environment variables
309
+ ENV HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__"
310
+ ENV CFLAGS="-D__HIP_PLATFORM_AMD__"
311
+ ENV CXXFLAGS="-D__HIP_PLATFORM_AMD__"
312
+ # Install torch_memory_saver
313
+ # RUN pip install git+https://github.com/YangWang92/torch_memory_saver_numa.git --no-deps
314
+ # RUN pip install "git+https://github.com/YangWang92/torch_memory_saver_numa.git@numa"
315
+ RUN pip install "git+https://github.com/yushengsu-thu/torch_memory_saver.git"
316
+ # pip install git+https://github.com/fzyzcjy/torch_memory_saver.git --no-deps
317
+ #########################################
318
+ #########################################
319
+
320
+
321
+
322
+
323
+ ########################################
324
+ ######Install ray#######################
325
+ ########################################
326
+ # need to add this patch manually: https://github.com/ray-project/ray/pull/53531/files
327
+ RUN pip uninstall ray -y
328
+ # RUN pip install "ray[data,train,tune,serve]>=2.47.0"
329
+ RUN pip install "ray[data,train,tune,serve]==2.47.1"
330
+ ########################################
331
+ ########################################
332
+ ########################################
333
+
334
+
335
+ ### Need to verify whether numerical/convergence issue
336
+ #######################################
337
+ ################apex###################
338
+ #######################################
339
+ WORKDIR /workspace/
340
+ RUN pip uninstall -y apex && \
341
+ git clone https://github.com/ROCm/apex.git && \
342
+ cd apex && \
343
+ python setup.py install && \
344
+ cd /workspace/
345
+ #######################################
346
+ #######################################
347
+ #######################################
348
+
349
+
350
+ ########################################
351
+ ############ mbridge####################
352
+ ########################################
353
+ RUN pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
354
+ ########################################
355
+ ########################################
356
+ ########################################
357
+
358
+
359
+
360
+ ########################################
361
+ ########slime agent framewrok need######
362
+ ########################################
363
+ RUN pip install pydra_config==0.0.15
364
+ RUN pip install together
365
+ RUN pip install google-generativeai
366
+ ########################################
367
+ ########################################
368
+ ########################################
369
+
370
+
371
+
372
+ WORKDIR /workspace/
373
+
374
+ CMD ["/usr/bin/bash"]
ccevolve/baselines/thetaevolve/docker/Dockerfile_Aug_10_2025_9a48ba0.rocm ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### Use the base image
2
+
3
+ # The Docker image built with this Dockerfile:
4
+ # PR: commit ID 36711aa (Aug 22, 2025) dockerfile - Supports up to slime commit ID: 9a48ba0 (Aug 10, 2025)
5
+
6
+ # You can find the latest pre-built Docker image from here: https://hub.docker.com/r/rlsys/slime/tags
7
+ # Current latest docker img: `rlsys/slime:slime_ubuntu22.04_rocm6.3.4-patch-numa-patch_sglang0.4.9_megatron-patch_ray2.47.1_apex_torch-memory-saver0.0.8-patch-vim` manually add the patch to mitigate checkpoint loading issue. (vim /workspace/Megatron-LM-amd_version/megatron/training/checkpointing.py. Line: 1449 ~ 1457 - comment out if becasue of dismatch number of dist checkpoints
8
+
9
+ # Thanks to Yang Wang (https://www.microsoft.com/en-us/research/people/yangwang5/) for working on the patch for this ROCm base Docker image to support virtual memory management on MI300X.
10
+
11
+ # FROM "rlfoundation.azurecr.io/rocm6.3.4:vllm-0.8.5-numa-patch-ubuntu-22.04"
12
+ FROM "rlsys/rocm-6.3.4-patch:rocm6.3.4-numa-patch_ubuntu-22.04"
13
+
14
+ SHELL ["/bin/bash", "-ceuxo", "pipefail"]
15
+
16
+ ARG MAX_JOBS=512
17
+ ENV MAX_JOBS=${MAX_JOBS}
18
+
19
+ ENV PATH="/usr/local/python3.12/bin:$PATH"
20
+ RUN ln -sf /usr/bin/python3.12 /usr/bin/python && \
21
+ ln -sf /usr/bin/pip3.12 /usr/bin/pip
22
+
23
+ RUN apt-get update
24
+ RUN apt-get install -y pkg-config liblzma-dev
25
+
26
+
27
+ ###########################################
28
+ ##########Install TransformerEngine########
29
+ ###########################################
30
+ WORKDIR /workspace/
31
+
32
+ RUN rm -rf TransformerEngine
33
+ # RUN git clone --recursive https://github.com/ROCm/TransformerEngine.git
34
+ RUN git clone https://github.com/ROCm/TransformerEngine.git
35
+ WORKDIR /workspace/TransformerEngine
36
+
37
+ RUN git checkout 236178e
38
+ # RUN git checkout bb061ad
39
+ # RUN git checkout 864405c
40
+
41
+ RUN git submodule update --init --recursive
42
+
43
+ ENV NVTE_FRAMEWORK=pytorch
44
+ ENV NVTE_ROCM_ARCH=gfx942
45
+ ENV NVTE_USE_HIPBLASLT=1
46
+ ENV NVTE_USE_ROCM=1
47
+
48
+ # export CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr:${CMAKE_PREFIX_PATH:-}"
49
+ ENV CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr"
50
+ RUN MAX_JOBS=${MAX_JOBS} pip install . -vvv
51
+ WORKDIR /workspace/
52
+ ###########################################
53
+ ###########################################
54
+ ###########################################
55
+
56
+
57
+
58
+ ###########################################################
59
+ ####Install vllm - sglang require vllm 0.6.7 dependency####
60
+ # #########################################################
61
+ #### Require vllm 0.6.7 - checkout 113274a0
62
+ WORKDIR /workspace/
63
+ RUN rm -rf vllm
64
+ RUN pip uninstall -y vllm
65
+ # Refer to here (down-grade vllm to 0.6.3): https://docs.vllm.ai/en/v0.6.3/getting_started/amd-installation.html
66
+ RUN git clone https://github.com/ROCm/vllm.git
67
+ # git clone https://github.com/vllm-project/vllm.git
68
+ WORKDIR /workspace/vllm
69
+ RUN git checkout 113274a0
70
+ ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
71
+ ENV MAX_JOBS=${MAX_JOBS}
72
+ RUN pip install "boto3>=1.26.0"
73
+ RUN pip install setuptools_scm
74
+ # will add src into py. You can delete the repo
75
+ RUN python3 setup.py install
76
+ WORKDIR /workspace/
77
+ ###########################################
78
+ ###########################################
79
+
80
+
81
+ RUN pip install setuptools==75.8.0
82
+
83
+
84
+ ###########################################
85
+ ############build sgalng###################
86
+ ###########################################
87
+ # Set environment variables
88
+ ENV BASE_DIR=/workspace
89
+ # ENV BASE_DIR=/sgl-workspace
90
+ ENV BUILD_TYPE=all
91
+ ENV SGL_REPO=https://github.com/sgl-project/sglang
92
+ ENV SGL_BRANCH=v0.4.7
93
+ ENV TRITON_REPO=https://github.com/ROCm/triton.git
94
+ ENV TRITON_COMMIT=improve_fa_decode_3.0.0
95
+ ENV AITER_REPO=https://github.com/ROCm/aiter.git
96
+ ENV AITER_COMMIT=v0.1.2
97
+ # v0.1.2 version - commit id: 9d11f47
98
+ # ENV AITER_COMMIT=9d11f47
99
+
100
+ ENV HIP_FORCE_DEV_KERNARG=1
101
+ ENV HSA_NO_SCRATCH_RECLAIM=1
102
+ ENV SGLANG_SET_CPU_AFFINITY=1
103
+ ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
104
+ ENV NCCL_MIN_NCHANNELS=112
105
+
106
+ ENV SGLANG_USE_AITER=1
107
+ ENV SGLANG_MOE_PADDING=1
108
+ # ENV MOE_PADDING=1
109
+ ENV VLLM_FP8_PADDING=1
110
+ ENV VLLM_FP8_ACT_PADDING=1
111
+ ENV VLLM_FP8_WEIGHT_PADDING=1
112
+ ENV VLLM_FP8_REDUCE_CONV=1
113
+ ENV TORCHINDUCTOR_MAX_AUTOTUNE=1
114
+ ENV TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISE=1
115
+ ENV HIPCC_COMPILE_FLAGS_APPEND="--offload-arch=gfx942"
116
+ ENV AMDGPU_TARGETS=gfx942
117
+ ENV ROCM_ARCH=gfx942
118
+ ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
119
+
120
+ # Switch to working directory
121
+ # WORKDIR /sgl-workspace
122
+ WORKDIR /workspace
123
+
124
+ # Clean and create directory
125
+ # RUN rm -rf /sgl-workspace && mkdir -p /sgl-workspace
126
+ RUN rm -rf /workspace && mkdir -p /workspace
127
+
128
+ # # Clone and build sglang
129
+ # RUN git clone ${SGL_REPO} \
130
+ # && cd sglang \
131
+ # && git checkout ${SGL_BRANCH} || echo "Using default branch" \
132
+ # && cd sgl-kernel \
133
+ # && rm -f pyproject.toml \
134
+ # && mv pyproject_rocm.toml pyproject.toml \
135
+ # && python setup_rocm.py install \
136
+ # && cd .. \
137
+ # && if [ "$BUILD_TYPE" = "srt" ]; then \
138
+ # python -m pip --no-cache-dir install -e "python[srt_hip]"; \
139
+ # else \
140
+ # python -m pip --no-cache-dir install -e "python[all_hip]"; \
141
+ # fi \
142
+ # && cd /sgl-workspace \
143
+ # && cp -r /sgl-workspace/sglang /sglang \
144
+ # && python -m pip cache purge
145
+
146
+ # Install common Python packages
147
+ RUN pip install IPython orjson python-multipart torchao pybind11
148
+
149
+ # Rebuild Triton
150
+ RUN pip uninstall -y triton || true \
151
+ && git clone ${TRITON_REPO} \
152
+ && cd triton \
153
+ && git checkout ${TRITON_COMMIT} \
154
+ && cd python \
155
+ && python3 setup.py install \
156
+ && cd /workspace
157
+ # && cd /sgl-workspace
158
+
159
+ # Build aiter
160
+ #version: Commit 9d11f47
161
+ # && git checkout ${AITER_COMMIT} \
162
+ RUN pip uninstall -y aiter || true
163
+ RUN git clone ${AITER_REPO} \
164
+ && cd aiter \
165
+ && git checkout ${AITER_COMMIT} \
166
+ && git submodule sync \
167
+ && git submodule update --init --recursive \
168
+ && PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop \
169
+ && cd /workspace
170
+ # && cd /sgl-workspace
171
+ # && PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop \
172
+
173
+
174
+ ###########################################
175
+ # Clone and build sglang
176
+ RUN git clone ${SGL_REPO} \
177
+ && cd sglang \
178
+ && git checkout ${SGL_BRANCH} || echo "Using default branch" \
179
+ && cd sgl-kernel \
180
+ && rm -f pyproject.toml \
181
+ && mv pyproject_rocm.toml pyproject.toml \
182
+ && python setup_rocm.py install \
183
+ && cd .. \
184
+ && if [ "$BUILD_TYPE" = "srt" ]; then \
185
+ python -m pip --no-cache-dir install -e "python[srt_hip]"; \
186
+ else \
187
+ python -m pip --no-cache-dir install -e "python[all_hip]"; \
188
+ fi \
189
+ && cd /workspace \
190
+ && cp -r /workspace/sglang /sglang \
191
+ && python -m pip cache purge
192
+ # && cd /sgl-workspace \
193
+ # && cp -r /sgl-workspace/sglang /sglang \
194
+ # && python -m pip cache purge
195
+ ###########################################
196
+
197
+
198
+ # Copy MI300X config
199
+ # RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
200
+ # /sgl-workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
201
+ RUN find /workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
202
+ /workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
203
+ -type f -name '*MI300X*' | \
204
+ xargs -I {} sh -c 'vf_config=$(echo "$1" | sed "s/MI300X/MI300X_VF/"); cp "$1" "$vf_config"' -- {}
205
+
206
+ # Environment setup complete.
207
+ RUN echo "Environment setup complete."
208
+ WORKDIR /workspace/
209
+
210
+
211
+ # # patch
212
+ # # Date: Jul 5, 2025
213
+ # ENV SLIME_COMMIT=9ddbdbd
214
+ # RUN git clone https://github.com/THUDM/slime.git \
215
+ # && cd slime \
216
+ # && git checkout ${SLIME_COMMIT} \
217
+ # && cp docker/patch/sglang.patch /workspace/sglang/
218
+ # # && cp docker/patch/sglang.patch /sgl-workspace/sglang/
219
+ # # && cp docker/patch/v0.4.10-cu126/sglang.patch /sgl-workspace/sglang/
220
+ # # && cp docker/patch/latest/sglang.patch /sgl-workspace/sglang/
221
+ # # && cp docker/patch/sglang.patch /sgl-workspace/sglang/
222
+ # # COPY /home/yushensu/projects/slime/docker/patch/sglang.patch /sgl-workspace/sglang/
223
+ # WORKDIR /workspace/sglang/
224
+ # # WORKDIR /sgl-workspace/sglang/
225
+ # RUN git apply sglang.patch && rm sglang.patch
226
+ # # WORKDIR /workspace/
227
+
228
+ # # sgl-router
229
+ # # WORKDIR /sgl-workspace/sglang/
230
+ # RUN apt-get update && apt-get install -y pkg-config libssl-dev
231
+ # RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
232
+ # RUN pip install setuptools-rust wheel build
233
+ # RUN source $HOME/.cargo/env && \
234
+ # mkdir -p sgl-router && \
235
+ # cd sgl-router && \
236
+ # cargo build -j 64 && \
237
+ # python3 -m build && \
238
+ # pip install dist/*.whl --force-reinstall
239
+
240
+ RUN pip install sglang-router --force-reinstall
241
+
242
+ ###########################################
243
+ ###########################################
244
+ ###########################################
245
+
246
+ RUN pip install transformers==4.51.1
247
+
248
+
249
+ #########################################
250
+ #####Install vllm v0.8.5#################
251
+ #########################################
252
+
253
+ WORKDIR /workspace/
254
+
255
+ ENV VLLM_TARGET_DEVICE=rocm
256
+ ENV ROCM_PATH=/opt/rocm
257
+ ENV SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev
258
+
259
+ RUN pip uninstall -y vllm || true
260
+ RUN rm -rf vllm-patch
261
+
262
+ RUN git clone https://github.com/RLFoundation/vllm-patch.git \
263
+ && cd vllm-patch \
264
+ && git checkout v0.8.5-sleep-numa \
265
+ && rm -rf build/ dist/ *.egg-info \
266
+ && ln -sf /opt/rocm/lib/libamdhip64.so /usr/lib/libamdhip64.so \
267
+ && SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev PYTORCH_ROCM_ARCH="gfx90a;gfx942" MAX_JOBS=${MAX_JOBS} python3 setup.py install
268
+
269
+ WORKDIR /workspace/
270
+ ###########################################
271
+ ###########################################
272
+
273
+
274
+ #########################################
275
+ #### Install megatron-core###############
276
+ #########################################
277
+ # Can be removed just the current megatron-lm dependency
278
+ RUN pip install "numpy>=1.21.0,<2.0" --force-reinstall
279
+
280
+ RUN pip uninstall -y megatron-core && \
281
+ git clone https://github.com/yushengsu-thu/Megatron-LM-amd_version.git && \
282
+ cd Megatron-LM-amd_version && \
283
+ pip install -vvv -e . && \
284
+ cd /workspace/
285
+ #########################################
286
+ #########################################
287
+ #########################################
288
+
289
+
290
+
291
+
292
+ #########################################
293
+ ###Add torch_memory_saver################
294
+ #########################################
295
+ # Set environment variables
296
+ ENV HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__"
297
+ ENV CFLAGS="-D__HIP_PLATFORM_AMD__"
298
+ ENV CXXFLAGS="-D__HIP_PLATFORM_AMD__"
299
+ # Install torch_memory_saver
300
+ # RUN pip install git+https://github.com/YangWang92/torch_memory_saver_numa.git --no-deps
301
+ # RUN pip install "git+https://github.com/YangWang92/torch_memory_saver_numa.git@numa"
302
+ RUN pip install "git+https://github.com/yushengsu-thu/torch_memory_saver.git"
303
+ # pip install git+https://github.com/fzyzcjy/torch_memory_saver.git --no-deps
304
+ #########################################
305
+ #########################################
306
+
307
+
308
+
309
+
310
+ ########################################
311
+ ######Install ray#######################
312
+ ########################################
313
+ # need to add this patch manually: https://github.com/ray-project/ray/pull/53531/files
314
+ RUN pip uninstall ray -y
315
+ # RUN pip install "ray[data,train,tune,serve]>=2.47.0"
316
+ RUN pip install "ray[data,train,tune,serve]==2.47.1"
317
+ ########################################
318
+ ########################################
319
+ ########################################
320
+
321
+
322
+ ### Need to verify whether numerical/convergence issue
323
+ #######################################
324
+ ################apex###################
325
+ #######################################
326
+ WORKDIR /workspace/
327
+ RUN pip uninstall -y apex && \
328
+ git clone https://github.com/ROCm/apex.git && \
329
+ cd apex && \
330
+ python setup.py install && \
331
+ cd /workspace/
332
+ #######################################
333
+ #######################################
334
+ #######################################
335
+
336
+
337
+ ########################################
338
+ ############ mbridge####################
339
+ ########################################
340
+ RUN pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
341
+ ########################################
342
+ ########################################
343
+ ########################################
344
+
345
+
346
+
347
+ ########################################
348
+ ########slime agent framewrok need######
349
+ ########################################
350
+ RUN pip install pydra_config==0.0.15
351
+ RUN pip install together
352
+ RUN pip install google-generativeai
353
+ ########################################
354
+ ########################################
355
+ ########################################
356
+
357
+
358
+
359
+ WORKDIR /workspace/
360
+
361
+ CMD ["/usr/bin/bash"]
ccevolve/baselines/thetaevolve/docker/Dockerfile_after_c22f55b_Aug_10_2025.rocm ADDED
@@ -0,0 +1,374 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### Use the base image
2
+
3
+ # The Docker image built with this Dockerfile:
4
+ # Supports up to slime commit ID: 9a48ba0 (Aug 10, 2025)
5
+ # Start to fail from c22f55b (Aug 10, 2025) - Need to fix the bug from here
6
+
7
+ # You can find the latest pre-built Docker image from here: https://hub.docker.com/r/rlsys/slime/tags
8
+ # Current latest docker img: `rlsys/slime:slime_ubuntu22.04_rocm6.3.4-patch-numa-patch_sglang0.4.9_megatron-patch_ray2.47.1_apex_torch-memory-saver0.0.8-patch-vim` manually add the patch to mitigate checkpoint loading issue. (vim /workspace/Megatron-LM-amd_version/megatron/training/checkpointing.py. Line: 1449 ~ 1457 - comment out if becasue of dismatch number of dist checkpoints
9
+
10
+
11
+ # The Docker image built with this Dockerfile:
12
+ # PR: commit ID 36711aa (Aug 22, 2025) dockerfile - Supports up to slime commit ID: 9a48ba0 (Aug 10, 2025)
13
+
14
+ # Start to failfrom c22f55b (Aug 10, 2025) - Need to fix the bug from here
15
+
16
+ # You can find the latest pre-built Docker image from here: https://hub.docker.com/r/rlsys/slime/tags
17
+ # Current latest docker img: `rlsys/slime:slime_ubuntu22.04_rocm6.3.4-patch-numa-patch_sglang0.4.9_megatron-patch_ray2.47.1_apex_torch-memory-saver0.0.8-patch-vim` manually add the patch to mitigate checkpoint loading issue. (vim /workspace/Megatron-LM-amd_version/megatron/training/checkpointing.py. Line: 1449 ~ 1457 - comment out if becasue of dismatch number of dist checkpoints
18
+
19
+ # Thanks to Yang Wang (https://www.microsoft.com/en-us/research/people/yangwang5/) for working on the patch for this ROCm base Docker image to support virtual memory management on MI300X.
20
+
21
+ # FROM "rlfoundation.azurecr.io/rocm6.3.4:vllm-0.8.5-numa-patch-ubuntu-22.04"
22
+ FROM "rlsys/rocm-6.3.4-patch:rocm6.3.4-numa-patch_ubuntu-22.04"
23
+
24
+ SHELL ["/bin/bash", "-ceuxo", "pipefail"]
25
+
26
+ ARG MAX_JOBS=512
27
+ ENV MAX_JOBS=${MAX_JOBS}
28
+
29
+ ENV PATH="/usr/local/python3.12/bin:$PATH"
30
+ RUN ln -sf /usr/bin/python3.12 /usr/bin/python && \
31
+ ln -sf /usr/bin/pip3.12 /usr/bin/pip
32
+
33
+ RUN apt-get update
34
+ RUN apt-get install -y pkg-config liblzma-dev
35
+
36
+
37
+ ###########################################
38
+ ##########Install TransformerEngine########
39
+ ###########################################
40
+ WORKDIR /workspace/
41
+
42
+ RUN rm -rf TransformerEngine
43
+ # RUN git clone --recursive https://github.com/ROCm/TransformerEngine.git
44
+ RUN git clone https://github.com/ROCm/TransformerEngine.git
45
+ WORKDIR /workspace/TransformerEngine
46
+
47
+ RUN git checkout 236178e
48
+ # RUN git checkout bb061ad
49
+ # RUN git checkout 864405c
50
+
51
+ RUN git submodule update --init --recursive
52
+
53
+ ENV NVTE_FRAMEWORK=pytorch
54
+ ENV NVTE_ROCM_ARCH=gfx942
55
+ ENV NVTE_USE_HIPBLASLT=1
56
+ ENV NVTE_USE_ROCM=1
57
+
58
+ # export CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr:${CMAKE_PREFIX_PATH:-}"
59
+ ENV CMAKE_PREFIX_PATH="/opt/rocm:/opt/rocm/hip:/usr/local:/usr"
60
+ RUN MAX_JOBS=${MAX_JOBS} pip install . -vvv
61
+ WORKDIR /workspace/
62
+ ###########################################
63
+ ###########################################
64
+ ###########################################
65
+
66
+
67
+
68
+ ###########################################################
69
+ ####Install vllm - sglang require vllm 0.6.7 dependency####
70
+ # #########################################################
71
+ #### Require vllm 0.6.7 - checkout 113274a0
72
+ WORKDIR /workspace/
73
+ RUN rm -rf vllm
74
+ RUN pip uninstall -y vllm
75
+ # Refer to here (down-grade vllm to 0.6.3): https://docs.vllm.ai/en/v0.6.3/getting_started/amd-installation.html
76
+ RUN git clone https://github.com/ROCm/vllm.git
77
+ # git clone https://github.com/vllm-project/vllm.git
78
+ WORKDIR /workspace/vllm
79
+ RUN git checkout 113274a0
80
+ ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
81
+ ENV MAX_JOBS=${MAX_JOBS}
82
+ RUN pip install "boto3>=1.26.0"
83
+ RUN pip install setuptools_scm
84
+ # will add src into py. You can delete the repo
85
+ RUN python3 setup.py install
86
+ WORKDIR /workspace/
87
+ ###########################################
88
+ ###########################################
89
+
90
+
91
+ RUN pip install setuptools==75.8.0
92
+
93
+
94
+ ###########################################
95
+ ############build sgalng###################
96
+ ###########################################
97
+ # Set environment variables
98
+ ENV BASE_DIR=/workspace
99
+ # ENV BASE_DIR=/sgl-workspace
100
+ ENV BUILD_TYPE=all
101
+ ENV SGL_REPO=https://github.com/sgl-project/sglang
102
+ # ENV SGL_BRANCH=v0.4.9
103
+ ENV SGL_BRANCH=0.4.9.post6
104
+ # ENV SGL_BRANCH=v0.4.10
105
+ ENV TRITON_REPO=https://github.com/ROCm/triton.git
106
+ ENV TRITON_COMMIT=improve_fa_decode_3.0.0
107
+ ENV AITER_REPO=https://github.com/ROCm/aiter.git
108
+ ENV AITER_COMMIT=v0.1.3
109
+ # ENV AITER_COMMIT=v0.1.4
110
+ # v0.1.2 version - commit id: 9d11f47
111
+ # ENV AITER_COMMIT=9d11f47
112
+
113
+ ENV HIP_FORCE_DEV_KERNARG=1
114
+ ENV HSA_NO_SCRATCH_RECLAIM=1
115
+ ENV SGLANG_SET_CPU_AFFINITY=1
116
+ ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
117
+ ENV NCCL_MIN_NCHANNELS=112
118
+
119
+ ENV SGLANG_USE_AITER=1
120
+ ENV SGLANG_MOE_PADDING=1
121
+ # ENV MOE_PADDING=1
122
+ ENV VLLM_FP8_PADDING=1
123
+ ENV VLLM_FP8_ACT_PADDING=1
124
+ ENV VLLM_FP8_WEIGHT_PADDING=1
125
+ ENV VLLM_FP8_REDUCE_CONV=1
126
+ ENV TORCHINDUCTOR_MAX_AUTOTUNE=1
127
+ ENV TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISE=1
128
+ ENV HIPCC_COMPILE_FLAGS_APPEND="--offload-arch=gfx942"
129
+ ENV AMDGPU_TARGETS=gfx942
130
+ ENV ROCM_ARCH=gfx942
131
+ ENV PYTORCH_ROCM_ARCH="gfx90a;gfx942"
132
+
133
+ # Switch to working directory
134
+ # WORKDIR /sgl-workspace
135
+ WORKDIR /workspace
136
+
137
+ # Clean and create directory
138
+ # RUN rm -rf /sgl-workspace && mkdir -p /sgl-workspace
139
+ RUN rm -rf /workspace && mkdir -p /workspace
140
+
141
+ # # Clone and build sglang
142
+ # RUN git clone ${SGL_REPO} \
143
+ # && cd sglang \
144
+ # && git checkout ${SGL_BRANCH} || echo "Using default branch" \
145
+ # && cd sgl-kernel \
146
+ # && rm -f pyproject.toml \
147
+ # && mv pyproject_rocm.toml pyproject.toml \
148
+ # && python setup_rocm.py install \
149
+ # && cd .. \
150
+ # && if [ "$BUILD_TYPE" = "srt" ]; then \
151
+ # python -m pip --no-cache-dir install -e "python[srt_hip]"; \
152
+ # else \
153
+ # python -m pip --no-cache-dir install -e "python[all_hip]"; \
154
+ # fi \
155
+ # && cd /sgl-workspace \
156
+ # && cp -r /sgl-workspace/sglang /sglang \
157
+ # && python -m pip cache purge
158
+
159
+ # Install common Python packages
160
+ RUN pip install IPython orjson python-multipart torchao pybind11
161
+
162
+ # Rebuild Triton
163
+ RUN pip uninstall -y triton || true \
164
+ && git clone ${TRITON_REPO} \
165
+ && cd triton \
166
+ && git checkout ${TRITON_COMMIT} \
167
+ && cd python \
168
+ && python3 setup.py install \
169
+ && cd /workspace
170
+ # && cd /sgl-workspace
171
+
172
+ # Build aiter
173
+ #version: Commit 9d11f47
174
+ # && git checkout ${AITER_COMMIT} \
175
+ RUN pip uninstall -y aiter || true
176
+ RUN git clone ${AITER_REPO} \
177
+ && cd aiter \
178
+ && git checkout ${AITER_COMMIT} \
179
+ && git submodule sync \
180
+ && git submodule update --init --recursive \
181
+ && PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop \
182
+ && cd /workspace
183
+ # && cd /sgl-workspace
184
+ # && PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop \
185
+
186
+
187
+ ###########################################
188
+ # Clone and build sglang
189
+ RUN git clone ${SGL_REPO} \
190
+ && cd sglang \
191
+ && git checkout ${SGL_BRANCH} || echo "Using default branch" \
192
+ && cd sgl-kernel \
193
+ && rm -f pyproject.toml \
194
+ && mv pyproject_rocm.toml pyproject.toml \
195
+ && python setup_rocm.py install \
196
+ && cd .. \
197
+ && if [ "$BUILD_TYPE" = "srt" ]; then \
198
+ python -m pip --no-cache-dir install -e "python[srt_hip]"; \
199
+ else \
200
+ python -m pip --no-cache-dir install -e "python[all_hip]"; \
201
+ fi \
202
+ && cd /workspace \
203
+ && cp -r /workspace/sglang /sglang \
204
+ && python -m pip cache purge
205
+ # && cd /sgl-workspace \
206
+ # && cp -r /sgl-workspace/sglang /sglang \
207
+ # && python -m pip cache purge
208
+ ###########################################
209
+
210
+
211
+ # Copy MI300X config
212
+ # RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
213
+ # /sgl-workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
214
+ RUN find /workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
215
+ /workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
216
+ -type f -name '*MI300X*' | \
217
+ xargs -I {} sh -c 'vf_config=$(echo "$1" | sed "s/MI300X/MI300X_VF/"); cp "$1" "$vf_config"' -- {}
218
+
219
+ # Environment setup complete.
220
+ RUN echo "Environment setup complete."
221
+ WORKDIR /workspace/
222
+
223
+
224
+ # # patch
225
+ # # Date: Jul 5, 2025
226
+ # ENV SLIME_COMMIT=9ddbdbd
227
+ # RUN git clone https://github.com/THUDM/slime.git \
228
+ # && cd slime \
229
+ # && git checkout ${SLIME_COMMIT} \
230
+ # && cp docker/patch/sglang.patch /workspace/sglang/
231
+ # # && cp docker/patch/sglang.patch /sgl-workspace/sglang/
232
+ # # && cp docker/patch/v0.4.10-cu126/sglang.patch /sgl-workspace/sglang/
233
+ # # && cp docker/patch/latest/sglang.patch /sgl-workspace/sglang/
234
+ # # && cp docker/patch/sglang.patch /sgl-workspace/sglang/
235
+ # # COPY /home/yushensu/projects/slime/docker/patch/sglang.patch /sgl-workspace/sglang/
236
+ # WORKDIR /workspace/sglang/
237
+ # # WORKDIR /sgl-workspace/sglang/
238
+ # RUN git apply sglang.patch && rm sglang.patch
239
+ # # WORKDIR /workspace/
240
+
241
+ # # sgl-router
242
+ # # WORKDIR /sgl-workspace/sglang/
243
+ # RUN apt-get update && apt-get install -y pkg-config libssl-dev
244
+ # RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
245
+ # RUN pip install setuptools-rust wheel build
246
+ # RUN source $HOME/.cargo/env && \
247
+ # mkdir -p sgl-router && \
248
+ # cd sgl-router && \
249
+ # cargo build -j 64 && \
250
+ # python3 -m build && \
251
+ # pip install dist/*.whl --force-reinstall
252
+
253
+ RUN pip install sglang-router --force-reinstall
254
+
255
+ ###########################################
256
+ ###########################################
257
+ ###########################################
258
+
259
+ RUN pip install transformers==4.51.1
260
+
261
+
262
+ #########################################
263
+ #####Install vllm v0.8.5#################
264
+ #########################################
265
+
266
+ WORKDIR /workspace/
267
+
268
+ ENV VLLM_TARGET_DEVICE=rocm
269
+ ENV ROCM_PATH=/opt/rocm
270
+ ENV SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev
271
+
272
+ RUN pip uninstall -y vllm || true
273
+ RUN rm -rf vllm-patch
274
+
275
+ RUN git clone https://github.com/RLFoundation/vllm-patch.git \
276
+ && cd vllm-patch \
277
+ && git checkout v0.8.5-sleep-numa \
278
+ && rm -rf build/ dist/ *.egg-info \
279
+ && ln -sf /opt/rocm/lib/libamdhip64.so /usr/lib/libamdhip64.so \
280
+ && SETUPTOOLS_SCM_PRETEND_VERSION=0.8.5.dev PYTORCH_ROCM_ARCH="gfx90a;gfx942" MAX_JOBS=${MAX_JOBS} python3 setup.py install
281
+
282
+ WORKDIR /workspace/
283
+ ###########################################
284
+ ###########################################
285
+
286
+
287
+ #########################################
288
+ #### Install megatron-core###############
289
+ #########################################
290
+ # Can be removed just the current megatron-lm dependency
291
+ RUN pip install "numpy>=1.21.0,<2.0" --force-reinstall
292
+
293
+ RUN pip uninstall -y megatron-core && \
294
+ git clone https://github.com/yushengsu-thu/Megatron-LM-amd_version.git && \
295
+ cd Megatron-LM-amd_version && \
296
+ pip install -vvv -e . && \
297
+ cd /workspace/
298
+ #########################################
299
+ #########################################
300
+ #########################################
301
+
302
+
303
+
304
+
305
+ #########################################
306
+ ###Add torch_memory_saver################
307
+ #########################################
308
+ # Set environment variables
309
+ ENV HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__"
310
+ ENV CFLAGS="-D__HIP_PLATFORM_AMD__"
311
+ ENV CXXFLAGS="-D__HIP_PLATFORM_AMD__"
312
+ # Install torch_memory_saver
313
+ # RUN pip install git+https://github.com/YangWang92/torch_memory_saver_numa.git --no-deps
314
+ # RUN pip install "git+https://github.com/YangWang92/torch_memory_saver_numa.git@numa"
315
+ RUN pip install "git+https://github.com/yushengsu-thu/torch_memory_saver.git"
316
+ # pip install git+https://github.com/fzyzcjy/torch_memory_saver.git --no-deps
317
+ #########################################
318
+ #########################################
319
+
320
+
321
+
322
+
323
+ ########################################
324
+ ######Install ray#######################
325
+ ########################################
326
+ # need to add this patch manually: https://github.com/ray-project/ray/pull/53531/files
327
+ RUN pip uninstall ray -y
328
+ # RUN pip install "ray[data,train,tune,serve]>=2.47.0"
329
+ RUN pip install "ray[data,train,tune,serve]==2.47.1"
330
+ ########################################
331
+ ########################################
332
+ ########################################
333
+
334
+
335
+ ### Need to verify whether numerical/convergence issue
336
+ #######################################
337
+ ################apex###################
338
+ #######################################
339
+ WORKDIR /workspace/
340
+ RUN pip uninstall -y apex && \
341
+ git clone https://github.com/ROCm/apex.git && \
342
+ cd apex && \
343
+ python setup.py install && \
344
+ cd /workspace/
345
+ #######################################
346
+ #######################################
347
+ #######################################
348
+
349
+
350
+ ########################################
351
+ ############ mbridge####################
352
+ ########################################
353
+ RUN pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
354
+ ########################################
355
+ ########################################
356
+ ########################################
357
+
358
+
359
+
360
+ ########################################
361
+ ########slime agent framewrok need######
362
+ ########################################
363
+ RUN pip install pydra_config==0.0.15
364
+ RUN pip install together
365
+ RUN pip install google-generativeai
366
+ ########################################
367
+ ########################################
368
+ ########################################
369
+
370
+
371
+
372
+ WORKDIR /workspace/
373
+
374
+ CMD ["/usr/bin/bash"]
ccevolve/baselines/thetaevolve/docker/Dockerfile_b200 ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG SGLANG_VERSION=v0.5.2rc2-cu128-b200
2
+ FROM lmsysorg/sglang:${SGLANG_VERSION} AS sglang
3
+
4
+ # we need to write this again after from
5
+ ARG SGLANG_VERSION
6
+ ARG MEGATRON_COMMIT=main
7
+
8
+ RUN apt update
9
+ RUN apt install -y nvtop
10
+
11
+ # TODO: change to pip install sglang-router after it has a new release
12
+ RUN pip install sglang-router --force-reinstall
13
+ RUN pip install git+https://github.com/fzyzcjy/torch_memory_saver.git --no-cache-dir --force-reinstall
14
+ RUN pip install ray[default]
15
+ RUN pip install httpx[http2] wandb pylatexenc blobfile accelerate "mcp[cli]"
16
+ RUN pip install git+https://github.com/zhuzilin/cumem_allocator.git
17
+
18
+ # mbridge
19
+ RUN pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
20
+
21
+ RUN TORCH_CUDA_ARCH_LIST="8.0;8.9;9.0;9.0a;10.0" pip install git+https://github.com/fanshiqing/grouped_gemm@v1.1.4
22
+ # apex
23
+ RUN NVCC_APPEND_FLAGS="--threads 4" \
24
+ pip -v install --disable-pip-version-check --no-cache-dir \
25
+ --no-build-isolation \
26
+ --config-settings "--build-option=--cpp_ext --cuda_ext --parallel 8" git+https://github.com/NVIDIA/apex.git
27
+ # transformer engine, we install with --no-deps to avoid installing torch and torch-extensions
28
+ RUN pip install pybind11
29
+ RUN pip -v install --no-build-isolation git+https://github.com/NVIDIA/TransformerEngine.git@stable
30
+ # flash attn
31
+ # the newest version megatron supports is v2.7.4.post1
32
+ RUN MAX_JOBS=64 pip -v install flash-attn==2.7.4.post1
33
+ RUN git clone https://github.com/Dao-AILab/flash-attention.git && cd flash-attention/ && git checkout 27f501d && cd hopper/ && python setup.py install
34
+ RUN python_path=`python -c "import site; print(site.getsitepackages()[0])"` && \
35
+ mkdir -p $python_path/flash_attn_3 && \
36
+ wget -P $python_path/flash_attn_3 https://raw.githubusercontent.com/Dao-AILab/flash-attention/27f501dbe011f4371bff938fe7e09311ab3002fa/hopper/flash_attn_interface.py
37
+
38
+ WORKDIR /root/
39
+ RUN git clone https://github.com/NVIDIA/Megatron-LM.git --recursive && \
40
+ cd Megatron-LM && \
41
+ pip install -e .
42
+
43
+ # sandwitch norm for GLM models
44
+ COPY patch/${SGLANG_VERSION}/megatron.patch /root/Megatron-LM/
45
+ RUN cd Megatron-LM && \
46
+ git checkout ${MEGATRON_COMMIT} && \
47
+ git apply megatron.patch --3way && \
48
+ if grep -R -n '^<<<<<<< ' .; then \
49
+ echo "Patch failed to apply cleanly. Please resolve conflicts." && \
50
+ exit 1; \
51
+ fi && \
52
+ rm megatron.patch
53
+
54
+ # sglang patch
55
+ COPY patch/${SGLANG_VERSION}/sglang.patch /sgl-workspace/sglang/
56
+ RUN cd /sgl-workspace/sglang && \
57
+ git apply sglang.patch && \
58
+ if grep -R -n '^<<<<<<< ' .; then \
59
+ echo "Patch failed to apply cleanly. Please resolve conflicts." && \
60
+ exit 1; \
61
+ fi && \
62
+ rm sglang.patch
63
+
64
+ RUN rm /root/.tmux.conf
ccevolve/baselines/thetaevolve/docker/README.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Docker release rule
2
+
3
+ We will publish 2 kinds of docker images:
4
+ 1. stable version, which based on official sglang release. We will store the patch on those versions.
5
+ 2. latest version, which aligns to `lmsysorg/sglang:latest`.
6
+
7
+ current stable version is:
8
+ - sglang v0.5.7 nightly-dev-20260107-dce8b060 (dce8b0606c06d3a191a24c7b8cbe8e238ab316c9), megatron dev 3714d81d418c9f1bca4594fc35f9e8289f652862
9
+
10
+ history versions:
11
+ - sglang v0.5.6 nightly-dev-20251208-5e2cda61 (5e2cda6158e670e64b926a9985d65826c537ac82), megatron v0.14.0 (23e00ed0963c35382dfe8a5a94fb3cda4d21e133)
12
+ - sglang v0.5.5.post1 (303cc957e62384044dfa8e52d7d8af8abe12f0ac), megatron v0.14.0 (23e00ed0963c35382dfe8a5a94fb3cda4d21e133)
13
+ - sglang v0.5.0rc0-cu126 (8ecf6b9d2480c3f600826c7d8fef6a16ed603c3f), megatron 48406695c4efcf1026a7ed70bb390793918dd97b
14
+
15
+ The command to build:
16
+
17
+ ```bash
18
+ just release
19
+ ```
20
+
21
+ Before each update, we will test the following models with 64xH100:
22
+
23
+ - Qwen3-4B sync
24
+ - Qwen3-4B async
25
+ - Qwen3-30B-A3B sync
26
+ - Qwen3-30B-A3B fp8 sync
27
+ - GLM-4.5-355B-A32B sync
ccevolve/baselines/thetaevolve/docker/justfile ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ release-primary:
2
+ ARG_TAG_POSTFIX="" ARG_BUILD_EXTRA_ARGS="" just _release-raw
3
+
4
+ # Should be executed on ARM machines
5
+ release-cu129-arm64:
6
+ ARG_TAG_POSTFIX="-cu129-arm64" ARG_BUILD_EXTRA_ARGS='--build-arg SGLANG_IMAGE_TAG=v0.5.5.post3-cu129-arm64 --build-arg ENABLE_SGLANG_PATCH=0' just _release-raw
7
+
8
+ # Should be executed on ARM machines
9
+ release-cu13-arm64:
10
+ ARG_TAG_POSTFIX="-cu13-arm64" ARG_BUILD_EXTRA_ARGS='--build-arg SGLANG_IMAGE_TAG=dev-arm64-cu13-20251122 --build-arg ENABLE_CUDA_13=1 --build-arg ENABLE_SGLANG_PATCH=0' just _release-raw
11
+
12
+ _release-raw:
13
+ #!/bin/bash
14
+ set -euxo pipefail
15
+ cd ..
16
+
17
+ VERSION="$(cat docker/version.txt | tr -d '\n')"
18
+ IMAGE_TAG=${VERSION}${ARG_TAG_POSTFIX}
19
+
20
+ docker build -f docker/Dockerfile . --build-arg HTTP_PROXY="$http_proxy" --build-arg HTTPS_PROXY="$https_proxy" --build-arg NO_PROXY="localhost,127.0.0.1" $ARG_BUILD_EXTRA_ARGS -t slimerl/slime:$IMAGE_TAG
21
+ docker push slimerl/slime:$IMAGE_TAG
22
+
23
+ if [ -z "${ARG_TAG_POSTFIX}" ]; then
24
+ docker tag slimerl/slime:$IMAGE_TAG slimerl/slime:latest
25
+ docker push slimerl/slime:latest
26
+ fi
27
+
28
+ debug:
29
+ #!/bin/bash
30
+ set -euxo pipefail
31
+ cd ..
32
+
33
+ VERSION="$(cat docker/version.txt | tr -d '\n')"
34
+ IMAGE_TAG=${VERSION}
35
+
36
+ docker build -f docker/Dockerfile . --build-arg HTTP_PROXY="$http_proxy" --build-arg HTTPS_PROXY="$https_proxy" --build-arg NO_PROXY="localhost,127.0.0.1" -t slimerl/slime-test:$IMAGE_TAG
37
+ docker push slimerl/slime-test:$IMAGE_TAG
38
+
39
+ docker tag slimerl/slime-test:$IMAGE_TAG slimerl/slime-test:latest
40
+ docker push slimerl/slime-test:latest
ccevolve/baselines/thetaevolve/docker/version.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ nightly-dev-20260202b
ccevolve/baselines/thetaevolve/docs/README.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # slime Documentation
2
+
3
+ We recommend new contributors start from writing documentation, which helps you quickly understand SGLang codebase.
4
+ Most documentation files are located under the `docs/` folder.
5
+
6
+ ## Docs Workflow
7
+
8
+ ### Install Dependency
9
+
10
+ ```bash
11
+ apt-get update && apt-get install -y pandoc parallel retry
12
+ pip install -r requirements.txt
13
+ ```
14
+
15
+ ### Update Documentation
16
+
17
+ You can update the documentation in the en and zh folders by adding Markdown or Jupyter Notebook files to the appropriate subdirectories. If you create new files, make sure to update index.rst (or any other relevant .rst files) accordingly.
18
+
19
+ ## Build and Render
20
+
21
+ ```bash
22
+ # build english version
23
+ bash ./build.sh en
24
+ bash ./serve.sh en
25
+
26
+ # build chinese version
27
+ bash ./build.sh zh
28
+ bash ./serve.sh zh
29
+ ```
30
+
31
+ You can then visit `http://localhost:8000` to view the documentation.
ccevolve/baselines/thetaevolve/docs/build.sh ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
4
+ LANG=$1
5
+
6
+ # make sure language is only en or zh
7
+ if [ "$LANG" != "en" ] && [ "$LANG" != "zh" ]; then
8
+ echo "Language must be en or zh"
9
+ exit 1
10
+ fi
11
+
12
+ cd $SCRIPT_DIR
13
+ SLIME_DOC_LANG=$LANG sphinx-build -b html -D language=$LANG --conf-dir ./ ./$LANG ./build/$LANG
ccevolve/baselines/thetaevolve/docs/build_all.sh ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
4
+ cd "$SCRIPT_DIR"
5
+
6
+ echo "[slime-docs] Building EN..."
7
+ ./build.sh en
8
+ echo "[slime-docs] Building ZH..."
9
+ ./build.sh zh
10
+
11
+ # Create a lightweight root index with auto redirect based on localStorage (done client side)
12
+ ROOT_INDEX=build/index.html
13
+ cat > "$ROOT_INDEX" <<'EOF'
14
+ <!DOCTYPE html>
15
+ <html lang="en">
16
+ <head>
17
+ <meta charset="utf-8" />
18
+ <title>slime docs</title>
19
+ <meta name="viewport" content="width=device-width,initial-scale=1" />
20
+ <style>
21
+ body{font:14px/1.4 system-ui,-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif;padding:40px;max-width:720px;margin:auto;color:#222}
22
+ a{color:#0969da;text-decoration:none}a:hover{text-decoration:underline}
23
+ .lang-links{margin-top:1.2rem;display:flex;gap:1rem}
24
+ .note{margin-top:2rem;font-size:12px;color:#666}
25
+ </style>
26
+ <script>
27
+ (function(){
28
+ var stored = null;
29
+ try{stored = localStorage.getItem('slime-doc-lang');}catch(e){}
30
+ var path = (stored === 'zh') ? 'zh/' : (stored === 'en') ? 'en/' : null;
31
+ if(path){ window.location.replace(path); }
32
+ })();
33
+ </script>
34
+ </head>
35
+ <body>
36
+ <h1>slime Documentation</h1>
37
+ <p>Select language:</p>
38
+ <p class="lang-links"><a href="en/">English</a> <a href="zh/">中文</a></p>
39
+ <p class="note">Auto-redirect uses your last choice if stored; else pick above.</p>
40
+ </body>
41
+ </html>
42
+ EOF
43
+
44
+ echo "[slime-docs] Done. Root landing page at build/index.html"
ccevolve/baselines/thetaevolve/docs/conf.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import sys
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+
7
+ sys.path.insert(0, os.path.abspath("../.."))
8
+
9
+ __version__ = "0.0.1"
10
+
11
+ project = "slime"
12
+ copyright = f"2025-{datetime.now().year}, slime"
13
+ author = "slime Team"
14
+
15
+ version = __version__
16
+ release = __version__
17
+
18
+ extensions = [
19
+ "sphinx.ext.autodoc",
20
+ "sphinx.ext.autosummary",
21
+ "sphinx.ext.napoleon",
22
+ "sphinx.ext.viewcode",
23
+ "sphinx.ext.autosectionlabel",
24
+ "sphinx.ext.intersphinx",
25
+ "sphinx_tabs.tabs",
26
+ "myst_parser",
27
+ "sphinx_copybutton",
28
+ "sphinxcontrib.mermaid",
29
+ "nbsphinx",
30
+ "sphinx.ext.mathjax",
31
+ ]
32
+
33
+ nbsphinx_allow_errors = True
34
+ nbsphinx_execute = "never"
35
+
36
+ autosectionlabel_prefix_document = True
37
+ nbsphinx_allow_directives = True
38
+
39
+
40
+ myst_enable_extensions = [
41
+ "dollarmath",
42
+ "amsmath",
43
+ "deflist",
44
+ "colon_fence",
45
+ "html_image",
46
+ "linkify",
47
+ "substitution",
48
+ ]
49
+
50
+ myst_heading_anchors = 3
51
+
52
+ nbsphinx_kernel_name = "python3"
53
+ nbsphinx_execute_arguments = [
54
+ "--InlineBackend.figure_formats={'svg', 'pdf'}",
55
+ "--InlineBackend.rc={'figure.dpi': 96}",
56
+ ]
57
+
58
+
59
+ nb_render_priority = {
60
+ "html": (
61
+ "application/vnd.jupyter.widget-view+json",
62
+ "application/javascript",
63
+ "text/html",
64
+ "image/svg+xml",
65
+ "image/png",
66
+ "image/jpeg",
67
+ "text/markdown",
68
+ "text/latex",
69
+ "text/plain",
70
+ )
71
+ }
72
+
73
+ myst_enable_extensions = [
74
+ "dollarmath",
75
+ "amsmath",
76
+ "deflist",
77
+ "colon_fence",
78
+ "html_image",
79
+ "linkify",
80
+ "substitution",
81
+ ]
82
+
83
+ myst_heading_anchors = 3
84
+ myst_ref_domains = ["std", "py"]
85
+
86
+ templates_path = ["_templates"]
87
+
88
+ source_suffix = {
89
+ ".rst": "restructuredtext",
90
+ ".md": "markdown",
91
+ }
92
+
93
+ master_doc = "index"
94
+
95
+ language = os.environ.get("SLIME_DOC_LANG", "en")
96
+
97
+ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
98
+
99
+ pygments_style = "sphinx"
100
+
101
+ html_theme = "sphinx_book_theme"
102
+ html_logo = "_static/image/logo.jpg"
103
+ html_favicon = "_static/image/logo.ico"
104
+ html_title = project
105
+ html_copy_source = True
106
+ html_last_updated_fmt = ""
107
+
108
+ html_theme_options = {
109
+ "repository_url": "https://github.com/THUDM/slime",
110
+ "repository_branch": "main",
111
+ "show_navbar_depth": 3,
112
+ "max_navbar_depth": 4,
113
+ "collapse_navbar": True,
114
+ "use_edit_page_button": True,
115
+ "use_source_button": True,
116
+ "use_issues_button": True,
117
+ "use_repository_button": True,
118
+ "use_download_button": True,
119
+ "use_sidenotes": True,
120
+ "show_toc_level": 2,
121
+ }
122
+
123
+ html_context = {
124
+ "display_github": True,
125
+ "github_user": "sgl-project",
126
+ "github_repo": "sgl-project.github.io",
127
+ "github_version": "main",
128
+ "conf_py_path": "/docs/",
129
+ }
130
+
131
+ html_static_path = ["_static"]
132
+ html_css_files = ["css/custom_log.css"]
133
+ # Add custom javascript for language toggle (en <-> zh)
134
+ html_js_files = [
135
+ "js/lang-toggle.js",
136
+ ]
137
+
138
+
139
+ def _sync_examples(app):
140
+ """Sync top-level examples into language-specific doc trees.
141
+
142
+ Policy:
143
+ - README.md -> English docs/en/_examples_synced/<example>/README.md
144
+ - README_zh.md -> Chinese docs/zh/_examples_synced/<example>/README_zh.md
145
+ - If a language-specific README missing, that example is simply skipped for that language.
146
+ """
147
+ docs_root = Path(__file__).resolve().parent
148
+ src_dir = docs_root.parent / "examples"
149
+ if not src_dir.exists():
150
+ return
151
+
152
+ lang_cfgs = {
153
+ "en": {
154
+ "dir": docs_root / "en",
155
+ "readme_name": "README.md",
156
+ },
157
+ "zh": {
158
+ "dir": docs_root / "zh",
159
+ # primary preferred name; will fallback to README.md
160
+ "readme_name": "README_zh.md",
161
+ },
162
+ }
163
+
164
+ for lang, cfg in lang_cfgs.items():
165
+ lang_dir = cfg["dir"]
166
+ if not lang_dir.exists():
167
+ continue
168
+ out_dir = lang_dir / "_examples_synced"
169
+ if out_dir.exists():
170
+ shutil.rmtree(out_dir)
171
+ out_dir.mkdir(parents=True, exist_ok=True)
172
+
173
+ entries = [] # (example_name, readme_rel_path)
174
+ for d in sorted(src_dir.iterdir()):
175
+ if not d.is_dir():
176
+ continue
177
+ # language-specific selection with fallback for zh
178
+ if lang == "zh":
179
+ primary = d / cfg["readme_name"] # README_zh.md
180
+ fallback = d / "README.md"
181
+ candidate = primary if primary.exists() else fallback
182
+ else:
183
+ candidate = d / cfg["readme_name"]
184
+ if not candidate.exists():
185
+ continue # skip entirely if nothing suitable
186
+ target_dir = out_dir / d.name
187
+ target_dir.mkdir(parents=True, exist_ok=True)
188
+ shutil.copy2(candidate, target_dir / "README.md")
189
+ entries.append((d.name, f"_examples_synced/{d.name}/README.md"))
190
+
191
+
192
+ def setup(app):
193
+ # ensure examples are synced before reading source files
194
+ app.connect("builder-inited", _sync_examples)
195
+
196
+
197
+ myst_enable_extensions = [
198
+ "dollarmath",
199
+ "amsmath",
200
+ "deflist",
201
+ "colon_fence",
202
+ ]
203
+ myst_heading_anchors = 5
204
+
205
+ htmlhelp_basename = "slimedoc"
206
+
207
+ latex_elements = {}
208
+
209
+ latex_documents = [
210
+ (master_doc, "slime.tex", "slime Documentation", "slime Team", "manual"),
211
+ ]
212
+
213
+ man_pages = [(master_doc, "slime", "slime Documentation", [author], 1)]
214
+
215
+ texinfo_documents = [
216
+ (
217
+ master_doc,
218
+ "slime",
219
+ "slime Documentation",
220
+ author,
221
+ "slime",
222
+ "One line description of project.",
223
+ "Miscellaneous",
224
+ ),
225
+ ]
226
+
227
+ epub_title = project
228
+
229
+ epub_exclude_files = ["search.html"]
230
+
231
+ copybutton_prompt_text = r">>> |\.\.\. "
232
+ copybutton_prompt_is_regexp = True
233
+
234
+ autodoc_preserve_defaults = True
235
+ navigation_with_keys = False
236
+
237
+ autodoc_mock_imports = [
238
+ "torch",
239
+ "transformers",
240
+ "triton",
241
+ ]
242
+
243
+ intersphinx_mapping = {
244
+ "python": ("https://docs.python.org/3.12", None),
245
+ "typing_extensions": ("https://typing-extensions.readthedocs.io/en/latest", None),
246
+ "pillow": ("https://pillow.readthedocs.io/en/stable", None),
247
+ "numpy": ("https://numpy.org/doc/stable", None),
248
+ "torch": ("https://pytorch.org/docs/stable", None),
249
+ }
250
+
251
+ html_theme = "sphinx_book_theme"
252
+
253
+
254
+ nbsphinx_prolog = """
255
+ .. raw:: html
256
+
257
+ <style>
258
+ .output_area.stderr, .output_area.stdout {
259
+ color: #d3d3d3 !important; /* light gray */
260
+ }
261
+ </style>
262
+ """
ccevolve/baselines/thetaevolve/docs/requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gguf>=0.10.0
2
+ ipykernel
3
+ ipywidgets
4
+ jupyter_client
5
+ markdown>=3.4.0
6
+ matplotlib
7
+ myst-parser
8
+ nbconvert
9
+ nbsphinx
10
+ nbstripout
11
+ pandoc
12
+ pillow
13
+ pydantic
14
+ sphinx
15
+ sphinx-autobuild
16
+ sphinx-book-theme
17
+ sphinx-copybutton
18
+ sphinx-tabs
19
+ sphinxcontrib-mermaid
20
+ urllib3<2.0.0
ccevolve/baselines/thetaevolve/docs/serve.sh ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
4
+ LANG="${1:-all}"
5
+ PORT="${PORT:-8000}"
6
+
7
+ cd "$SCRIPT_DIR"
8
+
9
+ if [ "$LANG" = "all" ]; then
10
+ # Expect both builds present
11
+ if [ ! -d build/en ] || [ ! -d build/zh ]; then
12
+ echo "[serve] Missing build/en or build/zh. Run ./build_all.sh first." >&2
13
+ fi
14
+ echo "[serve] Serving multi-language docs root on http://localhost:$PORT (en/, zh/)"
15
+ python -m http.server -d ./build "$PORT"
16
+ exit $?
17
+ fi
18
+
19
+ if [ "$LANG" != "en" ] && [ "$LANG" != "zh" ]; then
20
+ echo "Usage: $0 [en|zh|all]" >&2
21
+ exit 1
22
+ fi
23
+
24
+ if [ ! -d "build/$LANG" ]; then
25
+ echo "[serve] build/$LANG not found. Run ./build.sh $LANG first." >&2
26
+ exit 1
27
+ fi
28
+ echo "[serve] Serving $LANG docs on http://localhost:$PORT"
29
+ python -m http.server -d ./build/$LANG "$PORT"
ccevolve/baselines/thetaevolve/eval_results/ac1/correct.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "correct": true,
3
+ "error": null
4
+ }
ccevolve/baselines/thetaevolve/eval_results/ac1/metrics.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "combined_score": -1.503163554681561,
3
+ "public": {
4
+ "best_value": 1.503163554681561,
5
+ "best_length": 1319,
6
+ "num_runs": 1
7
+ },
8
+ "private": {
9
+ "all_values": [
10
+ 1.503163554681561
11
+ ],
12
+ "all_lengths": [
13
+ 1319
14
+ ]
15
+ },
16
+ "text_feedback": "Lower evaluate_sequence value is better. combined_score = -best_value.",
17
+ "execution_time_mean": 0.021179363131523132,
18
+ "execution_time_std": 0.0,
19
+ "num_valid_runs": 1,
20
+ "num_invalid_runs": 0,
21
+ "all_validation_errors": []
22
+ }
ccevolve/baselines/thetaevolve/eval_results/circle_packing/correct.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "correct": true,
3
+ "error": null
4
+ }
ccevolve/baselines/thetaevolve/eval_results/circle_packing/metrics.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "combined_score": 2.6359856612408987,
3
+ "public": {
4
+ "centers_str": " centers[0] = (0.1112, 0.1112)\n centers[1] = (0.2406, 0.2370)\n centers[2] = (0.4048, 0.2580)\n centers[3] = (0.5966, 0.2576)\n centers[4] = (0.7603, 0.2363)\n centers[5] = (0.8892, 0.1108)\n centers[6] = (0.0962, 0.3179)\n centers[7] = (0.3141, 0.0926)\n centers[8] = (0.5006, 0.0939)\n centers[9] = (0.6869, 0.0924)\n centers[10] = (0.9043, 0.3167)\n centers[11] = (0.1035, 0.5174)\n centers[12] = (0.2731, 0.4040)\n centers[13] = (0.5013, 0.4700)\n centers[14] = (0.7284, 0.4024)\n centers[15] = (0.8969, 0.5154)\n centers[16] = (0.1052, 0.7260)\n centers[17] = (0.2974, 0.6183)\n centers[18] = (0.5045, 0.7247)\n centers[19] = (0.7053, 0.6131)\n centers[20] = (0.8932, 0.7252)\n centers[21] = (0.0849, 0.9151)\n centers[22] = (0.2977, 0.8667)\n centers[23] = (0.5027, 0.9211)\n centers[24] = (0.7054, 0.8698)\n centers[25] = (0.9154, 0.9154)",
5
+ "num_circles": 26
6
+ },
7
+ "private": {
8
+ "reported_sum_of_radii": 2.6359856612408987
9
+ },
10
+ "execution_time_mean": 0.025406131520867348,
11
+ "execution_time_std": 0.0,
12
+ "num_valid_runs": 1,
13
+ "num_invalid_runs": 0,
14
+ "all_validation_errors": []
15
+ }
ccevolve/baselines/thetaevolve/examples/__init__.py ADDED
File without changes
ccevolve/baselines/thetaevolve/examples/fully_async/README.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Fully Asynchronous Rollout Example
2
+
3
+ This example shows a simple way to make rollout generation **fully asynchronous**: a single global worker is created once and then keeps running in the background, continuously pulling prompts and launching generation tasks. Training only needs to fetch already finished results. This removes the per‑step wait that happens in the normal synchronous style.
4
+
5
+ ### Files
6
+ * `fully_async_rollout.py`: global async worker + `generate_rollout_fully_async` entry.
7
+ * `run-qwen3-4b-fully_async.sh`: example launch script with Qwen3‑4B.
8
+
9
+ ### Prerequisite
10
+ First set up model & environment following the Qwen3-4B example.
11
+
12
+ ### Quick Start
13
+ ```bash
14
+ cd slime
15
+ bash examples/fully_async/run-qwen3-4b-fully_async.sh
16
+ ```
17
+ You should see log lines like:
18
+ ```
19
+ Creating new global async worker...
20
+ Continuous async rollout worker started
21
+ ```
22
+
23
+ ### How It Works (Very Short)
24
+ * First call: create `AsyncRolloutWorker` (thread + asyncio loop).
25
+ * Loop keeps up to `--rollout-batch-size` tasks in flight using `generate_and_rm_group`.
26
+ * Completed groups are pushed into a queue; caller drains until it has enough samples.
27
+ * Worker is stopped automatically at process exit.
28
+
29
+ ### Limitations
30
+ * No evaluation mode.
31
+ * Ordering is best effort (sorted at the end by index).
32
+ * Minimal error handling.
33
+
34
+ ### Config Differences (2 Key Points)
35
+ To enable the fully async pattern there are only two changes compared to a normal run:
36
+
37
+ 1. Use the async training driver: `train_async.py` (not `train.py`).
38
+ 2. Set the rollout function path:
39
+ ```bash
40
+ --rollout-function-path fully_async_rollout.generate_rollout_fully_async
41
+ ```
42
+
43
+ Why is it still "fully" async although `train_async.py` itself schedules rollouts step‑by‑step?
44
+
45
+ Because the real generation work is done by a **persistent background worker** created in `generate_rollout_fully_async`. Each call from `train_async.py` only drains already completed samples from the worker's output queue; the worker has been continuously generating since the first call. Thus rollout production (model inference) and training consume happen in parallel with minimal waiting.
ccevolve/baselines/thetaevolve/examples/fully_async/fully_async_rollout.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import queue
3
+ import threading
4
+ import time
5
+ from typing import List
6
+
7
+ # Import core functions from sglang_rollout directly to avoid code duplication
8
+ from slime.rollout.sglang_rollout import GenerateState, generate_and_rm_group
9
+ from slime.utils.async_utils import run
10
+ from slime.utils.types import Sample
11
+
12
+ # Global worker manager
13
+ _global_worker = None
14
+ _worker_lock = threading.Lock()
15
+
16
+
17
+ def get_global_worker(args, data_buffer):
18
+ """Get or create global worker"""
19
+ global _global_worker
20
+ with _worker_lock:
21
+ if _global_worker is None or not _global_worker.worker_thread.is_alive():
22
+ print("Creating new global async worker...")
23
+ _global_worker = AsyncRolloutWorker(args, data_buffer, concurrency=args.sglang_server_concurrency)
24
+ _global_worker.start()
25
+ return _global_worker
26
+
27
+
28
+ def stop_global_worker():
29
+ """Stop global worker"""
30
+ global _global_worker
31
+ with _worker_lock:
32
+ if _global_worker is not None:
33
+ _global_worker.stop()
34
+ _global_worker = None
35
+
36
+
37
+ class AsyncRolloutWorker:
38
+ """
39
+ Simplified asynchronous rollout worker, using threads instead of processes
40
+ Supports continuous running, independent of rollout function lifecycle
41
+ """
42
+
43
+ def __init__(self, args, data_buffer, concurrency=10):
44
+ self.args = args
45
+ self.data_buffer = data_buffer # Directly save data_buffer reference
46
+ self.concurrency = concurrency
47
+ self.running = True
48
+ self.output_queue = queue.Queue(maxsize=1000) # Continuous output queue
49
+ self.worker_thread = None
50
+ self.state = GenerateState(args)
51
+
52
+ async def continuous_worker_loop(self):
53
+ """Continuous work loop - constantly get data from data_buffer and process"""
54
+ print("Continuous async rollout worker started")
55
+
56
+ active_tasks = set()
57
+ max_concurrent_tasks = self.args.rollout_batch_size
58
+ group_id_counter = 0
59
+
60
+ while self.running:
61
+ try:
62
+ # Clean up completed tasks
63
+ if active_tasks:
64
+ done_tasks = {task for task in active_tasks if task.done()}
65
+ for task in done_tasks:
66
+ try:
67
+ task.result() # Results are already handled in callbacks
68
+ except Exception as e:
69
+ print(f"Task failed with exception: {e}")
70
+ active_tasks -= done_tasks
71
+
72
+ # If active task count hasn't reached limit, try to get new data and start tasks
73
+ while len(active_tasks) < max_concurrent_tasks and self.running:
74
+ samples = self.data_buffer.get_samples(1)
75
+
76
+ for group in samples:
77
+ group_id = group_id_counter
78
+ group_id_counter += 1
79
+
80
+ # Create new async task
81
+ task = asyncio.create_task(
82
+ generate_and_rm_group(
83
+ self.args,
84
+ group,
85
+ sampling_params=self.state.sampling_params.copy(),
86
+ evaluation=False,
87
+ )
88
+ )
89
+
90
+ # Add completion callback
91
+ def make_callback(gid):
92
+ def task_done_callback(task):
93
+ result = task.result()
94
+ self.output_queue.put((gid, result))
95
+
96
+ return task_done_callback
97
+
98
+ task.add_done_callback(make_callback(group_id))
99
+ active_tasks.add(task)
100
+ break
101
+
102
+ # Brief sleep to avoid busy waiting
103
+ await asyncio.sleep(1)
104
+
105
+ except Exception as e:
106
+ print(f"Error in continuous worker loop: {e}")
107
+ await asyncio.sleep(1)
108
+
109
+ if active_tasks:
110
+ print(f"Waiting for {len(active_tasks)} continuous tasks to complete...")
111
+ await asyncio.wait(active_tasks)
112
+
113
+ print("Continuous async rollout worker stopped")
114
+
115
+ def worker_thread_func(self):
116
+ """Worker function running in independent thread"""
117
+ asyncio.run(self.continuous_worker_loop())
118
+
119
+ def start(self):
120
+ """Start continuous work mode"""
121
+ if self.worker_thread is None or not self.worker_thread.is_alive():
122
+ self.worker_thread = threading.Thread(target=self.worker_thread_func, daemon=True)
123
+ self.worker_thread.start()
124
+ print("Started continuous async worker thread")
125
+
126
+ def stop(self):
127
+ """Stop worker thread"""
128
+ self.running = False
129
+ if self.worker_thread and self.worker_thread.is_alive():
130
+ self.worker_thread.join(timeout=5)
131
+ print("Stopped async worker thread")
132
+
133
+ def get_completed_groups(self) -> List[tuple]:
134
+ """Get completed sample groups"""
135
+ completed = []
136
+ while True:
137
+ try:
138
+ result = self.output_queue.get_nowait()
139
+ completed.append(result)
140
+ except queue.Empty:
141
+ break
142
+ return completed
143
+
144
+ def get_queue_size(self) -> int:
145
+ """Get current output queue size"""
146
+ return self.output_queue.qsize()
147
+
148
+
149
+ async def generate_rollout_async(args, rollout_id: int, data_buffer) -> List[List[Sample]]:
150
+ """
151
+ Simplified asynchronous rollout generation - using global continuous worker
152
+ """
153
+ assert args.rollout_global_dataset
154
+
155
+ # Get global worker, which will run continuously
156
+ worker = get_global_worker(args, data_buffer)
157
+
158
+ # Simplified: directly use rollout_batch_size as target
159
+ target_data_size = args.rollout_batch_size
160
+
161
+ data = []
162
+ completed_groups = {}
163
+ do_print = True
164
+
165
+ print(f"Starting async rollout generation for {target_data_size} groups")
166
+ print(f"Global worker queue size: {worker.get_queue_size()}")
167
+
168
+ # Main loop: collect results from global worker's output queue
169
+ start_time = time.time()
170
+ last_progress_time = start_time
171
+ no_progress_timeout = 30.0 # Warn if no progress for 30 seconds
172
+
173
+ while len(data) < target_data_size:
174
+ # Collect completed results
175
+ completed = worker.get_completed_groups()
176
+
177
+ made_progress = False
178
+ for group_id, group in completed:
179
+ completed_groups[group_id] = group
180
+ made_progress = True
181
+
182
+ if made_progress:
183
+ last_progress_time = time.time()
184
+
185
+ # Process completed groups in order (try to maintain order, but not strict requirement)
186
+ processed_any = False
187
+
188
+ # Process all available completed groups
189
+ available_ids = list(completed_groups.keys())
190
+ for group_id in available_ids:
191
+ if len(data) >= target_data_size:
192
+ break
193
+
194
+ group = completed_groups.pop(group_id)
195
+
196
+ if do_print:
197
+ print(
198
+ f"First rollout sample: {[group[0].prompt + group[0].response]}, "
199
+ f"label: {group[0].label}, reward: {group[0].reward}",
200
+ flush=True,
201
+ )
202
+ do_print = False
203
+
204
+ # Simplified: directly add samples, no filters used
205
+ data.append(group)
206
+ processed_any = True
207
+
208
+ # Check progress
209
+ current_time = time.time()
210
+ if current_time - last_progress_time > no_progress_timeout:
211
+ print(
212
+ f"Warning: No progress for {no_progress_timeout}s. "
213
+ f"Queue size: {worker.get_queue_size()}, "
214
+ f"Collected: {len(data)}/{target_data_size}"
215
+ )
216
+ last_progress_time = current_time
217
+
218
+ # If no results were processed, brief sleep to avoid busy waiting
219
+ if not processed_any:
220
+ await asyncio.sleep(0.01)
221
+
222
+ duration = time.time() - start_time
223
+ print(f"Rollout completed in {duration:.2f}s! Global worker queue size: {worker.get_queue_size()}")
224
+
225
+ if data:
226
+ print(
227
+ f"Finish rollout: {[data[-1][0].prompt + data[-1][0].response]}, "
228
+ f"label: {data[-1][0].label}, reward: {data[-1][0].reward}",
229
+ flush=True,
230
+ )
231
+
232
+ data = sorted(data, key=lambda group: group[0].index)
233
+ return data
234
+
235
+
236
+ def generate_rollout_fully_async(args, rollout_id, data_buffer, evaluation=False):
237
+ if evaluation:
238
+ raise ValueError("Evaluation mode not supported in simple async rollout")
239
+
240
+ completed_samples = run(generate_rollout_async(args, rollout_id, data_buffer))
241
+ return completed_samples
242
+
243
+
244
+ # Register exit cleanup function
245
+ import atexit
246
+
247
+ atexit.register(stop_global_worker)
ccevolve/baselines/thetaevolve/examples/fully_async/run-qwen3-4b-fully_async.sh ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # for rerun the task
4
+ pkill -9 sglang
5
+ sleep 3
6
+ ray stop --force
7
+ pkill -9 ray
8
+ pkill -9 python
9
+ sleep 3
10
+ pkill -9 ray
11
+ pkill -9 python
12
+
13
+ set -ex
14
+
15
+ # will prevent ray from buffering stdout/stderr
16
+ export PYTHONBUFFERED=16
17
+
18
+ NVLINK_COUNT=$(nvidia-smi topo -m 2>/dev/null | grep -o 'NV[0-9][0-9]*' | wc -l)
19
+ if [ "$NVLINK_COUNT" -gt 0 ]; then
20
+ HAS_NVLINK=1
21
+ else
22
+ HAS_NVLINK=0
23
+ fi
24
+ echo "HAS_NVLINK: $HAS_NVLINK (detected $NVLINK_COUNT NVLink references)"
25
+
26
+ SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
27
+ source "${SCRIPT_DIR}/../../scripts/models/qwen3-4B.sh"
28
+
29
+ CKPT_ARGS=(
30
+ --hf-checkpoint /root/Qwen3-4B
31
+ #--hf-checkpoint /root/Qwen3-4B-FP8
32
+ --ref-load /root/Qwen3-4B_torch_dist
33
+ --load /root/Qwen3-4B_slime/
34
+ --save /root/Qwen3-4B_slime/
35
+ --save-interval 20
36
+ )
37
+
38
+ ROLLOUT_ARGS=(
39
+ --rollout-function-path fully_async_rollout.generate_rollout_fully_async
40
+ --prompt-data /mnt/o1_alicloud/personal/zzl/rl_data/dapo-math-17k.jsonl
41
+ --input-key prompt
42
+ --label-key label
43
+ --apply-chat-template
44
+ --rollout-shuffle
45
+ --rm-type deepscaler
46
+ --num-rollout 3000
47
+ --rollout-batch-size 32
48
+ --n-samples-per-prompt 8
49
+ --rollout-max-response-len 8192
50
+ --rollout-temperature 0.8
51
+
52
+ --global-batch-size 256
53
+ --balance-data
54
+ )
55
+
56
+ PERF_ARGS=(
57
+ --tensor-model-parallel-size 2
58
+ --sequence-parallel
59
+ --pipeline-model-parallel-size 1
60
+ --context-parallel-size 1
61
+ --expert-model-parallel-size 1
62
+ --expert-tensor-parallel-size 1
63
+
64
+ --recompute-granularity full
65
+ --recompute-method uniform
66
+ --recompute-num-layers 1
67
+
68
+ # --micro-batch-size 1
69
+ --use-dynamic-batch-size
70
+ --max-tokens-per-gpu 9216
71
+ )
72
+
73
+ GRPO_ARGS=(
74
+ --advantage-estimator grpo
75
+ --use-kl-loss
76
+ --kl-loss-coef 0.00
77
+ --kl-loss-type low_var_kl
78
+ --entropy-coef 0.00
79
+ --eps-clip 0.2
80
+ --eps-clip-high 0.28
81
+
82
+ --use-tis
83
+ )
84
+
85
+ OPTIMIZER_ARGS=(
86
+ --optimizer adam
87
+ --lr 1e-6
88
+ --lr-decay-style constant
89
+ --weight-decay 0.1
90
+ --adam-beta1 0.9
91
+ --adam-beta2 0.98
92
+ )
93
+
94
+ SGLANG_ARGS=(
95
+ --rollout-num-gpus-per-engine 1
96
+ )
97
+
98
+ MISC_ARGS=(
99
+ # default dropout in megatron is 0.1
100
+ --attention-dropout 0.0
101
+ --hidden-dropout 0.0
102
+ # should be good for model performance
103
+ --accumulate-allreduce-grads-in-fp32
104
+ --attention-softmax-in-fp32
105
+ # need to comment this when using model with MLA
106
+ --attention-backend flash
107
+ )
108
+
109
+ # launch the master node of ray in container
110
+ export MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
111
+ ray start --head --node-ip-address ${MASTER_ADDR} --num-gpus 8 --disable-usage-stats
112
+
113
+ RUNTIME_ENV_JSON="{
114
+ \"env_vars\": {
115
+ \"PYTHONPATH\": \"/root/Megatron-LM/:${SCRIPT_DIR}\",
116
+ \"CUDA_DEVICE_MAX_CONNECTIONS\": \"1\",
117
+ \"NCCL_NVLS_ENABLE\": \"${HAS_NVLINK}\"
118
+ }
119
+ }"
120
+
121
+ ray job submit --address="http://127.0.0.1:8265" \
122
+ --runtime-env-json="${RUNTIME_ENV_JSON}" \
123
+ -- python3 train_async.py \
124
+ --actor-num-nodes 1 \
125
+ --actor-num-gpus-per-node 4 \
126
+ --rollout-num-gpus 4 \
127
+ ${MODEL_ARGS[@]} \
128
+ ${CKPT_ARGS[@]} \
129
+ ${ROLLOUT_ARGS[@]} \
130
+ ${OPTIMIZER_ARGS[@]} \
131
+ ${GRPO_ARGS[@]} \
132
+ ${DISTRIBUTED_ARGS[@]} \
133
+ ${PERF_ARGS[@]} \
134
+ ${SGLANG_ARGS[@]} \
135
+ ${MISC_ARGS[@]}
ccevolve/baselines/thetaevolve/examples/multi_agent/README.md ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Multi-Agent RL
2
+
3
+ This directory provides an example of running multi-agent reinforcement learning (RL) with slime.
4
+
5
+ ## Environment Setup
6
+
7
+ The environment setup is identical to the standard RL setup used in slime.
8
+
9
+ ## Running the Script
10
+
11
+ You can either define your own multi-agent system or use the provided default configuration.
12
+
13
+ ```python
14
+ MULTI_AGENT_CONFIGS = {
15
+ "custom_multi_agent_function_path": "examples.multi_agent.agent_system.run_agent_system",
16
+ "num_parallel": 5,
17
+ "incorrect_reward_weight": 0.8,
18
+ "correct_reward_weight": 1.2,
19
+ }
20
+ ```
21
+
22
+ To start a run, execute:
23
+
24
+ ```bash
25
+ cd slime/
26
+ bash examples/multi_agent/run-qwen3-30B-A3B-multi-agent.sh
27
+ ```
28
+
29
+ ## New Arguments
30
+
31
+ - Specify the agent rollout function with the `--custom-generate-function-path` argument.
32
+ - Set the `--rollout-max-context-len` argument according to your model’s context window.
33
+
34
+ ```bash
35
+ ROLLOUT_ARGS=(
36
+ --custom-generate-function-path examples.multi_agent.rollout_with_multi_agents.generate_with_multi_agents
37
+ --prompt-data /root/dapo-math-17k/dapo-math-17k.jsonl
38
+ --input-key prompt
39
+ --label-key label
40
+ --apply-chat-template
41
+ --rollout-shuffle
42
+ --rm-type deepscaler
43
+ --num-rollout 3000
44
+ --rollout-batch-size 32
45
+ --n-samples-per-prompt 8
46
+ --rollout-max-context-len 16384
47
+ --rollout-max-response-len 8192
48
+ --rollout-temperature 0.8
49
+
50
+ --global-batch-size 256
51
+ --balance-data
52
+ )
53
+ ```
ccevolve/baselines/thetaevolve/examples/multi_agent/__init__.py ADDED
File without changes
ccevolve/baselines/thetaevolve/examples/multi_agent/agent_system.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import re
3
+ import time
4
+ import traceback
5
+ from copy import deepcopy
6
+ from typing import List
7
+
8
+ from slime.rollout.rm_hub import batched_async_rm
9
+ from slime.utils.http_utils import post
10
+ from slime.utils.types import Sample
11
+
12
+ from .prompts import SOLVER_PROMPT_TEMPLATE, generate_rewriter_template, generate_select_template
13
+
14
+
15
+ async def generate_response(args, prompt, key):
16
+ try:
17
+ sampling_params = args.sampling_params
18
+ tokenizer = args.tokenizer
19
+ max_context_length = args.rollout_max_context_len
20
+ sample = deepcopy(args.sample)
21
+
22
+ url = f"http://{args.sglang_router_ip}:{args.sglang_router_port}/generate"
23
+
24
+ prompt_token_ids = tokenizer.encode(prompt, add_special_tokens=False)
25
+ sample.tokens = prompt_token_ids
26
+ sample.prompt = prompt
27
+ input_token_ids = prompt_token_ids
28
+ prompt_length = len(input_token_ids)
29
+ current_sampling_params = deepcopy(sampling_params)
30
+ current_sampling_params["max_new_tokens"] = min(
31
+ sampling_params["max_new_tokens"], max_context_length - prompt_length
32
+ )
33
+
34
+ if current_sampling_params["max_new_tokens"] <= 0:
35
+ return None
36
+
37
+ payload = {"input_ids": input_token_ids, "sampling_params": current_sampling_params, "return_logprob": True}
38
+
39
+ output = await post(url, payload)
40
+
41
+ # Extract new response tokens
42
+ if "output_token_logprobs" in output["meta_info"]:
43
+ new_response_tokens = [item[1] for item in output["meta_info"]["output_token_logprobs"]]
44
+ else:
45
+ # abort
46
+ new_response_tokens = []
47
+
48
+ # Update sample with tokens directly - avoiding re-tokenization
49
+ sample.tokens = sample.tokens + new_response_tokens
50
+ sample.response_length += len(new_response_tokens)
51
+ sample.response = output["text"]
52
+
53
+ match output["meta_info"]["finish_reason"]["type"]:
54
+ case "length":
55
+ sample.status = Sample.Status.TRUNCATED
56
+ # case "abort":
57
+ # sample.status = Sample.Status.ABORTED
58
+ case "stop":
59
+ sample.status = Sample.Status.COMPLETED
60
+
61
+ args.results_dict[key].append(sample)
62
+
63
+ final = output["text"].replace("<|user|>", "")
64
+ if "</think>" in final:
65
+ contents = final.split("</think>")
66
+ if len(contents) == 2 and contents[1] != "":
67
+ reason_content = contents[0].strip()
68
+ response_content = contents[1].strip()
69
+ sample.reason_content = reason_content
70
+ sample.response_content = response_content
71
+ return response_content
72
+ sample.reason_content = None
73
+ sample.response_content = None
74
+ return None
75
+ except Exception as e:
76
+ print(f"Error generating response: {e}")
77
+ return None
78
+
79
+
80
+ class Agent:
81
+ """A base class for our AI agents."""
82
+
83
+ def __init__(self):
84
+ pass
85
+
86
+ async def run(self, args, prompt, max_retries: int = 1, key: str = None) -> str:
87
+ """Runs the agent by sending a prompt to the LLM."""
88
+ for i in range(max_retries):
89
+ try:
90
+ response = await generate_response(args, prompt, key=key)
91
+ return response
92
+ except Exception as e:
93
+ print(f"Error querying LLM: {e}")
94
+ time.sleep(1)
95
+ print(f"Failed to query LLM after {max_retries} retries")
96
+ return None
97
+
98
+
99
+ class SolverAgent(Agent):
100
+ """The agent responsible for generating and improving solutions."""
101
+
102
+ def __init__(self):
103
+ super().__init__()
104
+
105
+ async def generate_initial_solution(self, args, problem_statement) -> str:
106
+ """Generates the first solution attempt."""
107
+ prompt = SOLVER_PROMPT_TEMPLATE.format(problem_statement=problem_statement)
108
+ return await self.run(args, prompt, max_retries=3, key="solver")
109
+
110
+
111
+ class RewriterAgent(Agent):
112
+ """The agent responsible for rewriting solutions."""
113
+
114
+ def __init__(self):
115
+ super().__init__()
116
+
117
+ async def rewrite(self, args, problem_statement, previous_solutions: List[str]) -> str:
118
+ """Generates the rewrited solution."""
119
+
120
+ # Dynamically generate template
121
+ template = generate_rewriter_template(len(previous_solutions))
122
+
123
+ # Build parameter dictionary
124
+ format_params = {"problem_statement": problem_statement}
125
+ for i, solution in enumerate(previous_solutions):
126
+ format_params[f"solution{i+1}"] = solution
127
+
128
+ prompt = template.format(**format_params)
129
+ return await self.run(args, prompt, max_retries=1, key="rewriter")
130
+
131
+
132
+ class SelectorAgent(Agent):
133
+ """The agent responsible for selecting solutions."""
134
+
135
+ def __init__(self):
136
+ super().__init__()
137
+
138
+ async def select(self, args, problem_statement, candidate_solutions: List[str]) -> str:
139
+ """Generates the rewrited solution."""
140
+
141
+ # Dynamically generate template
142
+ template = generate_select_template(len(candidate_solutions))
143
+
144
+ # Build parameter dictionary
145
+ format_params = {"problem_statement": problem_statement}
146
+ for i, solution in enumerate(candidate_solutions):
147
+ format_params[f"solution{i+1}"] = solution
148
+
149
+ prompt = template.format(**format_params)
150
+ return await self.run(args, prompt, max_retries=10, key="selector")
151
+
152
+ def extract_selected_solution_idx(self, response: str, candidate_solutions: List[str]) -> int:
153
+ """Extracts the selected solution ID from the response."""
154
+ PATTERN = re.compile("Judgment:\s*(\d+)")
155
+ matched = PATTERN.findall(response)
156
+ try:
157
+ selected_id = int(matched[0]) - 1
158
+ if selected_id < len(candidate_solutions) and selected_id >= 0:
159
+ return selected_id
160
+ else:
161
+ return None
162
+ except Exception as e:
163
+ print(f"extract_selected_solution_idx error: {e}")
164
+ return None
165
+
166
+
167
+ async def rewrite_worker(args, previous_solutions, problem_statement, worker_id):
168
+ rewriter = RewriterAgent()
169
+ new_solution = await rewriter.rewrite(args, problem_statement, previous_solutions)
170
+ return new_solution
171
+
172
+
173
+ async def solver_worker(args, problem_statement, worker_id):
174
+ """
175
+ Single solver workflow.
176
+ """
177
+
178
+ try:
179
+ solver = SolverAgent()
180
+ current_solution = await solver.generate_initial_solution(args, problem_statement)
181
+ return current_solution
182
+
183
+ except Exception as e:
184
+ print(f"[Worker-{worker_id}] exception: {e}")
185
+ print(f"[Worker-{worker_id}] traceback: {traceback.format_exc()}")
186
+ return None
187
+
188
+
189
+ async def run_agent_system(args, sample):
190
+ """
191
+ Concurrently run num_parallel pipeline instances.
192
+ """
193
+
194
+ args = deepcopy(args) # Deep copy args since it may be modified in rollout_with_multi_agents
195
+ args.sample = sample
196
+ args.results_dict = {"solver": [], "rewriter": [], "selector": []}
197
+
198
+ problem_statement = sample.prompt
199
+ tasks = [solver_worker(args, problem_statement, worker_id) for worker_id in range(args.num_parallel)]
200
+ results = await asyncio.gather(*tasks, return_exceptions=True)
201
+
202
+ rewards = await batched_async_rm(args, args.results_dict["solver"])
203
+ for sample, reward in zip(args.results_dict["solver"], rewards):
204
+ sample.reward = reward
205
+
206
+ previous_solutions = [item for item in results if isinstance(item, str)]
207
+
208
+ def reward_adjustment(samples, reward_weight):
209
+ for sample in samples:
210
+ sample.reward = sample.reward * reward_weight
211
+ return samples
212
+
213
+ if len(previous_solutions) == 0:
214
+ reward_adjustment(args.results_dict["solver"], args.incorrect_reward_weight)
215
+ return args.results_dict["solver"]
216
+
217
+ # Rewriting
218
+ tasks = [
219
+ rewrite_worker(args, previous_solutions, problem_statement, worker_id)
220
+ for worker_id in range(args.num_parallel)
221
+ ]
222
+ rewrited_solutions_raw = await asyncio.gather(*tasks, return_exceptions=True)
223
+
224
+ # Handle exception results
225
+ rewrited_solutions = []
226
+ for i, result in enumerate(rewrited_solutions_raw):
227
+ if isinstance(result, str):
228
+ rewrited_solutions.append(result)
229
+
230
+ rewards = await batched_async_rm(args, args.results_dict["rewriter"])
231
+ for sample, reward in zip(args.results_dict["rewriter"], rewards):
232
+ sample.reward = reward
233
+
234
+ if len(rewrited_solutions) == 0:
235
+ reward_adjustment(args.results_dict["solver"], args.incorrect_reward_weight)
236
+ reward_adjustment(args.results_dict["rewriter"], args.incorrect_reward_weight)
237
+ return args.results_dict["solver"] + args.results_dict["rewriter"]
238
+
239
+ # Selection
240
+ selector = SelectorAgent()
241
+ response = await selector.select(args, problem_statement, rewrited_solutions)
242
+ if len(args.results_dict["selector"]) == 0:
243
+ reward_adjustment(args.results_dict["solver"], args.incorrect_reward_weight)
244
+ reward_adjustment(args.results_dict["rewriter"], args.incorrect_reward_weight)
245
+ return args.results_dict["solver"] + args.results_dict["rewriter"]
246
+
247
+ assert (
248
+ len(args.results_dict["selector"]) == 1
249
+ ), f"selector should only return one solution, but got {len(args.results_dict['selector'])}"
250
+ if response is None:
251
+ args.results_dict["selector"][0].reward = 0
252
+ else:
253
+ selected_solution_idx = selector.extract_selected_solution_idx(response, rewrited_solutions)
254
+ if selected_solution_idx is None:
255
+ args.results_dict["selector"][0].reward = 0
256
+ else:
257
+ selected_solution = rewrited_solutions[selected_solution_idx]
258
+ for sample in args.results_dict["rewriter"]:
259
+ if sample.response_content is not None and selected_solution in sample.response_content:
260
+ args.results_dict["selector"][0].reward = sample.reward
261
+ break
262
+
263
+ ## If final answer is correct, add positive reward to all; if incorrect, add negative penalty to all.
264
+ if args.results_dict["selector"][0].reward == 1:
265
+ reward_adjustment(args.results_dict["solver"], args.correct_reward_weight)
266
+ reward_adjustment(args.results_dict["rewriter"], args.correct_reward_weight)
267
+ reward_adjustment(args.results_dict["selector"], args.correct_reward_weight)
268
+ else:
269
+ reward_adjustment(args.results_dict["solver"], args.incorrect_reward_weight)
270
+ reward_adjustment(args.results_dict["rewriter"], args.incorrect_reward_weight)
271
+ reward_adjustment(args.results_dict["selector"], args.incorrect_reward_weight)
272
+
273
+ return args.results_dict["solver"] + args.results_dict["rewriter"] + args.results_dict["selector"]
ccevolve/baselines/thetaevolve/examples/multi_agent/prompts.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Defines prompt templates for generating different prompts
2
+
3
+
4
+ SOLVER_PROMPT_TEMPLATE = """{problem_statement}"""
5
+
6
+
7
+ def generate_rewriter_template(num_solutions: int) -> str:
8
+ """Dynamically generate rewrite templates based on the number of solutions."""
9
+ solution_sections = []
10
+ for i in range(num_solutions):
11
+ solution_sections.append(f"#### Solution {i+1}\n{{solution{i+1}}}\n\n---")
12
+
13
+ solutions_text = "\n".join(solution_sections)
14
+
15
+ return f"""### Task: Solution Rewriting Based on Previous Solutions ###
16
+ You are being reactivated to revise your mathematical proof. You are provided with two documents:
17
+ 1. The problem you need to solve.
18
+ 2. Your {num_solutions} different "Previous Solutions".
19
+
20
+ Your sole task is to generate a new, correct version of your solution based on your previous discoveries in the provided {num_solutions} solutions.
21
+
22
+ Refer to the following {num_solutions} solutions and solve the problem.
23
+ ---
24
+
25
+ ### Problem
26
+
27
+ {{problem_statement}}
28
+
29
+ ---
30
+
31
+ ### Candidates Solution
32
+ {solutions_text}
33
+ """
34
+
35
+
36
+ def generate_select_template(num_solutions: int) -> str:
37
+ """Dynamically generate select templates based on the number of solutions."""
38
+ solution_sections = []
39
+ for i in range(num_solutions):
40
+ solution_sections.append(f"#### Solution {i+1}\n{{solution{i+1}}}\n\n---")
41
+
42
+ solutions_text = "\n".join(solution_sections)
43
+
44
+ return f"""You will be given a challenging math problem followed by {num_solutions} solutions.
45
+ Your task is to systematically analyze these solutions to identify the most mathematically sound approach.
46
+
47
+ You are provided with two documents:
48
+ 1. The problem you need to solve.
49
+ 2. Your {num_solutions} "Candidate Solutions".
50
+
51
+ Evaluation Process:
52
+ 1. Initial Screening
53
+ - Group solutions by their final answers
54
+ - Identify and explain mathematical contradictions between different answers
55
+ - Eliminate solutions with clear mathematical errors
56
+
57
+ 2. Detailed Analysis
58
+ For remaining solutions, evaluate:
59
+ - Mathematical precision and accuracy
60
+ - Logical progression of steps
61
+ - Completeness of mathematical reasoning
62
+ - Handling of edge cases or special conditions
63
+ - For solutions containing and addressing errors, evaluate the error identification and correction methodology.
64
+
65
+ 3. Solution Comparison
66
+ Compare viable solutions based on:
67
+ - Efficiency of approach
68
+ - Clarity of mathematical reasoning
69
+ - Sophistication of method
70
+ - Robustness of solution (works for all cases)
71
+
72
+ Your response should include:
73
+ 1. Brief analysis of conflicting answers
74
+ 2. Detailed evaluation of mathematically sound solutions
75
+ 3. Justification for eliminating incorrect solutions
76
+ 4. Clear explanation for selecting the best approach
77
+
78
+ End your evaluation with exactly:
79
+ Judgment: IDX
80
+ where IDX is the index 1-{num_solutions} of the best solution
81
+
82
+ ### Problem
83
+
84
+ {{problem_statement}}
85
+
86
+ ---
87
+
88
+ ### Candidate Solutions
89
+ {solutions_text}
90
+ """
ccevolve/baselines/thetaevolve/examples/multi_agent/rollout_with_multi_agents.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+
3
+ from transformers import AutoTokenizer
4
+
5
+ from slime.utils.misc import load_function
6
+ from slime.utils.types import Sample
7
+
8
+ MULTI_AGENT_CONFIGS = {
9
+ "custom_multi_agent_function_path": "examples.multi_agent.agent_system.run_agent_system",
10
+ "num_parallel": 5,
11
+ "incorrect_reward_weight": 0.8,
12
+ "correct_reward_weight": 1.2,
13
+ }
14
+
15
+
16
+ async def generate_with_multi_agents(args, sample: Sample, sampling_params, evaluation=False) -> list[Sample]:
17
+
18
+ tokenizer = AutoTokenizer.from_pretrained(args.hf_checkpoint, trust_remote_code=True)
19
+ max_context_length = args.rollout_max_context_len if not evaluation else args.eval_max_context_len
20
+
21
+ args.sampling_params = sampling_params
22
+ args.rollout_max_context_len = max_context_length
23
+ args.tokenizer = tokenizer
24
+
25
+ for key, value in MULTI_AGENT_CONFIGS.items():
26
+ setattr(args, key, value)
27
+
28
+ custom_multi_agent_func = load_function(args.custom_multi_agent_function_path)
29
+ samples = await custom_multi_agent_func(args, sample)
30
+
31
+ random.shuffle(samples)
32
+
33
+ return samples
ccevolve/baselines/thetaevolve/examples/multi_agent/run-qwen3-30B-A3B-multi-agent.sh ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # for rerun the task
4
+ pkill -9 sglang
5
+ sleep 3
6
+ ray stop --force
7
+ pkill -9 ray
8
+ pkill -9 python
9
+ sleep 3
10
+ pkill -9 ray
11
+ pkill -9 python
12
+
13
+ set -ex
14
+
15
+ # will prevent ray from buffering stdout/stderr
16
+ export PYTHONBUFFERED=16
17
+
18
+ NVLINK_COUNT=$(nvidia-smi topo -m 2>/dev/null | grep -o 'NV[0-9][0-9]*' | wc -l)
19
+ if [ "$NVLINK_COUNT" -gt 0 ]; then
20
+ HAS_NVLINK=1
21
+ else
22
+ HAS_NVLINK=0
23
+ fi
24
+ echo "HAS_NVLINK: $HAS_NVLINK (detected $NVLINK_COUNT NVLink references)"
25
+
26
+ SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
27
+ source "/root/slime/scripts/models/qwen3-30B-A3B.sh"
28
+
29
+ CKPT_ARGS=(
30
+ --hf-checkpoint /root/Qwen3-30B-A3B
31
+ #--hf-checkpoint /root/Qwen3-30B-A3B-FP8
32
+ --ref-load /root/Qwen3-30B-A3B_torch_dist
33
+ --load /root/Qwen3-4B_slime/
34
+ --save /root/Qwen3-4B_slime/
35
+ --save-interval 20
36
+ )
37
+
38
+ ROLLOUT_ARGS=(
39
+ --custom-generate-function-path examples.multi_agent.rollout_with_multi_agents.generate_with_multi_agents
40
+ --prompt-data /root/dapo-math-17k/dapo-math-17k.jsonl
41
+ --input-key prompt
42
+ --label-key label
43
+ --apply-chat-template
44
+ --rollout-shuffle
45
+ --rm-type deepscaler
46
+ --num-rollout 3000
47
+ --rollout-batch-size 32
48
+ --n-samples-per-prompt 8
49
+ --rollout-max-context-len 16384
50
+ --rollout-max-response-len 8192
51
+ --rollout-temperature 0.8
52
+
53
+ --global-batch-size 256
54
+ --balance-data
55
+ )
56
+
57
+ # multi-agent do not support eval for now
58
+ EVAL_ARGS=(
59
+ # --eval-interval 20
60
+ # --eval-prompt-data aime /root/aime-2024/aime-2024.jsonl
61
+ --n-samples-per-eval-prompt 16
62
+ --eval-max-response-len 16384
63
+ --eval-top-p 0.7
64
+ )
65
+
66
+ PERF_ARGS=(
67
+ --tensor-model-parallel-size 4
68
+ --sequence-parallel
69
+ --pipeline-model-parallel-size 1
70
+ --context-parallel-size 1
71
+ --expert-model-parallel-size 8
72
+ --expert-tensor-parallel-size 1
73
+
74
+ --recompute-granularity full
75
+ --recompute-method uniform
76
+ --recompute-num-layers 1
77
+
78
+ # --micro-batch-size 1
79
+ --use-dynamic-batch-size
80
+ --max-tokens-per-gpu 20480
81
+ )
82
+
83
+ GRPO_ARGS=(
84
+ --advantage-estimator grpo
85
+ --use-kl-loss
86
+ --kl-loss-coef 0.00
87
+ --kl-loss-type low_var_kl
88
+ --entropy-coef 0.00
89
+ --eps-clip 0.2
90
+ --eps-clip-high 0.28
91
+ )
92
+
93
+ OPTIMIZER_ARGS=(
94
+ --optimizer adam
95
+ --lr 1e-6
96
+ --lr-decay-style constant
97
+ --weight-decay 0.1
98
+ --adam-beta1 0.9
99
+ --adam-beta2 0.98
100
+
101
+ --optimizer-cpu-offload
102
+ --overlap-cpu-optimizer-d2h-h2d
103
+ --use-precision-aware-optimizer
104
+ )
105
+
106
+ WANDB_ARGS=(
107
+ #--use-wandb
108
+ # --wandb-project slime-dev
109
+ # --wandb-group qwen3-30B-A3B-test
110
+ # --wandb-key ${WANDB_KEY}
111
+ )
112
+
113
+ SGLANG_ARGS=(
114
+ --rollout-num-gpus-per-engine 8
115
+ --sglang-mem-fraction-static 0.7
116
+ --sglang-cuda-graph-bs 1 2 4 8 $(seq 16 8 256)
117
+ )
118
+
119
+ MISC_ARGS=(
120
+ # default dropout in megatron is 0.1
121
+ --attention-dropout 0.0
122
+ --hidden-dropout 0.0
123
+ # should be good for model performance
124
+ --accumulate-allreduce-grads-in-fp32
125
+ --attention-softmax-in-fp32
126
+ # need to comment this when using model with MLA
127
+ --attention-backend flash
128
+ )
129
+
130
+ # launch the master node of ray in container
131
+ export MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
132
+ ray start --head --node-ip-address ${MASTER_ADDR} --num-gpus 8 --disable-usage-stats --dashboard-host=0.0.0.0 --dashboard-port=8265
133
+
134
+ # Build the runtime environment JSON with proper variable substitution
135
+ RUNTIME_ENV_JSON="{
136
+ \"env_vars\": {
137
+ \"PYTHONPATH\": \"/root/Megatron-LM/\",
138
+ \"CUDA_DEVICE_MAX_CONNECTIONS\": \"1\",
139
+ \"NCCL_NVLS_ENABLE\": \"${HAS_NVLINK}\"
140
+ }
141
+ }"
142
+
143
+ ray job submit --address="http://127.0.0.1:8265" \
144
+ --runtime-env-json="${RUNTIME_ENV_JSON}" \
145
+ -- python3 train.py \
146
+ --actor-num-nodes 1 \
147
+ --actor-num-gpus-per-node 8 \
148
+ --colocate \
149
+ ${MODEL_ARGS[@]} \
150
+ ${CKPT_ARGS[@]} \
151
+ ${ROLLOUT_ARGS[@]} \
152
+ ${OPTIMIZER_ARGS[@]} \
153
+ ${GRPO_ARGS[@]} \
154
+ ${DISTRIBUTED_ARGS[@]} \
155
+ ${WANDB_ARGS[@]} \
156
+ ${PERF_ARGS[@]} \
157
+ ${EVAL_ARGS[@]} \
158
+ ${SGLANG_ARGS[@]} \
159
+ ${MISC_ARGS[@]}
ccevolve/baselines/thetaevolve/examples/reproducibility/README.md ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reproducibility
2
+
3
+ Reproducibility is a bedrock of scientific progress. By combining the [deterministic inference](https://lmsys.org/blog/2025-09-22-sglang-deterministic/) of SGLang and the deterministic mode of Megatron-LM, slime supports bitwise experiment reproduction.
4
+
5
+ To enable deterministic training, you need to set:
6
+ ```bash
7
+ # sglang config
8
+ --sglang-enable-deterministic-inference
9
+ --sglang-attention-backend flashinfer
10
+
11
+ # megatron config
12
+ --deterministic-mode
13
+ ```
14
+
15
+ And set the following environment variables:
16
+
17
+ ```bash
18
+ "env_vars": {
19
+ ...,
20
+ "NCCL_ALGO": "Ring",
21
+ "NVTE_ALLOW_NONDETERMINISTIC_ALGO": "0",
22
+ "CUBLAS_WORKSPACE_CONFIG": ":4096:8"
23
+ }
24
+ ```
25
+
26
+ We also need to set `--use-slime-router` until the pypi whl of sglang-router updates.
27
+
28
+ Here we provide the script to do RL training on Qwen2.5 0.5B model and GSM8K dataset with full deterministic.
29
+
30
+ For data and checkpoint preparation, please run:
31
+
32
+ ```bash
33
+ # download
34
+ huggingface-cli download --repo-type dataset zhuzilin/gsm8k --local-dir /root/gsm8k
35
+ huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct --local-dir /root/Qwen2.5-0.5B-Instruct
36
+
37
+ # convert ckpt
38
+ cd slime/
39
+ source scripts/models/qwen2.5-0.5B.sh
40
+ PYTHONPATH=/root/Megatron-LM/ python \
41
+ tools/convert_hf_to_torch_dist.py \
42
+ ${MODEL_ARGS[@]} \
43
+ --hf-checkpoint /root/Qwen2.5-0.5B-Instruct \
44
+ --save /root/Qwen2.5-0.5B-Instruct_torch_dist/
45
+ ```
46
+
47
+ And to run training,
48
+
49
+ ```bash
50
+ bash examples/reproducibility/run-qwen2.5-0.5B-gsm8k.sh
51
+ ```
52
+
53
+ For screen shots of the wandb, please refer to [pull#370](https://github.com/THUDM/slime/pull/370).
ccevolve/baselines/thetaevolve/examples/reproducibility/run-qwen2.5-0.5B-gsm8k.sh ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # for rerun the task
4
+ pkill -9 sglang
5
+ sleep 3
6
+ ray stop --force
7
+ pkill -9 ray
8
+ pkill -9 python
9
+ sleep 3
10
+ pkill -9 ray
11
+ pkill -9 python
12
+
13
+ set -ex
14
+
15
+ # will prevent ray from buffering stdout/stderr
16
+ export PYTHONBUFFERED=16
17
+
18
+ SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
19
+ source "${SCRIPT_DIR}/../../scripts/models/qwen2.5-0.5B.sh"
20
+
21
+ CKPT_ARGS=(
22
+ --hf-checkpoint /root/Qwen2.5-0.5B-Instruct/
23
+ --ref-load /root/Qwen2.5-0.5B-Instruct_torch_dist/
24
+ )
25
+
26
+ ROLLOUT_ARGS=(
27
+ --prompt-data /root/gsm8k/train.parquet
28
+ --input-key messages
29
+ --label-key label
30
+ --apply-chat-template
31
+ --rollout-shuffle
32
+ --rm-type math
33
+ --num-rollout 100
34
+ --rollout-batch-size 32
35
+ --n-samples-per-prompt 8
36
+ --rollout-max-response-len 1024
37
+ --rollout-temperature 0.8
38
+
39
+ --global-batch-size 256
40
+ )
41
+
42
+ EVAL_ARGS=(
43
+ --eval-interval 20
44
+ --eval-prompt-data gsm8k /root/gsm8k/test.parquet
45
+ --n-samples-per-eval-prompt 1
46
+ --eval-max-response-len 1024
47
+ --eval-top-k 1
48
+ )
49
+
50
+ PERF_ARGS=(
51
+ --tensor-model-parallel-size 1
52
+ --sequence-parallel
53
+ --pipeline-model-parallel-size 1
54
+ --context-parallel-size 1
55
+ --expert-model-parallel-size 1
56
+ --expert-tensor-parallel-size 1
57
+
58
+ --use-dynamic-batch-size
59
+ --max-tokens-per-gpu 9216
60
+ )
61
+
62
+ GRPO_ARGS=(
63
+ --advantage-estimator grpo
64
+ --use-kl-loss
65
+ --kl-loss-coef 0.00
66
+ --kl-loss-type low_var_kl
67
+ --kl-coef 0.00
68
+ --entropy-coef 0.00
69
+ --eps-clip 0.2
70
+ --eps-clip-high 0.28
71
+ )
72
+
73
+ OPTIMIZER_ARGS=(
74
+ --optimizer adam
75
+ --lr 1e-6
76
+ --lr-decay-style constant
77
+ --weight-decay 0.1
78
+ --adam-beta1 0.9
79
+ --adam-beta2 0.98
80
+ )
81
+
82
+ WANDB_ARGS=(
83
+ --use-wandb
84
+ --wandb-host https://wandb.ai/
85
+ --wandb-team glm-zero
86
+ --wandb-project slime-dev
87
+ --wandb-group qwen2.5-0.5B-gsm8k-deterministic
88
+ )
89
+
90
+ SGLANG_ARGS=(
91
+ --rollout-num-gpus-per-engine 1
92
+ --sglang-mem-fraction-static 0.7
93
+
94
+ --sglang-enable-deterministic-inference
95
+ --sglang-attention-backend flashinfer
96
+
97
+ --deterministic-mode
98
+ )
99
+
100
+ MISC_ARGS=(
101
+ # default dropout in megatron is 0.1
102
+ --attention-dropout 0.0
103
+ --hidden-dropout 0.0
104
+ # should be good for model performance
105
+ --accumulate-allreduce-grads-in-fp32
106
+ --attention-softmax-in-fp32
107
+ # need to comment this when using model with MLA
108
+ --attention-backend flash
109
+ )
110
+
111
+ # launch the master node of ray in container
112
+ ray start --head --node-ip-address 127.0.0.1 --num-gpus 8 --disable-usage-stats
113
+
114
+ ray job submit --address="http://127.0.0.1:8265" \
115
+ --runtime-env-json='{
116
+ "env_vars": {
117
+ "PYTHONPATH": "/root/Megatron-LM",
118
+ "CUDA_DEVICE_MAX_CONNECTIONS": "1",
119
+ "NCCL_ALGO": "Ring",
120
+ "NVTE_ALLOW_NONDETERMINISTIC_ALGO": "0",
121
+ "CUBLAS_WORKSPACE_CONFIG": ":4096:8"
122
+ }
123
+ }' \
124
+ -- python3 train.py \
125
+ --actor-num-nodes 1 \
126
+ --actor-num-gpus-per-node 8 \
127
+ --colocate \
128
+ --calculate-per-token-loss \
129
+ --use-slime-router \
130
+ ${MODEL_ARGS[@]} \
131
+ ${CKPT_ARGS[@]} \
132
+ ${ROLLOUT_ARGS[@]} \
133
+ ${OPTIMIZER_ARGS[@]} \
134
+ ${GRPO_ARGS[@]} \
135
+ ${DISTRIBUTED_ARGS[@]} \
136
+ ${WANDB_ARGS[@]} \
137
+ ${PERF_ARGS[@]} \
138
+ ${EVAL_ARGS[@]} \
139
+ ${SGLANG_ARGS[@]} \
140
+ ${MISC_ARGS[@]}
ccevolve/baselines/thetaevolve/examples/retool/requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ jinja2>=3.0.0
2
+ psutil>=5.8.0
3
+ pytest>=7.0.0
ccevolve/baselines/thetaevolve/examples/retool/retool_qwen3_4b_rl.sh ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # for rerun the task
4
+ pkill -9 sglang
5
+ sleep 3
6
+ ray stop --force
7
+ pkill -9 ray
8
+ pkill -9 python
9
+ sleep 3
10
+ pkill -9 ray
11
+ pkill -9 python
12
+
13
+ set -ex
14
+
15
+ # will prevent ray from buffering stdout/stderr
16
+ export PYTHONBUFFERED=16
17
+
18
+ NVLINK_COUNT=$(nvidia-smi topo -m 2>/dev/null | grep -o 'NV[0-9][0-9]*' | wc -l)
19
+ if [ "$NVLINK_COUNT" -gt 0 ]; then
20
+ HAS_NVLINK=1
21
+ else
22
+ HAS_NVLINK=0
23
+ fi
24
+ echo "HAS_NVLINK: $HAS_NVLINK (detected $NVLINK_COUNT NVLink references)"
25
+
26
+ SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
27
+ source "/root/slime/scripts/models/qwen3-4B.sh"
28
+
29
+ CKPT_ARGS=(
30
+ --hf-checkpoint /root/font-info/qwen3-4b-sft
31
+ --ref-load /root/font-info/qwen3-4b-sft_torch_dist
32
+ # --load /root/Qwen3-4B_slime/
33
+ --save /root/font-info/qwen3-4b-sft/qwen3-4b-sft-multi-turn/
34
+ --save-interval 20
35
+ --rotary-base 5000000
36
+ )
37
+
38
+ ROLLOUT_ARGS=(
39
+ --prompt-data /root/dapo-math-17k/dapo-math-17k.jsonl
40
+ --input-key prompt
41
+ --label-key label
42
+ --apply-chat-template
43
+ --rollout-shuffle
44
+ --reward-key score
45
+ --num-rollout 3000
46
+ --rollout-batch-size 32
47
+ --n-samples-per-prompt 8
48
+ --rollout-max-response-len 8192
49
+ --rollout-temperature 0.8
50
+
51
+ --global-batch-size 256
52
+ --balance-data
53
+ )
54
+
55
+ EVAL_ARGS=(
56
+ --eval-interval 20
57
+ --eval-prompt-data aime /root/aime-2024/aime-2024.jsonl
58
+ --n-samples-per-eval-prompt 16
59
+ --eval-max-response-len 16384
60
+ --eval-top-p 0.7
61
+ )
62
+
63
+ PERF_ARGS=(
64
+ --tensor-model-parallel-size 2
65
+ --sequence-parallel
66
+ --pipeline-model-parallel-size 1
67
+ --context-parallel-size 1
68
+ --expert-model-parallel-size 1
69
+ --expert-tensor-parallel-size 1
70
+
71
+ --recompute-granularity full
72
+ --recompute-method uniform
73
+ --recompute-num-layers 1
74
+
75
+ # --micro-batch-size 1
76
+ --use-dynamic-batch-size
77
+ --max-tokens-per-gpu 9216
78
+ )
79
+
80
+ GRPO_ARGS=(
81
+ --advantage-estimator grpo
82
+ --use-kl-loss
83
+ --kl-loss-coef 0.00
84
+ --kl-loss-type low_var_kl
85
+ --entropy-coef 0.00
86
+ --eps-clip 0.2
87
+ --eps-clip-high 0.28
88
+ )
89
+
90
+ OPTIMIZER_ARGS=(
91
+ --optimizer adam
92
+ --lr 1e-6
93
+ --lr-decay-style constant
94
+ --weight-decay 0.1
95
+ --adam-beta1 0.9
96
+ --adam-beta2 0.98
97
+ )
98
+
99
+ WANDB_ARGS=(
100
+ --use-wandb
101
+ --wandb-project slime-dapo
102
+ --wandb-group qwen3-4B-test-multi-turn
103
+ --wandb-key ${WANDB_KEY}
104
+ )
105
+
106
+ SGLANG_ARGS=(
107
+ --rollout-num-gpus-per-engine 2
108
+ --sglang-mem-fraction-static 0.7
109
+ )
110
+
111
+ MISC_ARGS=(
112
+ # default dropout in megatron is 0.1
113
+ --attention-dropout 0.0
114
+ --hidden-dropout 0.0
115
+ # should be good for model performance
116
+ --accumulate-allreduce-grads-in-fp32
117
+ --attention-softmax-in-fp32
118
+ # need to comment this when using model with MLA
119
+ --attention-backend flash
120
+ )
121
+
122
+ CUSTOM_ARGS=(
123
+ --custom-generate-function-path generate_with_retool.generate
124
+ --custom-rm-path generate_with_retool.reward_func
125
+ )
126
+
127
+ # launch the master node of ray in container
128
+ export MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
129
+ ray start --head --node-ip-address ${MASTER_ADDR} --num-gpus 4 --disable-usage-stats --dashboard-host=0.0.0.0 --dashboard-port=8265
130
+
131
+ # Build the runtime environment JSON with proper variable substitution
132
+ RUNTIME_ENV_JSON="{
133
+ \"env_vars\": {
134
+ \"PYTHONPATH\": \"/root/Megatron-LM/:${SCRIPT_DIR}:/root/slime\",
135
+ \"CUDA_DEVICE_MAX_CONNECTIONS\": \"1\",
136
+ \"NCCL_NVLS_ENABLE\": \"${HAS_NVLINK}\"
137
+ }
138
+ }"
139
+
140
+ ray job submit --address="http://127.0.0.1:8265" \
141
+ --runtime-env-json="${RUNTIME_ENV_JSON}" \
142
+ -- python3 train.py \
143
+ --actor-num-nodes 1 \
144
+ --actor-num-gpus-per-node 4 \
145
+ --colocate \
146
+ ${MODEL_ARGS[@]} \
147
+ ${CKPT_ARGS[@]} \
148
+ ${ROLLOUT_ARGS[@]} \
149
+ ${OPTIMIZER_ARGS[@]} \
150
+ ${GRPO_ARGS[@]} \
151
+ ${DISTRIBUTED_ARGS[@]} \
152
+ ${WANDB_ARGS[@]} \
153
+ ${PERF_ARGS[@]} \
154
+ ${EVAL_ARGS[@]} \
155
+ ${SGLANG_ARGS[@]} \
156
+ ${MISC_ARGS[@]} \
157
+ ${CUSTOM_ARGS[@]}
ccevolve/baselines/thetaevolve/examples/retool/sft_data_processing.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+
3
+ ds = load_dataset("JoeYing/ReTool-SFT")["train"]
4
+
5
+
6
+ def convert(sample):
7
+ conversations = sample["messages"]
8
+
9
+ def convert_role(role):
10
+ if role == "user":
11
+ return "user"
12
+ elif role == "assistant":
13
+ return "assistant"
14
+ elif role == "system":
15
+ return "system"
16
+ else:
17
+ raise ValueError(f"Unknown role: {role}")
18
+
19
+ messages = [
20
+ {
21
+ "role": convert_role(turn["role"]),
22
+ "content": turn["content"],
23
+ }
24
+ for turn in conversations
25
+ ]
26
+
27
+ return {"messages": messages}
28
+
29
+
30
+ ds = ds.map(convert)
31
+ ds.to_parquet("./data/retool/ReTool-SFT.parquet")
ccevolve/baselines/thetaevolve/examples/search-r1/README.md ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Search-R1 lite
2
+
3
+ This is a minimal reproduction of [Search-R1](https://github.com/PeterGriffinJin/Search-R1) and an example of using multi-turn conversation and tool-calling in slime.
4
+
5
+ ## Environment Setup
6
+
7
+ Use the `slimerl/slime:latest` image and initialize the environment required for Search-R1:
8
+
9
+ ```bash
10
+ cd /root/
11
+ git clone https://github.com/THUDM/slime.git
12
+ pip install -e .
13
+ # for Search R1
14
+ pip install chardet
15
+ ```
16
+
17
+ Please refer to the script provided in Search-R1 to download the data:
18
+
19
+ ```bash
20
+ git clone https://github.com/PeterGriffinJin/Search-R1.git
21
+ cd Search-R1/
22
+ python scripts/data_process/nq_search.py --local_dir /root/nq_search/
23
+ ```
24
+
25
+ Initialize the Qwen2.5-3B model:
26
+
27
+ ```bash
28
+ # hf checkpoint
29
+ huggingface-cli download Qwen/Qwen2.5-3B --local-dir /root/Qwen2.5-3B
30
+
31
+ # mcore checkpoint
32
+ cd /root/slime
33
+ source scripts/models/qwen2.5-3B.sh
34
+ PYTHONPATH=/root/Megatron-LM python tools/convert_hf_to_torch_dist.py \
35
+ ${MODEL_ARGS[@]} \
36
+ --hf-checkpoint /root/Qwen2.5-3B \
37
+ --save /root/Qwen2.5-3B_torch_dist
38
+ ```
39
+
40
+ ## Running the Script
41
+
42
+ You need to configure your serper.dev API in `generate_with_search.py`:
43
+
44
+ ```python
45
+ SEARCH_R1_CONFIGS = {
46
+ "max_turns": 3,
47
+ "topk": 3,
48
+ "google_api_key": "YOUR_API_KEY", # Replace with your actual API key
49
+ "snippet_only": True, # Set to True to only return snippets
50
+ "proxy": None, # Set to your proxy if needed
51
+ "search_concurrency": 256,
52
+ # rm
53
+ "format_score": 0.2,
54
+ }
55
+ ```
56
+
57
+ And run:
58
+
59
+ ```bash
60
+ cd slime/
61
+ bash examples/search-r1/run_qwen2.5_3B.sh
62
+ ```
63
+
64
+ ## Code Structure
65
+
66
+ To implement multi-turn conversation + tool-calling in slime, you only need to implement a custom data generation function and a reward model for the task. These correspond to the following 2 configuration items in the startup script:
67
+
68
+ ```bash
69
+ CUSTOM_ARGS=(
70
+ --custom-generate-function-path generate_with_search.generate
71
+ --custom-rm-path generate_with_search.reward_func
72
+ )
73
+ ```
74
+
75
+ These are the `generate` and `reward_func` functions in `generate_with_search.py`.
ccevolve/baselines/thetaevolve/examples/search-r1/README_zh.md ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Search-R1 lite
2
+
3
+ [English](./README.md)
4
+
5
+ 这里是一个对 [Search-R1](https://github.com/PeterGriffinJin/Search-R1) 的简单复现,以及是一个在 slime 中使用多轮对话和工具调用的样例。
6
+
7
+ ## 配置环境
8
+
9
+ 使用 `slimerl/slime:latest` 镜像,并初始化 Search-R1 需要的环境:
10
+
11
+ ```bash
12
+ cd /root/
13
+ git clone https://github.com/THUDM/slime.git
14
+ pip install -e .
15
+ # for Search R1
16
+ pip install chardet
17
+ ```
18
+
19
+ 请参照 Search-R1 中提供的脚本下载数据:
20
+
21
+ ```bash
22
+ git clone https://github.com/PeterGriffinJin/Search-R1.git
23
+ cd Search-R1/
24
+ python scripts/data_process/nq_search.py --local_dir /root/nq_search/
25
+ ```
26
+
27
+ 初始化 Qwen2.5-3B 模型:
28
+
29
+ ```bash
30
+ # hf checkpoint
31
+ huggingface-cli download Qwen/Qwen2.5-3B --local-dir /root/Qwen2.5-3B
32
+
33
+ # mcore checkpoint
34
+ cd /root/slime
35
+ source scripts/models/qwen2.5-3B.sh
36
+ PYTHONPATH=/root/Megatron-LM python tools/convert_hf_to_torch_dist.py \
37
+ ${MODEL_ARGS[@]} \
38
+ --hf-checkpoint /root/Qwen2.5-3B \
39
+ --save /root/Qwen2.5-3B_torch_dist
40
+ ```
41
+
42
+ ## 运行脚本
43
+
44
+ 需要将你的 serper.dev API 配置在 `generate_with_search.py` 中:
45
+
46
+ ```python
47
+ SEARCH_R1_CONFIGS = {
48
+ "max_turns": 3,
49
+ "topk": 3,
50
+ "google_api_key": "YOUR_API_KEY", # Replace with your actual API key
51
+ "snippet_only": True, # Set to True to only return snippets
52
+ "proxy": None, # Set to your proxy if needed
53
+ "search_concurrency": 256,
54
+ # rm
55
+ "format_score": 0.2,
56
+ }
57
+ ```
58
+
59
+ 并运行:
60
+
61
+ ```bash
62
+ cd slime/
63
+ bash examples/search-r1/run_qwen2.5_3B.sh
64
+ ```
65
+
66
+ ## 代码结构
67
+
68
+ 为了实现多轮 + 工具调用,在 slime 中只需要实现一个自定义的数据生成函数,以及一个任务所需的 reward model,对应启动脚本中的这 2 个配置项:
69
+
70
+ ```bash
71
+ CUSTOM_ARGS=(
72
+ --custom-generate-function-path generate_with_search.generate
73
+ --custom-rm-path generate_with_search.reward_func
74
+ )
75
+ ```
76
+
77
+ 也就是 `generate_with_search.py` 中的 `generate` 和 `reward_func` 两个函数。
ccevolve/baselines/thetaevolve/examples/search-r1/generate_with_search.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Adapted form https://github.com/PeterGriffinJin/Search-R1/blob/ceee7b89655ed52f205b9beb98e1190c3eedcfb0/search_r1/llm_agent/generation.py
2
+ import asyncio
3
+ import re
4
+
5
+ from google_search_server import google_search
6
+ from qa_em_format import compute_score_em
7
+
8
+ from slime.rollout.sglang_rollout import GenerateState
9
+ from slime.utils.http_utils import post
10
+ from slime.utils.types import Sample
11
+
12
+ SEARCH_R1_CONFIGS = {
13
+ "max_turns": 3,
14
+ "topk": 3,
15
+ "google_api_key": "YOUR_API_KEY", # Replace with your actual API key
16
+ "snippet_only": True, # Set to True to only return snippets
17
+ "proxy": None, # Set to your proxy if needed
18
+ "search_concurrency": 256,
19
+ # rm
20
+ "format_score": 0.2,
21
+ }
22
+
23
+
24
+ SEMAPHORE = asyncio.Semaphore(SEARCH_R1_CONFIGS["search_concurrency"])
25
+
26
+
27
+ def _passages2string(retrieval_result):
28
+ format_reference = ""
29
+ for idx, doc_item in enumerate(retrieval_result):
30
+
31
+ content = doc_item["document"]["contents"]
32
+ title = content.split("\n")[0]
33
+ text = "\n".join(content.split("\n")[1:])
34
+ format_reference += f"Doc {idx+1}(Title: {title}) {text}\n"
35
+
36
+ return format_reference
37
+
38
+
39
+ async def search(query: str) -> str:
40
+ result = await google_search(
41
+ SEARCH_R1_CONFIGS["google_api_key"],
42
+ query,
43
+ SEARCH_R1_CONFIGS["topk"],
44
+ snippet_only=SEARCH_R1_CONFIGS["snippet_only"],
45
+ proxy=SEARCH_R1_CONFIGS["proxy"],
46
+ )
47
+ return _passages2string(result)
48
+
49
+
50
+ def postprocess_responses(resp: str) -> str:
51
+ return (
52
+ resp.split("</search>")[0] + "</search>"
53
+ if "</search>" in resp
54
+ else resp.split("</answer>")[0] + "</answer>" if "</answer>" in resp else resp
55
+ )
56
+
57
+
58
+ def postprocess_predictions(prediction: str):
59
+ pattern = r"<(search|answer)>(.*?)</\1>"
60
+ match = re.search(pattern, prediction, re.DOTALL)
61
+ if match:
62
+ content = match.group(2).strip() # Return only the content inside the tags
63
+ action = match.group(1)
64
+ else:
65
+ content = ""
66
+ action = None
67
+
68
+ return action, content
69
+
70
+
71
+ async def execute_predictions(prediction: str) -> str:
72
+ action, content = postprocess_predictions(prediction)
73
+
74
+ if action == "search":
75
+ search_query = content
76
+ async with SEMAPHORE:
77
+ search_results = await search(search_query)
78
+ next_obs = f"\n\n<information>{search_results.strip()}</information>\n\n"
79
+ done = False
80
+ elif action == "answer":
81
+ next_obs = ""
82
+ done = True
83
+ else:
84
+ next_obs = f"\nMy previous action is invalid. \
85
+ If I want to search, I should put the query between <search> and </search>. \
86
+ If I want to give the final answer, I should put the answer between <answer> and </answer>. Let me try again.\n"
87
+ done = False
88
+
89
+ return next_obs, done
90
+
91
+
92
+ async def generate(args, sample: Sample, sampling_params) -> Sample:
93
+ assert not args.partial_rollout, f"Partial rollout is not supported for this function at the moment."
94
+
95
+ state = GenerateState(args)
96
+
97
+ url = f"http://{args.sglang_router_ip}:{args.sglang_router_port}/generate"
98
+
99
+ # Handle partial rollout samples: continue generation from existing response
100
+ prompt = sample.prompt
101
+ prompt_tokens_ids = state.tokenizer(sample.prompt, add_special_tokens=False)["input_ids"]
102
+ response = ""
103
+ response_token_ids = []
104
+ loss_mask = []
105
+ for _ in range(SEARCH_R1_CONFIGS["max_turns"]):
106
+ payload = {
107
+ "text": prompt + response,
108
+ "sampling_params": sampling_params,
109
+ }
110
+ output = await post(url, payload)
111
+
112
+ # abort
113
+ if output["meta_info"]["finish_reason"]["type"] == "abort":
114
+ sample.status = Sample.Status.ABORTED
115
+ return sample
116
+
117
+ cur_response = output["text"]
118
+ cur_response = postprocess_responses(cur_response)
119
+
120
+ cur_response_token_ids = state.tokenizer(cur_response, add_special_tokens=False)["input_ids"]
121
+ response += cur_response
122
+ response_token_ids += cur_response_token_ids
123
+ loss_mask += [1] * len(cur_response_token_ids)
124
+
125
+ if output["meta_info"]["finish_reason"]["type"] == "length":
126
+ break
127
+
128
+ next_obs, done = await execute_predictions(cur_response)
129
+ if done:
130
+ break
131
+
132
+ assert next_obs != "", "Next observation should not be empty."
133
+ obs_tokens_ids = state.tokenizer(next_obs, add_special_tokens=False)["input_ids"]
134
+ response += next_obs
135
+ response_token_ids += obs_tokens_ids
136
+ loss_mask += [0] * len(obs_tokens_ids)
137
+
138
+ sample.tokens = prompt_tokens_ids + response_token_ids
139
+ sample.response_length = len(response_token_ids)
140
+ sample.response = response
141
+ sample.loss_mask = loss_mask
142
+ match output["meta_info"]["finish_reason"]["type"]:
143
+ case "length":
144
+ sample.status = Sample.Status.TRUNCATED
145
+ case "abort":
146
+ sample.status = Sample.Status.ABORTED
147
+ case "stop":
148
+ sample.status = Sample.Status.COMPLETED
149
+
150
+ return sample
151
+
152
+
153
+ async def reward_func(args, sample, **kwargs):
154
+ """The reward function for retrieval-based question answering.
155
+
156
+ Args:
157
+ args: the arguments
158
+ sample: the sample to evaluate
159
+ """
160
+ if not isinstance(sample, Sample):
161
+ raise TypeError("Sample must be an instance of Sample class.")
162
+
163
+ score = compute_score_em(
164
+ solution_str=sample.prompt + sample.response,
165
+ ground_truth=sample.label["ground_truth"],
166
+ format_score=SEARCH_R1_CONFIGS["format_score"],
167
+ )
168
+
169
+ return score
ccevolve/baselines/thetaevolve/examples/search-r1/google_search_server.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ import random
4
+ import re
5
+ from typing import Dict, List
6
+
7
+ import aiohttp
8
+ import chardet
9
+
10
+
11
+ # --- Utilities ---
12
+ def parse_snippet(snippet: str) -> List[str]:
13
+ segments = snippet.split("...")
14
+ return [s.strip() for s in segments if len(s.strip().split()) > 5]
15
+
16
+
17
+ def sanitize_search_query(query: str) -> str:
18
+ # Remove or replace special characters that might cause issues.
19
+ # This is a basic example; you might need to add more characters or patterns.
20
+ sanitized_query = re.sub(r"[^\w\s]", " ", query) # Replace non-alphanumeric and non-whitespace with spaces.
21
+ sanitized_query = re.sub(
22
+ r"[\t\r\f\v\n]", " ", sanitized_query
23
+ ) # replace tab, return, formfeed, vertical tab with spaces.
24
+ sanitized_query = re.sub(
25
+ r"\s+", " ", sanitized_query
26
+ ).strip() # remove duplicate spaces, and trailing/leading spaces.
27
+
28
+ return sanitized_query
29
+
30
+
31
+ def filter_links(search_results: List[Dict]) -> List[str]:
32
+ links = []
33
+ for result in search_results:
34
+ for item in result.get("items", []):
35
+ if "mime" in item:
36
+ continue
37
+ ext = os.path.splitext(item["link"])[1]
38
+ if ext in ["", ".html", ".htm", ".shtml"]:
39
+ links.append(item["link"])
40
+ return links
41
+
42
+
43
+ async def fetch(session: aiohttp.ClientSession, url: str, semaphore: asyncio.Semaphore) -> str:
44
+ if url == "":
45
+ return ""
46
+ user_agents = [
47
+ "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P)...",
48
+ "Mozilla/5.0 AppleWebKit/537.36...",
49
+ "Mozilla/5.0 (compatible; Googlebot/2.1; +https://www.google.com/bot.html)",
50
+ ]
51
+ headers = {"User-Agent": random.choice(user_agents)}
52
+
53
+ async with semaphore:
54
+ try:
55
+ async with session.get(url, headers=headers) as response:
56
+ raw = await response.read()
57
+ detected = chardet.detect(raw)
58
+ encoding = detected["encoding"] or "utf-8"
59
+ return raw.decode(encoding, errors="ignore")
60
+ except (aiohttp.ClientError, asyncio.TimeoutError):
61
+ return ""
62
+
63
+
64
+ async def fetch_all(urls: List[str], limit: int = 8) -> List[str]:
65
+ semaphore = asyncio.Semaphore(limit)
66
+ timeout = aiohttp.ClientTimeout(total=5)
67
+ connector = aiohttp.TCPConnector(limit_per_host=limit, force_close=True)
68
+
69
+ async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
70
+ tasks = [fetch(session, url, semaphore) for url in urls]
71
+ return await asyncio.gather(*tasks)
72
+
73
+
74
+ def collect_context(snippet: str, doc: str) -> str:
75
+ snippets = parse_snippet(snippet)
76
+ ctx_paras = []
77
+
78
+ for s in snippets:
79
+ pos = doc.replace("\n", " ").find(s)
80
+ if pos == -1:
81
+ continue
82
+ sta = pos
83
+ while sta > 0 and doc[sta] != "\n":
84
+ sta -= 1
85
+ end = pos + len(s)
86
+ while end < len(doc) and doc[end] != "\n":
87
+ end += 1
88
+ para = doc[sta:end].strip()
89
+ if para not in ctx_paras:
90
+ ctx_paras.append(para)
91
+
92
+ return "\n".join(ctx_paras)
93
+
94
+
95
+ async def google_search(api_key, query, top_k=5, timeout: int = 60, proxy=None, snippet_only=False) -> List[Dict]:
96
+ timeout_obj = aiohttp.ClientTimeout(total=timeout)
97
+ session_kwargs = {}
98
+ if proxy:
99
+ session_kwargs["proxy"] = proxy
100
+ async with aiohttp.ClientSession(**session_kwargs) as session:
101
+ async with session.post(
102
+ "https://google.serper.dev/search",
103
+ json={
104
+ "q": query,
105
+ "num": top_k,
106
+ "gl": "us",
107
+ "hl": "en",
108
+ },
109
+ headers={
110
+ "Content-Type": "application/json",
111
+ "X-API-KEY": api_key,
112
+ },
113
+ timeout=timeout_obj,
114
+ ) as resp:
115
+ resp.raise_for_status()
116
+ response = await resp.json()
117
+ items = response.get("organic", [])
118
+
119
+ contexts = []
120
+ if snippet_only:
121
+ for item in items:
122
+ title = item.get("title", "")
123
+ context = " ".join(parse_snippet(item.get("snippet", "")))
124
+ if title != "" or context != "":
125
+ title = "No title." if not title else title
126
+ context = "No snippet available." if not context else context
127
+ contexts.append(
128
+ {
129
+ "document": {"contents": f'"{title}"\n{context}'},
130
+ }
131
+ )
132
+ else:
133
+ links = [item.get("link", "") for item in items if "link" in item]
134
+ web_contents = await fetch_all(links)
135
+ contexts = []
136
+ for i, item in enumerate(items):
137
+ title = item.get("title", "")
138
+ snippet = item.get("snippet", "")
139
+
140
+ context = collect_context(snippet, web_contents[i])
141
+ if title != "" or context != "":
142
+ title = "No title." if not title else title
143
+ context = "No snippet available." if not context else context
144
+ contexts.append(
145
+ {
146
+ "document": {"contents": f'"{title}"\n{context}'},
147
+ }
148
+ )
149
+
150
+ return contexts
ccevolve/baselines/thetaevolve/examples/search-r1/qa_em_format.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Adapt from https://github.com/PeterGriffinJin/Search-R1/blob/ceee7b89655ed52f205b9beb98e1190c3eedcfb0/verl/utils/reward_score/qa_em_format.py
2
+ # Copyright 2024 Bytedance Ltd. and/or its affiliates
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import random
17
+ import re
18
+ import string
19
+
20
+
21
+ def normalize_answer(s):
22
+ def remove_articles(text):
23
+ return re.sub(r"\b(a|an|the)\b", " ", text)
24
+
25
+ def white_space_fix(text):
26
+ return " ".join(text.split())
27
+
28
+ def remove_punc(text):
29
+ exclude = set(string.punctuation)
30
+ return "".join(ch for ch in text if ch not in exclude)
31
+
32
+ def lower(text):
33
+ return text.lower()
34
+
35
+ return white_space_fix(remove_articles(remove_punc(lower(s))))
36
+
37
+
38
+ def em_check(prediction, golden_answers):
39
+ if isinstance(golden_answers, str):
40
+ golden_answers = [golden_answers]
41
+ normalized_prediction = normalize_answer(prediction)
42
+ score = 0
43
+ for golden_answer in golden_answers:
44
+ golden_answer = normalize_answer(golden_answer)
45
+ if golden_answer == normalized_prediction:
46
+ score = 1
47
+ break
48
+ return score
49
+
50
+
51
+ def is_valid_sequence(text):
52
+ # Find the position of "<|im_start|>assistant" with potential whitespace
53
+ assistant_pattern = r"<\|im_start\|>assistant\s*"
54
+ assistant_match = re.search(assistant_pattern, text)
55
+
56
+ if not assistant_match:
57
+ return False, "Missing assistant marker"
58
+
59
+ # Extract the content after the assistant marker
60
+ start_pos = assistant_match.end()
61
+ content = text[start_pos:]
62
+
63
+ # Check for balanced tags
64
+ tags_to_check = ["think", "search", "information", "answer"]
65
+ for tag in tags_to_check:
66
+ opening_count = len(re.findall(f"<{tag}>", content))
67
+ closing_count = len(re.findall(f"</{tag}>", content))
68
+ if opening_count != closing_count:
69
+ return False, f"Mismatch in {tag} tags: {opening_count} opening vs {closing_count} closing tags"
70
+
71
+ # Now check for proper sequence pattern and no extraneous content
72
+
73
+ # 1. First split the content by any tags we recognize
74
+ split_pattern = r"(</?(?:think|search|information|answer)>)"
75
+ parts = re.split(split_pattern, content)
76
+
77
+ # 2. Keep track of the current position in the expected sequence
78
+ state = "start" # start -> think -> search -> information -> think -> ... -> answer -> end
79
+
80
+ # 3. Check each part
81
+ for i, part in enumerate(parts):
82
+ # Skip empty parts
83
+ if not part.strip():
84
+ continue
85
+
86
+ # Check if this is a tag
87
+ if re.match(r"</?(?:think|search|information|answer)>", part):
88
+ # This is a tag, check if it's valid in the current state
89
+ if part == "<think>" and state in ["start", "information"]:
90
+ state = "in_think"
91
+ elif part == "</think>" and state == "in_think":
92
+ state = "after_think"
93
+ elif part == "<search>" and state == "after_think":
94
+ state = "in_search"
95
+ elif part == "</search>" and state == "in_search":
96
+ state = "after_search"
97
+ elif part == "<information>" and state == "after_search":
98
+ state = "in_information"
99
+ elif part == "</information>" and state == "in_information":
100
+ state = "information"
101
+ elif part == "<answer>" and state == "after_think":
102
+ state = "in_answer"
103
+ elif part == "</answer>" and state == "in_answer":
104
+ state = "end"
105
+ else:
106
+ return False, f"Unexpected tag {part} in state {state}"
107
+ else:
108
+ # This is content, check if it's valid in the current state
109
+ if state in ["in_think", "in_search", "in_information", "in_answer"]:
110
+ # Content is allowed inside tags
111
+ pass
112
+ elif state in ["start", "after_think", "after_search", "information"]:
113
+ # Only whitespace is allowed between tags
114
+ if part.strip():
115
+ return False, f"Unexpected content '{part.strip()}' between tags (state: {state})"
116
+ else:
117
+ return False, f"Unexpected content in state {state}"
118
+
119
+ # Check final state
120
+ if state != "end":
121
+ return False, f"Incomplete sequence, ended in state {state}"
122
+
123
+ return True, "Valid sequence format"
124
+
125
+
126
+ def extract_solution(solution_str):
127
+ """Extract the equation from the solution string."""
128
+
129
+ answer_pattern = r"<answer>(.*?)</answer>"
130
+ match = re.finditer(answer_pattern, solution_str, re.DOTALL)
131
+ matches = list(match)
132
+
133
+ # If there are 0 or exactly 1 matches, return None
134
+ if len(matches) <= 1:
135
+ return None
136
+
137
+ # If there are 2 or more matches, return the last one
138
+ return matches[-1].group(1).strip()
139
+
140
+
141
+ def extract_information_blocks(text: str) -> list[str]:
142
+ pattern = r"<information>(.*?)</information>"
143
+ matches = re.findall(pattern, text, re.DOTALL)
144
+ return [match.strip() for match in matches]
145
+
146
+
147
+ def is_retrieval_correct(text: str, golden_answers: list[str]) -> list[str]:
148
+ seqs = extract_information_blocks(text)
149
+ for seq in seqs:
150
+ for golden_answer in golden_answers:
151
+ if normalize_answer(golden_answer) in normalize_answer(seq):
152
+ return True
153
+ return False
154
+
155
+
156
+ def compute_score_em(
157
+ solution_str,
158
+ ground_truth,
159
+ method="strict",
160
+ structure_format_score=0,
161
+ final_format_score=0,
162
+ retrieval_score=0,
163
+ format_score=0,
164
+ score=1.0,
165
+ ):
166
+ """The scoring function for exact match (EM).
167
+
168
+ Args:
169
+ solution_str: the solution text
170
+ ground_truth: the ground truth
171
+ method: the method to extract the solution, choices are 'strict' and 'flexible'
172
+ format_score: the score for the format
173
+ score: the score for the correct answer
174
+ """
175
+ is_valid_format, _ = is_valid_sequence(solution_str)
176
+ retrieval_correct = False
177
+ if is_valid_format:
178
+ retrieval_correct = is_retrieval_correct(solution_str, ground_truth["target"])
179
+ answer = extract_solution(solution_str=solution_str)
180
+ do_print = random.randint(1, 64) == 1
181
+
182
+ if do_print:
183
+ print(f"--------------------------------")
184
+ print(f"Golden answers: {ground_truth['target']}")
185
+ print(f"Extracted answer: {answer}")
186
+ print(f"Solution string: {solution_str}")
187
+
188
+ if answer is None:
189
+ if is_valid_format:
190
+ if retrieval_correct:
191
+ return structure_format_score + retrieval_score # 0.3
192
+ else:
193
+ return structure_format_score # 0.2
194
+ else:
195
+ return 0
196
+ else:
197
+ if em_check(answer, ground_truth["target"]):
198
+ if is_valid_format:
199
+ return score # 1
200
+ else:
201
+ return score - structure_format_score # 0.8
202
+ elif is_valid_format:
203
+ if retrieval_correct:
204
+ return structure_format_score + retrieval_score # 0.3
205
+ else:
206
+ return structure_format_score # 0.2
207
+ else:
208
+ return final_format_score # 0.1
ccevolve/baselines/thetaevolve/examples/search-r1/run_qwen2.5_3B.sh ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # for rerun the task
4
+ pkill -9 sglang
5
+ sleep 3
6
+ ray stop --force
7
+ pkill -9 ray
8
+ pkill -9 python
9
+ sleep 3
10
+ pkill -9 ray
11
+ pkill -9 python
12
+
13
+ set -ex
14
+
15
+ # will prevent ray from buffering stdout/stderr
16
+ export PYTHONBUFFERED=16
17
+
18
+ SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
19
+ source "${SCRIPT_DIR}/../../scripts/models/qwen2.5-3B.sh"
20
+
21
+ CKPT_ARGS=(
22
+ --hf-checkpoint /root/Qwen2.5-3B/
23
+ --ref-load /root/Qwen2.5-3B_torch_dist/
24
+ --load /root/Qwen2.5-3B_slime/
25
+ --save /root/Qwen2.5-3B_slime/
26
+ --save-interval 20
27
+ )
28
+
29
+ ROLLOUT_ARGS=(
30
+ --prompt-data /root/nq_search/train.parquet
31
+ --input-key prompt
32
+ --label-key reward_model
33
+ --apply-chat-template
34
+ --rollout-shuffle
35
+ --num-rollout 3000
36
+ --rollout-batch-size 32
37
+ --n-samples-per-prompt 8
38
+ --rollout-max-response-len 512
39
+ --rollout-temperature 0.8
40
+
41
+ --global-batch-size 256
42
+ --balance-data
43
+ )
44
+
45
+ PERF_ARGS=(
46
+ --tensor-model-parallel-size 2
47
+ --sequence-parallel
48
+ --pipeline-model-parallel-size 1
49
+ --context-parallel-size 1
50
+ --expert-model-parallel-size 1
51
+ --expert-tensor-parallel-size 1
52
+
53
+ --recompute-granularity full
54
+ --recompute-method uniform
55
+ --recompute-num-layers 1
56
+
57
+ # --micro-batch-size 1
58
+ --use-dynamic-batch-size
59
+ --max-tokens-per-gpu 9216
60
+ )
61
+
62
+ GRPO_ARGS=(
63
+ --advantage-estimator grpo
64
+ --use-kl-loss
65
+ --kl-loss-coef 0.00
66
+ --kl-loss-type low_var_kl
67
+ --entropy-coef 0.00
68
+ --eps-clip 0.2
69
+ --eps-clip-high 0.28
70
+ )
71
+
72
+ OPTIMIZER_ARGS=(
73
+ --optimizer adam
74
+ --lr 1e-6
75
+ --lr-decay-style constant
76
+ --weight-decay 0.1
77
+ --adam-beta1 0.9
78
+ --adam-beta2 0.98
79
+ )
80
+
81
+ WANDB_ARGS=(
82
+ # --use-wandb
83
+ # --wandb-project slime-dev
84
+ # --wandb-group search-r1_qwen2.5-3B-test
85
+ # --wandb-key ${WANDB_KEY}
86
+ )
87
+
88
+ SGLANG_ARGS=(
89
+ --rollout-num-gpus-per-engine 2
90
+ --sglang-mem-fraction-static 0.7
91
+ )
92
+
93
+ MISC_ARGS=(
94
+ # default dropout in megatron is 0.1
95
+ --attention-dropout 0.0
96
+ --hidden-dropout 0.0
97
+ # should be good for model performance
98
+ --accumulate-allreduce-grads-in-fp32
99
+ --attention-softmax-in-fp32
100
+ # need to comment this when using model with MLA
101
+ --attention-backend flash
102
+ )
103
+
104
+ CUSTOM_ARGS=(
105
+ --custom-generate-function-path generate_with_search.generate
106
+ --custom-rm-path generate_with_search.reward_func
107
+ )
108
+
109
+ # launch the master node of ray in container
110
+ export MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
111
+ ray start --head --node-ip-address ${MASTER_ADDR} --num-gpus 8 --disable-usage-stats
112
+
113
+ RUNTIME_ENV_JSON="{
114
+ \"env_vars\": {
115
+ \"PYTHONPATH\": \"/root/Megatron-LM/:${SCRIPT_DIR}\",
116
+ \"CUDA_DEVICE_MAX_CONNECTIONS\": \"1\"
117
+ }
118
+ }"
119
+
120
+ ray job submit --address="http://127.0.0.1:8265" \
121
+ --runtime-env-json="${RUNTIME_ENV_JSON}" \
122
+ -- python3 train.py \
123
+ --actor-num-nodes 1 \
124
+ --actor-num-gpus-per-node 4 \
125
+ --rollout-num-gpus 4 \
126
+ --colocate \
127
+ ${MODEL_ARGS[@]} \
128
+ ${CKPT_ARGS[@]} \
129
+ ${ROLLOUT_ARGS[@]} \
130
+ ${OPTIMIZER_ARGS[@]} \
131
+ ${GRPO_ARGS[@]} \
132
+ ${DISTRIBUTED_ARGS[@]} \
133
+ ${WANDB_ARGS[@]} \
134
+ ${PERF_ARGS[@]} \
135
+ ${SGLANG_ARGS[@]} \
136
+ ${MISC_ARGS[@]} \
137
+ ${CUSTOM_ARGS[@]}
ccevolve/baselines/thetaevolve/openevolve_adapted/.gitignore ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ results/
2
+ examples/lm_eval/prompts/system_message.txt
3
+ examples/lm_eval/prompts/evaluator_system_message.txt
4
+
5
+ # Python
6
+ __pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+ *.so
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+ MANIFEST
27
+ **/.ipynb_checkpoints/
28
+ # Virtual environments
29
+ venv/
30
+ env/
31
+ ENV/
32
+
33
+ # IDE
34
+ .idea/
35
+ .vscode/
36
+ *.swp
37
+ *.swo
38
+
39
+ # Output files
40
+ examples/*/output/
41
+ openevolve_output*/
42
+ *.log
43
+
44
+ # Test cache
45
+ .pytest_cache/
46
+ .coverage
47
+ htmlcov/
48
+
49
+ # Misc
50
+ .DS_Store
51
+ .venv
52
+
53
+ # For SR
54
+ secrets.yaml
55
+ problems
56
+
57
+ # Artifacts from running the evaluation
58
+ artifacts/
59
+
60
+ # all dir like stress_test_output_xxx/
61
+ stress_test_output_*/