diff --git a/.gemini/config.yaml b/.gemini/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..66015ad30ed6768e06dceb91f11004be8a74bb04
--- /dev/null
+++ b/.gemini/config.yaml
@@ -0,0 +1,10 @@
+have_fun: false
+code_review:
+ disable: false
+ comment_severity_threshold: HIGH
+ max_review_comments: -1
+ pull_request_opened:
+ help: false
+ summary: false
+ code_review: true
+ignore_patterns: []
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 0000000000000000000000000000000000000000..649ba3ca862e8e47a92b932b337fe189fbd14e7c
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,13 @@
+# Local uasge: git config blame.ignoreRevsFile .git-blame-ignore-revs
+
+# [dev] feat: immigrate from yapf & pylint to ruff based on pre-commit
+# Changed 268 files, +10k/-9k lines. This is the biggest formatter change.
+b00f77d8559b48d57a33c0132a5ba1c81891a536
+
+# [ci] refactor: reduce ruff line-length from 300 to 120
+# Changed 238 files, +6k/-1k lines. Global formatting change.
+00a10a8ef389556f957a2f36132b2358fd6a109f
+
+# [Lint] fix: linting errors in all files
+# Changed 179 files, +1k/-3k lines. Global lint fix.
+8e5ad4688a13de81727c014a3c2e2fb26324bc20
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 0000000000000000000000000000000000000000..2ab905806c461d3fc4cca460aaff7c0c93cb472d
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,27 @@
+/docs @eric-haibin-lin @zhaochenyang20 @hongpeng-guo
+/docs/amd_tutorial @yushengsu-thu
+/docs/slang_multiturn @zhaochenyang20 @SwordFaith
+/docs/ascend_tutorial @FightingZhen
+
+/third_party/sglang @zhaochenyang20 @SwordFaith
+/third_party/vllm @PeterSH6 @wuxibin89
+
+/examples/grpo_trainer @vermouth1992 @PeterSH6 @tardis-key @FightingZhen @ji-huazhong
+
+/verl/single_controller @zw0610 @wuxibin89 @hongpeng-guo
+/verl/trainer @eric-haibin-lin @vermouth1992 @tongyx361 @PeterSH6
+/verl/models/mcore @ISEEKYAN @vermouth1992
+/verl/models/transformers @vermouth1992 @PeterSH6 @tardis-key @FightingZhen @ji-huazhong
+/verl/workers/engine @eric-haibin-lin @vermouth1992 @ZihengJiang
+/verl/workers/roles @eric-haibin-lin @vermouth1992 @ZihengJiang
+/verl/workers/engine/fsdp @eric-haibin-lin @vermouth1992 @ZihengJiang
+/verl/workers/rollout/vllm_rollout @wuxibin89 @PeterSH6 @chenhaiq
+/verl/workers/rollout/sglang_rollout @zhaochenyang20 @SwordFaith @chenhaiq
+/verl/workers/actor/megatron_actor.py @ISEEKYAN @vermouth1992
+/verl/workers/critic/megatron_critic.py @ISEEKYAN @vermouth1992
+/verl/workers/megatron_workers.py @ISEEKYAN @vermouth1992
+/verl/experimental @wuxibin89 @ArronHZG
+
+/tests/single_controller @zw0610 @wuxibin89
+/tests/trainer @eric-haibin-lin @vermouth1992 @tongyx361 @PeterSH6
+/tests/workers/rollout/vllm_rollout @wuxibin89 @PeterSH6 @chenhaiq
diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
new file mode 100644
index 0000000000000000000000000000000000000000..67341f4139d9a5348f3887242b7c6accf10abc7b
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -0,0 +1,65 @@
+# modified from https://github.com/huggingface/transformers/blob/main/.github/ISSUE_TEMPLATE/bug-report.yml?plain=1
+name: "\U0001F41B Bug Report"
+description: Submit a bug report to help us improve verl
+labels: [ "bug" ]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Thanks for taking the time to fill out this bug report! 🤗
+
+ - type: textarea
+ id: system-info
+ attributes:
+ label: System Info
+ description: Please share your system info with us. You can run the command `python scripts/diagnose.py` and copy-paste its output below.
+ placeholder: verl version, platform, python version, ...
+ validations:
+ required: true
+
+ - type: checkboxes
+ id: information-scripts-examples
+ attributes:
+ label: Information
+ description: 'The problem arises when using:'
+ options:
+ - label: "The official example scripts"
+ - label: "My own modified scripts"
+
+ - type: checkboxes
+ id: information-tasks
+ attributes:
+ label: Tasks
+ description: "The tasks I am working on are:"
+ options:
+ - label: "An officially supported task in the `examples` folder (such as GLUE/SQuAD, ...)"
+ - label: "My own task or dataset (give details below)"
+
+ - type: textarea
+ id: reproduction
+ validations:
+ required: true
+ attributes:
+ label: Reproduction
+ description: |
+ Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
+ Please include relevant config information with your code.
+ If you have code snippets, error messages, stack traces please provide them here as well.
+ Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
+ Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
+
+ placeholder: |
+ Steps to reproduce the behavior:
+
+ 1.
+ 2.
+ 3.
+
+
+ - type: textarea
+ id: expected-behavior
+ validations:
+ required: true
+ attributes:
+ label: Expected behavior
+ description: "A clear and concise description of what you would expect to happen."
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ac09e8636d7a7577999a55ffdf7095dd0b656e52
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,2 @@
+blank_issues_enabled: true
+version: 0.1
diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml
new file mode 100644
index 0000000000000000000000000000000000000000..18e0615b9ecd01bf1cead1155ff8124865e07d24
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature-request.yml
@@ -0,0 +1,32 @@
+# modified from https://github.com/huggingface/transformers/blob/main/.github/ISSUE_TEMPLATE/feature-request.yml?plain=1
+name: "\U0001F680 Feature request"
+description: Submit a proposal/request for a new verl feature
+labels: [ "Feature request" ]
+body:
+ - type: textarea
+ id: feature-request
+ validations:
+ required: true
+ attributes:
+ label: Feature request
+ description: |
+ A clear and concise description of the feature proposal. Please provide a link to the paper and code in case they exist.
+
+ - type: textarea
+ id: motivation
+ validations:
+ required: true
+ attributes:
+ label: Motivation
+ description: |
+ Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too.
+
+
+ - type: textarea
+ id: contribution
+ validations:
+ required: true
+ attributes:
+ label: Your contribution
+ description: |
+ Is there any way that you could help, e.g. by submitting a PR? Make sure to read the CONTRIBUTING.MD [readme](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md)
\ No newline at end of file
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000000000000000000000000000000000000..91c0d21f2a32b285170008e7d411edc9843e80b1
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,41 @@
+### What does this PR do?
+
+> Add **concise** overview of what this PR aims to achieve or accomplish. Reference related GitHub issues and PRs that help with the review.
+
+### Checklist Before Starting
+
+- [ ] Search for similar PRs. Paste at least one query link here: ...
+- [ ] Format the PR title as `[{modules}] {type}: {description}` (This will be checked by the CI)
+ - `{modules}` include `fsdp`, `megatron`, `veomni`, `sglang`, `vllm`, `rollout`, `trainer`, `ci`, `training_utils`, `recipe`, `hardware`, `deployment`, `ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`, `env`, `tool`, `ckpt`, `doc`, `data`, `cfg`, `reward`, `fully_async`, `one_step_off`
+ - If this PR involves multiple modules, separate them with `,` like `[megatron, fsdp, doc]`
+ - `{type}` is in `feat`, `fix`, `refactor`, `chore`, `test`
+ - If this PR breaks any API (CLI arguments, config, function signature, etc.), add `[BREAKING]` to the beginning of the title.
+ - Example: `[BREAKING][fsdp, megatron] feat: dynamic batching`
+
+### Test
+
+> For changes that can not be tested by CI (e.g., algorithm implementation, new model support), validate by experiment(s) and show results like training curve plots, evaluation results, etc.
+
+### API and Usage Example
+
+> Demonstrate how the API changes if any, and provide usage example(s) if possible.
+
+```python
+# Add code snippet or script demonstrating how to use this
+```
+
+### Design & Code Changes
+
+> Demonstrate the high-level design if this PR is complex, and list the specific changes.
+
+### Checklist Before Submitting
+
+> [!IMPORTANT]
+> Please check all the following items before requesting a review, otherwise the reviewer might deprioritize this PR for review.
+
+- [ ] Read the [Contribute Guide](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md).
+- [ ] Apply [pre-commit checks](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md#code-linting-and-formatting): `pre-commit install && pre-commit run --all-files --show-diff-on-failure --color=always`
+- [ ] Add / Update [the documentation](https://github.com/volcengine/verl/tree/main/docs).
+- [ ] Add unit or end-to-end test(s) to [the CI workflow](https://github.com/volcengine/verl/tree/main/.github/workflows) to cover all the code. If not feasible, explain why: ...
+- [ ] Once your PR is ready for CI, send a message in [the `ci-request` channel](https://verl-project.slack.com/archives/C091TCESWB1) in [the `verl` Slack workspace](https://join.slack.com/t/verl-project/shared_invite/zt-3855yhg8g-CTkqXu~hKojPCmo7k_yXTQ). (If not accessible, please try [the Feishu group (飞书群)](https://applink.larkoffice.com/client/chat/chatter/add_by_link?link_token=772jd4f1-cd91-441e-a820-498c6614126a).)
+- [ ] If your PR is related to the `recipe` submodule, please also update the reference to the submodule commit via `git submodule update --remote` or `cd recipe && git pull origin main`.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000000000000000000000000000000000000..24a3571c620c9c1e5ef7b4011851124c7a020626
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,9 @@
+## Enabled the dependabot to check the dependencies of the project
+## Dependabot will open pull requests to update dependencies automatically
+
+version: 2
+updates:
+ - package-ecosystem: pip
+ directory: "/"
+ schedule:
+ interval: weekly
\ No newline at end of file
diff --git a/.github/workflows/README.md b/.github/workflows/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d83c87b2e710db4f362171d1a6ee1ccf5db7d829
--- /dev/null
+++ b/.github/workflows/README.md
@@ -0,0 +1,73 @@
+### Adding a New Workflow
+
+When adding a new workflow for continuous integration (CI), you have two runner options: a fixed runner or a machine from the vemlp.
+
+- **Fixed Runner**: To use a fixed runner, specify it in your workflow using the `runs-on` keyword, like `runs-on: [L20x8]`.
+- **Vemlp Runner**: Opting for a Vemlp machine allows you to launch tasks elastically.
+
+Here is a template to assist you. This template is designed for using Vemlp machines. Currently, for each workflow, you need to create a `setup` and a `cleanup` job. When using this template, the main parts you need to modify are the `IMAGE` environment variable and the specific `job steps`.
+
+```yaml
+name: Your Default Workflow
+
+on:
+ push:
+ branches:
+ - main
+ - v0.*
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ - ".github/workflows/template.yml"
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+permissions:
+ contents: read
+
+env:
+ IMAGE: "your vemlp image" # e.g. "verl-ci-cn-beijing.cr.volces.com/verlai/verl:sgl059.dev2"
+ DYNAMIC_RUNNER_URL: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner" # public veFaas api
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ task-id: ${{ steps.create-runner.outputs.task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_URL }}"
+ image: "${{ env.DEFAULT_IMAGE }}"
+
+ your_job:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'default-runner' }}"]
+ steps:
+ xxxx # your jobs
+
+ cleanup:
+ runs-on: ubuntu-latest
+ needs: [setup, your_job]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_URL }}"
+ task-id: "${{ needs.setup.outputs.task-id }}"
+```
+
+### Model and Dataset
+To avoid CI relies on network, we pre-download dataset on a NFS on the CI machine. The path for models are \${HOME}/models and the path for dataset is \${HOME}/models/hf_data.
\ No newline at end of file
diff --git a/.github/workflows/check-pr-title.yml b/.github/workflows/check-pr-title.yml
new file mode 100644
index 0000000000000000000000000000000000000000..948ce5e3f01498ce4f569230cdb2dd384fc0cbfd
--- /dev/null
+++ b/.github/workflows/check-pr-title.yml
@@ -0,0 +1,58 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+
+on:
+ pull_request:
+ types: [opened, edited, synchronize]
+
+jobs:
+ check-title:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+
+ - name: Run PR title checker
+ run: python3 tests/special_sanity/check_pr_title.py
+ env:
+ PR_TITLE: ${{ github.event.pull_request.title }}
+
+ - name: Run PR description checker
+ run: python3 tests/special_sanity/check_pr_description.py
+ env:
+ PR_TITLE: ${{ github.event.pull_request.title }}
+ GITHUB_EVENT_PATH: ${{ github.event_path }}
diff --git a/.github/workflows/cpu_unit_tests.yml b/.github/workflows/cpu_unit_tests.yml
new file mode 100644
index 0000000000000000000000000000000000000000..48ce123bc07b3726398230aba4a7edb89a8caa14
--- /dev/null
+++ b/.github/workflows/cpu_unit_tests.yml
@@ -0,0 +1,118 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: cpu_unit_tests
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ push:
+ branches:
+ - main
+ - v0.*
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ - .github/workflows/cpu_unit_tests.yml
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+env:
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm017.dev2"
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-image: "${{ env.IMAGE }}"
+
+ cpu_unit_tests:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 20 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ TORCH_COMPILE_DISABLE: 1
+ TORCHINDUCTOR_DISABLE: 1
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ pip3 install --upgrade "transformers>=5.0.0"
+ - name: Download datasets
+ run: |
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+ python3 examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/models/hf_data/hiyouga/geometry3k
+ - name: Running CPU unit tests
+ run: |
+ echo '[pytest]' > pytest.ini
+ echo 'python_files = *_on_cpu.py' >> pytest.ini
+ pytest -s -x --asyncio-mode=auto tests/
+ cleanup:
+ runs-on: ubuntu-latest
+ needs: [setup, cpu_unit_tests]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..aa4a713deac84bcc021481f20dc887db5132cf8e
--- /dev/null
+++ b/.github/workflows/doc.yml
@@ -0,0 +1,101 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+
+name: doc_test
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ push:
+ branches:
+ - main
+ - v0.*
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ - "docs/**"
+ - .github/workflows/doc.yml
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read # for checkout
+ pages: write # for deploy-pages
+ id-token: write # for deploy-pages
+
+jobs:
+ doc_test:
+ runs-on: ubuntu-latest
+ timeout-minutes: 5 # Increase this timeout value as needed
+ strategy:
+ matrix:
+ python-version: ["3.10"]
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ pip install -r docs/requirements-docs.txt
+
+ - name: Run doc make html
+ run: |
+ cd docs
+ make clean
+ make html SPHINXOPTS="--keep-going -w _build/sphinx.log"
+ if grep -q ": ERROR:" _build/sphinx.log; then
+ echo "🚨 Sphinx doc build contained ERRORs - see _build/sphinx.log"
+ exit 1
+ fi
+ if grep -q "WARNING: document isn't included in any toctree" _build/sphinx.log; then
+ echo "🚨 Sphinx doc build contained WARNING. Please include newly added docs in index.rst. See _build/sphinx.log for details"
+ exit 1
+ fi
+ if grep -q "WARNING: Inline emphasis" _build/sphinx.log; then
+ echo "🚨 Sphinx doc build contained WARNING. Please check inline emphasis is correct. See _build/sphinx.log for details"
+ exit 1
+ fi
+ if grep -q "WARNING: Definition list ends without a blank line" _build/sphinx.log; then
+ echo "🚨 Sphinx doc build contained WARNING. Please check if the indentation is correct. See _build/sphinx.log for details"
+ exit 1
+ fi
diff --git a/.github/workflows/docker-build-ascend-a2.yml b/.github/workflows/docker-build-ascend-a2.yml
new file mode 100644
index 0000000000000000000000000000000000000000..76540a53c320a0080748cae2c7c10346ced6ef7e
--- /dev/null
+++ b/.github/workflows/docker-build-ascend-a2.yml
@@ -0,0 +1,84 @@
+name: docker-build-ascend-a2
+
+on:
+ workflow_dispatch:
+ push:
+ branches: ["main"]
+ paths:
+ - "docker/ascend/Dockerfile.ascend_8.5.0_a2"
+ - ".github/workflows/docker-build-ascend-a2.yml"
+ release:
+ types: [published]
+ schedule:
+ - cron: "0 16 * * *"
+
+jobs:
+ build-ascend-image-a2:
+ if: ${{ github.event_name != 'pull_request' && github.repository_owner == 'verl-project' }}
+ runs-on: ubuntu-latest
+ concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}-build-ascend-image-a2
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+ steps:
+ - name: Remove unnecessary parts in github actions runners to free up disk space
+ uses: jlumbroso/free-disk-space@v1.3.1
+ with:
+ tool-cache: true
+
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.11"
+
+ - name: Get base image name and tag
+ id: base_image
+ run: |
+ BASE_IMAGE_FULL=$(grep '^FROM' ./docker/ascend/Dockerfile.ascend_8.5.0_a2 | head -1 | cut -d' ' -f2)
+ echo "Base image full: $BASE_IMAGE_FULL"
+ BASE_IMAGE_TAG=$(echo "$BASE_IMAGE_FULL" | cut -d':' -f2)
+ echo "Base image tag: $BASE_IMAGE_TAG"
+ NEW_IMAGE_NAME="verl-$BASE_IMAGE_TAG"
+ echo "New image name: $NEW_IMAGE_NAME"
+ echo "base_image_tag=$BASE_IMAGE_TAG" >> "$GITHUB_OUTPUT"
+ echo "new_image_name=$NEW_IMAGE_NAME" >> "$GITHUB_OUTPUT"
+
+ - name: Get image tag
+ id: version
+ run: |
+ BRANCH_NAME=$(echo "${{ github.ref }}" | sed 's/refs\/heads\///g' | sed 's/[^a-zA-Z0-9._-]/_/g')
+ if [ "${{ github.event_name }}" = "release" ]; then
+ echo "tag=${{ steps.base_image.outputs.new_image_name }}-${{ github.event.release.tag_name }}" >> "$GITHUB_OUTPUT"
+ elif [ "$BRANCH_NAME" = "main" ]; then
+ echo "tag=${{ steps.base_image.outputs.new_image_name }}-latest" >> "$GITHUB_OUTPUT"
+ fi
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Login to Quay.io
+ uses: docker/login-action@v3
+ with:
+ registry: quay.io
+ username: ${{ secrets.QUAY_USERNAME }}
+ password: ${{ secrets.QUAY_PASSWORD }}
+
+ - name: Clean Docker cache before build
+ run: |
+ docker system prune -a -f --volumes || true
+
+ - name: Build and push images Quay
+ uses: docker/build-push-action@v6
+ with:
+ context: .
+ platforms: linux/amd64,linux/arm64
+ file: ./docker/ascend/Dockerfile.ascend_8.5.0_a2
+ push: true
+ tags: |
+ quay.io/ascend/verl:${{ steps.version.outputs.tag }}
+ cache-from: type=gha
+ cache-to: type=gha,mode=max
+ build-args: |
+ BUILDKIT_INLINE_CACHE=1
diff --git a/.github/workflows/docker-build-ascend-a3.yml b/.github/workflows/docker-build-ascend-a3.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6549387fddcb9e419bebf46903370323a03d6963
--- /dev/null
+++ b/.github/workflows/docker-build-ascend-a3.yml
@@ -0,0 +1,84 @@
+name: docker-build-ascend-a3
+
+on:
+ workflow_dispatch:
+ push:
+ branches: ["main"]
+ paths:
+ - "docker/ascend/Dockerfile.ascend_8.5.0_a3"
+ - ".github/workflows/docker-build-ascend-a3.yml"
+ release:
+ types: [published]
+ schedule:
+ - cron: "0 19 * * *"
+
+jobs:
+ build-ascend-image-a3:
+ if: ${{ github.event_name != 'pull_request' && github.repository_owner == 'verl-project' }}
+ runs-on: ubuntu-latest
+ concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}-build-ascend-image-a3
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+ steps:
+ - name: Remove unnecessary parts in github actions runners to free up disk space
+ uses: jlumbroso/free-disk-space@v1.3.1
+ with:
+ tool-cache: true
+
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.11"
+
+ - name: Get base image name and tag
+ id: base_image
+ run: |
+ BASE_IMAGE_FULL=$(grep '^FROM' ./docker/ascend/Dockerfile.ascend_8.5.0_a3 | head -1 | cut -d' ' -f2)
+ echo "Base image full: $BASE_IMAGE_FULL"
+ BASE_IMAGE_TAG=$(echo "$BASE_IMAGE_FULL" | cut -d':' -f2)
+ echo "Base image tag: $BASE_IMAGE_TAG"
+ NEW_IMAGE_NAME="verl-$BASE_IMAGE_TAG"
+ echo "New image name: $NEW_IMAGE_NAME"
+ echo "base_image_tag=$BASE_IMAGE_TAG" >> "$GITHUB_OUTPUT"
+ echo "new_image_name=$NEW_IMAGE_NAME" >> "$GITHUB_OUTPUT"
+
+ - name: Get image tag
+ id: version
+ run: |
+ BRANCH_NAME=$(echo "${{ github.ref }}" | sed 's/refs\/heads\///g' | sed 's/[^a-zA-Z0-9._-]/_/g')
+ if [ "${{ github.event_name }}" = "release" ]; then
+ echo "tag=${{ steps.base_image.outputs.new_image_name }}-${{ github.event.release.tag_name }}" >> "$GITHUB_OUTPUT"
+ elif [ "$BRANCH_NAME" = "main" ]; then
+ echo "tag=${{ steps.base_image.outputs.new_image_name }}-latest" >> "$GITHUB_OUTPUT"
+ fi
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Login to Quay.io
+ uses: docker/login-action@v3
+ with:
+ registry: quay.io
+ username: ${{ secrets.QUAY_USERNAME }}
+ password: ${{ secrets.QUAY_PASSWORD }}
+
+ - name: Clean Docker cache before build
+ run: |
+ docker system prune -a -f --volumes || true
+
+ - name: Build and push images Quay
+ uses: docker/build-push-action@v6
+ with:
+ context: .
+ platforms: linux/amd64,linux/arm64
+ file: ./docker/ascend/Dockerfile.ascend_8.5.0_a3
+ push: true
+ tags: |
+ quay.io/ascend/verl:${{ steps.version.outputs.tag }}
+ cache-from: type=gha
+ cache-to: type=gha,mode=max
+ build-args: |
+ BUILDKIT_INLINE_CACHE=1
diff --git a/.github/workflows/e2e_ascend.yml b/.github/workflows/e2e_ascend.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d4ea77ad143ce44ff2312d98a6c0aff68c8b382c
--- /dev/null
+++ b/.github/workflows/e2e_ascend.yml
@@ -0,0 +1,166 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: e2e_ascend
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ push:
+ branches:
+ - main
+ - v0.*
+ pull_request:
+ branches:
+ - main
+ paths:
+ - ".github/workflows/e2e_ascend.yml"
+ - "examples/data_preprocess/**"
+ - "examples/grpo_trainer/**"
+ - "examples/ppo_trainer/**"
+ - "examples/sft/**"
+ - "verl/experimental/one_step_off_policy/**"
+ - "tests/special_npu/**"
+ - "tests/special_sanity/check_device_api_usage.py"
+ - "verl/**"
+ - "pyproject.toml"
+ - "requirements-npu.txt"
+ - "setup.py"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+permissions:
+ contents: read
+
+jobs:
+ llm_rl_job:
+ if: github.repository_owner == 'verl-project'
+ name: E2E Ascend testing for RL training scenarios of LLM models
+ runs-on: linux-aarch64-a2b3-8
+ timeout-minutes: 120
+ container:
+ image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+ options: >-
+ --shm-size 16g
+ env:
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - name: Check npu and CANN info
+ run: |
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+ npu-smi info
+ - name: Check initial pip list from image
+ run: |
+ pip list
+ - name: Checkout volcengine/verl repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ clean: true
+ - name: Install the current repository
+ run: |
+ pip install -r requirements-npu.txt
+ pip install -e .
+ - name: Check final pip list
+ run: |
+ pip list
+ - name: Preprocess gsm8k dataset
+ run: |
+ python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k
+ - name: Running gsm8k e2e training tests with PPO on ASCEND NPU (FSDP backend)
+ run: |
+ ray stop --force
+ bash tests/special_npu/run_qwen3_06b_ppo.sh
+ rm -rf $HOME/ckpts
+ - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (FSDP backend)
+ run: |
+ ray stop --force
+ bash tests/special_npu/run_qwen2_5_05b_grpo.sh
+ rm -rf $HOME/ckpts
+ - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeed backend)
+ run: |
+ ray stop --force
+ USE_DIST_CKPT=True bash tests/special_npu/run_qwen2_5_05b_grpo_mindspeed.sh
+ rm -rf $HOME/dist_ckpt/qwen2_5_05b_grpo_mindspeed
+ rm -rf $HOME/ckpts
+ - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU (MindSpeed backend, MoE Model)
+ run: |
+ ray stop --force
+ USE_DIST_CKPT=True USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen3moe_minimal.json DUMMY_MODEL_PATH=$HOME/dist_ckpt/qwen3_30b_grpo_mindspeed bash tests/special_npu/run_qwen3_30b_grpo_mindspeed.sh
+ - name: Running the E2E test with fully_async_policy algorithm (FSDP2)
+ run: |
+ ray stop --force
+ bash tests/special_npu/run_fully_async_policy.sh
+
+ vlm_rl_job:
+ if: github.repository_owner == 'verl-project'
+ name: E2E Ascend testing for RL training scenarios of VLM models
+ runs-on: linux-aarch64-a2b3-8
+ timeout-minutes: 120
+ container:
+ image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+ options: >-
+ --shm-size 16g
+ env:
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - name: Check npu and CANN info
+ run: |
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+ npu-smi info
+ - name: Check initial pip list from image
+ run: |
+ pip list
+ - name: Checkout volcengine/verl repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ clean: true
+ - name: Install the current repository
+ run: |
+ pip install -r requirements-npu.txt
+ pip install -e .
+ - name: Check final pip list
+ run: |
+ pip list
+ - name: Preprocess geo3k dataset
+ run: |
+ python examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/.cache/datasets/hiyouga/geometry3k
+ - name: Running geo3k e2e training tests with GRPO on ASCEND NPU
+ run: |
+ ray stop --force
+ bash tests/special_npu/run_qwen2_5_vl_3b_npu.sh
+ rm -rf $HOME/ckpts
diff --git a/.github/workflows/e2e_fully_async_policy.yml b/.github/workflows/e2e_fully_async_policy.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a46be3048143c876a7a702b2564b48780aeaba2e
--- /dev/null
+++ b/.github/workflows/e2e_fully_async_policy.yml
@@ -0,0 +1,170 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: e2e_fully_async_policy
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ # For push, for now only anti-patterns are specified so it is more conservative
+ # and achieves higher coverage.
+ push:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ - "!**/*.md"
+ - "!**/*.sh"
+ # Other entrypoints
+ - "!examples/*trainer*"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ - "verl/experimental/fully_async_policy"
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ - "!**/*.md"
+ - "!**/*.sh"
+ # Other entrypoints
+ - "!examples/**"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ # Home
+ - "verl/experimental/fully_async_policy"
+ # Entrypoints
+ - ".github/workflows/e2e_fully_async_policy.yml"
+ - "examples/data_preprocess/gsm8k.py"
+ - "tests/special_e2e/run_fully_async_policy.sh"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+env:
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm017.dev2"
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-image: "${{ env.IMAGE }}"
+
+ # Test FSDP2 strategy
+ e2e_fully_async_policy_fsdp2:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 10 # Increase timeout for async training
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ ACTOR_STRATEGY: "fsdp2"
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ pip3 install cupy-cuda12x==13.6.0
+ - name: Prepare GSM8K dataset
+ run: |
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+ - name: Running the E2E test with fully_async_policy algorithm (FSDP2)
+ run: |
+ ray stop --force
+ bash tests/special_e2e/run_fully_async_policy.sh
+
+ # Test Megatron strategy
+ e2e_fully_async_policy_megatron:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 10 # Increase timeout for async training
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ ACTOR_STRATEGY: "megatron"
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ pip3 install cupy-cuda12x==13.6.0
+ - name: Prepare GSM8K dataset
+ run: |
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+ - name: Running the E2E test with fully_async_policy algorithm (Megatron)
+ run: |
+ ray stop --force
+ bash tests/special_e2e/run_fully_async_policy.sh
+
+ cleanup:
+ runs-on: ubuntu-latest
+ needs: [setup, e2e_fully_async_policy_fsdp2]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
diff --git a/.github/workflows/e2e_one_step_off_policy.yml b/.github/workflows/e2e_one_step_off_policy.yml
new file mode 100644
index 0000000000000000000000000000000000000000..de3f8df5c1eb4ffafee91d7954ba71f6e32a69f2
--- /dev/null
+++ b/.github/workflows/e2e_one_step_off_policy.yml
@@ -0,0 +1,171 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: e2e_one_step_off_policy
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ # For push, for now only anti-patterns are specified so it is more conservative
+ # and achieves higher coverage.
+ push:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ - "!**/*.md"
+ - "!**/*.sh"
+ # Other entrypoints
+ - "!examples/*trainer*"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ - "verl/experimental/one_step_off_policy"
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ - "!**/*.md"
+ - "!**/*.sh"
+ # Other entrypoints
+ - "!examples/**"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ # Home
+ - "verl/experimental/one_step_off_policy"
+ # Entrypoints
+ - ".github/workflows/e2e_one_step_off_policy.yml"
+ - "examples/data_preprocess/gsm8k.py"
+ - "tests/special_e2e/run_one_step_off_policy.sh"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+env:
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm017.dev2"
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-image: "${{ env.IMAGE }}"
+
+ # Test FSDP2 strategy
+ e2e_one_step_off_policy_fsdp2:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 10 # Increase timeout for async training
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ ACTOR_STRATEGY: "fsdp2"
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ pip3 install cupy-cuda12x==13.6.0
+ - name: Prepare GSM8K dataset
+ run: |
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+ - name: Running the E2E test with one_step_off_policy algorithm (FSDP2)
+ run: |
+ ray stop --force
+ bash tests/special_e2e/run_one_step_off_policy.sh
+
+ # Test Megatron strategy
+ e2e_one_step_off_policy_megatron:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 10 # Increase timeout for async training
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ ACTOR_STRATEGY: "megatron"
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ pip3 install cupy-cuda12x==13.6.0
+ - name: Prepare GSM8K dataset
+ run: |
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+ - name: Running the E2E test with one_step_off_policy algorithm (Megatron)
+ run: |
+ ray stop --force
+ bash tests/special_e2e/run_one_step_off_policy.sh
+
+ cleanup:
+ runs-on: ubuntu-latest
+ needs:
+ [setup, e2e_one_step_off_policy_fsdp2, e2e_one_step_off_policy_megatron]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
diff --git a/.github/workflows/e2e_one_step_off_policy_ascend.yml b/.github/workflows/e2e_one_step_off_policy_ascend.yml
new file mode 100644
index 0000000000000000000000000000000000000000..77ed29b4e03f153a6548bff85aee284e0419eba5
--- /dev/null
+++ b/.github/workflows/e2e_one_step_off_policy_ascend.yml
@@ -0,0 +1,169 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: e2e_one_step_off_policy_ascend
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ # For push, for now only anti-patterns are specified so it is more conservative
+ # and achieves higher coverage.
+ push:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ - "!**/*.md"
+ - "!**/*.sh"
+ # Other entrypoints
+ - "!examples/*trainer*"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ - "verl/experimental/one_step_off_policy"
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ - "!**/*.md"
+ - "!**/*.sh"
+ # Other entrypoints
+ - "!examples/**"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ # Home
+ - "verl/experimental/one_step_off_policy"
+ # Entrypoints
+ - ".github/workflows/e2e_one_step_off_policy_ascend.yml"
+ - "examples/data_preprocess/gsm8k.py"
+ - "tests/special_npu/run_one_step_off_policy.sh"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+jobs:
+ # Test FSDP2 strategy
+ e2e_one_step_off_policy_fsdp2_ascend:
+ if: github.repository_owner == 'verl-project'
+ runs-on: linux-aarch64-a2b3-8
+ timeout-minutes: 60 # Increase this timeout value as needed
+ container:
+ image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+ options: >-
+ --shm-size 16g
+ env:
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ ACTOR_STRATEGY: "fsdp2"
+ steps:
+ - name: Check npu and CANN info
+ run: |
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+ npu-smi info
+ - name: Check initial pip list from image
+ run: |
+ pip list
+ - name: Checkout verl-project/verl repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ clean: true
+ - name: Install the current repository
+ run: |
+ pip install -r requirements-npu.txt
+ pip install --no-deps -e .
+ - name: Check final pip list
+ run: |
+ pip list
+ - name: Prepare weights
+ run: |
+ ln -s /root/.cache/models ~/models
+ - name: Prepare GSM8K dataset
+ run: |
+ python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k
+ - name: Running the E2E test with one_step_off_policy algorithm (FSDP2)
+ run: |
+ ray stop --force
+ bash tests/special_npu/run_one_step_off_policy.sh
+
+ # Test Megatron strategy
+ e2e_one_step_off_policy_megatron_ascend:
+ if: github.repository_owner == 'verl-project'
+ runs-on: linux-aarch64-a2b3-8
+ timeout-minutes: 60 # Increase this timeout value as needed
+ container:
+ image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+ options: >-
+ --shm-size 16g
+ env:
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ ACTOR_STRATEGY: "megatron"
+ steps:
+ - name: Check npu and CANN info
+ run: |
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+ npu-smi info
+ - name: Check initial pip list from image
+ run: |
+ pip list
+ - name: Checkout verl-project/verl repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ clean: true
+ - name: Install the current repository
+ run: |
+ pip install -r requirements-npu.txt
+ pip install --no-deps -e .
+ - name: Check final pip list
+ run: |
+ pip list
+ - name: Prepare weights
+ run: |
+ ln -s /root/.cache/models ~/models
+ - name: Prepare GSM8K dataset
+ run: |
+ python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k
+ - name: Running the E2E test with one_step_off_policy algorithm (Megatron)
+ run: |
+ ray stop --force
+ bash tests/special_npu/run_one_step_off_policy.sh
diff --git a/.github/workflows/e2e_ppo_grpo_trainer_trtllm.yml b/.github/workflows/e2e_ppo_grpo_trainer_trtllm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7f7314d6a1d9339e32a5590c651c0dc5268165b8
--- /dev/null
+++ b/.github/workflows/e2e_ppo_grpo_trainer_trtllm.yml
@@ -0,0 +1,285 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: e2e_ppo_trainer_megatron_trtllm
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch.
+ # For push, for now only anti-patterns are specified so it is more conservative
+ # and achieves higher coverage.
+ push:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ # Recipes
+ - "!recipe/**"
+ # FSDP
+ - "!verl/workers/**/*dp_*.py"
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!docker/**"
+ # Docs
+ - "!**/*.md"
+ - "!docs/**"
+ - "!examples/**"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ # Recipes
+ - "!recipe/**"
+ # FSDP
+ - "!verl/workers/**/*dp_*.py"
+ # Entrypoints
+ - "verl/workers/rollout/trtllm_rollout/**"
+ - "tests/workers/rollout/rollout_trtllm/**"
+ - ".github/workflows/e2e_ppo_grpo_trainer_trtllm.yml"
+ - "examples/data_preprocess/gsm8k.py"
+ - "examples/data_preprocess/geo3k.py"
+ - "examples/data_preprocess/dapo_multiturn_w_tool.py"
+ - "examples/data_preprocess/aime2024_multiturn_w_tool.py"
+ - "examples/grpo_trainer/run_qwen2-7b_math_trtllm.sh"
+ - "examples/grpo_trainer/run_qwen2-7b_math_megatron_trtllm.sh"
+ - "examples/grpo_trainer/run_qwen3-30b_dapo_megatron_fp8_trtllm.sh"
+ # add back when ppo flow is ready
+ # - "tests/special_e2e/run_ppo_trainer_megatron.sh"
+ # - "verl/trainer/main_ppo.py"
+ # - "verl/trainer/config/ppo_megatron_trainer.yaml"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+env:
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:trtllm1.3.0rc4"
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-image: "${{ env.IMAGE }}"
+
+ trtllm_unit_tests:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 30 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install pytest-asyncio
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ - name: Run TRTLLM unit tests
+ run: |
+ export TRTLLM_TEST_MODEL_PATH_ROOT="${HOME}/models"
+ ray stop --force
+ pytest -v -s \
+ tests/workers/rollout/rollout_trtllm/test_adapter.py \
+ tests/workers/rollout/rollout_trtllm/test_async_server.py \
+ tests/workers/rollout/rollout_trtllm/test_trtllm_rollout_utils.py
+
+ e2e_grpo_trainer_fsdp-qwen2:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 30 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ - name: Prepare GSM8K dataset
+ run: |
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k --local_save_dir ${PWD}/data/gsm8k
+ - name: Running GSM8K E2E training tests with FSDP on 8 L20 GPUs (Qwen)
+ run: |
+ ray stop --force
+ DATADIR=${HOME}/data \
+ bash examples/grpo_trainer/run_qwen2-7b_math_trtllm.sh 2 \
+ trainer.total_training_steps=1 \
+ data.train_files="['${PWD}/data/gsm8k/train.parquet']" \
+ data.val_files="['${PWD}/data/gsm8k/test.parquet']" \
+ trainer.logger='["console"]' \
+ actor_rollout_ref.model.path="${HOME}/models/Qwen/Qwen2.5-0.5B-Instruct"
+ - name: clean up
+ run: |
+ rm -rf checkpoints
+
+ e2e_grpo_trainer_megatron-qwen2:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 30 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ - name: Prepare GSM8K dataset
+ run: |
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k --local_save_dir ${PWD}/data/gsm8k
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen)
+ run: |
+ ray stop --force
+ DATADIR=${HOME}/data \
+ ACTOR_TP=2 \
+ bash examples/grpo_trainer/run_qwen2-7b_math_megatron_trtllm.sh 2 \
+ trainer.total_training_steps=1 \
+ data.train_files="['${PWD}/data/gsm8k/train.parquet']" \
+ data.val_files="['${PWD}/data/gsm8k/test.parquet']" \
+ trainer.logger='["console"]' \
+ actor_rollout_ref.model.path="${HOME}/models/Qwen/Qwen2.5-0.5B-Instruct"
+ - name: clean up
+ run: |
+ rm -rf checkpoints
+ e2e_grpo_trainer_fsdp-vlm:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 30 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ - name: Prepare GEO3K dataset
+ run: |
+ python3 examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/models/hf_data/geo3k --local_save_dir ${PWD}/data/geo3k
+ - name: Running GEO3K E2E training tests with FSDP on 8 L20 GPUs (VLM)
+ run: |
+ ray stop --force
+ DATADIR=${HOME}/data \
+ bash examples/grpo_trainer/run_qwen2_5_vl_3b_trtllm.sh 2 \
+ trainer.total_training_steps=1 \
+ data.train_files="['${PWD}/data/geo3k/train.parquet']" \
+ data.val_files="['${PWD}/data/geo3k/test.parquet']" \
+ trainer.logger='["console"]' \
+ actor_rollout_ref.model.path="${HOME}/models/Qwen/Qwen3-VL-2B-Instruct"
+ - name: clean up
+ run: |
+ rm -rf checkpoints
+ - name: Prepare DAPO-Math-17k and AIME-2024 datasets (data_preprocess)
+ run: |
+ python3 examples/data_preprocess/dapo_multiturn_w_tool.py --local_save_dir ${PWD}/data/dapo-math-17k
+ python3 examples/data_preprocess/aime2024_multiturn_w_tool.py --local_save_dir ${PWD}/data/aime-2024
+ - name: Running DAPO E2E with FP8 TRT-LLM rollout (Qwen3-0.6B)
+ run: |
+ ray stop --force
+ export INFER_TP=2 ACTOR_TP=2 ACTOR_PP=2 ACTOR_VPP=2 ACTOR_EP=1 ACTOR_CP=2 REF_TP=2 REF_PP=2 REF_VPP=2 REF_EP=1 REF_CP=2 GEN_MOE_TP=null GEN_MOE_EP=null
+ export NNODES=1 GPUS_PER_NODE=8 TRTLLM_MOE_BACKEND=CUTLASS
+ export DATA_DIR=${PWD} DAPO_MATH_TRAIN=${PWD}/data/dapo-math-17k/train.parquet AIME_VAL=${PWD}/data/aime-2024/train.parquet MODEL_PATH=${HOME}/models/Qwen/Qwen3-0.6B
+ bash examples/grpo_trainer/run_qwen3-30b_dapo_megatron_fp8_trtllm.sh \
+ reward_model.reward_kwargs.overlong_buffer_cfg.len=258 \
+ reward_model.reward_kwargs.max_resp_len=512 \
+ data.max_prompt_length=512 \
+ data.max_response_length=512 \
+ data.train_batch_size=32 \
+ actor_rollout_ref.rollout.n=4 \
+ actor_rollout_ref.rollout.max_num_seqs=16 \
+ actor_rollout_ref.rollout.max_num_batched_tokens=1024 \
+ actor_rollout_ref.rollout.max_model_len=1024 \
+ actor_rollout_ref.actor.megatron.override_transformer_config.moe_grouped_gemm=False \
+ actor_rollout_ref.actor.megatron.override_transformer_config.moe_permute_fusion=False \
+ trainer.total_training_steps=1 \
+ trainer.logger='["console"]'
+ - name: clean up
+ run: |
+ rm -rf checkpoints
+
+ cleanup:
+ runs-on: ubuntu-latest
+ needs: [setup, trtllm_unit_tests, e2e_grpo_trainer_fsdp-qwen2, e2e_grpo_trainer_megatron-qwen2, e2e_grpo_trainer_fsdp-vlm]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
diff --git a/.github/workflows/e2e_ppo_trainer.yml b/.github/workflows/e2e_ppo_trainer.yml
new file mode 100644
index 0000000000000000000000000000000000000000..357f0aa6bb6d819ae3098f73aa945dd1bb46123f
--- /dev/null
+++ b/.github/workflows/e2e_ppo_trainer.yml
@@ -0,0 +1,78 @@
+name: e2e_ppo_trainer
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ # For push, for now only anti-patterns are specified so it is more conservative
+ # and achieves higher coverage.
+ push:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!verl/trainer/fsdp_sft_trainer.py"
+
+ # Megatron
+ - "!verl/workers/**/megatron_*.py"
+
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!**/*.md"
+ - "!docker/**"
+ - "!examples/**"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ # Docs
+ - "!docs/**"
+
+ # Megatron
+ - "!verl/workers/**/megatron_*.py"
+ # Entrypoints
+ - ".github/workflows/e2e_ppo_trainer.yml"
+ - "examples/data_preprocess/gsm8k.py"
+ - "examples/data_preprocess/geo3k.py"
+ - "tests/special_e2e/ppo_trainer"
+ - "verl/trainer/main_ppo.py"
+ - "verl/trainer/config/ppo_trainer.yaml"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+jobs:
+ pre_commit_for_ppo:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: ["3.12"]
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install the current repository
+ run: |
+ pip install pre-commit hydra-core
+ pip3 install --no-deps -e .
+ - name: Set ruff --output-format=github
+ run: |
+ sed -i 's/--output-format=full/--output-format=github/' .pre-commit-config.yaml
+ git add .pre-commit-config.yaml
+ - uses: pre-commit/action@v3.0.1
+ with:
+ extra_args: "" # Overriding default "--all-files"
+
diff --git a/.github/workflows/e2e_ppo_trainer_megatron_sglang.yml b/.github/workflows/e2e_ppo_trainer_megatron_sglang.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5a8ef80432e48d1089c35b0f26f489a1571ef43f
--- /dev/null
+++ b/.github/workflows/e2e_ppo_trainer_megatron_sglang.yml
@@ -0,0 +1,201 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: e2e_ppo_trainer_megatron_sglang
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch.
+ # For push, for now only anti-patterns are specified so it is more conservative
+ # and achieves higher coverage.
+ push:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!verl/trainer/fsdp_sft_trainer.py" # FSDP
+ - "!verl/workers/**/*dp_*.py"
+ - "!verl/utils/fsdp_utils.py"
+ - "!verl/utils/checkpoint/fsdp_checkpoint_manager.py"
+ - "!verl/model_merger/fsdp_model_merger.py"
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!docker/**"
+ # Docs
+ - "!**/*.md"
+ - "!docs/**"
+ - "!examples/**"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py" # FSDP
+ - "!verl/workers/**/*dp_*.py"
+ - "!verl/utils/fsdp_utils.py"
+ - "!verl/utils/checkpoint/fsdp_checkpoint_manager.py"
+ - "!verl/model_merger/fsdp_model_merger.py"
+ # Entrypoints
+ - "verl/worksers/rollout/sglang_rollout/*"
+ - ".github/workflows/e2e_ppo_trainer_megatron_sglang.yml"
+ - "examples/data_preprocess/gsm8k.py"
+ - "examples/data_preprocess/geo3k.py"
+ - "tests/special_e2e/run_ppo_trainer_megatron.sh"
+ - "verl/trainer/main_ppo.py"
+ - "verl/trainer/config/ppo_megatron_trainer.yaml"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+env:
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:sgl059.dev2"
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-image: "${{ env.IMAGE }}"
+
+ e2e_ppo_trainer_megatron-deepseek:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 60 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ ENGINE: sglang
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ - name: Prepare GSM8K dataset
+ run: |
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
+ run: |
+ ray stop --force
+ OPTIM_MEMORY_EFFICIENT=True ENGINE=sglang SAVE_FREQ=1 MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/special_e2e/run_ppo_trainer_megatron.sh
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
+ run: |
+ ray stop --force
+ export VLLM_USE_V1=1
+ ray start --head
+ ENGINE=sglang MODE=async RESUME_MODE=auto MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct TOTAL_TRAIN_STEPS=2 bash tests/special_e2e/run_ppo_trainer_megatron.sh
+ - name: Profiling GRPO GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Deepseek)
+ run: |
+ ray stop --force
+ PROFILE_ENABLE=True ENGINE=sglang ADV_ESTIMATOR=grpo USE_DYNAMIC_BSZ=False MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/special_e2e/run_ppo_trainer_megatron.sh
+ if [ -z "$( ls -A '/tmp/ray/session_latest/logs/nsight/' )" ]; then
+ echo "[ERROR] not found any profiling files"
+ exit 1
+ else
+ echo "[SUCCESS] profile success"
+ fi
+ - name: clean up
+ run: |
+ rm -rf checkpoints
+
+ # Qwen3-0.6B: dense, tie_word_embeddings=True
+ e2e_ppo_trainer_megatron-qwen3:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 60 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ ENGINE: sglang
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ - name: Prepare GSM8K dataset
+ run: |
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen3) testing learning rate scheduler
+ run: |
+ ray stop --force
+ ALL_OFFLOAD=True VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 LR_WARMUP_STEPS=1 TOTAL_TRAIN_STEPS=2 MODEL_ID=Qwen/Qwen3-0.6B bash tests/special_e2e/run_ppo_trainer_megatron.sh
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with FP8 rollout
+ run: |
+ ray stop --force
+ export VLLM_USE_V1=1
+ ROLLOUT_QUANTIZATION=fp8 TOTAL_TRAIN_STEPS=2 MODEL_ID=Qwen/Qwen3-0.6B bash tests/special_e2e/run_ppo_trainer_megatron.sh
+ - name: clean up
+ run: |
+ rm -rf checkpoints
+
+ cleanup:
+ runs-on: ubuntu-latest
+ needs:
+ [setup, e2e_ppo_trainer_megatron-deepseek, e2e_ppo_trainer_megatron-qwen3]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
diff --git a/.github/workflows/e2e_ppo_trainer_megatron_sglang_2.yml b/.github/workflows/e2e_ppo_trainer_megatron_sglang_2.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ba9d3b23545a69afb6286bb1ea4d373bc51a14b0
--- /dev/null
+++ b/.github/workflows/e2e_ppo_trainer_megatron_sglang_2.yml
@@ -0,0 +1,201 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: e2e_ppo_trainer_megatron_sglang_2
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch.
+ # For push, for now only anti-patterns are specified so it is more conservative
+ # and achieves higher coverage.
+ push:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!verl/trainer/fsdp_sft_trainer.py" # FSDP
+ - "!verl/workers/**/*dp_*.py"
+ - "!verl/utils/fsdp_utils.py"
+ - "!verl/utils/checkpoint/fsdp_checkpoint_manager.py"
+ - "!verl/model_merger/fsdp_model_merger.py"
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!docker/**"
+ # Docs
+ - "!**/*.md"
+ - "!docs/**"
+ - "!examples/**"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py" # FSDP
+ - "!verl/workers/**/*dp_*.py"
+ - "!verl/utils/fsdp_utils.py"
+ - "!verl/utils/checkpoint/fsdp_checkpoint_manager.py"
+ - "!verl/model_merger/fsdp_model_merger.py"
+ # Entrypoints
+ - "verl/worksers/rollout/sglang_rollout/*"
+ - ".github/workflows/e2e_ppo_trainer_megatron_sglang.yml"
+ - "examples/data_preprocess/gsm8k.py"
+ - "examples/data_preprocess/geo3k.py"
+ - "tests/special_e2e/run_ppo_trainer_megatron.sh"
+ - "verl/trainer/main_ppo.py"
+ - "verl/trainer/config/ppo_megatron_trainer.yaml"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+env:
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:sgl059.dev2"
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-image: "${{ env.IMAGE }}"
+
+ e2e_ppo_trainer_fsdp_sglang:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 40 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ - name: Prepare gsm8k dataset
+ run: |
+ ray stop --force
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm and save ckpt
+ run: |
+ ray stop --force
+ ENGINE=sglang bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+
+ e2e_ppo_trainer_fsdp-qwen2_5vl-3b:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 60 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ # Geo3k
+ - name: Prepare GEO3K dataset
+ run: |
+ ray stop --force
+ python3 examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/models/hf_data/hiyouga/geometry3k/
+ - name: Running GEO3K VLM E2E training tests on 8 L20 GPUs with rmpad using function rm
+ run: |
+ ray stop --force
+ TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
+ MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
+ MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
+ ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
+ ENGINE=sglang ROLLOUT_MODE=async GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
+ ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
+ bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ - name: Running GEO3K VLM E2E with rmpad using torch fused kernel (Qwen2.5-VL)
+ run: |
+ ray stop --force
+ FUSED_KERNELS=True TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
+ MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
+ MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
+ ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
+ ENGINE=sglang ROLLOUT_MODE=async GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
+ ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
+ bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ - name: Running GEO3K VLM E2E with rmpad using triton fused kernel (Qwen2.5-VL)
+ run: |
+ ray stop --force
+ FUSED_KERNELS=True FUSED_KERNEL_BACKEND=triton \
+ TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
+ MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
+ MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
+ ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
+ ENGINE=sglang ROLLOUT_MODE=async GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
+ ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
+ bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+
+ cleanup:
+ runs-on: ubuntu-latest
+ needs:
+ [setup, e2e_ppo_trainer_fsdp-qwen2_5vl-3b, e2e_ppo_trainer_fsdp_sglang]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
diff --git a/.github/workflows/e2e_ppo_trainer_megatron_vllm.yml b/.github/workflows/e2e_ppo_trainer_megatron_vllm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d9fa832a56b9027480f834da7f26ac291f8f207b
--- /dev/null
+++ b/.github/workflows/e2e_ppo_trainer_megatron_vllm.yml
@@ -0,0 +1,212 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: e2e_ppo_trainer_megatron_vllm
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch.
+ # For push, for now only anti-patterns are specified so it is more conservative
+ # and achieves higher coverage.
+ push:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ # FSDP
+ - "!verl/workers/**/*dp_*.py"
+ - "!verl/utils/fsdp_utils.py"
+ - "!verl/utils/checkpoint/fsdp_checkpoint_manager.py"
+ - "!verl/model_merger/fsdp_model_merger.py"
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!docker/**"
+ # Docs
+ - "!**/*.md"
+ - "!docs/**"
+ - "!examples/**"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ # FSDP
+ - "!verl/workers/**/*dp_*.py"
+ - "!verl/utils/fsdp_utils.py"
+ - "!verl/utils/checkpoint/fsdp_checkpoint_manager.py"
+ - "!verl/model_merger/fsdp_model_merger.py"
+ # Entrypoints
+ - ".github/workflows/e2e_ppo_trainer_megatron_vllm.yml"
+ - "examples/data_preprocess/gsm8k.py"
+ - "examples/data_preprocess/geo3k.py"
+ - "tests/special_e2e/run_ppo_trainer_megatron.sh"
+ - "verl/trainer/main_ppo.py"
+ - "verl/trainer/config/ppo_megatron_trainer.yaml"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+env:
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm017.dev2"
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-image: "${{ env.IMAGE }}"
+
+ # deepseek-ai/deepseek-coder-1.3b-instruct: dense, tie_word_embeddings=False
+ e2e_ppo_trainer_megatron-deepseek:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 60 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps --force-reinstall .
+ pip3 install mbridge
+ pip3 install math-verify
+ - name: Prepare GSM8K dataset
+ run: |
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+ # Full training save&load
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron, use mbridge e2e to pre-load and save (Deepseek)
+ run: |
+ ray stop --force
+ ALL_OFFLOAD=True SAVE_FREQ=1 MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct COMMON_PP=4 COMMON_VPP=null COMMON_CP=1 USE_MBRIDGE=True USE_DIST_CKPT=False \
+ bash tests/special_e2e/run_ppo_trainer_megatron.sh
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron, use mbridge e2e to pre-load and save (Deepseek)
+ run: |
+ ray stop --force
+ RESUME_MODE=auto MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct TOTAL_TRAIN_STEPS=2 SAVE_FREQ=1 COMMON_PP=4 COMMON_VPP=null COMMON_CP=1 USE_MBRIDGE=True USE_DIST_CKPT=False \
+ bash tests/special_e2e/run_ppo_trainer_megatron.sh
+ # LoRA training save&load
+ - name: clean up and install Megatron-Bridge
+ run: |
+ rm -rf checkpoints
+ pip3 install git+https://github.com/NVIDIA-NeMo/Megatron-Bridge.git@83a7c11 --no-deps --no-build-isolation
+ pip3 install git+https://github.com/NVIDIA/Megatron-LM.git@5455f0a --no-deps --no-build-isolation
+ pip3 install "nvidia-modelopt[torch]>=0.37.0" transformers==4.57.1
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron, use Megatron-Bridge LoRA e2e to pre-load and save (Deepseek)
+ run: |
+ ray stop --force
+ ALL_OFFLOAD=True SAVE_FREQ=1 MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct COMMON_PP=4 LORA_RANK=8 COMMON_VPP=null COMMON_CP=1 USE_MBRIDGE=True VANILLA_MBRIDGE=False VALUE_VANILLA_MBRIDGE=False USE_DIST_CKPT=False \
+ bash tests/special_e2e/run_ppo_trainer_megatron.sh
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron, use Megatron-Bridge LoRA e2e to pre-load and save (Deepseek)
+ run: |
+ ray stop --force
+ RESUME_MODE=auto MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct TOTAL_TRAIN_STEPS=2 SAVE_FREQ=1 COMMON_PP=4 LORA_RANK=8 COMMON_VPP=null COMMON_CP=1 USE_MBRIDGE=True VANILLA_MBRIDGE=False VALUE_VANILLA_MBRIDGE=False USE_DIST_CKPT=False \
+ bash tests/special_e2e/run_ppo_trainer_megatron.sh
+ - name: clean up
+ run: |
+ rm -rf checkpoints
+
+ # Qwen3-0.6B: dense, tie_word_embeddings=True
+ e2e_ppo_trainer_megatron-qwen3:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 60 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ pip3 install math-verify
+ - name: Prepare GSM8K dataset
+ run: |
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen3) testing learning rate scheduler
+ run: |
+ ray stop --force
+ ALL_OFFLOAD=True VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 LR_WARMUP_STEPS=1 TOTAL_TRAIN_STEPS=2 MODEL_ID=Qwen/Qwen3-0.6B bash tests/special_e2e/run_ppo_trainer_megatron.sh
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with FP8 rollout
+ run: |
+ ray stop --force
+ export VLLM_USE_V1=1
+ ROLLOUT_QUANTIZATION=fp8 TOTAL_TRAIN_STEPS=2 MODEL_ID=Qwen/Qwen3-0.6B bash tests/special_e2e/run_ppo_trainer_megatron.sh
+ - name: clean up
+ run: |
+ rm -rf checkpoints
+
+ cleanup:
+ runs-on: ubuntu-latest
+ needs:
+ [setup, e2e_ppo_trainer_megatron-deepseek, e2e_ppo_trainer_megatron-qwen3]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
diff --git a/.github/workflows/e2e_ppo_trainer_megatron_vllm_2.yml b/.github/workflows/e2e_ppo_trainer_megatron_vllm_2.yml
new file mode 100644
index 0000000000000000000000000000000000000000..83a1faf88326fad3bea04afff2591c0fb1a9a73b
--- /dev/null
+++ b/.github/workflows/e2e_ppo_trainer_megatron_vllm_2.yml
@@ -0,0 +1,318 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: e2e_ppo_trainer_megatron_vllm_2
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch.
+ # For push, for now only anti-patterns are specified so it is more conservative
+ # and achieves higher coverage.
+ push:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ # FSDP
+ - "!verl/workers/**/*dp_*.py"
+ - "!verl/utils/fsdp_utils.py"
+ - "!verl/utils/checkpoint/fsdp_checkpoint_manager.py"
+ - "!verl/model_merger/fsdp_model_merger.py"
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!docker/**"
+ # Docs
+ - "!**/*.md"
+ - "!docs/**"
+ - "!examples/**"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ # FSDP
+ - "!verl/workers/**/*dp_*.py"
+ - "!verl/utils/fsdp_utils.py"
+ - "!verl/utils/checkpoint/fsdp_checkpoint_manager.py"
+ - "!verl/model_merger/fsdp_model_merger.py"
+ # Entrypoints
+ - ".github/workflows/e2e_ppo_trainer_megatron_vllm_2.yml"
+ - "examples/data_preprocess/gsm8k.py"
+ - "examples/data_preprocess/geo3k.py"
+ - "tests/special_e2e/run_ppo_trainer_megatron.sh"
+ - "verl/trainer/main_ppo.py"
+ - "verl/trainer/config/ppo_megatron_trainer.yaml"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+env:
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm017.dev2"
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-image: "${{ env.IMAGE }}"
+
+ e2e_ppo_trainer_megatron-moe-expert-parallel:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 60 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps --force-reinstall .
+ pip3 install git+https://github.com/NVIDIA-NeMo/Megatron-Bridge.git@83a7c11 --no-deps --no-build-isolation
+ pip3 install git+https://github.com/NVIDIA/Megatron-LM.git@5455f0a --no-deps --no-build-isolation
+ pip3 install "nvidia-modelopt[torch]>=0.37.0" transformers==4.57.1
+ - name: Prepare GSM8K dataset
+ run: |
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron-Bridge (Qwen3-30B-A3B-Instruct-2507)
+ run: |
+ ray stop --force
+ ADV_ESTIMATOR=grpo USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen2moe_minimal.json \
+ PPO_MAX_TOKEN_LEN=1024 FWD_MAX_TOKEN_LEN=1024 \
+ MAX_PROMPT_LENGTH=512 MAX_RESPONSE_LENGTH=512 \
+ MODEL_ID=Qwen/Qwen3-30B-A3B-Instruct-2507 USE_MBRIDGE=True VANILLA_MBRIDGE=False VALUE_VANILLA_MBRIDGE=False \
+ COMMON_PP=2 COMMON_VPP=null COMMON_CP=1 COMMON_TP=4 COMMON_EP=4 COMMON_ETP=1 INFER_TP=8 \
+ USE_DIST_CKPT=True ALL_OFFLOAD=True SKIP_SAVE_HF_MODEL=1 bash tests/special_e2e/run_ppo_trainer_megatron.sh
+ - name: Running GSM8K E2E training tests with 3D parallelism with FP8 rollout on 8 L20 GPUs with Megatron-Bridge (Qwen3-30B-A3B-Instruct-2507)
+ run: |
+ ray stop --force
+ ADV_ESTIMATOR=grpo USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen2moe_minimal.json \
+ PPO_MAX_TOKEN_LEN=1024 FWD_MAX_TOKEN_LEN=1024 \
+ MAX_PROMPT_LENGTH=512 MAX_RESPONSE_LENGTH=512 \
+ MODEL_ID=Qwen/Qwen3-30B-A3B-Instruct-2507 USE_MBRIDGE=True VANILLA_MBRIDGE=False VALUE_VANILLA_MBRIDGE=False \
+ COMMON_PP=2 COMMON_VPP=null COMMON_CP=1 COMMON_TP=4 COMMON_EP=4 COMMON_ETP=1 INFER_TP=2 \
+ USE_DIST_CKPT=True ALL_OFFLOAD=True SKIP_SAVE_HF_MODEL=1 ROLLOUT_QUANTIZATION=fp8 bash tests/special_e2e/run_ppo_trainer_megatron.sh
+ - name: clean up
+ run: |
+ rm -rf checkpoints
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron-Bridge LoRA (Qwen3-30B-A3B-Instruct-2507)
+ run: |
+ ray stop --force
+ ADV_ESTIMATOR=grpo USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen2moe_minimal.json \
+ PPO_MAX_TOKEN_LEN=1024 FWD_MAX_TOKEN_LEN=1024 \
+ MAX_PROMPT_LENGTH=512 MAX_RESPONSE_LENGTH=512 LORA_RANK=8 CRITIC_LORA_RANK=8 \
+ MODEL_ID=Qwen/Qwen3-30B-A3B-Instruct-2507 USE_MBRIDGE=True VANILLA_MBRIDGE=False VALUE_VANILLA_MBRIDGE=False \
+ COMMON_PP=2 COMMON_VPP=null COMMON_CP=1 COMMON_TP=4 COMMON_EP=2 COMMON_ETP=1 INFER_TP=8 \
+ USE_DIST_CKPT=False LORA_MERGE=True ALL_OFFLOAD=True SKIP_SAVE_HF_MODEL=1 bash tests/special_e2e/run_ppo_trainer_megatron.sh
+ - name: clean up
+ run: |
+ rm -rf checkpoints
+
+ e2e_ppo_trainer_fsdp_vllm:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 60 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ - name: Prepare GSM8K dataset
+ run: |
+ ray stop --force
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+ # Function RM
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (FSDP_SIZE=8)
+ run: |
+ ray stop --force
+ VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp-size8" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm after resuming
+ run: |
+ ray stop --force
+ RESUME_MODE=auto VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp-size8" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ - name: Test merging FSDP checkpoints (Qwen Actor)
+ run: |
+ exp_name="qwen2.5-0.5b-function-reward-minimal-fsdp-size8"
+ python -m verl.model_merger test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (DDP_SIZE=2, FSDP_SIZE=4)
+ run: |
+ ray stop --force
+ VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True FSDP_SIZE=4 USE_KL=True VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-ddp-size2-fsdp-size4" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ - name: Test merging DDP+FSDP checkpoints (Qwen Actor)
+ run: |
+ exp_name="qwen2.5-0.5b-function-reward-minimal-ddp-size2-fsdp-size4"
+ python -m verl.model_merger test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (FSDP2)
+ run: |
+ ray stop --force
+ VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp2-size8" STRATEGY=fsdp2 bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ - name: Test merging FSDP2 checkpoints (Qwen Actor)
+ run: |
+ exp_name="qwen2.5-0.5b-function-reward-minimal-fsdp2-size8"
+ python -m verl.model_merger test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
+ - name: Running GSM8K E2E without rmpad using function rm
+ run: |
+ ray stop --force
+ RM_PAD=False bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm (GRPO)
+ run: |
+ ray stop --force
+ CUSTOM_REWARD_FN=True ADV_ESTIMATOR=grpo USE_KL=True bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ # - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm (ReMax)
+ # run: |
+ # ray stop --force
+ # ADV_ESTIMATOR=remax USE_KL=True bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ # LoRA tests
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with grpo lora using function rm with use_shm
+ run: |
+ ray stop --force
+ ADV_ESTIMATOR=grpo USE_SHM=True LORA_RANK=32 LOAD_FORMAT=safetensors bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with grpo lora using function rm with use_shm and layered_summon
+ run: |
+ ray stop --force
+ ADV_ESTIMATOR=grpo USE_SHM=True LORA_RANK=32 LOAD_FORMAT=safetensors LAYERED_SUMMON=True TOTAL_TRAIN_STEPS=1 SAVE_FREQ=1 FSDP_SIZE=4 VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ - name: Test GRPO LoRA checkpoints merging function
+ run: |
+ export EXP_NAME="qwen2.5-0.5b-function-reward-minimal"
+ ls checkpoints/verl-test/${EXP_NAME}/global_step_1/actor
+ cat checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/huggingface/config.json
+ python3 -m verl.model_merger merge --backend fsdp --local_dir checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/ --target_dir checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/huggingface
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with grpo lora using function rm with use_shm and layered_summon with fsdp2
+ run: |
+ ray stop --force
+ ADV_ESTIMATOR=grpo USE_SHM=True LORA_RANK=32 LOAD_FORMAT=safetensors LAYERED_SUMMON=True STRATEGY=fsdp2 bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+
+ e2e_ppo_trainer_fsdp-qwen2_5vl-3b:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 40 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ # Geo3k
+ - name: Prepare GEO3K dataset
+ run: |
+ python3 examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/models/hf_data/hiyouga/geometry3k/
+ - name: Running GEO3K VLM GRPO E2E training tests on 8 L20 GPUs with rmpad using function rm
+ run: |
+ ray stop --force
+ TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
+ MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
+ MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
+ ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
+ SP_SIZE=2 \
+ bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+
+ - name: Running GEO3K VLM PPO E2E training tests on 8 L20 GPUs with rmpad using function rm
+ run: |
+ ray stop --force
+ TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
+ MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
+ MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
+ ADV_ESTIMATOR=gae RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
+ SP_SIZE=2 \
+ bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ - name: Running GEO3K VLM GRPO E2E lora training tests on 8 L20 GPUs with rmpad using function rm
+ run: |
+ ray stop --force
+ TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
+ MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
+ MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
+ ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
+ SP_SIZE=2 \
+ LORA_RANK=32 LORA_EXCLUDE=".*visual.*" \
+ bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+
+ cleanup:
+ runs-on: ubuntu-latest
+ needs:
+ [
+ setup,
+ e2e_ppo_trainer_megatron-moe-expert-parallel,
+ e2e_ppo_trainer_fsdp-qwen2_5vl-3b,
+ e2e_ppo_trainer_fsdp_vllm,
+ ]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
diff --git a/.github/workflows/e2e_ppo_trainer_megatron_vllm_2_ascend.yml b/.github/workflows/e2e_ppo_trainer_megatron_vllm_2_ascend.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d0abdcc60e33367f8f3861d8bfeb8393ff20b5ad
--- /dev/null
+++ b/.github/workflows/e2e_ppo_trainer_megatron_vllm_2_ascend.yml
@@ -0,0 +1,233 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: e2e_ppo_trainer_megatron_vllm_2_ascend
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch.
+ # For push, for now only anti-patterns are specified so it is more conservative
+ # and achieves higher coverage.
+ push:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ # FSDP
+ - "!verl/workers/**/*dp_*.py"
+ - "!verl/utils/fsdp_utils.py"
+ - "!verl/utils/checkpoint/fsdp_checkpoint_manager.py"
+ - "!verl/model_merger/fsdp_model_merger.py"
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!docker/**"
+ # Docs
+ - "!**/*.md"
+ - "!docs/**"
+ - "!examples/**"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ # FSDP
+ - "!verl/workers/**/*dp_*.py"
+ - "!verl/utils/fsdp_utils.py"
+ - "!verl/utils/checkpoint/fsdp_checkpoint_manager.py"
+ - "!verl/model_merger/fsdp_model_merger.py"
+ # Entrypoints
+ - ".github/workflows/e2e_ppo_trainer_megatron_vllm_2_ascend.yml"
+ - "examples/data_preprocess/gsm8k.py"
+ - "examples/data_preprocess/geo3k.py"
+ - "tests/special_e2e/run_ppo_trainer_megatron.sh"
+ - "verl/trainer/main_ppo.py"
+ - "verl/trainer/config/ppo_megatron_trainer.yaml"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+jobs:
+ e2e_ppo_trainer_fsdp_vllm_ascend:
+ if: github.repository_owner == 'verl-project'
+ runs-on: linux-aarch64-a2b3-8
+ timeout-minutes: 90 # Increase this timeout value as needed
+ container:
+ image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+ options: >-
+ --shm-size 16g
+ env:
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - name: Check npu and CANN info
+ run: |
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+ npu-smi info
+ - name: Check initial pip list from image
+ run: |
+ pip list
+ - name: Checkout verl-project/verl repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ clean: true
+ - name: Install the current repository
+ run: |
+ pip install -r requirements-npu.txt
+ pip install --no-deps -e .
+ - name: Check final pip list
+ run: |
+ pip list
+ - name: Prepare weights
+ run: |
+ ln -s /root/.cache/models ~/models
+ - name: Prepare GSM8K dataset
+ run: |
+ python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k
+ # Function RM
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (DDP_SIZE=2, FSDP_SIZE=4)
+ run: |
+ ray stop --force
+ VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True FSDP_SIZE=4 USE_KL=True VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-ddp-size2-fsdp-size4" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ - name: Test merging DDP+FSDP checkpoints (Qwen Actor)
+ run: |
+ exp_name="qwen2.5-0.5b-function-reward-minimal-ddp-size2-fsdp-size4"
+ python -m verl.model_merger test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (FSDP2)
+ run: |
+ ray stop --force
+ VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp2-size8" STRATEGY=fsdp2 bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ - name: Test merging FSDP2 checkpoints (Qwen Actor)
+ run: |
+ exp_name="qwen2.5-0.5b-function-reward-minimal-fsdp2-size8"
+ python -m verl.model_merger test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
+ - name: Running GSM8K E2E without rmpad using function rm
+ run: |
+ ray stop --force
+ RM_PAD=False bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm (GRPO)
+ run: |
+ ray stop --force
+ CUSTOM_REWARD_FN=True ADV_ESTIMATOR=grpo USE_KL=True bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with grpo lora using function rm with use_shm and layered_summon
+ run: |
+ ray stop --force
+ ADV_ESTIMATOR=grpo USE_SHM=True LORA_RANK=32 LOAD_FORMAT=safetensors LAYERED_SUMMON=True TOTAL_TRAIN_STEPS=1 SAVE_FREQ=1 FSDP_SIZE=4 VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ - name: Test GRPO LoRA checkpoints merging function
+ run: |
+ export EXP_NAME="qwen2.5-0.5b-function-reward-minimal"
+ ls checkpoints/verl-test/${EXP_NAME}/global_step_1/actor
+ cat checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/huggingface/config.json
+ python3 -m verl.model_merger merge --backend fsdp --local_dir checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/ --target_dir checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/huggingface
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with grpo lora using function rm with use_shm and layered_summon with fsdp2
+ run: |
+ ray stop --force
+ ADV_ESTIMATOR=grpo USE_SHM=True LORA_RANK=32 LOAD_FORMAT=safetensors LAYERED_SUMMON=True STRATEGY=fsdp2 bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+
+ e2e_ppo_trainer_fsdp-qwen2_5vl-3b_ascend:
+ if: github.repository_owner == 'verl-project'
+ runs-on: linux-aarch64-a2b3-8
+ timeout-minutes: 60 # Increase this timeout value as needed
+ container:
+ image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+ options: >-
+ --shm-size 16g
+ env:
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - name: Check npu and CANN info
+ run: |
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+ npu-smi info
+ - name: Check initial pip list from image
+ run: |
+ pip list
+ - name: Checkout verl-project/verl repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ clean: true
+ - name: Install the current repository
+ run: |
+ pip install -r requirements-npu.txt
+ pip install --no-deps -e .
+ pip install trl==0.26.0
+ - name: Check final pip list
+ run: |
+ pip list
+ - name: Prepare weights
+ run: |
+ ln -s /root/.cache/models ~/models
+ # Geo3k
+ - name: Prepare GEO3K dataset
+ run: |
+ python examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/.cache/datasets/hiyouga/geometry3k
+ - name: Running GEO3K VLM GRPO E2E training tests on 8 L20 GPUs with rmpad using function rm
+ run: |
+ ray stop --force
+ TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
+ MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
+ MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
+ ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
+ SP_SIZE=2 \
+ bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ - name: Running GEO3K VLM PPO E2E training tests on 8 L20 GPUs with rmpad using function rm
+ run: |
+ ray stop --force
+ TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
+ MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
+ MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
+ ADV_ESTIMATOR=gae RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
+ SP_SIZE=2 \
+ bash tests/special_e2e/ppo_trainer/run_function_reward.sh
+ - name: Running GEO3K VLM GRPO E2E lora training tests on 8 L20 GPUs with rmpad using function rm
+ run: |
+ ray stop --force
+ TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
+ MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
+ MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
+ ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
+ SP_SIZE=2 \
+ LORA_RANK=32 LORA_EXCLUDE=".*visual.*" \
+ bash tests/special_e2e/ppo_trainer/run_function_reward.sh
diff --git a/.github/workflows/e2e_ppo_trainer_veomni_vllm.yml b/.github/workflows/e2e_ppo_trainer_veomni_vllm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0accafd58e8a131458a4729f0d42b87b71f077c7
--- /dev/null
+++ b/.github/workflows/e2e_ppo_trainer_veomni_vllm.yml
@@ -0,0 +1,153 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: e2e_ppo_trainer_veomni_vllm
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch.
+ # For push, for now only anti-patterns are specified so it is more conservative
+ # and achieves higher coverage.
+ push:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ # Megatron
+ - "!verl/workers/**/megatron_*.py"
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!docker/**"
+ # Docs
+ - "!**/*.md"
+ - "!docs/**"
+ - "!examples/**"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ # Megatron
+ - "!verl/workers/**/megatron_*.py"
+ # Entrypoints
+ - ".github/workflows/e2e_ppo_trainer_veomni_vllm.yml"
+ - "examples/data_preprocess/gsm8k.py"
+ - "examples/data_preprocess/geo3k.py"
+ - "tests/special_e2e/run_ppo_trainer_veomni.sh"
+ - "verl/trainer/main_ppo.py"
+ - "verl/trainer/config/ppo_trainer.yaml"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+env:
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm017.dev2"
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-image: "${{ env.IMAGE }}"
+
+ e2e_ppo_trainer_veomni_vllm:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 60 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ pip3 install git+https://github.com/ByteDance-Seed/VeOmni.git@v0.1.4
+ - name: Prepare GSM8K dataset
+ run: |
+ ray stop --force
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+ - name: Prepare GEO3K dataset
+ run: |
+ ray stop --force
+ python3 examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/models/hf_data/hiyouga/geometry3k/
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with veomni engine (FSDP_SIZE=4, USP=2)
+ run: |
+ ray stop --force
+ FSDP_SIZE=4 SP_SIZE=2 bash tests/special_e2e/run_ppo_trainer_veomni.sh
+ - name: Running GEO3K E2E training tests on 8 L20 GPUs with veomni engine (FSDP_SIZE=8, USP=1)
+ run: |
+ ray stop --force
+ MODEL_ID=Qwen/Qwen3-VL-2B-Instruct TRAIN_FILES=${HOME}/data/geo3k/train.parquet VAL_FILES=${HOME}/data/gsm8k/test.parquet FSDP_SIZE=8 SP_SIZE=1 bash tests/special_e2e/run_ppo_trainer_veomni.sh
+
+ cleanup:
+ runs-on: ubuntu-latest
+ needs:
+ [
+ setup,
+ e2e_ppo_trainer_veomni_vllm,
+ ]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
diff --git a/.github/workflows/e2e_sft_llm.yml b/.github/workflows/e2e_sft_llm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..435a0a626db83085ca08a65995406c7c494f1802
--- /dev/null
+++ b/.github/workflows/e2e_sft_llm.yml
@@ -0,0 +1,153 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: e2e_sft_llm
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ push:
+ branches:
+ - main
+ - v0.*
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!examples/**"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+
+ # Megatron
+ - "!verl/workers/**/megatron_*.py"
+ # Entrypoints
+ - ".github/workflows/e2e_sft_llm.yml"
+ - "examples/data_preprocess/gsm8k.py"
+ - "tests/special_e2e/sft"
+ - "verl/trainer/fsdp_sft_trainer.py"
+ - "verl/trainer/config/sft_trainer.yaml"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+env:
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:sgl059.dev2"
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-image: "${{ env.IMAGE }}"
+ e2e_sft_llm:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 30 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install peft
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ pip3 install git+https://github.com/ByteDance-Seed/VeOmni.git@v0.1.4
+ - name: Prepare gsm8k dataset
+ run: |
+ ray stop --force
+ python3 examples/data_preprocess/gsm8k_multiturn_sft.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm
+ run: |
+ ray stop --force
+ bash tests/special_e2e/sft/run_sft.sh
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs w/o rmpad using function rm
+ run: |
+ ray stop --force
+ RM_PAD=False bash tests/special_e2e/sft/run_sft.sh
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with sequence parallism
+ run: |
+ ray stop --force
+ SP_SIZE=2 bash tests/special_e2e/sft/run_sft.sh
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with sequence parallism and liger
+ run: |
+ ray stop --force
+ SP_SIZE=2 LIGER=True bash tests/special_e2e/sft/run_sft.sh
+ - name: Running GSM8K E2E training tests with LoRA
+ run: |
+ ray stop --force
+ LORA_RANK=32 bash tests/special_e2e/sft/run_sft.sh
+ - name: Run GSM8K E2E training and resume tests resuming from the checkpoint manager
+ run: |
+ ray stop --force
+ LORA_RANK=32 RESUME_MODE=auto TOTAL_TRAIN_STEP=2 bash tests/special_e2e/sft/run_sft.sh
+ # TODO: multiturn
+ - name: Running GSM8K E2E training tests with multiturn and various configs and compare results
+ run: |
+ bash tests/special_e2e/sft/test_sft_engine_all.sh
+
+ cleanup:
+ runs-on: ubuntu-latest
+ needs: [setup, e2e_sft_llm]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
diff --git a/.github/workflows/e2e_sft_llm_ascend.yml b/.github/workflows/e2e_sft_llm_ascend.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3919da747a9393857b7c26a54aaa1ff7c84ba603
--- /dev/null
+++ b/.github/workflows/e2e_sft_llm_ascend.yml
@@ -0,0 +1,160 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: e2e_sft_llm_ascend
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ push:
+ branches:
+ - main
+ - v0.*
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!examples/**"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+
+ # Megatron
+ - "!verl/workers/**/megatron_*.py"
+ # Entrypoints
+ - ".github/workflows/e2e_sft_llm_ascend.yml"
+ - "examples/data_preprocess/gsm8k.py"
+ - "tests/special_e2e/sft"
+ - "verl/trainer/fsdp_sft_trainer.py"
+ - "verl/trainer/config/sft_trainer.yaml"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+jobs:
+ e2e_sft_llm_ascend:
+ if: github.repository_owner == 'verl-project'
+ runs-on: linux-aarch64-a2b3-8
+ timeout-minutes: 90 # Increase this timeout value as needed
+ container:
+ image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+ options: >-
+ --shm-size 16g
+ env:
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - name: Check npu and CANN info
+ run: |
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+ npu-smi info
+ - name: Check initial pip list from image
+ run: |
+ pip list
+ - name: Checkout verl-project/verl repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ clean: true
+ - name: Install the current repository
+ run: |
+ pip install -r requirements-npu.txt
+ pip install -e .
+ pip install git+https://github.com/ByteDance-Seed/VeOmni.git@v0.1.4
+ pip install pandas==2.3.3
+ pip uninstall -y mbridge
+ pip install git+https://github.com/ISEEKYAN/mbridge.git@89eb10
+ - name: Check final pip list
+ run: |
+ pip list
+ - name: Prepare weights
+ run: |
+ ln -s /root/.cache/models ~/models
+ - name: Prepare gsm8k dataset
+ run: |
+ python3 examples/data_preprocess/gsm8k_multiturn_sft.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k
+ - name: Running GSM8K E2E training tests on 8 NPUs with rmpad using function rm
+ run: |
+ ray stop --force
+ bash tests/special_e2e/sft/run_sft.sh
+ - name: Running GSM8K E2E training tests on 8 NPUs w/o rmpad using function rm
+ run: |
+ ray stop --force
+ RM_PAD=False bash tests/special_e2e/sft/run_sft.sh
+ - name: Running GSM8K E2E training tests on 8 NPUs with sequence parallism
+ run: |
+ ray stop --force
+ SP_SIZE=2 bash tests/special_e2e/sft/run_sft.sh
+ - name: Running GSM8K E2E training tests with LoRA
+ run: |
+ ray stop --force
+ LORA_RANK=32 bash tests/special_e2e/sft/run_sft.sh
+ - name: Run GSM8K E2E training and resume tests resuming from the checkpoint manager
+ run: |
+ ray stop --force
+ LORA_RANK=32 RESUME_MODE=auto TOTAL_TRAIN_STEP=2 bash tests/special_e2e/sft/run_sft.sh
+ - name: Running GSM8K E2E training tests with multiturn and various configs and compare results
+ run: |
+ ray stop --force
+ rm -rf ~/verl/test/log
+ mkdir -p ~/verl/test/log
+ export VERL_FILE_LOGGER_ROOT=~/verl/test/log
+ # test with single gpu as golden
+ echo "run with single gpu as golden"
+ BACKEND=fsdp SP_SIZE=1 FSDP_SIZE=1 NUM_GPUS=1 FSDP_STRATEGY=fsdp VERL_FILE_LOGGER_PATH=~/verl/test/log/golden.jsonl bash tests/special_e2e/sft/run_sft_engine.sh
+ # test with fsdp 1
+ echo "run with sp2 fsdp_size2 num_gpus8 fsdp_strategy fsdp pad_mode no_padding"
+ BACKEND=fsdp SP_SIZE=2 FSDP_SIZE=2 NUM_GPUS=8 FSDP_STRATEGY=fsdp PAD_MODE=no_padding bash tests/special_e2e/sft/run_sft_engine.sh
+ # test with fsdp 1 use_remove_padding and pad_mode no_padding
+ echo "run with sp4 fsdp_size4 num_gpus8 fsdp_strategy fsdp pad_mode no_padding use_remove_padding False"
+ BACKEND=fsdp SP_SIZE=1 FSDP_SIZE=-1 NUM_GPUS=8 FSDP_STRATEGY=fsdp PAD_MODE=no_padding USE_REMOVE_PADDING=False bash tests/special_e2e/sft/run_sft_engine.sh
+ # test with fsdp 2
+ echo "run with sp2 fsdp_size2 num_gpus8 fsdp_strategy fsdp2"
+ BACKEND=fsdp SP_SIZE=2 FSDP_SIZE=2 NUM_GPUS=8 FSDP_STRATEGY=fsdp2 bash tests/special_e2e/sft/run_sft_engine.sh
+ # test with veomni
+ echo "run with sp2 fsdp_size4 num_gpus8 fsdp_strategy fsdp2"
+ BACKEND=veomni SP_SIZE=2 FSDP_SIZE=4 NUM_GPUS=8 FSDP_STRATEGY=fsdp2 bash tests/special_e2e/sft/run_sft_engine.sh
+ # test with megatron
+ echo "run with tp2 pp2 vpp2 cp2 num_gpus8"
+ BACKEND=megatron TP_SIZE=2 PP_SIZE=2 VPP_SIZE=NULL CP_SIZE=2 NUM_GPUS=8 bash tests/special_e2e/sft/run_sft_engine.sh
+ # test with cp in ray
+ echo "run with tp2 pp2 vpp2 cp2 num_gpus8 mode=ray"
+ BACKEND=megatron TP_SIZE=2 PP_SIZE=2 VPP_SIZE=NULL CP_SIZE=2 NUM_GPUS=8 mode=ray bash tests/special_e2e/sft/run_sft_engine.sh
+ rm -rf ~/verl/test/log
diff --git a/.github/workflows/e2e_sft_vlm.yml b/.github/workflows/e2e_sft_vlm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..93d02c83c8c52ed6384c3235144ab574b88d591b
--- /dev/null
+++ b/.github/workflows/e2e_sft_vlm.yml
@@ -0,0 +1,128 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: e2e_sft_vlm
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ push:
+ branches:
+ - main
+ - v0.*
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!examples/**"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+
+ # Megatron
+ - "!verl/workers/**/megatron_*.py"
+ # Entrypoints
+ - ".github/workflows/e2e_sft_vlm.yml"
+ - "examples/data_preprocess/gsm8k.py"
+ - "tests/special_e2e/sft"
+ - "verl/trainer/fsdp_sft_trainer.py"
+ - "verl/trainer/config/sft_trainer.yaml"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+env:
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:sgl059.dev2"
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-image: "${{ env.IMAGE }}"
+ e2e_sft_vlm:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 30 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install peft
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ pip3 install git+https://github.com/ByteDance-Seed/VeOmni.git@v0.1.4
+ - name: Prepare pokemon-gpt4o-captions dataset
+ run: |
+ ray stop --force
+ python3 examples/data_preprocess/pokemon.py --local_dataset_path ${HOME}/models/hf_data/pokemon-gpt4o-captions
+ - name: Running Pokemon E2E training tests with multiturn and various configs and compare results
+ run: |
+ MODEL_ID=Qwen/Qwen3-VL-2B-Instruct DATASET_DIR=~/data/pokemon-gpt4o-captions VPP_SIZE=null bash tests/special_e2e/sft/test_sft_engine_all.sh
+
+ cleanup:
+ runs-on: ubuntu-latest
+ needs: [setup, e2e_sft_vlm]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
diff --git a/.github/workflows/gpu_unit_tests.yml b/.github/workflows/gpu_unit_tests.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6c16b950afd5f5c19d45925d140d86d29087a528
--- /dev/null
+++ b/.github/workflows/gpu_unit_tests.yml
@@ -0,0 +1,137 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: GPU unit tests
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ push:
+ branches:
+ - main
+ - v0.4.x
+ paths:
+ - "**/*.py"
+ - .github/workflows/gpu_unit_tests.yml
+ pull_request:
+ branches:
+ - main
+ - v0.4.x
+ paths:
+ # The order that you define paths patterns matters:
+ # A matching negative pattern (prefixed with !) after a positive match will exclude the path.
+ # A matching positive pattern after a negative match will include the path again.
+ - "**/*.py"
+ # Other entrypoints
+ - "!examples/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ # Entrypoints
+ - .github/workflows/gpu_unit_tests.yml
+ - "tests/**test_*.py"
+ # Ignore CPU tests
+ - "!tests/*_on_cpu.py"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+env:
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:sgl059.dev2"
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-image: "${{ env.IMAGE }}"
+
+ gpu_unit_tests:
+ if: github.repository_owner == 'verl-project'
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 60 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1"
+ HF_HUB_ENABLE_HF_TRANSFER: 1
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install hf_transfer
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ pip3 install cupy-cuda12x==13.6.0 pytest-asyncio
+ pip3 install --ignore-installed blinker
+ pip3 install --ignore-installed mlflow "numpy<2.0"
+ - name: Run all GPU unit tests
+ run: |
+ pytest -s -x --ignore-glob="*on_npu.py" --ignore-glob="*test_special_*.py" --ignore-glob='*on_cpu.py' --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob='tests/special*' --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_shared_memory*" --ignore-glob="tests/workers/rollout/rollout_trtllm" --ignore-glob="*test_bucketed_weight_transfer*" tests/
+ - name: Testing LinearCrossEntropyTP Correctness, Computation Time and Memory Consumption
+ run: |
+ LOW_MEMORY=True torchrun --standalone --nnodes=1 --nproc-per-node=8 tests/utils/test_special_linear_cross_entropy_tp.py
+ - name: Testing FSDP2 actor functionality
+ run: |
+ torchrun --standalone --nnodes=1 --nproc-per-node=2 tests/workers/actor/test_special_dp_actor.py
+ - name: Testing FSDP2 critic functionality
+ run: |
+ torchrun --standalone --nnodes=1 --nproc-per-node=2 tests/workers/critic/test_special_dp_critic.py
+
+ cleanup:
+ runs-on: ubuntu-latest
+ needs: [setup, gpu_unit_tests]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
diff --git a/.github/workflows/model.yml b/.github/workflows/model.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5522ba714669e26b543d3169847be162ccda6e06
--- /dev/null
+++ b/.github/workflows/model.yml
@@ -0,0 +1,184 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+# name: Check PR Title
+
+name: model
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ push:
+ branches:
+ - main
+ - v0.*
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "verl/**/*.py"
+ # Entrypoints
+ - ".github/workflows/model.yml"
+ - "tests/special_distributed/test_fsdp_ckpt.py"
+ - "tests/special_distributed/test_tensor_dict.py"
+ - "tests/models/**"
+ - "tests/special_distributed/run_all.sh"
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+env:
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm017.dev2"
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-image: "${{ env.IMAGE }}"
+
+ model_rmpad:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 20 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository and upgrade to latest transformers(4.54.0)/flash_attn, transformers 4.55.0 has strange behavior with model backward
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ pip3 install --upgrade "transformers<5.0.0"
+ - name: Running rmpad model tests on 8 L20 GPUs + flash_attn 2.5.8
+ run: |
+ pytest -s tests/models/test_transformer.py
+ - name: Running rmpad model tests on 8 L20 GPUs + latest flash_attn
+ run: |
+ pytest -s tests/models/test_transformer.py
+ - name: Running FSDP rmpad model tests on 8 L20 GPUs + latest flash_attn
+ run: |
+ STRATEGY=fsdp torchrun --nproc_per_node=8 tests/special_distributed/test_fsdp_ckpt.py
+ - name: Running transformers ulysses tests on 8 L20 GPUs + latest transformers
+ run: |
+ torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py
+ - name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.54.1
+ run: |
+ pip3 install transformers==4.54.1
+ torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py
+ - name: Run distributed test
+ run: |
+ bash tests/special_distributed/run_all.sh
+
+ # TODO: Move this back to model_rmpad once FSDP2 is stable.
+ # NOTE: List as an independent job to make rerun easier.
+ model_rmpad_fsdp2_unstable:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 20 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository and upgrade to latest transformers/flash_attn
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ - name: Running FSDP2 rmpad model tests on 8 L20 GPUs + latest flash_attn
+ run: |
+ STRATEGY=fsdp2 torchrun --nproc_per_node=8 tests/special_distributed/test_fsdp_ckpt.py
+
+ model_engine:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 20 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ - name: Download model config files
+ run: |
+ hf download Qwen/Qwen2.5-0.5B-Instruct --local-dir $HOME/models/Qwen/Qwen2.5-0.5B-Instruct
+
+ - name: Running mcore engine tests on 8 L20 GPUs
+ run: |
+ ray stop --force
+ pytest -s -x tests/models/test_engine.py
+
+ cleanup:
+ runs-on: ubuntu-latest
+ needs: [setup, model_rmpad, model_rmpad_fsdp2_unstable, model_engine]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
diff --git a/.github/workflows/model_ascend.yml b/.github/workflows/model_ascend.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a5ab7620ee3c4d5a6cbc8e1f8188f7a614191650
--- /dev/null
+++ b/.github/workflows/model_ascend.yml
@@ -0,0 +1,137 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+# name: Check PR Title
+
+name: model_ascend
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ push:
+ branches:
+ - main
+ - v0.*
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "verl/**/*.py"
+ # Entrypoints
+ - ".github/workflows/model_ascend.yml"
+ - "tests/special_distributed/test_fsdp_ckpt.py"
+ - "tests/special_distributed/test_tensor_dict.py"
+ - "tests/models/**"
+ - "tests/special_distributed/run_all.sh"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+permissions:
+ contents: read
+
+jobs:
+ model_rmpad_ascend:
+ if: github.repository_owner == 'verl-project'
+ runs-on: linux-aarch64-a2b3-8
+ timeout-minutes: 60 # Increase this timeout value as needed
+ container:
+ image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+ options: >-
+ --shm-size 16g
+ env:
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - name: Check npu and CANN info
+ run: |
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+ npu-smi info
+ - name: Check initial pip list from image
+ run: |
+ pip list
+ - name: Checkout verl-project/verl repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ clean: true
+ - name: Install the current repository
+ run: |
+ pip install -r requirements-npu.txt
+ pip install --no-deps -e .[test]
+ - name: Check final pip list
+ run: |
+ pip list
+ - name: Prepare weights
+ run: |
+ ln -s /root/.cache/models ~/models
+ - name: Running rmpad model tests on 8 NPUs
+ run: |
+ pytest -s tests/models/test_transformer.py
+ - name: Running FSDP rmpad model tests on 8 NPUs
+ run: |
+ STRATEGY=fsdp torchrun --nproc_per_node=8 tests/special_distributed/test_fsdp_ckpt.py
+ - name: Running transformers ulysses tests on 8 NPUs
+ run: |
+ torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py
+ - name: Run distributed test
+ run: |
+ bash tests/special_distributed/run_all.sh
+
+ # TODO: Move this back to model_rmpad once FSDP2 is stable.
+ # NOTE: List as an independent job to make rerun easier.
+ model_rmpad_fsdp2_unstable_ascend:
+ if: github.repository_owner == 'verl-project'
+ runs-on: linux-aarch64-a2b3-8
+ timeout-minutes: 60
+ container:
+ image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+ options: >-
+ --shm-size 16g
+ env:
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip install -r requirements-npu.txt
+ pip install --no-deps -e .[test]
+ - name: Prepare weights
+ run: |
+ ln -s /root/.cache/models ~/models
+ - name: Running FSDP2 rmpad model tests on 8 NPUs
+ run: |
+ STRATEGY=fsdp2 torchrun --nproc_per_node=8 tests/special_distributed/test_fsdp_ckpt.py
diff --git a/.github/workflows/nightly_ascend.yml b/.github/workflows/nightly_ascend.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c74ea4ba7d97e90862d789f40c906872af01597f
--- /dev/null
+++ b/.github/workflows/nightly_ascend.yml
@@ -0,0 +1,174 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: nightly_ci_ascend
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ # For push, for now only anti-patterns are specified so it is more conservative
+ # and achieves higher coverage.
+ schedule:
+ - cron: "0 17 * * *"
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+jobs:
+ # Test ppo qwen3-8b fsdp+vllm
+ nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend:
+ if: github.repository_owner == 'verl-project'
+ runs-on: linux-aarch64-a2b3-8
+ timeout-minutes: 180 # Increase this timeout value as needed
+ container:
+ image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+ options: >-
+ --shm-size 16g
+ env:
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - name: Check npu and CANN info
+ run: |
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+ npu-smi info
+ - name: Check initial pip list from image
+ run: |
+ pip list
+ - name: Checkout verl-project/verl repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ clean: true
+ - name: Install the current repository
+ run: |
+ pip install -r requirements-npu.txt
+ pip install --no-deps -e .
+ - name: Check final pip list
+ run: |
+ pip list
+ - name: Prepare weights
+ run: |
+ ln -s /root/.cache/models ~/models
+ - name: Prepare GSM8K dataset
+ run: |
+ python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k
+ - name: Running nightlyCI_ppo-qwen3-8b-fsdp-vllm_ascend
+ run: |
+ ray stop --force
+ bash tests/special_npu/nightly_ci_ascend/run_ppo_qwen3-8b_fsdp_npu.sh
+
+ # Test grpo qwen25-7b-Instruct fsdp+vllm
+ nightlyCI_grpo-qwen25-7b-Instruct-fsdp-vllm_ascend:
+ if: github.repository_owner == 'verl-project'
+ runs-on: linux-aarch64-a2b3-8
+ timeout-minutes: 180 # Increase this timeout value as needed
+ container:
+ image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+ options: >-
+ --shm-size 16g
+ env:
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - name: Check npu and CANN info
+ run: |
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+ npu-smi info
+ - name: Check initial pip list from image
+ run: |
+ pip list
+ - name: Checkout verl-project/verl repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ clean: true
+ - name: Install the current repository
+ run: |
+ pip install -r requirements-npu.txt
+ pip install --no-deps -e .
+ - name: Check final pip list
+ run: |
+ pip list
+ - name: Prepare weights
+ run: |
+ ln -s /root/.cache/models ~/models
+ - name: Prepare GSM8K dataset
+ run: |
+ python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k
+ - name: Running nightlyCI_grpo-qwen25-7b-Instruct-fsdp-vllm_ascend
+ run: |
+ ray stop --force
+ bash tests/special_npu/nightly_ci_ascend/run_grpo_qwen25-7b-instruct_fsdp_npu.sh
+
+ # Test grpo qwen25-vl-3b-Instruct fsdp+vllm
+ nightlyCI_grpo-qwen25-vl-3b-Instruct-fsdp-vllm_ascend:
+ if: github.repository_owner == 'verl-project'
+ runs-on: linux-aarch64-a2b3-8
+ timeout-minutes: 180 # Increase this timeout value as needed
+ container:
+ image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+ options: >-
+ --shm-size 16g
+ env:
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - name: Check npu and CANN info
+ run: |
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+ npu-smi info
+ - name: Check initial pip list from image
+ run: |
+ pip list
+ - name: Checkout verl-project/verl repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ clean: true
+ - name: Install the current repository
+ run: |
+ pip install -r requirements-npu.txt
+ pip install --no-deps -e .
+ - name: Check final pip list
+ run: |
+ pip list
+ - name: Prepare weights
+ run: |
+ ln -s /root/.cache/models ~/models
+ - name: Preprocess geo3k dataset
+ run: |
+ python examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/.cache/datasets/hiyouga/geometry3k
+ - name: Running nightlyCI_grpo-qwen25-vl-3b-Instruct-fsdp-vllm_ascend
+ run: |
+ ray stop --force
+ bash tests/special_npu/nightly_ci_ascend/run_grpo_qwen25-vl-3b-instruct_fsdp_npu.sh
diff --git a/.github/workflows/npu_unit_tests.yml b/.github/workflows/npu_unit_tests.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7f678409da0cd7977ff339da7fb81e3e75df2091
--- /dev/null
+++ b/.github/workflows/npu_unit_tests.yml
@@ -0,0 +1,126 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - `npu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix on ascend device.
+# - Since cpu/gpu/npu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: NPU unit tests
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ push:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ - .github/workflows/npu_unit_tests.yml
+ pull_request:
+ branches:
+ - main
+ paths:
+ # The order that you define paths patterns matters:
+ # A matching negative pattern (prefixed with !) after a positive match will exclude the path.
+ # A matching positive pattern after a negative match will include the path again.
+ - "**/*.py"
+ # Other entrypoints
+ - "!examples/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ - "!recipe/**"
+ # Entrypoints
+ - .github/workflows/npu_unit_tests.yml
+ - "tests/**test_*.py"
+ # Ignore CPU tests
+ - "!tests/*_on_cpu.py"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+jobs:
+ npu_unit_tests:
+ if: github.repository_owner == 'verl-project'
+ runs-on: linux-aarch64-a2b3-8
+ timeout-minutes: 60 # Increase this timeout value as needed
+ container:
+ image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+ options: >-
+ --shm-size 16g
+ env:
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - name: Check npu and CANN info
+ run: |
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+ npu-smi info
+ - name: Check initial pip list from image
+ run: |
+ pip list
+ - name: Checkout volcengine/verl repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ clean: true
+ - name: Install the current repository
+ run: |
+ pip install -r requirements-npu.txt
+ pip install --no-deps -e .[test]
+ pip install mlflow pytest-asyncio
+ - name: Check final pip list
+ run: |
+ pip list
+ - name: Prepare weights
+ run: |
+ ln -s /root/.cache/models ~/models
+ - name: Run all NPU unit tests
+ run: |
+ pytest -s -x --ignore-glob="*test_special_*.py" --ignore-glob="*on_cpu.py" --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob="tests/special*" --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_rvdz*" --ignore-glob="*test_ray_collectives*" --ignore-glob="*test_nvtx_profile*" --ignore-glob="tests/checkpoint_engine" --ignore-glob="*test_shared_memory*" --ignore-glob="tests/workers/rollout/rollout_trtllm" --ignore-glob="*test_fsdp_lora_merge*" --ignore-glob="*test_activation_offload*" --ignore-glob="*test_normalize_peft_param_name.py*" tests/
+ - name: Testing activation offload
+ run: |
+ pytest -s -x tests/utils/test_activation_offload.py
+ - name: Testing normalize peft param name
+ run: |
+ pytest -s -x tests/utils/test_normalize_peft_param_name.py
+ - name: Testing FSDP2 actor functionality
+ run: |
+ torchrun --standalone --nnodes=1 --nproc-per-node=2 tests/workers/actor/test_special_dp_actor.py
+ - name: Testing FSDP2 critic functionality
+ run: |
+ torchrun --standalone --nnodes=1 --nproc-per-node=2 tests/workers/critic/test_special_dp_critic.py
+ - name: Running NPU profiling unit tests
+ run: |
+ pytest -s -x tests/utils/test_special_mstx_profile.py
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4f6aa4bdf0d10048057777269df9507188b3a264
--- /dev/null
+++ b/.github/workflows/pre-commit.yml
@@ -0,0 +1,41 @@
+# c.f. https://github.com/pre-commit/action?tab=readme-ov-file#using-this-action
+name: pre-commit
+
+# No need to avoid / cancel lightweight pre-commit jobs
+on:
+ schedule:
+ - cron: "0 0 * * 0"
+ pull_request:
+ push:
+ branches:
+ - main
+ - v0.*
+ # Allow manual triggering
+ workflow_dispatch:
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+jobs:
+ pre-commit:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: ["3.12"]
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install the current repository
+ run: |
+ pip install pre-commit hydra-core
+ pip install --no-deps -e .
+ - name: Set ruff --output-format=github
+ run: |
+ sed -i 's/--output-format=full/--output-format=github/' .pre-commit-config.yaml
+ git add .pre-commit-config.yaml
+ # Check "--all-files" by default
+ - uses: pre-commit/action@v3.0.1
diff --git a/.github/workflows/precommit-autofix.yml b/.github/workflows/precommit-autofix.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d235da90cd21ce429559eccf962fdb4eee7a5252
--- /dev/null
+++ b/.github/workflows/precommit-autofix.yml
@@ -0,0 +1,52 @@
+name: scheduled pre-commit autofix
+
+on:
+ schedule:
+ # Every hour
+ - cron: "0 * * * *"
+ workflow_dispatch:
+
+permissions:
+ contents: write
+ pull-requests: write
+
+jobs:
+ precommit:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.10"
+
+ - name: Install pre-commit
+ run: |
+ python -m pip install --upgrade pip
+ pip install pre-commit hydra-core
+
+ - name: Run pre-commit
+ run: |
+ pre-commit run --all-files || true
+
+ - name: Create or update PR
+ uses: peter-evans/create-pull-request@v6
+ with:
+ branch: bot/precommit-autofix
+ delete-branch: true
+ title: "[ci] chore: scheduled pre-commit autofix"
+ commit-message: "chore: auto-fix pre-commit issues"
+ body: |
+ This PR was created automatically by a scheduled GitHub Action.
+
+ - Runs `pre-commit run --all-files`
+ - Triggered hourly
+ labels: |
+ automated
+ pre-commit
diff --git a/.github/workflows/reward_model_sglang.yml b/.github/workflows/reward_model_sglang.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c9a4e9804a0f77b43a3898730093bee32adda906
--- /dev/null
+++ b/.github/workflows/reward_model_sglang.yml
@@ -0,0 +1,134 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+# name: Check PR Title
+
+name: reward_model_sglang
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ push:
+ branches:
+ - main
+ - v0.*
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "verl/**/*.py"
+ # Entrypoints
+ - ".github/workflows/reward_model_sglang.yml"
+ - "tests/experimental/reward_loop/**"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+env:
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:sgl059.dev2"
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-image: "${{ env.IMAGE }}"
+
+ reward_model_sglang:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 30 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK: "True"
+ NCCL_SHM_DISABLE: "1"
+ NCCL_P2P_DISABLE: "1"
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ pip3 install sglang-router==0.2.2
+ - name: Prepare gsm8k dataset
+ run: |
+ ray stop --force
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k --local_dir ${HOME}/data/gsm8k
+ - name: Running sglang generative reward model tests on 8 L20 GPUs
+ run: |
+ unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
+ ROLLOUT_NAME=sglang pytest -s -x tests/experimental/reward_loop/test_reward_model_genrm.py
+ - name: Running sglang discriminative reward model tests on 8 L20 GPUs
+ run: |
+ unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
+ ROLLOUT_NAME=sglang pytest -s -x tests/experimental/reward_loop/test_reward_model_disrm.py
+ - name: Running sglang agent loop with reward manager tests on 8 L20 GPUs
+ run: |
+ unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
+ ROLLOUT_NAME=sglang pytest -s -x tests/experimental/reward_loop/test_agent_reward_loop_standalone.py
+ - name: Running sglang agent loop with reward model colocate tests on 8 L20 GPUs
+ run: |
+ unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
+ ROLLOUT_NAME=sglang pytest -s -x tests/experimental/reward_loop/test_agent_reward_loop_colocate.py
+
+ cleanup:
+ runs-on: ubuntu-latest
+ needs: [setup, reward_model_sglang]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
diff --git a/.github/workflows/reward_model_vllm.yml b/.github/workflows/reward_model_vllm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..aebde06984f3226256cbe791f8389660879fd3a4
--- /dev/null
+++ b/.github/workflows/reward_model_vllm.yml
@@ -0,0 +1,134 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+# name: Check PR Title
+
+name: reward_model_vllm
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ push:
+ branches:
+ - main
+ - v0.*
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "verl/**/*.py"
+ # Entrypoints
+ - ".github/workflows/reward_model_vllm.yml"
+ - "tests/experimental/reward_loop/**"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+env:
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm017.dev2"
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-image: "${{ env.IMAGE }}"
+
+ reward_model_vllm:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 30 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK: "True"
+ NCCL_SHM_DISABLE: "1"
+ NCCL_P2P_DISABLE: "1"
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ - name: Prepare gsm8k dataset
+ run: |
+ ray stop --force
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k --local_dir ${HOME}/data/gsm8k
+ - name: Running vllm generative reward model tests on 8 L20 GPUs
+ run: |
+ unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
+ ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_reward_model_genrm.py
+ - name: Running vllm discriminative reward model tests on 8 L20 GPUs
+ run: |
+ unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
+ ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_reward_model_disrm.py
+
+ - name: Running vllm agent loop with reward manager tests on 8 L20 GPUs
+ run: |
+ unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
+ ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_agent_reward_loop_standalone.py
+ - name: Running vllm agent loop with reward model colocate tests on 8 L20 GPUs
+ run: |
+ unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
+ ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_agent_reward_loop_colocate.py
+
+ cleanup:
+ runs-on: ubuntu-latest
+ needs: [setup, reward_model_vllm]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
diff --git a/.github/workflows/reward_model_vllm_ascend.yml b/.github/workflows/reward_model_vllm_ascend.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b57aa97c73bb3d1feb456d118e3866bca9f5265d
--- /dev/null
+++ b/.github/workflows/reward_model_vllm_ascend.yml
@@ -0,0 +1,113 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+# name: Check PR Title
+
+name: reward_model_vllm_ascend
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ push:
+ branches:
+ - main
+ - v0.*
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "verl/**/*.py"
+ # Entrypoints
+ - ".github/workflows/reward_model_vllm_ascend.yml"
+ - "tests/experimental/reward_loop/**"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+jobs:
+ reward_model_vllm_ascend:
+ if: github.repository_owner == 'verl-project'
+ runs-on: linux-aarch64-a2b3-8
+ timeout-minutes: 60 # Increase this timeout value as needed
+ container:
+ image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest
+ options: >-
+ --shm-size 16g
+ env:
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - name: Check npu and CANN info
+ run: |
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+ npu-smi info
+ - name: Check initial pip list from image
+ run: |
+ pip list
+ - name: Checkout verl-project/verl repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ clean: true
+ - name: Install the current repository
+ run: |
+ pip install -r requirements-npu.txt
+ pip install --no-deps -e .[test]
+ - name: Check final pip list
+ run: |
+ pip list
+ - name: Prepare weights
+ run: |
+ ln -s /root/.cache/models ~/models
+ - name: Prepare gsm8k dataset
+ run: |
+ ray stop --force
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k --local_dir ${HOME}/data/gsm8k
+ - name: Running vllm generative reward model tests on 8 NPUs
+ run: |
+ ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_reward_model_genrm.py
+ - name: Running vllm discriminative reward model tests on 8 NPUs
+ run: |
+ ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_reward_model_disrm.py
+ - name: Running vllm agent loop with reward manager tests on 8 NPUs
+ run: |
+ ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_agent_reward_loop_standalone.py
+ - name: Running vllm agent loop with reward model colocate tests on 8 NPUs
+ run: |
+ export HCCL_HOST_SOCKET_PORT_RANGE=auto
+ export HCCL_NPU_SOCKET_PORT_RANGE=auto
+ ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_agent_reward_loop_colocate.py
\ No newline at end of file
diff --git a/.github/workflows/sanity.yml b/.github/workflows/sanity.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ac7532d2f04ee239139ad60b499b6857443941f1
--- /dev/null
+++ b/.github/workflows/sanity.yml
@@ -0,0 +1,108 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+# name: Check PR Title
+
+name: sanity
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ push:
+ branches:
+ - main
+ - v0.*
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ - .github/workflows/sanity.yml
+ - "tests/special_sanity/**"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+jobs:
+ sanity:
+ runs-on: ubuntu-latest
+ timeout-minutes: 5 # Increase this timeout value as needed
+ strategy:
+ matrix:
+ python-version: ["3.10"]
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install the current repository
+ run: |
+ pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu
+ pip3 install -r requirements.txt
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ - name: Run sanity test
+ run: |
+ pytest -s -x tests/special_sanity
+ - name: Run license test
+ run: |
+ python3 tests/special_sanity/check_license.py --directories .
+ - name: Assert naming convention
+ run: |
+ if grep -rIn --exclude-dir=.git --exclude-dir=.github --exclude-dir=venv --exclude-dir=__pycache__ 'veRL' .; then
+ echo "Please use verl instead of veRL in the codebase"
+ exit 1
+ fi
+ - name: Assert SGLang naming convention
+ run: |
+ if grep -rIn --exclude-dir=.git --exclude-dir=.github --exclude-dir=venv --exclude-dir=__pycache__ --exclude=ascend_sglang_best_practices.rst -E 'Sglang|sgLang|sglAng|sglaNg|sglanG' .; then
+ echo "Please use SGLang or sglang as the formal name of SGLang rollout engine"
+ exit 1
+ fi
+ - name: Validate test folder structure
+ run: python3 tests/special_sanity/validate_structure.py
+ - name: Assert documentation requirement for functions
+ run: python3 tests/special_sanity/validate_imported_docs.py
+ - name: Assert device api usage in verl/verl
+ run: python3 tests/special_sanity/check_device_api_usage.py --directory ./verl
+ - name: Assert documentation time info
+ run: python3 tests/special_sanity/check_docs_time_info.py
+ - name: Check docstrings for specified files
+ run: python3 tests/special_sanity/check_docstrings.py
+ - name: Check DataProto for specified folders
+ run: python3 tests/special_sanity/check_dataproto_usage.py -d ./verl/workers/engine
diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml
new file mode 100644
index 0000000000000000000000000000000000000000..176d15ae2bd470752daaf138fa5aaa90641738e9
--- /dev/null
+++ b/.github/workflows/scorecard.yml
@@ -0,0 +1,66 @@
+# This workflow uses actions that are not certified by GitHub. They are provided
+# by a third-party and are governed by separate terms of service, privacy
+# policy, and support documentation.
+
+name: Scorecard supply-chain security
+on:
+ # For Branch-Protection check. Only the default branch is supported. See
+ # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection
+ branch_protection_rule:
+ # To guarantee Maintained check is occasionally updated. See
+ # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained
+ schedule:
+ - cron: "27 7 * * 1"
+ push:
+ branches:
+ - main
+ - v0.*
+
+# Declare default permissions as read only.
+permissions: read-all
+
+jobs:
+ analysis:
+ name: Scorecard analysis
+ runs-on: ubuntu-latest
+ permissions:
+ # Needed to upload the results to code-scanning dashboard.
+ security-events: write
+ # Needed to publish results and get a badge (see publish_results below).
+ id-token: write
+ # Uncomment the permissions below if installing in a private repository.
+ # contents: read
+ # actions: read
+
+ steps:
+ - name: "Checkout code"
+ uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ with:
+ persist-credentials: false
+
+ - name: "Run analysis"
+ uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1
+ with:
+ results_file: results.sarif
+ results_format: sarif
+ # (Optional) "write" PAT token. Uncomment the `repo_token` line below if:
+ # - you want to enable the Branch-Protection check on a *public* repository, or
+ # - you are installing Scorecard on a *private* repository
+ # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action?tab=readme-ov-file#authentication-with-fine-grained-pat-optional.
+ # repo_token: ${{ secrets.SCORECARD_TOKEN }}
+
+ # Public repositories:
+ # - Publish results to OpenSSF REST API for easy access by consumers
+ # - Allows the repository to include the Scorecard badge.
+ # - See https://github.com/ossf/scorecard-action#publishing-results.
+ # For private repositories:
+ # - `publish_results` will always be set to `false`, regardless
+ # of the value entered here.
+ publish_results: true
+
+ # Upload the results to GitHub's code scanning dashboard (optional).
+ # Commenting out will disable upload of results to your repo's Code Scanning dashboard
+ - name: "Upload to code-scanning"
+ uses: github/codeql-action/upload-sarif@9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0 #v3.28.9
+ with:
+ sarif_file: results.sarif
diff --git a/.github/workflows/secrets_scan.yml b/.github/workflows/secrets_scan.yml
new file mode 100644
index 0000000000000000000000000000000000000000..298ed16c668c67facdb6af2119878da576f5bdf5
--- /dev/null
+++ b/.github/workflows/secrets_scan.yml
@@ -0,0 +1,22 @@
+on:
+ push:
+ branches:
+ - main
+ - v0.*
+ pull_request:
+
+permissions:
+ contents: read
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ with:
+ fetch-depth: 0
+ - name: Secret Scanning
+ uses: trufflesecurity/trufflehog@7dc056a193116ba8d82154bf0549381c8fb8545c # v3.88.14
+ with:
+ extra_args: --results=verified,unknown
diff --git a/.github/workflows/sgl.yml b/.github/workflows/sgl.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bc0c0bb7f4acfdae3455bba8353457ccf3200435
--- /dev/null
+++ b/.github/workflows/sgl.yml
@@ -0,0 +1,165 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: sgl
+
+on:
+ # workflow_dispatch: # Manual
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ push:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ - .github/workflows/sgl.yml
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!examples/**"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py" # FSDP
+ - "!verl/workers/**/*dp_*.py"
+ # Megatron
+ - "!verl/workers/**/megatron_*.py"
+ # vLLM
+ - "!**/*vllm*"
+
+ # Entrypoints
+ - ".github/workflows/sgl.yml"
+ - "tests/rollout/*sglang*"
+ - "tests/rollout/async_rollout_utils.py"
+ - "tests/workers/rollout/*interaction*"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+env:
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:sgl059.dev2"
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-image: "${{ env.IMAGE }}"
+
+ sgl:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 35 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: 1
+ SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK: "True"
+ NCCL_SHM_DISABLE: "1"
+ NCCL_P2P_DISABLE: "1"
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install cupy-cuda12x==13.6.0 pytest-asyncio
+ pip3 install hf_transfer fastmcp pytest-asyncio
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ - name: Prepare gsm8k dataset
+ run: |
+ ray stop --force
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+ - name: Test the latest SGLang Rollout async with agent loop
+ run: |
+ ROLLOUT_NAME=sglang pytest -svvv tests/experimental/agent_loop
+
+ sgl_checkpoint_engine:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 35 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: 1
+ SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK: "True"
+ NCCL_SHM_DISABLE: "1"
+ NCCL_P2P_DISABLE: "1"
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install cupy-cuda12x==13.6.0 pytest-asyncio
+ pip3 install hf_transfer fastmcp pytest-asyncio
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ - name: Test SGLang ServerAdapter with Checkpoint Engine (NCCL)
+ run: |
+ ROLLOUT_NAME=sglang pytest -svvv tests/checkpoint_engine/test_special_server_adapter.py
+
+ cleanup:
+ runs-on: ubuntu-latest
+ needs: [setup, sgl, sgl_checkpoint_engine]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
diff --git a/.github/workflows/type-coverage-check.yml b/.github/workflows/type-coverage-check.yml
new file mode 100644
index 0000000000000000000000000000000000000000..268f0c672f0f87e8437d7dc964ee464922ec5d4e
--- /dev/null
+++ b/.github/workflows/type-coverage-check.yml
@@ -0,0 +1,31 @@
+name: Type Annotation and Docstring Coverage
+
+on:
+ pull_request:
+ paths:
+ - '**/*.py'
+ - '.github/workflows/type-coverage-check.yml'
+
+jobs:
+ type-coverage-check:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0 # 🚨 Important: fetch full history so `origin/main` is available
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.10'
+
+ - name: Install dependencies
+ run: |
+ pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu
+ pip3 install -r requirements.txt
+ pip3 install --no-deps -e .
+ - name: Run type annotation coverage check
+ run: |
+ python3 tests/special_sanity/type_coverage_check.py
+ - name: Run docstring coverage check
+ run: |
+ python3 tests/special_sanity/check_api_docs.py verl
diff --git a/.github/workflows/vllm.yml b/.github/workflows/vllm.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d358349f72cf7adaeee1b96a59ed79cf3d4789de
--- /dev/null
+++ b/.github/workflows/vllm.yml
@@ -0,0 +1,169 @@
+# # Tests layout
+
+# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+# - `tests/trainer` for testing functionality related to `verl/trainer`
+# - `tests/models` for testing functionality related to `verl/models`
+# - ...
+
+# There are a few folders with `special_` prefix, created for special purposes:
+# - `special_distributed`: unit tests that must run with multiple GPUs
+# - `special_e2e`: end-to-end tests with training/generation scripts
+# - `special_npu`: tests for NPUs
+# - `special_sanity`: a suite of quick sanity tests
+# - `special_standalone`: a set of test that are designed to run in dedicated environments
+
+# Accelerators for tests
+# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+
+# # Workflow layout
+
+# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+# 3. End-to-end tests: `e2e_*.yml`
+# 4. Unit tests
+# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+# - new workflow yaml is added to `.github/workflows`
+# - new tests are added to workflow mentioned in 2.
+
+name: vllm
+
+on:
+ # Trigger the workflow on push or pull request,
+ # but only for the main branch
+ push:
+ branches:
+ - main
+ - v0.*
+ pull_request:
+ branches:
+ - main
+ - v0.*
+ paths:
+ - "**/*.py"
+ # Other entrypoints
+ - "!examples/**"
+ - "!tests/**"
+ - "!verl/trainer/main_*.py"
+ - "!verl/trainer/fsdp_sft_trainer.py"
+ # FSDP
+ - "!verl/workers/**/*dp_*.py"
+ # Megatron
+ - "!verl/workers/**/megatron_*.py"
+ # SGLang
+ - "!**/*sglang*"
+ # Entrypoints
+ - ".github/workflows/vllm.yml"
+ - "tests/special_e2e/generation"
+ - "tests/workers/rollout"
+ - "verl/trainer/main_generation.py"
+ - "verl/trainer/config/generation.yaml"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions:
+ contents: read
+
+env:
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm017.dev2"
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
+jobs:
+ setup:
+ if: github.repository_owner == 'verl-project'
+ runs-on: ubuntu-latest
+ outputs:
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+ steps:
+ - uses: actions/checkout@v4
+ - id: create-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "create"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-image: "${{ env.IMAGE }}"
+
+ vllm:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 35 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ pip3 install --upgrade "transformers<5.0"
+ # - name: Download Model to Use
+ # run: |
+ # hf download Qwen/Qwen2.5-0.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B-Instruct
+ # hf download Qwen/Qwen2.5-1.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-1.5B-Instruct
+ # hf download Qwen/Qwen2.5-VL-3B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-VL-3B-Instruct
+ # hf download OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN --local-dir ${HOME}/models/OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN
+ # export HF_HUB_OFFLINE=1
+ - name: Prepare gsm8k dataset
+ run: |
+ ray stop --force
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+ - name: Test the latest vLLM Rollout async with agent loop
+ run: |
+ ROLLOUT_NAME=vllm pytest -svvv tests/experimental/agent_loop
+ - name: Test vllm server abort functionality
+ run: |
+ pytest tests/workers/rollout/rollout_vllm/test_vllm_abort.py -v -s
+
+ vllm_checkpoint_engine:
+ needs: setup
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+ timeout-minutes: 35 # Increase this timeout value as needed
+ env:
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+ HF_ENDPOINT: "https://hf-mirror.com"
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ with:
+ fetch-depth: 0
+ - name: Install the current repository
+ run: |
+ pip3 install pytest-asyncio
+ pip3 install -r requirements-test.txt
+ pip3 install --no-deps -e .
+ pip3 install --upgrade "transformers<5.0"
+ pip3 install cupy-cuda12x==13.6.0
+ - name: Test vLLM ServerAdapter with Checkpoint Engine (NCCL)
+ run: |
+ ROLLOUT_NAME=vllm pytest -svvv tests/checkpoint_engine/test_special_server_adapter.py
+ - name: Test bucketed weight transfer
+ run: |
+ pytest -svvv tests/utils/test_bucketed_weight_transfer.py
+
+ cleanup:
+ runs-on: ubuntu-latest
+ needs: [setup, vllm, vllm_checkpoint_engine]
+ if: always()
+ steps:
+ - id: destroy-runner
+ uses: volcengine/vemlp-github-runner@v1
+ with:
+ mode: "destroy"
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..e32a3d2b895482f39a54af592b8e0c7761d85052
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,139 @@
+**/*.pt
+**/checkpoints
+**/wget-log
+**/_build/
+**/*.ckpt
+**/outputs
+**/*.tar.gz
+**/playground
+**/wandb
+
+/pyrightconfig.json
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+dataset/*
+tensorflow/my_graph/*
+.idea/
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+# env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+tmp/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+pytest.ini
+output.txt
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# IPython Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+venv/
+.venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
+
+# vscode
+.vscode
+
+# Mac
+.DS_Store
+
+# vim
+*.swp
+
+# emacs
+*~
+
+# ckpt
+*.lock
+
+# data
+*.parquet
+/eval/data/
+
+
+# local logs
+logs
+log
+outputs
+.history
+/checkpoints/
+/outputs/
+
+eval/data/
+
+eval/data/
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000000000000000000000000000000000000..d5dd7a6aa577ccb64650ca389b699e04fd7af259
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "recipe"]
+ path = recipe
+ url = https://github.com/verl-project/verl-recipe.git
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7f836f50ff9a2a06c9031119ad9290391b69028d
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,45 @@
+repos:
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: "v0.12.2"
+ hooks:
+ - id: ruff
+ args: ["--fix", "--show-fixes", "--output-format=full"]
+ exclude: ^.*\.(ipynb)$
+ - id: ruff-format
+
+ - repo: https://github.com/pre-commit/mirrors-mypy
+ rev: "v1.17.0"
+ hooks:
+ - id: mypy
+
+ - repo: local
+ hooks:
+ - id: autogen-trainer-cfg
+ name: Generate and verify verl/trainer/config/_generated_*.yaml
+ entry: scripts/generate_trainer_config.sh
+ language: script
+ pass_filenames: false
+
+ - repo: local
+ hooks:
+ - id: check-docstrings
+ name: Check doc string coverage
+ entry: python3 tests/special_sanity/check_docstrings.py
+ language: python
+ pass_filenames: false
+
+ - repo: local
+ hooks:
+ - id: check-license
+ name: Check license
+ entry: python3 tests/special_sanity/check_license.py --directories examples scripts tests verl setup.py
+ language: python
+ pass_filenames: false
+
+ - repo: local
+ hooks:
+ - id: compileall
+ name: Compile all python files
+ entry: sh -c 'PYTHONWARNINGS=error python3 -m compileall -q . -x "(^|[\\/])(\.venv|venv|\.git)([\\/]|$)"'
+ language: python
+ pass_filenames: false
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0016868541a2a0667ef40ae6a9d861bcd26b9316
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,19 @@
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+version: 2
+
+build:
+ os: ubuntu-22.04
+ tools:
+ python: "3.11"
+ rust: "1.70"
+
+sphinx:
+ configuration: docs/conf.py
+
+python:
+ install:
+ - requirements: docs/requirements-docs.txt
+ - method: pip
+ path: .
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000000000000000000000000000000000000..6fd3023a0859f533951476fac6e8e06fe1e8aa3f
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,90 @@
+# Contributing to verl
+
+Thank you for considering a contribution to verl! We welcome contributions of any kind - bug fixes, enhancements, documentation improvements, or even just feedback. Whether you're an experienced developer or this is your first open-source project, your help is invaluable.
+
+Your support can take many forms:
+- Report issues or unexpected behaviors.
+- Suggest or implement new features.
+- Improve or expand documentation.
+- Review pull requests and assist other contributors.
+- Spread the word: share verl in blog posts, social media, or give the repo a ⭐.
+
+## Finding Issues to Contribute
+
+Looking for ways to dive in? Check out these issues:
+- [Good first issues](https://github.com/volcengine/verl/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22good%20first%20issue%22)
+- [Call for contribution](https://github.com/volcengine/verl/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22call%20for%20contribution%22)
+Furthermore, you can learn the development plan and roadmap via [RFC](https://github.com/volcengine/verl/issues?q=is%3Aissue%20state%3Aopen%20label%3ARFC) and [Roadmap](https://github.com/volcengine/verl/issues?q=state%3Aopen%20label%3A%22roadmap%22).
+
+
+## Developing
+
+- **Python-only**: install verl via `pip install -e .[test,vllm]` or `pip install -e .[test,sglang]` and iterate quickly. For full dependency setup, check out the verl [installation doc](https://verl.readthedocs.io/en/latest/start/install.html).
+
+## Code Linting and Formatting
+
+We rely on pre-commit to keep our code consistent. To set it up:
+
+```bash
+pip install pre-commit
+pre-commit install
+# for staged changes
+pre-commit run
+# for all files in the repo
+pre-commit run --all-files
+# run a specific hook with pre-commit
+# pre-commit run --all-files --show-diff-on-failure --color=always
+
+
+[](https://github.com/volcengine/verl/stargazers)
+[](https://twitter.com/verl_project)
+
+
+[](https://verl.readthedocs.io/en/latest/)
+
+
+
verl: Volcano Engine Reinforcement Learning for LLMs
+
+verl is a flexible, efficient and production-ready RL training library for large language models (LLMs).
+
+verl is the open-source version of **[HybridFlow: A Flexible and Efficient RLHF Framework](https://arxiv.org/abs/2409.19256v2)** paper.
+
+verl is flexible and easy to use with:
+
+- **Easy extension of diverse RL algorithms**: The hybrid-controller programming model enables flexible representation and efficient execution of complex post-training dataflows. Build RL dataflows such as GRPO, PPO in a few lines of code.
+
+- **Seamless integration of existing LLM infra with modular APIs**: Decouples computation and data dependencies, enabling seamless integration with existing LLM frameworks, such as FSDP, Megatron-LM, vLLM, SGLang, etc
+
+- **Flexible device mapping**: Supports various placement of models onto different sets of GPUs for efficient resource utilization and scalability across different cluster sizes.
+
+- Ready integration with popular HuggingFace models
+
+verl is fast with:
+
+- **State-of-the-art throughput**: SOTA LLM training and inference engine integrations and SOTA RL throughput.
+
+- **Efficient actor model resharding with 3D-HybridEngine**: Eliminates memory redundancy and significantly reduces communication overhead during transitions between training and generation phases.
+
+
+
+
+
+
++ 🎉 News • + ✨ Getting Started • + 📖 Introduction +
++ 🎈 Citation • + 🌻 Acknowledgement • + 📬 Contact • + 📈 Star History +
+
+
+
+
+
+- 软件栈工作在hostcpu,通信算法展开一个个task
+- 每个task调用runtime接口,下发到device的rtsqueue
+- STARS从rstqueue上顺序拿取task
+- 根据task类型分别调用掉SDMA和RDMA引擎。
+ **单算子瓶颈**:hostbound 每个task提交是2~5us,一个通信算子有几百个task,单算子场景不会在device上缓存,下发一个执行一个
+
+##### AICpu机制展开
+
+
+
+- host侧不下发一个个task,把通信算子作为一个个kernel,放在通信算子kernel的队列上去。
+- STARS调度kernel队列流上的kernel,把kernel放到AiCPU上去执行。
+- AICPU调用函数(kernel),用一个线程执行kernel 函数,在函数内把通信task展开,把task放到rstqueue上,STARS调用。
+- 降低host和aicpu交互,由几百次降低为一次。
+- task的提交在AICPU上提交,做了提交的部分合并。
+
+#### TASK_QUEUE_ENABLE
+
+**使用方式:**`export TASK_QUEUE_ENABLE=2`
+
+TASK_QUEUE_ENABLE,下发优化,图模式设置为1(即开启图模式的时候这个要设置为1),非图模式设置为2
+
+示意图:
+
+
+
+##### 绑核优化
+
+**使用方式:**`export CPU_AFFINITY_CONF=1`
+
+详细设置原理可看:https://www.hiascend.com/document/detail/zh/Pytorch/600/ptmoddevg/trainingmigrguide/performance_tuning_0059.html
+
+### 其他
+
+以下内容汇总了若干全局环境变量的调优配置。由于这些参数在训练阶段与推理阶段往往都能带来正向收益,且目前尚缺乏足够精细的消融实验来严格区分它们各自对训练或推理的贡献占比,故统一归拢在此,供后续持续监控与进一步拆解分析。
+
+#### 使能jemalloc
+
+使用方式(注意需要先安装jemalloc库):`export LD_PRELOAD=/usr/local/lib/libjemalloc.so.2`
+
+**安装使用教程:**[MindSpeed-RL/docs/install_guide.md · Ascend/MindSpeed-RL - AtomGit | GitCode](https://gitcode.com/Ascend/MindSpeed-RL/blob/master/docs/install_guide.md#高性能内存库-jemalloc-安装)
+
+#### 多流复用
+
+内存方面有优化
+
+使能方式:`export MULTI_STREAM_MEMORY_REUSE=1`
+
+原理介绍:https://www.hiascend.com/document/detail/zh/Pytorch/600/ptmoddevg/trainingmigrguide/performance_tuning_0040.html
+
+#### VLLM_ASCEND_ENABLE_FLASHCOMM
+
+使用方式:`export VLLM_ASCEND_ENABLE_FLASHCOMM=1`
+
+启用昇腾 NPU 特有的FLASHCOMM高速通信优化技术
+
+地址:https://vllm-ascend.readthedocs.io/zh-cn/latest/user_guide/release_notes.html
+
+#### VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE
+
+使用方式:`export VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE=1`
+
+启用昇腾 NPU针对大模型推理的稠密计算优化
+
+地址:https://vllm-ascend.readthedocs.io/zh-cn/latest/user_guide/release_notes.html
+
+#### VLLM_ASCEND_ENABLE_PREFETCH_MLP
+
+使用方式:`export VLLM_ASCEND_ENABLE_PREFETCH_MLP=1`
+
+启用 MLP 层的权重预取机制
+
+
+
+### verl框架参数设置
+
+主要是内存方面的一些设置开关(注意,这个里面的优化都或多或少会导致吞吐量有一定程度的劣化)
+
+~~~bash
+# 梯度检查点 (Gradient Checkpointing)
+# 作用: 通过重新计算激活值来节省显存,以计算换内存。在前向传播时不保存中间激活值,反向传播时重新计算,可以显著降低显存占用,允许使用更大的batch size。
+actor_rollout_ref.model.enable_gradient_checkpointing=True
+
+# 参数卸载 (Parameter Offload)
+# 作用: 将模型参数卸载到CPU内存,训练时再加载回GPU。
+actor_rollout_ref.actor.fsdp_config.param_offload=${offload} # True
+actor_rollout_ref.ref.fsdp_config.param_offload=${offload} # True
+
+# 优化器状态卸载 (Optimizer Offload)
+# 作用: 将优化器状态(如Adam的动量)卸载到CPU。优化器状态通常占用大量显存(对于Adam,每个参数需要额外8字节),卸载可以节省显存。
+actor_rollout_ref.actor.fsdp_config.optimizer_offload=${offload} # True
+
+# 释放推理引擎缓存 (Free Cache Engine)
+# 作用: 在训练阶段释放推理引擎的KV cache和权重。这是3D-HybridEngine的核心优化,允许在同一GPU上交替进行推理和训练,显著降低显存需求。
+actor_rollout_ref.rollout.free_cache_engine=True
+
+# 熵计算优化
+# entropy_checkpointing: 在训练时对熵计算启用重计算,降低显存峰值
+# entropy_from_logits_with_chunking: 分块处理logits张量(如2048 tokens一组),避免一次性加载整个[bsz*seq_len, vocab]张量
+actor_rollout_ref.actor.entropy_checkpointing=True
+actor_rollout_ref.ref.entropy_checkpointing=True
+actor_rollout_ref.actor.entropy_from_logits_with_chunking=True
+actor_rollout_ref.ref.entropy_from_logits_with_chunking=True
+
+# 推理引擎显存配置
+# gpu_memory_utilization: 控制vLLM使用的GPU显存比例(0.90 = 90%)
+# enforce_eager=False: 启用CUDA graphs加速推理,但会占用额外显存
+actor_rollout_ref.rollout.gpu_memory_utilization=0.90
+actor_rollout_ref.rollout.enforce_eager=False
+~~~
+
+## NPU调优参考文章
+
+环境变量相关:[环境变量列表-Ascend Extension for PyTorch6.0.0-昇腾社区](https://www.hiascend.com/document/detail/zh/Pytorch/600/apiref/Envvariables/Envir_001.html)
+
+社区性能调优教程:[性能调优流程-Ascend Extension for PyTorch6.0.0-昇腾社区](https://www.hiascend.com/document/detail/zh/Pytorch/600/ptmoddevg/trainingmigrguide/performance_tuning_0001.html)
+
+
+
diff --git a/docs/ascend_tutorial/examples/run_qwen3_32B_megatron_1k_256k_npu.md b/docs/ascend_tutorial/examples/run_qwen3_32B_megatron_1k_256k_npu.md
new file mode 100644
index 0000000000000000000000000000000000000000..18f029b99c8c59d02a2000be1c8505bf447340fb
--- /dev/null
+++ b/docs/ascend_tutorial/examples/run_qwen3_32B_megatron_1k_256k_npu.md
@@ -0,0 +1,155 @@
+# Long Sequence Qwen3-32B 1k-to-256k Example
+
+Last updated: 6/3/2026.
+
+本章对Qwen3-32B进行了长序列开发。Qwen3-32B的模型能力为最长推到40k
+
+## 全层实验
+
+对Qwen3-32B进行了长序列开发,脚本如下:
+
+```bash
+set -x
+
+export USE_OPTIMIZED_MODEL=0
+export VLLM_USE_V1=1
+export VLLM_ASCEND_ENABLE_NZ=0
+export VLLM_VERSION="0.13.0"
+export LD_PRELOAD=/usr/local/lib/libjemalloc.so.2
+export PYTORCH_NPU_ALLOC_CONF="max_split_size_mb:2048"
+
+PROJECT_NAME="GRPO-Qwen3-32B"
+EXPERIMENT_NAME="GRPO-Qwen3-32B-megatron-gsm8k"
+
+SAVE_CHECKPOINT_DIR=$HOME/verl_checkpoints
+math_train_path=$HOME/datasets/gsm8k/train.parquet
+math_test_path=$HOME/datasets/gsm8k/test.parquet
+train_files="['$math_train_path']"
+test_files="['$math_test_path']"
+
+use_dynamic_bsz=False
+enable_chunked_prefill=True
+tp_size=8
+max_prompt_length=1024
+max_response_length=$((1024*256))
+actor_ppo_max_token_len=$(((max_prompt_length + max_response_length) / tp_size))
+infer_ppo_max_token_len=$(((max_prompt_length + max_response_length) / tp_size))
+cp_size=4
+
+python3 -m verl.trainer.main_ppo \
+ --config-path=config \
+ --config-name='ppo_megatron_trainer.yaml' \
+ algorithm.adv_estimator=grpo \
+ data.train_files="$train_files" \
+ data.val_files="$test_files" \
+ data.shuffle=False \
+ data.validation_shuffle=False \
+ data.train_batch_size=64 \
+ data.max_prompt_length=${max_prompt_length} \
+ data.max_response_length=${max_response_length} \
+ data.filter_overlong_prompts=False \
+ data.truncation='error' \
+ actor_rollout_ref.model.enable_gradient_checkpointing=True \
+ actor_rollout_ref.model.path=$HOME/hf_weights/Qwen3-32B \
+ actor_rollout_ref.actor.optim.lr=1e-6 \
+ actor_rollout_ref.actor.ppo_mini_batch_size=16 \
+ actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \
+ actor_rollout_ref.actor.use_kl_loss=True \
+ actor_rollout_ref.actor.kl_loss_coef=0.001 \
+ actor_rollout_ref.actor.kl_loss_type=low_var_kl \
+ actor_rollout_ref.actor.entropy_coeff=0 \
+ actor_rollout_ref.actor.ppo_max_token_len_per_gpu=${actor_ppo_max_token_len} \
+ actor_rollout_ref.actor.megatron.tensor_model_parallel_size=8 \
+ actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=1 \
+ actor_rollout_ref.actor.use_dynamic_bsz=${use_dynamic_bsz} \
+ actor_rollout_ref.actor.megatron.context_parallel_size=${CP} \
+ +actor_rollout_ref.actor.megatron.override_transformer_config.context_parallel_size=${CP} \
+ +actor_rollout_ref.actor.megatron.override_transformer_config.use_flash_attn=True \
+ +actor_rollout_ref.actor.optim.override_optimizer_config.optimizer_offload_fraction=1 \
+ +actor_rollout_ref.actor.optim.override_optimizer_config.use_precision_aware_optimizer=True \
+ +actor_rollout_ref.actor.optim.override_optimizer_config.optimizer_cpu_offload=True \
+ actor_rollout_ref.actor.megatron.param_offload=True \
+ actor_rollout_ref.actor.megatron.optimizer_offload=True \
+ actor_rollout_ref.actor.megatron.grad_offload=True \
+ actor_rollout_ref.actor.use_torch_compile=False \
+ actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=2 \
+ actor_rollout_ref.rollout.tensor_model_parallel_size=8 \
+ actor_rollout_ref.rollout.name=vllm \
+ actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
+ actor_rollout_ref.rollout.n=5 \
+ actor_rollout_ref.rollout.log_prob_use_dynamic_bsz=${use_dynamic_bsz} \
+ actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \
+ actor_rollout_ref.rollout.enable_chunked_prefill=${enable_chunked_prefill} \
+ actor_rollout_ref.rollout.enable_prefix_caching=True \
+ actor_rollout_ref.rollout.enforce_eager=False \
+ actor_rollout_ref.rollout.free_cache_engine=True \
+ actor_rollout_ref.ref.megatron.param_offload=True \
+ actor_rollout_ref.ref.use_torch_compile=False \
+ actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=1 \
+ actor_rollout_ref.ref.log_prob_use_dynamic_bsz=${use_dynamic_bsz} \
+ actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \
+ actor_rollout_ref.ref.megatron.use_dist_checkpointing=False \
+ actor_rollout_ref.ref.megatron.dist_checkpointing_path=${SAVE_CHECKPOINT_DIR} \
+ actor_rollout_ref.actor.megatron.use_dist_checkpointing=False \
+ actor_rollout_ref.actor.megatron.dist_checkpointing_path=${SAVE_CHECKPOINT_DIR} \
+ algorithm.use_kl_in_reward=False \
+ trainer.critic_warmup=0 \
+ trainer.logger=console \
+ trainer.n_gpus_per_node=16 \
+ trainer.nnodes=2 \
+ trainer.save_freq=100 \
+ trainer.test_freq=-1 \
+ trainer.total_training_steps=100 \
+ trainer.device=npu \
+ trainer.project_name=${PROJECT_NAME} \
+ trainer.experiment_name=${EXPERIMENT_NAME} \
+ trainer.total_epochs=30
+```
+
+- 相关实验结果
+
+
+
+## 减层实验
+
+在实际推理中,我们发现其最大在20k左右,因此对其进行减层实验,其response能到达到40k。
+
+在权重的`config.json`文件中,我们将`num_hidden_layers`从64减层到16
+
+```
+{
+ "architectures": [
+ "Qwen3ForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 151643,
+ "eos_token_id": 151645,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 5120,
+ "initializer_range": 0.02,
+ "intermediate_size": 25600,
+ "max_position_embeddings": 40960,
+ "max_window_layers": 64,
+ "model_type": "qwen3",
+ "num_attention_heads": 64,
+ "num_hidden_layers": 16,
+ "num_key_value_heads": 8,
+ "rms_norm_eps": 1e-06,
+ "rope_scaling": null,
+ "rope_theta": 1000000,
+ "sliding_window": null,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.51.0",
+ "use_cache": true,
+ "use_sliding_window": false,
+ "vocab_size": 151936
+}
+
+```
+
+- 其实验结果如下:
+
+
\ No newline at end of file
diff --git a/docs/ascend_tutorial/features/ascend_backend_features.md b/docs/ascend_tutorial/features/ascend_backend_features.md
new file mode 100644
index 0000000000000000000000000000000000000000..fe31696ce78f29ef8d23da41fd25efcfa42ba729
--- /dev/null
+++ b/docs/ascend_tutorial/features/ascend_backend_features.md
@@ -0,0 +1,274 @@
+# Ascend Backend Features Guide
+==================================================================================
+
+Last updated: 03/03/2026.
+
+昇腾全面支持verl生态建设,本文将介绍NPU上对于verl的适配工作及后端特性支持供开发者进行参考
+
+---
+
+## 推理后端
+
+当前verl支持vllm/sglang这两种主流推理后端,均可在昇腾NPU上运行。
+
+### 1. vllm:
+
+昇腾通过vllm-ascend插件来支持vllm推理后端,该插件是 vLLM 社区支持 Ascend 后端的推荐方法。它遵循[[RFC]](https://github.com/vllm-project/vllm/issues/11162),提供了一个可插拔接口,将 Ascend NPU 与 vLLM 解耦。
+
+##### 参数特性支持
+
+| vllm参数| verl对应通用参数 | 简介|
+| --- | --- | --- |
+| `model_path` | `actor_rollout_ref.model.path` |模型权重文件的路径|
+| `gpu_memory_utilization` | `actor_rollout_ref.rollout.gpu_memory_utilization` |用于控制每个阶段可使用的 GPU 内存量。它被指定为一个介于 0.0 和 1.0 之间的分数,其中:- 0.8 表示 GPU 总内存的 80%- 1.0 表示 GPU 总内存的 100%(不推荐,没有预留缓冲)|
+| `enforce_eager`| `actor_rollout_ref.rollout.enforce_eager` |禁用图模式,verl默认为False|
+| `enable_chunked_prefill`| `actor_rollout_ref.rollout.enable_chunked_prefill` | 分块预填充允许将大预填充分块成更小的块,并将它们与解码请求一起批处理。|
+| `free_cache_engine`| `actor_rollout_ref.rollout.free_cache_engine` |在部署生成阶段之后卸载 KVCache,默认值为 True。|
+| `max_model_len` | `actor_rollout_ref.rollout.max_model_len` | 模型能够处理的最大序列长度。它限制了单个输入序列的最大长度 |
+| `tp_size`| `actor_rollout_ref.rollout.tensor_model_parallel_size * data_parallel_size`|TP并行度|
+| `dp_size`| `actor_rollout_ref.rollout.data_parallel_size`|DP并行度|
+| `ep_size`| `actor_rollout_ref.rollout.expert_parallel_size`|EP并行度|
+| `node_rank`| `无,根据实际实例和卡数自动计算` |实例中的节点排序|
+| `load_format`| `actor_rollout_ref.rollout.load_format` |要加载的模型权重格式|
+| `disable_log_stats`| `actor_rollout_ref.rollout.disable_log_stats`|记录抢占请求的累积数量 |
+| `nnodes `| `无,根据实际实例和卡数自动计算` | 每个实例包含的节点数量` |
+| `trust_remote_code`| `actor_rollout_ref.model.trust_remote_code`|是否允许在 Hub 上定义自定义模型,并将其写入自己的建模文件中|
+| `max_num_seqs` | `actor_rollout_ref.rollout.max_num_seqs` |正在运行的请求的最大数量|
+| `max_num_batched_tokens`| `actor_rollout_ref.rollout.max_num_batched_tokens` |在一次批处理(batch)中可以处理的最大总Token数|
+| `skip_tokenizer_init`| `actor_rollout_ref.rollout.skip_tokenizer_init` |跳过初始化分词器并将 input_ids 传递到推理请求中|
+| `enable_prefix_caching` | `actor_rollout_ref.rollout.enable_prefix_caching`|`用于启用自动前缀缓存` |
+| `quantization`| `actor_rollout_ref.rollout.quantization,默认为None`|`量化方法`|
+| `enforce_eager`|`actor_rollout_ref.rollout.enforce_eager`|标志用于强制使用PyTorch的eager执行模式,而非默认的图执行模式|
+
+### 2. sglang:
+
+对于sglang推理后端,昇腾通过直接向sglang社区进行持续建设与维护来支持相关功能。
+此外在verl中使用sglang还涉及以下组件, 我们在[quick start](https://github.com/verl-project/verl/blob/main/docs/ascend_tutorial/ascend_sglang_quick_start.rst)中提供详细说明与一键安装脚本。
+
+| 组件| 描述|
+| --- | --- |
+| [sgl_kernel_npu](https://github.com/sgl-project/sgl-kernel-npu/blob/main/python/sgl_kernel_npu/README.md) | Ascend NPU SGL 优化推理内核集合,包括注意力机制、归一化、激活函数、LoRA 适配器等。 |
+| [deepep](https://github.com/sgl-project/sgl-kernel-npu/blob/main/python/deep_ep/README.md) | DeepEP的 Ascend 实现,为MoE模型提供高度优化的专家并行 (EP) 通信内核 |
+
+##### 参数特性支持
+
+verl中通过rollout config管理推理后端参数使能,包含通用参数和engine_kwargs自定义传参。
+以下列举在verl中常见设置的sglang特性参数,更多参数介绍请参考 [sglang社区NPU特性支持](https://docs.sglang.io/platforms/ascend_npu_support_features.html)
+
+| sglang参数| verl对应通用参数 | 简介|
+| --- | --- | --- |
+| model_path | actor_rollout_ref.model.path|模型权重文件的路径|
+| mem_fraction_static| actor_rollout_ref.rollout.gpu_memory_utilization |用于静态分配(模型权重和键值缓存内存池)的内存比例|
+| disable_cuda_graph| actor_rollout_ref.rollout.enforce_eager|禁用图模式,verl默认为False|
+| enable_memory_saver| 无,verl中默认设置为True | 允许使用 release_memory_occupation 和 resume_memory_occupation 来节省内存
+| base_gpu_id| 无,根据实际实例和卡数自动计算 |用于分配每个实例上计算卡资源时的的初始ID
+| gpu_id_step| 无,默认设置为1| 使用的连续计算卡ID 之间的差值
+| tp_size| actor_rollout_ref.rollout.tensor_model_parallel_size * data_parallel_size|TP并行度|
+| dp_size| actor_rollout_ref.rollout.data_parallel_size|DP并行度|
+| ep_size| actor_rollout_ref.rollout.expert_parallel_size|EP并行度|
+| node_rank| 无,根据实际实例和卡数自动计算 |实例中的节点排序|
+| load_format| actor_rollout_ref.rollout.load_format|要加载的模型权重格式|
+| dist_init_addr| 无,自动计算|用于初始化分布式后端的主机地址|
+| nnodes| 无,根据实际实例和卡数自动计算|每个实例包含的节点数量|
+| trust_remote_code| actor_rollout_ref.model.trust_remote_code|是否允许在 Hub 上定义自定义模型,并将其写入自己的建模文件中|
+| max_running_requests| actor_rollout_ref.rollout.max_num_seqs |正在运行的请求的最大数量|
+| log_level| 无,默认设置为error |日志记录器的日志级别|
+| skip_tokenizer_init| actor_rollout_ref.rollout.skip_tokenizer_init |跳过初始化分词器并将 input_ids 传递到推理请求中|
+| skip_server_warmup| 无,默认设置为True |跳过预热|
+| quantization| actor_rollout_ref.rollout.quantization,默认为None|量化方法|
+| attention_backend|actor_rollout_ref.rollout.engine_kwargs.sglang.attention_backend|attention内核,NPU应该设置为ascend|
+
+---
+
+## 训练后端
+
+### 1. FSDP
+
+昇腾通过torch_npu提供FSDP相关支持能力,当前pytorch api支持度参照[版本说明](https://www.hiascend.com/document/detail/zh/Pytorch/730/apiref/PyTorchNativeapi/docs/zh/native_apis/pytorch_2-7-1/torch-distributed-fsdp.md)。
+
+#### FSDP1
+##### 参数特性支持
+| verl参数 | 简介|
+| --- | --- |
+| `actor_rollout_ref.actor.fsdp_config.param_offload` |是否卸载模型权重到CPU,默认值为False|
+| `actor_rollout_ref.actor.fsdp_config.optimizer_offload` |是否卸载优化器状态到CPU,默认值为False|
+| `actor_rollout_ref.actor.fsdp_config.reshard_after_forward` |控制前向计算后的参数行为,平衡内存与通信。默认值为True:前向后重新分片参数,反向时重新全收集|
+| `actor_rollout_ref.actor.fsdp_config.fsdp_size` | 每个FSDP分片组中的NPU数量;默认值-1表示自动。|
+| `actor_rollout_ref.actor.fsdp_config.forward_prefetch` |在前向计算完成前预取下一次前向传播的 all-gather,仅用于FSDP1,默认值为False|
+| `actor_rollout_ref.actor.fsdp_config.use_orig_params` | FSDP是否会使用module的原始参数来初始化,仅用于FSDP1,默认值为False|
+| `actor_rollout_ref.actor.ulysses_sequence_parallel_size`|Ulysses序列并行大小|
+| `actor_rollout_ref.actor.entropy_from_logits_with_chunking`|通过分块计算熵以减少显存峰值,默认值为False|
+| `actor_rollout_ref.actor.fsdp_config.entropy_checkpointing`|在训练时对熵计算启用重计算,降低显存峰值,默认值为False|
+| `actor_rollout_ref.actor.fsdp_config.forward_only` |是否只进行前向计算,默认值为False|
+
+#### FSDP2
+##### 参数特性支持
+| verl参数 | 简介|
+| --- | --- |
+| `actor_rollout_ref.actor.fsdp_config.param_offload` |是否卸载模型权重到CPU,默认值为False|
+| `actor_rollout_ref.actor.fsdp_config.optimizer_offload` |是否卸载优化器状态到CPU,默认值为False|
+| `actor_rollout_ref.actor.fsdp_config.reshard_after_forward` |控制前向计算后的参数行为,平衡内存与通信。默认值为True:前向后重新分片参数,反向时重新全收集|
+| `actor_rollout_ref.actor.fsdp_config.fsdp_size` | 每个FSDP分片组中的NPU数量;默认值-1表示自动。|
+| `actor_rollout_ref.actor.ulysses_sequence_parallel_size`|Ulysses序列并行大小|
+| `actor_rollout_ref.actor.entropy_from_logits_with_chunking`|通过分块计算熵以减少显存峰值,默认值为False|
+| `actor_rollout_ref.actor.fsdp_config.entropy_checkpointing`|在训练时对熵计算启用重计算,降低显存峰值,默认值为False|
+| `actor_rollout_ref.actor.fsdp_config.forward_only` |是否只进行前向计算,默认值为False|
+
+
+
+### 2. Megatron
+
+Megatron 是 NVIDIA 推出的一个专注于模型并行的训练框架仓库。如果一个仓库(例如 Verl)的训练后端使用了 Megatron,同时又希望在 NPU 上运行该仓库,那么就需要额外安装 MindSpeed 来提供底层支持。下文将介绍 MindSpeed 是如何实现无感替换 Megatron 中的关键组件,从而使其能够适配 NPU 的。
+
+MindSpeed 底层的替换原理采用了 Monkey Patch 技术
+
+* MindSpeed Moneky Patch框架
+
+在verl里面通过`from mindspeed.megatron_adaptor import repatch `触发patch,调用栈如下:
+
+~~~
+from mindspeed.megatron_adaptor import repatch
+├── 执行 megatron_adaptor.py 模块导入
+├── 导入 features_manager 模块
+├── 执行 mindspeed/features_manager/__init__.py
+├── @AutoExecuteFunction 装饰器触发
+├── patch_features() 自动执行
+└── 进行`apply_features_pre_patches`和`apply_features_patches`操作
+~~~
+
+`Patch`类是整个patch系统的核心,实现了函数/类的动态替换
+
+~~~python
+class Patch
+~~~
+
+`parse_path`方法实现了动态模块导入和创建
+
+~~~python
+def parse_path(module_path, function_name, create_dummy)
+~~~
+
+patch系统支持多层装饰器叠加
+
+~~~
+def apply_patch(self):
+ final_patch_func = self.orig_func
+ if self.patch_func is not None:
+ final_patch_func = self.patch_func
+
+ # 应用所有装饰器
+ for wrapper in self.wrappers:
+ final_patch_func = wrapper(final_patch_func)
+~~~
+
+* MindSpeedPatchesManager类
+
+`MindSpeedPatchesManager`作为全局单例管理所有patch
+
+~~~python
+class MindSpeedPatchesManager:
+ patches_info: Dict[str, Patch] = {}
+~~~
+
+* Feature集成模式
+
+各个Feature通过继承`MindSpeedFeature`基类集成patch系统
+
+~~~python
+class MindSpeedFeature:
+ """Base class for mindspeed features."""
+
+ def __init__(self, feature_name: str, optimization_level: int = 2):
+ self.feature_name = feature_name.lower().strip().replace('-', '_')
+ self.optimization_level = optimization_level
+ self.default_patches = self.optimization_level == 0
+
+ def is_need_apply(self, args):
+ """Check the feature is need to apply."""
+ return (self.optimization_level <= args.optimization_level and getattr(args, self.feature_name, None)) \
+ or self.default_patches
+
+ def register_args(self, parser: ArgumentParser):
+ """Register cli arguments to enable the feature."""
+ pass
+
+ def pre_validate_args(self, args: Namespace):
+ """Validate the arguments of mindspeed before megatron args validation
+ and store some arguments of the mindspeed temporarily,
+ incase that megatron validate faile.
+ for example:
+ ```python
+ origin_context_parallel_size = args.context_parallel_size
+ args.context_parallel_size = 1
+ ```
+ """
+ pass
+
+ def validate_args(self, args: Namespace):
+ """Restore the arguments of the mindspeed.
+
+ for example:
+ ```python
+ args.context_parallel_size = origin_context_parallel_size
+ ```
+ """
+ pass
+
+ def post_validate_args(self, args: Namespace):
+ """validate mindspeed arguments after megatron arguments validation."""
+ pass
+
+ def pre_register_patches(self, patch_manager: MindSpeedPatchesManager, args: Namespace):
+ """Register all patch functions before import megatron"""
+ pass
+
+ def register_patches(self, patch_manager: MindSpeedPatchesManager, args: Namespace):
+ """Register all patch functions the feature is related."""
+ pass
+
+ def incompatible_check(self, global_args, check_args):
+ """Register all incompatible functions the feature is related."""
+ if getattr(global_args, self.feature_name, None) and getattr(global_args, check_args, None):
+ raise AssertionError('{} and {} are incompatible.'.format(self.feature_name, check_args))
+
+ def dependency_check(self, global_args, check_args):
+ """Register all dependency functions the feature is related."""
+ if getattr(global_args, self.feature_name, None) and not getattr(global_args, check_args, None):
+ raise AssertionError('{} requires {}.'.format(self.feature_name, check_args))
+
+ @staticmethod
+ def add_parser_argument_choices_value(parser, argument_name, new_choice):
+ """Add a new choice value to the existing choices of a parser argument."""
+ for action in parser._actions:
+ exist_arg = isinstance(action, argparse.Action) and argument_name in action.option_strings
+ if exist_arg and action.choices is not None and new_choice not in action.choices:
+ action.choices.append(new_choice)
+~~~
+
+##### 参数特性支持
+| verl参数 | 简介|
+| --- | --- |
+| `actor_rollout_ref.actor.megatron.optimizer_offload` |是否卸载模型优化器到CPU,默认值为False|
+| `actor_rollout_ref.actor.megatron.use_mbridge` |是否使用mbridge进行权重转换|
+| `actor_rollout_ref.actor.megatron.param_offload` |是否卸载模型权重到CPU,默认值为False|
+| `actor_rollout_ref.actor.megatron.tensor_model_parallel_size` | 张量并行大小;默认值为1。|
+| `actor_rollout_ref.actor.megatron.pipeline_model_parallel_size` |流水并行大小,默认值为1|
+| `actor_rollout_ref.actor.megatron.expert_model_parallel_size` | 专家并行大小,默认值为1|
+| `actor_rollout_ref.actor.megatron.expert_tensor_parallel_size`|TP拓展EP大小,默认值为null|
+| `actor_rollout_ref.actor.context_parallel_size`|序列并行大小,默认值为False|
+| `actor_rollout_ref.actor.megatron.override_transformer_config.deallocate_pipeline_outputs`|张量在发送到下一个pp stage后,输出数据被释放,降低显存峰值,默认值为False|
+| `actor_rollout_ref.actor.megatron.override_transformer_config.persist_layer_norm` |是否使用持久化 LayerNorm,默认值为False|
+| `actor_rollout_ref.actor.megatron.override_transformer_config.moe_grouped_gemm` |是否使用持Group GEMM,默认值为False|
+| `actor_rollout_ref.actor.megatron.override_transformer_config.moe_router_dtype` |用于路由和专家输出加权平均的数据类型。使用 fp32 或 fp64 可以提高稳定性,尤其是在专家数量较多时,默认值为fp32|
+| `actor_rollout_ref.actor.megatron.override_transformer_config.account_for_loss_in_pipeline_split` |如果设置为 True,在流水线并行的划分和放置策略中,loss 层会被视为一个标准的 Transformer 层来处理。默认为False。|
+| `actor_rollout_ref.actor.megatron.override_transformer_config.account_for_embedding_in_pipeline_split` |如果设置为 True,在流水线并行的划分和放置策略中,输入embedding 层会被视为一个标准的 Transformer 层来处理。默认为False。|
+| `actor_rollout_ref.actor.megatron.override_transformer_config.recompute_granularity` |重新计算激活的粒度,可选项为'full', 'selective' and 'none'。其中full代表重新计算整个transformer layer,selective代表只计算transformer layer中的核心注意力部分。默认为'none'。|
+| `actor_rollout_ref.actor.megatron.override_transformer_config.recompute_method` |该参数需将recompute_granularity设置为'full'才生效,可选项为'uniform', 'block'。默认为None。|
+| `actor_rollout_ref.actor.megatron.override_transformer_config.recompute_num_layers` |该参数需将recompute_granularity设置为'full'才生效,默认为None。若recompute_method设置为uniform,该参数含义为每个均匀划分的重新计算单元的transformer layers数量。例如你可以指定为--recompute_granularity full --recompute_method uniform --recompute_num_layers 4。recompute_num_layers越大,显存占用越小,计算成本越大。注意:当前进程中的模型层数需能被recompute_num_layers整除。默认为None。|
+| `actor_rollout_ref.actor.megatron.use_dist_checkpointing` |是否使用分布式权重,默认值为False|
+| `actor_rollout_ref.actor.megatron.dist_checkpointing_path` |分布式权重路径,默认值为null|
+| `actor_rollout_ref.actor.megatron.override_transformer_config.use_flash_attn` |是否使用fa,默认值为true|
+| `actor_rollout_ref.actor.megatron.override_transformer_config.use_fused_rotary_pos_emb` |是否使用融合旋转位置编码,默认值为False|
+| `actor_rollout_ref.actor.megatron.override_transformer_config.use_fused_swiglu` |是否使用融合swiglu,默认值为False|
+| `actor_rollout_ref.actor.megatron.override_transformer_config.num_layers_in_first_pipeline_stage` |第一个pipeline stage 的层数,默认值为none|
+| `actor_rollout_ref.actor.megatron.override_transformer_config.num_layers_in_last_pipeline_stage` |最后一个pipeline stage 的层数,默认值为none|
diff --git a/docs/blog/v0.7.md b/docs/blog/v0.7.md
new file mode 100644
index 0000000000000000000000000000000000000000..0bf3c31c3e9cd771451546a825cf9a74504c1cb7
--- /dev/null
+++ b/docs/blog/v0.7.md
@@ -0,0 +1,274 @@
+# verl 0.7 release blog
+
+**Author:** verl team
+
+Last updated: 01/03/2026.
+
+## Overview
+verl adopts a Hybrid-Controller architecture (also known as HybridFlow). Sharing design principles with asynchronous sharded dataflow systems like Google Pathways, verl models Reinforcement Learning (RL) algorithms, such as PPO, GRPO, DAPO, and others, as a multi-stage, multi-model and parallelizable dataflow graph.
+
+To balance flexibility with performance, verl unifies two distinct programming models:
+
+**High-Level Single-Controller (MPMD)**: At the orchestration level, a single process `RLTrainer` manages the global computation graph. It handles macro-tasks such as scheduling rollout generation, triggering reward scoring, and dispatching distributed training jobs.
+
+**Internal Multi-Controller (SPMD)**: Internally, the Model Engine operates in standard distributed training mode. Workers execute identical programs, via trainer backends like FSDP, Megatron, or VeOmni, or rollout executors (not rollout server) like vLLM/SGLang/TensorRT-LLM, to perform heavy distributed computation, synchronizing via collective communication.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+