| |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| name: model_ascend |
|
|
| on: |
| |
| |
| push: |
| branches: |
| - main |
| - v0.* |
| pull_request: |
| branches: |
| - main |
| - v0.* |
| paths: |
| - "verl/**/*.py" |
| |
| - ".github/workflows/model_ascend.yml" |
| - "tests/special_distributed/test_fsdp_ckpt.py" |
| - "tests/special_distributed/test_tensor_dict.py" |
| - "tests/models/**" |
| - "tests/special_distributed/run_all.sh" |
|
|
| |
| concurrency: |
| group: ${{ github.workflow }}-${{ github.ref }} |
| cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} |
|
|
| permissions: |
| contents: read |
|
|
| jobs: |
| model_rmpad_ascend: |
| if: github.repository_owner == 'verl-project' |
| runs-on: linux-aarch64-a2b3-8 |
| timeout-minutes: 60 |
| container: |
| image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest |
| options: >- |
| --shm-size 16g |
| env: |
| HF_ENDPOINT: "https://hf-mirror.com" |
| HF_HUB_ENABLE_HF_TRANSFER: "0" |
| steps: |
| - name: Check npu and CANN info |
| run: | |
| cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info |
| npu-smi info |
| - name: Check initial pip list from image |
| run: | |
| pip list |
| - name: Checkout verl-project/verl repo |
| uses: actions/checkout@v4 |
| with: |
| fetch-depth: 0 |
| clean: true |
| - name: Install the current repository |
| run: | |
| pip install -r requirements-npu.txt |
| pip install --no-deps -e .[test] |
| - name: Check final pip list |
| run: | |
| pip list |
| - name: Prepare weights |
| run: | |
| ln -s /root/.cache/models ~/models |
| - name: Running rmpad model tests on 8 NPUs |
| run: | |
| pytest -s tests/models/test_transformer.py |
| - name: Running FSDP rmpad model tests on 8 NPUs |
| run: | |
| STRATEGY=fsdp torchrun --nproc_per_node=8 tests/special_distributed/test_fsdp_ckpt.py |
| - name: Running transformers ulysses tests on 8 NPUs |
| run: | |
| torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py |
| - name: Run distributed test |
| run: | |
| bash tests/special_distributed/run_all.sh |
| |
| |
| |
| model_rmpad_fsdp2_unstable_ascend: |
| if: github.repository_owner == 'verl-project' |
| runs-on: linux-aarch64-a2b3-8 |
| timeout-minutes: 60 |
| container: |
| image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest |
| options: >- |
| --shm-size 16g |
| env: |
| HF_ENDPOINT: "https://hf-mirror.com" |
| HF_HUB_ENABLE_HF_TRANSFER: "0" |
| steps: |
| - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 |
| with: |
| fetch-depth: 0 |
| - name: Install the current repository |
| run: | |
| pip install -r requirements-npu.txt |
| pip install --no-deps -e .[test] |
| - name: Prepare weights |
| run: | |
| ln -s /root/.cache/models ~/models |
| - name: Running FSDP2 rmpad model tests on 8 NPUs |
| run: | |
| STRATEGY=fsdp2 torchrun --nproc_per_node=8 tests/special_distributed/test_fsdp_ckpt.py |
| |