| |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| name: NPU unit tests |
|
|
| on: |
| |
| |
| push: |
| branches: |
| - main |
| - v0.* |
| paths: |
| - "**/*.py" |
| - .github/workflows/npu_unit_tests.yml |
| pull_request: |
| branches: |
| - main |
| paths: |
| |
| |
| |
| - "**/*.py" |
| |
| - "!examples/**" |
| - "!verl/trainer/main_*.py" |
| - "!verl/trainer/fsdp_sft_trainer.py" |
| - "!recipe/**" |
| |
| - .github/workflows/npu_unit_tests.yml |
| - "tests/**test_*.py" |
| |
| - "!tests/*_on_cpu.py" |
|
|
| |
| concurrency: |
| group: ${{ github.workflow }}-${{ github.ref }} |
| cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} |
|
|
| |
| permissions: |
| contents: read |
|
|
| jobs: |
| npu_unit_tests: |
| if: github.repository_owner == 'verl-project' |
| runs-on: linux-aarch64-a2b3-8 |
| timeout-minutes: 60 |
| container: |
| image: swr.cn-southwest-2.myhuaweicloud.com/modelfoundry/ascend-ci/verl/verl:verl-8.5.0-910b-ubuntu22.04-py3.11-latest |
| options: >- |
| --shm-size 16g |
| env: |
| HF_ENDPOINT: "https://hf-mirror.com" |
| HF_HUB_ENABLE_HF_TRANSFER: "0" |
| steps: |
| - name: Check npu and CANN info |
| run: | |
| cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info |
| npu-smi info |
| - name: Check initial pip list from image |
| run: | |
| pip list |
| - name: Checkout volcengine/verl repo |
| uses: actions/checkout@v4 |
| with: |
| fetch-depth: 0 |
| clean: true |
| - name: Install the current repository |
| run: | |
| pip install -r requirements-npu.txt |
| pip install --no-deps -e .[test] |
| pip install mlflow pytest-asyncio |
| - name: Check final pip list |
| run: | |
| pip list |
| - name: Prepare weights |
| run: | |
| ln -s /root/.cache/models ~/models |
| - name: Run all NPU unit tests |
| run: | |
| pytest -s -x --ignore-glob="*test_special_*.py" --ignore-glob="*on_cpu.py" --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob="tests/special*" --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_rvdz*" --ignore-glob="*test_ray_collectives*" --ignore-glob="*test_nvtx_profile*" --ignore-glob="tests/checkpoint_engine" --ignore-glob="*test_shared_memory*" --ignore-glob="tests/workers/rollout/rollout_trtllm" --ignore-glob="*test_fsdp_lora_merge*" --ignore-glob="*test_activation_offload*" --ignore-glob="*test_normalize_peft_param_name.py*" tests/ |
| - name: Testing activation offload |
| run: | |
| pytest -s -x tests/utils/test_activation_offload.py |
| - name: Testing normalize peft param name |
| run: | |
| pytest -s -x tests/utils/test_normalize_peft_param_name.py |
| - name: Testing FSDP2 actor functionality |
| run: | |
| torchrun --standalone --nnodes=1 --nproc-per-node=2 tests/workers/actor/test_special_dp_actor.py |
| - name: Testing FSDP2 critic functionality |
| run: | |
| torchrun --standalone --nnodes=1 --nproc-per-node=2 tests/workers/critic/test_special_dp_critic.py |
| - name: Running NPU profiling unit tests |
| run: | |
| pytest -s -x tests/utils/test_special_mstx_profile.py |
| |