| name: daily_run_test |
|
|
| on: |
| workflow_dispatch: |
| inputs: |
| repo_org: |
| required: false |
| description: 'Tested repository organization name. Default is open-compass/opencompass' |
| type: string |
| default: 'open-compass/opencompass' |
| repo_ref: |
| required: false |
| description: 'Set branch or tag or commit id. Default is "main"' |
| type: string |
| default: 'main' |
| build_lmdeploy: |
| required: false |
| description: 'whether to build lmdeploy' |
| type: boolean |
| default: false |
| repo_org_lmdeploy: |
| required: false |
| description: 'Tested repository organization name. Default is internlm/lmdeploy' |
| type: string |
| default: 'InternLM/lmdeploy' |
| repo_ref_lmdeploy: |
| required: false |
| description: 'Set branch or tag or commit id. Default is "main"' |
| type: string |
| default: 'main' |
| regression_func: |
| required: true |
| description: 'regression functions' |
| type: string |
| default: "['chat_models','base_models', 'chat_obj_fullbench', 'chat_sub_fullbench', 'base_fullbench','cmd', 'api']" |
| cuda_env: |
| required: true |
| description: "regression conda env, eg. ['dsw_cu11','dsw_cu12']" |
| type: string |
| default: "['dsw_cu12']" |
| fullbench_eval: |
| required: true |
| description: 'fullbench volc functions' |
| type: string |
| default: "['base_long_context','base_objective','chat_long_context','chat_objective','chat_subjective']" |
| schedule: |
| - cron: '15 14 * * *' |
|
|
| env: |
| HF_DATASETS_OFFLINE: 1 |
| HF_EVALUATE_OFFLINE: 1 |
| TRANSFORMERS_OFFLINE: 1 |
| VLLM_USE_MODELSCOPE: false |
| LMDEPLOY_USE_MODELSCOPE: false |
| HF_HUB_OFFLINE: 1 |
| OUTPUT_FOLDER: cuda12.1_dist_${{ github.run_id }} |
|
|
| jobs: |
| build-pypi: |
| runs-on: ubuntu-latest |
| steps: |
| - uses: actions/checkout@v2 |
| with: |
| repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }} |
| ref: ${{github.event.inputs.repo_ref || 'main'}} |
| - name: Set up Python 3.x |
| uses: actions/setup-python@v2 |
| with: |
| python-version: 3.x |
| - name: Build lagent |
| run: | |
| pip install wheel setuptools |
| python setup.py sdist bdist_wheel |
| - name: Upload Artifacts |
| uses: actions/upload-artifact@v4 |
| with: |
| if-no-files-found: error |
| path: dist/* |
| retention-days: 1 |
| name: my-artifact-${{ github.run_id }} |
|
|
| build-pypi-lmdeploy: |
| if: ${{!cancelled() && (github.event_name != 'schedule' && inputs.build_lmdeploy)}} |
| strategy: |
| matrix: |
| pyver: [py310] |
| runs-on: ubuntu-latest |
| env: |
| PYTHON_VERSION: ${{ matrix.pyver }} |
| PLAT_NAME: manylinux2014_x86_64 |
| DOCKER_TAG: cuda12.1 |
| steps: |
| - name: Checkout repository |
| uses: actions/checkout@v3 |
| with: |
| repository: ${{ github.event.inputs.repo_org_lmdeploy || 'InternLM/lmdeploy' }} |
| ref: ${{github.event.inputs.repo_ref_lmdeploy || 'main'}} |
| - name: Build |
| run: | |
| echo ${PYTHON_VERSION} |
| echo ${PLAT_NAME} |
| echo ${DOCKER_TAG} |
| echo ${OUTPUT_FOLDER} |
| echo ${GITHUB_RUN_ID} |
| # remove -it |
| sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh |
| bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER} |
| - name: Upload Artifacts |
| uses: actions/upload-artifact@v4 |
| with: |
| if-no-files-found: error |
| path: builder/manywheel/${{ env.OUTPUT_FOLDER }} |
| retention-days: 1 |
| name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }} |
|
|
|
|
| prepare_env: |
| if: ${{!cancelled()}} |
| needs: ['build-pypi', 'build-pypi-lmdeploy'] |
| strategy: |
| fail-fast: false |
| matrix: |
| cuda_env: ${{ fromJSON(inputs.cuda_env || '["dsw_cu12"]')}} |
| runs-on: ${{ matrix.cuda_env }} |
| env: |
| CONDA_ENV: opencompass_regression |
| PIP_CACHE_PATH: /cpfs01/user/qa-llm-cicd/.cache/pip |
| environment: 'prod' |
| timeout-minutes: 240 |
| steps: |
| - name: Clone repository |
| uses: actions/checkout@v2 |
| with: |
| repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }} |
| ref: ${{github.event.inputs.repo_ref || 'main'}} |
| - name: Download Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: my-artifact-${{ github.run_id }} |
| - name: Remove Conda Env |
| if: always() |
| run: | |
| . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate |
| conda env remove -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} |
| conda info --envs |
| - name: Prepare - create conda env and install torch - cu11 |
| if: ${{matrix.cuda_env == 'dsw_cu11'}} |
| uses: nick-fields/retry@v3 |
| id: retry1 |
| with: |
| max_attempts: 3 |
| timeout_minutes: 40 |
| command: | |
| . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate |
| conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10 |
| conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} |
| pip install -r /cpfs01/shared/public/qa-llm-cicd/requirements-cu11.txt --cache-dir ${{env.PIP_CACHE_PATH}} |
| pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}} |
| pip install /cpfs01/user/qa-llm-cicd/packages/lmdeploy-0.6.1+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} |
| pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.6.1.post1+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} |
| pip uninstall torch torchvision torchaudio -y |
| pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118 |
| FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.7.0.post2+cu11torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl |
| pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.28.post3-cp310-cp310-manylinux_2_28_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} |
| conda info --envs |
| pip list |
| - name: Prepare - create conda env and install torch - cu12 |
| if: ${{matrix.cuda_env == 'dsw_cu12'}} |
| uses: nick-fields/retry@v3 |
| id: retry2 |
| with: |
| max_attempts: 3 |
| timeout_minutes: 40 |
| command: | |
| . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate |
| conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10 |
| conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} |
| pip install -r /cpfs01/shared/public/qa-llm-cicd/requirements-cu12.txt --cache-dir ${{env.PIP_CACHE_PATH}} |
| pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}} |
| pip install opencompass[lmdeploy] --cache-dir ${{env.PIP_CACHE_PATH}} |
| pip install opencompass[vllm] --cache-dir ${{env.PIP_CACHE_PATH}} |
| pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --cache-dir ${{env.PIP_CACHE_PATH}} |
| FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.7.0.post2+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl |
| pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.28.post3-cp310-cp310-manylinux_2_28_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} |
| conda info --envs |
| pip list |
| - name: Prepare - reinstall lmdeploy - cu12 |
| if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}} |
| uses: actions/download-artifact@v4 |
| with: |
| name: my-artifact-${{ github.run_id }}-py310 |
| - name: Prepare - reinstall lmdeploy - cu12 |
| if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}} |
| run: | |
| . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate |
| conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} |
| pip install lmdeploy-*.whl --no-deps |
| |
| daily_run_test: |
| if: ${{!cancelled()}} |
| needs: prepare_env |
| strategy: |
| fail-fast: false |
| matrix: |
| cuda_env: ${{ fromJSON(inputs.cuda_env || '["dsw_cu12"]')}} |
| regression_func: ${{fromJSON(github.event.inputs.regression_func || '["chat_models","base_models","chat_obj_fullbench","chat_sub_fullbench","base_fullbench","cmd","api"]')}} |
| runs-on: ${{ matrix.cuda_env }} |
| env: |
| CONDA_ENV: opencompass_regression |
| PIP_CACHE_PATH: /cpfs01/user/qa-llm-cicd/.cache/pip |
| HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub |
| HUGGINGFACE_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub |
| HF_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub |
| COMPASS_DATA_CACHE: /cpfs01/shared/public/llmeval/compass_data_cache |
| REPORT_ROOT: /cpfs01/shared/public/qa-llm-cicd/report |
| environment: 'prod' |
| timeout-minutes: 240 |
| steps: |
| - name: Clone repository |
| uses: actions/checkout@v2 |
| with: |
| repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }} |
| ref: ${{github.event.inputs.repo_ref || 'main'}} |
| - name: Prepare - prepare data and hf model |
| run: | |
| rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p |
| ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub |
| - name: Run command testcase |
| if: matrix.regression_func == 'cmd' |
| run: | |
| . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate |
| conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} |
| conda info --envs |
| export from_tf=TRUE |
| python tools/list_configs.py internlm2_5 mmlu |
| opencompass --models hf_internlm2_5_7b hf_internlm2_1_8b --datasets race_ppl demo_gsm8k_chat_gen --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details |
| rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }}/*/summary regression_result_daily |
| python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py |
| opencompass --models hf_internlm2_5_7b_chat hf_internlm2_chat_1_8b --datasets race_gen demo_gsm8k_chat_gen -a lmdeploy --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details |
| rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }}/*/summary regression_result_daily |
| python -m pytest -m case2 -s -v --color=yes .github/scripts/oc_score_assert.py |
| opencompass --datasets race_ppl demo_gsm8k_chat_gen --hf-type base --hf-path internlm/internlm2_5-7b --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details |
| rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }}/*/summary regression_result_daily |
| python -m pytest -m case3 -s -v --color=yes .github/scripts/oc_score_assert.py |
| opencompass --datasets race_gen demo_gsm8k_chat_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details |
| rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }}/*/summary regression_result_daily |
| python -m pytest -m case4 -s -v --color=yes .github/scripts/oc_score_assert.py |
| - name: Run chat model test |
| if: matrix.regression_func == 'chat_models' |
| run: | |
| . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate |
| conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} |
| conda info --envs |
| opencompass .github/scripts/eval_regression_chat.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details |
| rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_${{ matrix.cuda_env }}/*/summary regression_result_daily |
| python -m pytest -m chat -s -v --color=yes .github/scripts/oc_score_assert.py |
| - name: Run base model test |
| if: matrix.regression_func == 'base_models' |
| run: | |
| . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate |
| conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} |
| conda info --envs |
| opencompass .github/scripts/eval_regression_base.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/base_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details |
| rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/base_${{ matrix.cuda_env }}/*/summary regression_result_daily |
| python -m pytest -m base -s -v --color=yes .github/scripts/oc_score_assert.py |
| - name: Run chat model test - fullbench |
| if: matrix.regression_func == 'chat_obj_fullbench' |
| run: | |
| . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate |
| conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} |
| conda info --envs |
| opencompass .github/scripts/eval_regression_chat_objective_fullbench.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_obj_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details |
| rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_obj_${{ matrix.cuda_env }}/*/summary regression_result_daily |
| python -m pytest -m chat_obj_fullbench -s -v --color=yes .github/scripts/oc_score_assert.py |
| - name: Run chat model test - fullbench |
| if: matrix.regression_func == 'chat_sub_fullbench' |
| env: |
| COMPASS_DATA_CACHE: /cpfs01/shared/public/llmeval/compass_data_cache_subset |
| run: | |
| . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate |
| conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} |
| conda info --envs |
| opencompass .github/scripts/eval_regression_chat_subjective_fullbench.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_sub_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details |
| rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/chat_sub_${{ matrix.cuda_env }}/*/summary regression_result_daily |
| python -m pytest -m chat_sub_fullbench -s -v --color=yes .github/scripts/oc_score_assert.py |
| - name: Run base model test - fullbench |
| if: matrix.regression_func == 'base_fullbench' |
| run: | |
| . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate |
| conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} |
| conda info --envs |
| opencompass .github/scripts/eval_regression_base_fullbench.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/base_full_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details |
| rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/base_full_${{ matrix.cuda_env }}/*/summary regression_result_daily |
| python -m pytest -m base_fullbench -s -v --color=yes .github/scripts/oc_score_assert.py |
| - name: Run model test - api |
| if: matrix.regression_func == 'api' |
| run: | |
| . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate |
| conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} |
| conda info --envs |
| lmdeploy serve api_server internlm/internlm2_5-7b-chat --max-batch-size 256 --model-name internlm2 > ${{env.REPORT_ROOT}}/${{ github.run_id }}/restful.log 2>&1 & |
| echo "restful_pid=$!" >> "$GITHUB_ENV" |
| sleep 120s |
| opencompass .github/scripts/eval_regression_api.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/api_${{ matrix.cuda_env }} --reuse --max-num-workers 2 --dump-eval-details |
| rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/api_${{ matrix.cuda_env }}/*/summary regression_result_daily |
| python -m pytest -m api -s -v --color=yes .github/scripts/oc_score_assert.py |
| - name: Run model test - api kill |
| if: always() && matrix.regression_func == 'api' |
| run: | |
| kill -15 "$restful_pid" |
| |
| fullbench_run_test: |
| if: ${{!cancelled()}} |
| needs: ['build-pypi', 'build-pypi-lmdeploy'] |
| env: |
| FULLBENCH_CONDA_ENV: regression_test |
| FULLBENCH_REPORT_ROOT: /fs-computility/llm/qa-llm-cicd/eval_report/regression |
| COMPASS_DATA_CACHE: /fs-computility/llm/shared/llmeval/datasets/compass_data_cache |
| strategy: |
| fail-fast: false |
| matrix: |
| function_type: ${{fromJSON(github.event.inputs.fullbench_eval || '["base_long_context","base_objective","chat_long_context","chat_objective","chat_subjective"]')}} |
| runs-on: volc_cu12 |
| environment: 'prod' |
| timeout-minutes: 360 |
| steps: |
| - name: Clone repository |
| uses: actions/checkout@v2 |
| with: |
| repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }} |
| ref: ${{github.event.inputs.repo_ref || 'main'}} |
| - name: Download Artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| name: my-artifact-${{ github.run_id }} |
| - name: Prepare - reinstall opencompass - cu12 |
| if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}} |
| run: | |
| . /fs-computility/llm/qa-llm-cicd/miniconda3/bin/activate |
| conda activate ${{env.FULLBENCH_CONDA_ENV}} |
| pip install opencompass*.whl --no-deps |
| - name: Prepare - reinstall lmdeploy - cu12 |
| if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}} |
| uses: actions/download-artifact@v4 |
| with: |
| name: my-artifact-${{ github.run_id }}-py310 |
| - name: Prepare - reinstall lmdeploy - cu12 |
| if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}} |
| run: | |
| . /fs-computility/llm/qa-llm-cicd/miniconda3/bin/activate |
| conda activate ${{env.FULLBENCH_CONDA_ENV}} |
| pip install lmdeploy-*.whl --no-deps |
| - name: Conda env |
| if: ${{matrix.cuda_env == 'dsw_cu12' && inputs.build_lmdeploy}} |
| run: | |
| . /fs-computility/llm/qa-llm-cicd/miniconda3/bin/activate |
| conda activate ${{env.FULLBENCH_CONDA_ENV}} |
| conda info --envs |
| pip list |
| - name: Run command testcase |
| run: | |
| . /fs-computility/llm/qa-llm-cicd/miniconda3/bin/activate |
| conda activate ${{env.FULLBENCH_CONDA_ENV}} |
| conda info --envs |
| export from_tf=TRUE |
| opencompass /fs-computility/llm/qa-llm-cicd/ocplayground/template/regression/eval_${{ matrix.function_type }}.py --work-dir ${{env.FULLBENCH_REPORT_ROOT}}/${{ github.run_id }}/${{ matrix.function_type }} --reuse |
| rm regression_result_daily -f && ln -s ${{env.FULLBENCH_REPORT_ROOT}}/${{ github.run_id }}/${{ matrix.function_type }}/*/summary regression_result_daily |
| python -m pytest -m ${{ matrix.function_type }} -s -v --color=yes .github/scripts/oc_score_assert.py |
| |
|
|
| notify_to_feishu: |
| if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'develop' || github.ref_name == 'main') }} |
| needs: [daily_run_test, fullbench_run_test] |
| environment: 'prod' |
| timeout-minutes: 5 |
| runs-on: self-hosted |
| steps: |
| - name: notify |
| run: | |
| curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- Daily test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }} |
| |