|
|
name: daily_run_test |
|
|
|
|
|
on: |
|
|
workflow_dispatch: |
|
|
inputs: |
|
|
repo_org: |
|
|
required: false |
|
|
description: 'Tested repository organization name. Default is open-compass/opencompass' |
|
|
type: string |
|
|
default: 'open-compass/opencompass' |
|
|
repo_ref: |
|
|
required: false |
|
|
description: 'Set branch or tag or commit id. Default is "main"' |
|
|
type: string |
|
|
default: 'main' |
|
|
regression_type: |
|
|
required: true |
|
|
description: 'regression types' |
|
|
type: string |
|
|
default: "['cmd', 'api', 'bench']" |
|
|
regression_func: |
|
|
required: true |
|
|
description: 'regression functions' |
|
|
type: string |
|
|
default: "['chat_models','base_models','chat_obj_fullbench_v5', 'chat_obj_fullbench_v6', 'chat_obj_fullbench_v7', 'chat_obj_fullbench_other','chat_sub_fullbench','base_fullbench','base_longtext_fullbench','chat_longtext_fullbench']" |
|
|
baseline_result: |
|
|
required: true |
|
|
description: 'baseline result' |
|
|
type: string |
|
|
default: "0.5.0-baseline" |
|
|
schedule: |
|
|
- cron: '15 14 * * 2' |
|
|
|
|
|
env: |
|
|
HF_DATASETS_OFFLINE: 1 |
|
|
HF_EVALUATE_OFFLINE: 1 |
|
|
TRANSFORMERS_OFFLINE: 1 |
|
|
VLLM_USE_MODELSCOPE: false |
|
|
LMDEPLOY_USE_MODELSCOPE: false |
|
|
HF_HUB_OFFLINE: 1 |
|
|
OUTPUT_FOLDER: cuda12.1_dist_${{ github.run_id }} |
|
|
CONDA_PATH: /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/miniconda3 |
|
|
REPORT_ROOT: /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/eval_report/regression_test |
|
|
COMPASS_DATA_CACHE: /mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/compass_data_cache |
|
|
HF_DATASETS_CACHE: /mnt/shared-storage-user/auto-eval-pipeline/qa-llm-cicd/hf_cache |
|
|
HF_HUB_CACHE: /mnt/shared-storage-user/large-model-center-share-weights/hf_hub |
|
|
HF_ENDPOINT: https://hf-mirror.com |
|
|
TMPDIR: /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/tmpdir |
|
|
PIP_CACHE_DIR: /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/pip_cache |
|
|
CONDA_ENV: regression_test |
|
|
VLLM_WORKER_MULTIPROC_METHOD: spawn |
|
|
KUBEBRAIN_CLUSTER_ENTRY: https://h.pjlab.org.cn |
|
|
KUBEBRAIN_NAMESPACE: ailab-opencompass |
|
|
JOB_NAME: daily-test-${{ github.run_id }}-${{ github.run_attempt }} |
|
|
BASELINE_DIR: ${{github.event.inputs.baseline_result || '0.5.0-baseline' }} |
|
|
WORK_PATH: /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/daily_workdir/opencompass/opencompass |
|
|
|
|
|
jobs: |
|
|
build-pypi: |
|
|
runs-on: ubuntu-latest |
|
|
env: |
|
|
http_proxy: '' |
|
|
https_proxy: '' |
|
|
steps: |
|
|
- uses: actions/checkout@v5 |
|
|
with: |
|
|
repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }} |
|
|
ref: ${{github.event.inputs.repo_ref || 'main'}} |
|
|
- name: Set up Python 3.10 |
|
|
uses: actions/setup-python@v4 |
|
|
with: |
|
|
python-version: '3.10' |
|
|
- name: Build lagent |
|
|
run: | |
|
|
pip install wheel setuptools |
|
|
python setup.py sdist bdist_wheel |
|
|
- name: Upload Artifacts |
|
|
uses: actions/upload-artifact@v4 |
|
|
with: |
|
|
if-no-files-found: error |
|
|
path: dist/* |
|
|
retention-days: 1 |
|
|
name: my-artifact-${{ github.run_id }} |
|
|
|
|
|
prepare_env: |
|
|
if: ${{!cancelled()}} |
|
|
needs: ['build-pypi'] |
|
|
runs-on: yidian_cu12_daily |
|
|
timeout-minutes: 180 |
|
|
steps: |
|
|
- name: Clone repository |
|
|
uses: actions/checkout@v5 |
|
|
with: |
|
|
repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }} |
|
|
ref: ${{github.event.inputs.repo_ref || 'main'}} |
|
|
- name: Download Artifacts |
|
|
uses: actions/download-artifact@v4 |
|
|
with: |
|
|
name: my-artifact-${{ github.run_id }} |
|
|
- name: Remove Conda Env |
|
|
run: | |
|
|
. ${{env.CONDA_PATH}}/bin/activate |
|
|
conda env remove -y --name ${{env.CONDA_ENV}} |
|
|
conda info --envs |
|
|
- name: Prepare - create conda env and install torch - cu12 |
|
|
uses: nick-fields/retry@v3 |
|
|
with: |
|
|
max_attempts: 3 |
|
|
timeout_minutes: 120 |
|
|
command: | |
|
|
. ${{env.CONDA_PATH}}/bin/activate |
|
|
conda create -y --name ${{env.CONDA_ENV}} python=3.10 |
|
|
conda activate ${{env.CONDA_ENV}} |
|
|
export PIP_CACHE_DIR=${{env.PIP_CACHE_DIR}} |
|
|
rm -rf ${{env.TMPDIR}} && mkdir -p ${{env.TMPDIR}} |
|
|
export TMPDIR=${{env.TMPDIR}} |
|
|
pip install -r /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/config/requirements.txt -i https://pkg.pjlab.org.cn/repository/pypi-proxy/simple/ --trusted-host pkg.pjlab.org.cn --no-cache-dir |
|
|
pip install opencompass*.whl -i https://pkg.pjlab.org.cn/repository/pypi-proxy/simple/ --trusted-host pkg.pjlab.org.cn --no-cache-dir |
|
|
pip install opencompass[lmdeploy] -i https://pkg.pjlab.org.cn/repository/pypi-proxy/simple/ --trusted-host pkg.pjlab.org.cn --no-cache-dir |
|
|
pip install opencompass[vllm] -i https://pkg.pjlab.org.cn/repository/pypi-proxy/simple/ --trusted-host pkg.pjlab.org.cn --no-cache-dir |
|
|
pip install opencompass[full] -i https://pkg.pjlab.org.cn/repository/pypi-proxy/simple/ --trusted-host pkg.pjlab.org.cn --no-cache-dir |
|
|
pip install opencompass[api] -i https://pkg.pjlab.org.cn/repository/pypi-proxy/simple/ --trusted-host pkg.pjlab.org.cn --no-cache-dir |
|
|
pip install xformers -i https://pkg.pjlab.org.cn/repository/pypi-proxy/simple/ --trusted-host pkg.pjlab.org.cn --no-cache-dir |
|
|
pip install torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0 -i https://pkg.pjlab.org.cn/repository/pypi-proxy/simple/ --trusted-host pkg.pjlab.org.cn --no-cache-dir |
|
|
pip install transformers==4.57.0 vllm==0.11.0 lmdeploy==0.10.1 -i https://pkg.pjlab.org.cn/repository/pypi-proxy/simple/ --trusted-host pkg.pjlab.org.cn --no-cache-dir |
|
|
FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/packages/flash_attn-2.8.3+cu12torch2.8cxx11abiFALSE-cp310-cp310-linux_x86_64.whl |
|
|
cp -r /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/packages/nltk_data /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/miniconda3/envs/regression_test |
|
|
- name: conda env |
|
|
run: | |
|
|
. ${{env.CONDA_PATH}}/bin/activate |
|
|
conda activate ${{env.CONDA_ENV}} |
|
|
conda info --envs |
|
|
pip list |
|
|
|
|
|
daily_run_test: |
|
|
if: ${{!cancelled() && contains(needs.prepare_env.result, 'success') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_type), 'bench'))}} |
|
|
needs: prepare_env |
|
|
strategy: |
|
|
fail-fast: false |
|
|
matrix: |
|
|
regression_func: ${{fromJSON(github.event.inputs.regression_func || '["chat_models","base_models","chat_obj_fullbench_v5", "chat_obj_fullbench_v6", "chat_obj_fullbench_v7", "chat_obj_fullbench_other", "chat_sub_fullbench", "base_fullbench","base_longtext_fullbench","chat_longtext_fullbench"]')}} |
|
|
runs-on: yidian_cu12_daily |
|
|
timeout-minutes: 240 |
|
|
steps: |
|
|
- name: Clone repository |
|
|
uses: actions/checkout@v5 |
|
|
with: |
|
|
repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }} |
|
|
ref: ${{github.event.inputs.repo_ref || 'main'}} |
|
|
- name: conda env |
|
|
run: | |
|
|
. ${{env.CONDA_PATH}}/bin/activate |
|
|
conda activate ${{env.CONDA_ENV}} |
|
|
conda info --envs |
|
|
pip list |
|
|
- name: change rjob.py |
|
|
run: | |
|
|
cp /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/config/rjob.py . |
|
|
sed -i "s/TASK_ID='none'/TASK_ID='${{ github.run_id }}'/g" rjob.py |
|
|
- name: Run test |
|
|
if: matrix.regression_func != 'chat_obj_fullbench_other' && matrix.regression_func != 'chat_sub_fullbench' |
|
|
run: | |
|
|
. ${{env.CONDA_PATH}}/bin/activate |
|
|
conda activate ${{env.CONDA_ENV}} |
|
|
conda info --envs |
|
|
opencompass .github/scripts/eval_regression_${{matrix.regression_func}}.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/${{matrix.regression_func}} --reuse |
|
|
- name: Run test - other |
|
|
if: matrix.regression_func == 'chat_obj_fullbench_other' |
|
|
env: |
|
|
DATASET_SOURCE: HF |
|
|
run: | |
|
|
sed -i "s/DATASET_SOURCE=/DATASET_SOURCE=HF/g" rjob.py |
|
|
. ${{env.CONDA_PATH}}/bin/activate |
|
|
conda activate ${{env.CONDA_ENV}} |
|
|
conda info --envs |
|
|
opencompass .github/scripts/eval_regression_${{matrix.regression_func}}.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/${{matrix.regression_func}} --reuse |
|
|
- name: Run test - other |
|
|
if: matrix.regression_func == 'chat_sub_fullbench' |
|
|
env: |
|
|
COMPASS_DATA_CACHE: /mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/compass_data_cache_sub |
|
|
run: | |
|
|
sed -i "s/compass_data_cache/compass_data_cache_sub/g" rjob.py |
|
|
. ${{env.CONDA_PATH}}/bin/activate |
|
|
conda activate ${{env.CONDA_ENV}} |
|
|
conda info --envs |
|
|
opencompass .github/scripts/eval_regression_${{matrix.regression_func}}.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/${{matrix.regression_func}} --reuse |
|
|
- name: Assert result |
|
|
run: | |
|
|
. ${{env.CONDA_PATH}}/bin/activate |
|
|
conda activate ${{env.CONDA_ENV}} |
|
|
conda info --envs |
|
|
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/${{matrix.regression_func}}/*/summary regression_result_daily |
|
|
python -m pytest -m ${{matrix.regression_func}} -s -v --color=yes .github/scripts/oc_score_assert.py || true |
|
|
python .github/scripts/compare_results.py compare_results ${{env.REPORT_ROOT}}/${{ github.run_id }}/${{matrix.regression_func}} ${{env.REPORT_ROOT}}/${{env.BASELINE_DIR}}/${{matrix.regression_func}} |
|
|
- name: Change code permission |
|
|
if: always() |
|
|
run: | |
|
|
sudo chmod -R 777 . |
|
|
|
|
|
|
|
|
daily_run_cmd: |
|
|
if: ${{!cancelled() && contains(needs.prepare_env.result, 'success') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_type), 'cmd'))}} |
|
|
needs: prepare_env |
|
|
runs-on: yidian_cu12_daily |
|
|
timeout-minutes: 240 |
|
|
steps: |
|
|
- name: Clone repository |
|
|
uses: actions/checkout@v5 |
|
|
with: |
|
|
repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }} |
|
|
ref: ${{github.event.inputs.repo_ref || 'main'}} |
|
|
- name: conda env |
|
|
run: | |
|
|
. ${{env.CONDA_PATH}}/bin/activate |
|
|
conda activate ${{env.CONDA_ENV}} |
|
|
conda info --envs |
|
|
pip list |
|
|
- name: Run test |
|
|
run: | |
|
|
. ${{env.CONDA_PATH}}/bin/activate |
|
|
conda activate ${{env.CONDA_ENV}} |
|
|
conda info --envs |
|
|
rjob submit --name=cmd-${{ env.JOB_NAME }} --charged-group=opencompass_gpu --private-machine=group --group=opencompass_gpu --gpu=2 --cpu=32 --memory=32568 --private-machine=group --image=registry.h.pjlab.org.cn/ailab-puyu/xpuyu:torch-2.6.0-45d96d5f-0607 --env=COMPASS_DATA_CACHE=/mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/compass_data_cache --env=TIKTOKEN_CACHE_DIR=/mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/share_tiktoken --env=HF_ENDPOINT=https://hf-mirror.com --env=HF_DATASETS_CACHE=/mnt/shared-storage-user/auto-eval-pipeline/qa-llm-cicd/hf_cache --env=HF_HUB_CACHE=/mnt/shared-storage-user/large-model-center-share-weights/hf_hub --env=CUDA_MODULE_LOADING=EAGER --env=HF_DATASETS_OFFLINE=1 --env=TRANSFORMERS_OFFLINE=1 --env=HF_EVALUATE_OFFLINE=1 --env=HF_HUB_OFFLINE=1 --env=VLLM_USE_MODELSCOPE=false --env=VLLM_WORKER_MULTIPROC_METHOD=spawn --mount=gpfs://gpfs1/opencompass-shared:/mnt/shared-storage-user/opencompass-shared --mount=gpfs://gpfs1/auto-eval-pipeline:/mnt/shared-storage-user/auto-eval-pipeline --mount=gpfs://gpfs1/large-model-center-share-weights:/mnt/shared-storage-user/large-model-center-share-weights --host-network=True -- bash -exc '/mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/daily_cmd_test.sh ${{env.REPORT_ROOT}}/${{ github.run_id }}' |
|
|
|
|
|
for i in {1..300}; do |
|
|
current_status=$(rjob get cmd-${{ env.JOB_NAME }} | grep -oP 'rjob [^:]+: \K[^ ]+') |
|
|
if [[ $current_status == "Succeeded" || $current_status == "Failed" || $current_status == "Stopped" ]]; then |
|
|
echo "Current status: $current_status, stop checking" |
|
|
rjob logs job cmd-${{ env.JOB_NAME }} |
|
|
break |
|
|
fi |
|
|
sleep 6 |
|
|
done |
|
|
- name: Assert result |
|
|
run: | |
|
|
. ${{env.CONDA_PATH}}/bin/activate |
|
|
conda activate ${{env.CONDA_ENV}} |
|
|
conda info --envs |
|
|
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1/*/summary regression_result_daily |
|
|
python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py |
|
|
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd2/*/summary regression_result_daily |
|
|
python -m pytest -m case2 -s -v --color=yes .github/scripts/oc_score_assert.py |
|
|
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd3/*/summary regression_result_daily |
|
|
python -m pytest -m case3 -s -v --color=yes .github/scripts/oc_score_assert.py |
|
|
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd4/*/summary regression_result_daily |
|
|
python -m pytest -m case4 -s -v --color=yes .github/scripts/oc_score_assert.py |
|
|
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd5/*/summary regression_result_daily |
|
|
python -m pytest -m case5 -s -v --color=yes .github/scripts/oc_score_assert.py |
|
|
- name: Change code permission |
|
|
if: always() |
|
|
run: | |
|
|
sudo chmod -R 777 . |
|
|
|
|
|
daily_run_api: |
|
|
if: ${{!cancelled() && contains(needs.prepare_env.result, 'success') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_type), 'api'))}} |
|
|
needs: prepare_env |
|
|
runs-on: yidian_cu12_daily |
|
|
timeout-minutes: 240 |
|
|
steps: |
|
|
- name: Clone repository |
|
|
uses: actions/checkout@v5 |
|
|
with: |
|
|
repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }} |
|
|
ref: ${{github.event.inputs.repo_ref || 'main'}} |
|
|
- name: conda env |
|
|
run: | |
|
|
. ${{env.CONDA_PATH}}/bin/activate |
|
|
conda activate ${{env.CONDA_ENV}} |
|
|
conda info --envs |
|
|
pip list |
|
|
- name: Run test |
|
|
run: | |
|
|
. ${{env.CONDA_PATH}}/bin/activate |
|
|
conda activate ${{env.CONDA_ENV}} |
|
|
conda info --envs |
|
|
rjob submit --name=api-${{ env.JOB_NAME }} --charged-group=opencompass_gpu --private-machine=group --group=opencompass_gpu --gpu=2 --cpu=32 --memory=32568 --private-machine=group --image=registry.h.pjlab.org.cn/ailab-puyu/xpuyu:torch-2.6.0-45d96d5f-0607 --env=COMPASS_DATA_CACHE=/mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/compass_data_cache --env=TIKTOKEN_CACHE_DIR=/mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/share_tiktoken --env=HF_ENDPOINT=https://hf-mirror.com --env=HF_DATASETS_CACHE=/mnt/shared-storage-user/auto-eval-pipeline/qa-llm-cicd/hf_cache --env=HF_HUB_CACHE=/mnt/shared-storage-user/large-model-center-share-weights/hf_hub --env=CUDA_MODULE_LOADING=EAGER --env=HF_DATASETS_OFFLINE=1 --env=TRANSFORMERS_OFFLINE=1 --env=HF_EVALUATE_OFFLINE=1 --env=HF_HUB_OFFLINE=1 --env=VLLM_USE_MODELSCOPE=false --env=VLLM_WORKER_MULTIPROC_METHOD=spawn --mount=gpfs://gpfs1/opencompass-shared:/mnt/shared-storage-user/opencompass-shared --mount=gpfs://gpfs1/auto-eval-pipeline:/mnt/shared-storage-user/auto-eval-pipeline --mount=gpfs://gpfs1/large-model-center-share-weights:/mnt/shared-storage-user/large-model-center-share-weights --host-network=True -- bash -exc '/mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/daily_api_test.sh ${{env.REPORT_ROOT}}/${{ github.run_id }} ${{env.WORK_PATH}}' |
|
|
|
|
|
for i in {1..300}; do |
|
|
current_status=$(rjob get api-${{ env.JOB_NAME }} | grep -oP 'rjob [^:]+: \K[^ ]+') |
|
|
if [[ $current_status == "Succeeded" || $current_status == "Failed" || $current_status == "Stopped" ]]; then |
|
|
echo "Current status: $current_status, stop checking" |
|
|
rjob logs job api-${{ env.JOB_NAME }} |
|
|
break |
|
|
fi |
|
|
sleep 6 |
|
|
done |
|
|
- name: Assert result |
|
|
run: | |
|
|
. ${{env.CONDA_PATH}}/bin/activate |
|
|
conda activate ${{env.CONDA_ENV}} |
|
|
conda info --envs |
|
|
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/api/*/summary regression_result_daily |
|
|
python -m pytest -m api -s -v --color=yes .github/scripts/oc_score_assert.py |
|
|
- name: Change code permission |
|
|
if: always() |
|
|
run: | |
|
|
sudo chmod -R 777 . |
|
|
|