| name: Self-hosted runner AMD GPU (push)
|
|
|
| on:
|
| workflow_call:
|
| inputs:
|
| gpu_flavor:
|
| required: true
|
| type: string
|
|
|
| env:
|
| HF_HOME: /mnt/cache
|
| TRANSFORMERS_IS_CI: yes
|
| OMP_NUM_THREADS: 8
|
| MKL_NUM_THREADS: 8
|
| PYTEST_TIMEOUT: 60
|
| TF_FORCE_GPU_ALLOW_GROWTH: true
|
| RUN_PT_TF_CROSS_TESTS: 1
|
| HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
|
|
| jobs:
|
| check_runner_status:
|
| name: Check Runner Status
|
| runs-on: ubuntu-22.04
|
| steps:
|
| - name: Checkout transformers
|
| uses: actions/checkout@v4
|
| with:
|
| fetch-depth: 2
|
|
|
| - name: Check Runner Status
|
| run: python utils/check_self_hosted_runner.py --target_runners amd-mi210-single-gpu-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
|
|
| check_runners:
|
| name: Check Runners
|
| needs: check_runner_status
|
| strategy:
|
| matrix:
|
| machine_type: [single-gpu, multi-gpu]
|
| runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
|
| container:
|
| image: huggingface/transformers-pytorch-amd-gpu-push-ci
|
| options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
| steps:
|
| - name: ROCM-SMI
|
| run: |
|
| rocm-smi
|
| - name: ROCM-INFO
|
| run: |
|
| rocminfo | grep "Agent" -A 14
|
| - name: Show ROCR environment
|
| run: |
|
| echo "ROCR: $ROCR_VISIBLE_DEVICES"
|
|
|
| setup_gpu:
|
| name: Setup
|
| needs: check_runners
|
| strategy:
|
| matrix:
|
| machine_type: [single-gpu, multi-gpu]
|
| runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
|
| container:
|
| image: huggingface/transformers-pytorch-amd-gpu-push-ci
|
| options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
| outputs:
|
| matrix: ${{ steps.set-matrix.outputs.matrix }}
|
| test_map: ${{ steps.set-matrix.outputs.test_map }}
|
| env:
|
|
|
|
|
|
|
|
|
| CI_BRANCH_PUSH: ${{ github.event.ref }}
|
| CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
|
| CI_SHA_PUSH: ${{ github.event.head_commit.id }}
|
| CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
|
| steps:
|
|
|
|
|
| - name: Prepare custom environment variables
|
| shell: bash
|
|
|
|
|
| run: |
|
| CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
| echo $CI_BRANCH_PUSH
|
| echo $CI_BRANCH_WORKFLOW_RUN
|
| echo $CI_SHA_PUSH
|
| echo $CI_SHA_WORKFLOW_RUN
|
| [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
| [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
|
|
| - name: print environment variables
|
| run: |
|
| echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
| echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
|
|
| - name: Update clone using environment variables
|
| working-directory: /transformers
|
| run: |
|
| echo "original branch = $(git branch --show-current)"
|
| git fetch && git checkout ${{ env.CI_BRANCH }}
|
| echo "updated branch = $(git branch --show-current)"
|
| git checkout ${{ env.CI_SHA }}
|
| echo "log = $(git log -n 1)"
|
|
|
| - name: Cleanup
|
| working-directory: /transformers
|
| run: |
|
| rm -rf tests/__pycache__
|
| rm -rf tests/models/__pycache__
|
| rm -rf reports
|
|
|
| - name: Show installed libraries and their versions
|
| working-directory: /transformers
|
| run: pip freeze
|
|
|
| - name: Fetch the tests to run
|
| working-directory: /transformers
|
|
|
| run: |
|
| pip install --upgrade git-python
|
| python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
|
|
|
| - name: Report fetched tests
|
| uses: actions/upload-artifact@v4
|
| with:
|
| name: test_fetched
|
| path: /transformers/test_preparation.txt
|
|
|
| - id: set-matrix
|
| name: Organize tests into models
|
| working-directory: /transformers
|
|
|
|
|
|
|
| run: |
|
| if [ -f test_map.json ]; then
|
| keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)')
|
| test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)')
|
| else
|
| keys=$(python3 -c 'keys = ["dummy"]; print(keys)')
|
| test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)')
|
| fi
|
| echo $keys
|
| echo $test_map
|
| echo "matrix=$keys" >> $GITHUB_OUTPUT
|
| echo "test_map=$test_map" >> $GITHUB_OUTPUT
|
|
|
| run_models_gpu:
|
| name: Model tests
|
| needs: setup_gpu
|
|
|
| if: contains(fromJson(needs.setup_gpu.outputs.matrix), 'dummy') != true
|
| strategy:
|
| fail-fast: false
|
| matrix:
|
| folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }}
|
| machine_type: [single-gpu, multi-gpu]
|
| runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
|
| container:
|
| image: huggingface/transformers-pytorch-amd-gpu-push-ci
|
| options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
| env:
|
|
|
| CI_BRANCH_PUSH: ${{ github.event.ref }}
|
| CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
|
| CI_SHA_PUSH: ${{ github.event.head_commit.id }}
|
| CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
|
| steps:
|
|
|
|
|
| - name: Prepare custom environment variables
|
| shell: bash
|
|
|
| run: |
|
| CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
| echo $CI_BRANCH_PUSH
|
| echo $CI_BRANCH_WORKFLOW_RUN
|
| echo $CI_SHA_PUSH
|
| echo $CI_SHA_WORKFLOW_RUN
|
| [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
| [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
|
|
| - name: print environment variables
|
| run: |
|
| echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
| echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
|
|
| - name: Update clone using environment variables
|
| working-directory: /transformers
|
| run: |
|
| echo "original branch = $(git branch --show-current)"
|
| git fetch && git checkout ${{ env.CI_BRANCH }}
|
| echo "updated branch = $(git branch --show-current)"
|
| git checkout ${{ env.CI_SHA }}
|
| echo "log = $(git log -n 1)"
|
|
|
| - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
| working-directory: /transformers
|
| run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
|
|
| - name: Echo folder ${{ matrix.folders }}
|
| shell: bash
|
|
|
|
|
| run: |
|
| echo "${{ matrix.folders }}"
|
| echo "${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }}"
|
| matrix_folders=${{ matrix.folders }}
|
| matrix_folders=${matrix_folders/'models/'/'models_'}
|
| echo "$matrix_folders"
|
| echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
|
|
| - name: ROCM-SMI
|
| run: |
|
| rocm-smi
|
| - name: ROCM-INFO
|
| run: |
|
| rocminfo | grep "Agent" -A 14
|
| - name: Show ROCR environment
|
| run: |
|
| echo "ROCR: $ROCR_VISIBLE_DEVICES"
|
|
|
| - name: Environment
|
| working-directory: /transformers
|
| run: |
|
| python3 utils/print_env.py
|
|
|
| - name: Show installed libraries and their versions
|
| working-directory: /transformers
|
| run: pip freeze
|
|
|
| - name: Run all non-slow selected tests on GPU
|
| working-directory: /transformers
|
| run: |
|
| python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }} -m "not not_device_test"
|
|
|
| - name: Failure short reports
|
| if: ${{ failure() }}
|
| continue-on-error: true
|
| run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
|
|
|
| - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
|
| if: ${{ always() }}
|
| uses: actions/upload-artifact@v4
|
| with:
|
| name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
|
| path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
|
|
|
| send_results:
|
| name: Send results to webhook
|
| runs-on: ubuntu-22.04
|
| if: always()
|
| needs: [
|
| check_runner_status,
|
| check_runners,
|
| setup_gpu,
|
| run_models_gpu,
|
|
|
|
|
| ]
|
| env:
|
|
|
| CI_BRANCH_PUSH: ${{ github.event.ref }}
|
| CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
|
| CI_SHA_PUSH: ${{ github.event.head_commit.id }}
|
| CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
|
| steps:
|
| - name: Preliminary job status
|
| shell: bash
|
|
|
| run: |
|
| echo "Runner availability: ${{ needs.check_runner_status.result }}"
|
| echo "Setup status: ${{ needs.setup_gpu.result }}"
|
| echo "Runner status: ${{ needs.check_runners.result }}"
|
|
|
|
|
|
|
| - name: Prepare custom environment variables
|
| shell: bash
|
|
|
| run: |
|
| CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
| echo $CI_BRANCH_PUSH
|
| echo $CI_BRANCH_WORKFLOW_RUN
|
| echo $CI_SHA_PUSH
|
| echo $CI_SHA_WORKFLOW_RUN
|
| [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
| [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
|
|
| - name: print environment variables
|
| run: |
|
| echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
| echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
|
|
| - uses: actions/checkout@v4
|
|
|
|
|
|
|
| with:
|
| fetch-depth: 20
|
|
|
| - name: Update clone using environment variables
|
| run: |
|
| echo "original branch = $(git branch --show-current)"
|
| git fetch && git checkout ${{ env.CI_BRANCH }}
|
| echo "updated branch = $(git branch --show-current)"
|
| git checkout ${{ env.CI_SHA }}
|
| echo "log = $(git log -n 1)"
|
|
|
| - uses: actions/download-artifact@v4
|
| - name: Send message to Slack
|
| env:
|
| CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
| CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
| CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
|
| CI_SLACK_CHANNEL_ID_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }}
|
| CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
| CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }}
|
| ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
| CI_EVENT: Push CI (AMD) - ${{ inputs.gpu_flavor }}
|
| CI_TITLE_PUSH: ${{ github.event.head_commit.message }}
|
| CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }}
|
| CI_SHA: ${{ env.CI_SHA }}
|
| RUNNER_STATUS: ${{ needs.check_runner_status.result }}
|
| RUNNER_ENV_STATUS: ${{ needs.check_runners.result }}
|
| SETUP_STATUS: ${{ needs.setup_gpu.result }}
|
|
|
|
|
|
|
| run: |
|
| pip install huggingface_hub
|
| pip install slack_sdk
|
| pip show slack_sdk
|
| python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}"
|
|
|