| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | name: CICD NeMo |
| | on: |
| | schedule: |
| | - cron: 0 0 * * * |
| | pull_request: |
| | branches: |
| | - main |
| | - r** |
| | - weekly-bump* |
| | types: [labeled] |
| | workflow_dispatch: |
| | inputs: |
| | test_to_run: |
| | required: false |
| | default: all |
| | type: string |
| | description: Comma-separated list of tests to run. Use "all" to run the full test suite. |
| |
|
| | concurrency: |
| | |
| | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.event.label.name || 'main' }}-${{ github.event_name }} |
| | cancel-in-progress: true |
| |
|
| | jobs: |
| | pre-flight: |
| | runs-on: ubuntu-latest |
| | outputs: |
| | test_to_run: ${{ steps.test_to_run.outputs.main }} |
| | is_ci_workload: ${{ steps.is_ci_workload.outputs.main }} |
| | no_fail_fast: ${{ steps.no_fail_fast.outputs.main }} |
| | components_to_run: ${{ steps.components_to_run.outputs.main }} |
| | env: |
| | TESTS_TO_RUN: ${{ inputs.test_to_run }} |
| | EVENT_NAME: ${{ github.event_name }} |
| | HAS_LABEL: ${{ github.event.label.name == 'Run CICD' }} |
| | steps: |
| | - name: Checkout branch |
| | uses: actions/checkout@v4 |
| | with: |
| | fetch-depth: 0 |
| |
|
| | - name: Select components to run |
| | id: components_to_run |
| | run: | |
| | pip install -U pip |
| | pip install git-python |
| | |
| | if [[ "$EVENT_NAME" == "pull_request" ]]; then |
| | python .github/scripts/components_to_run.py --source-sha ${{ github.event.pull_request.head.sha }} --target-sha ${{ github.event.pull_request.base.sha }} |
| | else |
| | echo '["nemo2", "automodel", "export-deploy", "speech"]' | tee -a test_modules.json |
| | fi |
| |
|
| | components_to_run=$(cat test_modules.json) |
| |
|
| | echo "main=${components_to_run}" | tee -a "$GITHUB_OUTPUT" |
| |
|
| | - name: Select tests to run |
| | id: test_to_run |
| | run: | |
| | # For manual dispatch, we replace `all` with the actual job names |
| | if [[ "$EVENT_NAME" == "workflow_dispatch" ]]; then |
| | TESTS_TO_RUN=$TESTS_TO_RUN |
| | |
| | |
| | elif [[ "$EVENT_NAME" == "pull_request" && "$HAS_LABEL" == "true" ]]; then |
| | TESTS_TO_RUN=all |
| |
|
| | |
| | elif [[ "$EVENT_NAME" == "pull_request" && "$HAS_LABEL" != "true" ]]; then |
| | TESTS_TO_RUN="" |
| |
|
| | |
| | |
| | elif [[ "$EVENT_NAME" == "push" || "$EVENT_NAME" == "schedule" ]]; then |
| | TESTS_TO_RUN=all |
| |
|
| | else |
| | echo "Unsupported event_name $EVENT_NAME provided". |
| | exit 1 |
| | fi |
| |
|
| | parsed_string=$(echo "$TESTS_TO_RUN" | jq -c --raw-input 'split(",")') |
| | echo "main=${parsed_string}" | tee -a "$GITHUB_OUTPUT" |
| |
|
| | - name: Check if this is a CI workload |
| | shell: bash |
| | id: is_ci_workload |
| | run: | |
| | branch_name=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}} |
| | |
| | if [[ "$branch_name" =~ ^bump-ci-container || "$EVENT_NAME" == "schedule" ]]; then |
| | is_ci_workload=true |
| | echo "main=true" | tee -a "$GITHUB_OUTPUT" |
| | else |
| | is_ci_workload=false |
| | fi |
| |
|
| | echo "main=$is_ci_workload" | tee -a "$GITHUB_OUTPUT" |
| |
|
| | - name: Check if no-fail-fast is set |
| | shell: bash |
| | id: no_fail_fast |
| | env: |
| | HAS_FAIL_FAST_LABEL: ${{ contains(github.event.pull_request.labels.*.name, 'no-fail-fast') }} |
| | run: | |
| | if [[ "$HAS_FAIL_FAST_LABEL" == "true" || "$EVENT_NAME" == "schedule" ]]; then |
| | no_fail_fast=true |
| | else |
| | no_fail_fast=false |
| | fi |
| | |
| | echo "main=$no_fail_fast" | tee -a "$GITHUB_OUTPUT" |
| |
|
| | code-linting: |
| | if: needs.pre-flight.outputs.test_to_run != '[]' |
| | needs: [pre-flight] |
| | uses: ./.github/workflows/code-linting.yml |
| |
|
| | cicd-wait-in-queue: |
| | needs: [pre-flight, code-linting] |
| | runs-on: ubuntu-latest |
| | environment: test |
| | if: | |
| | needs.pre-flight.outputs.test_to_run != '[]' |
| | && needs.pre-flight.outputs.components_to_run != '[]' |
| | && needs.pre-flight.outputs.is_ci_workload == 'false' |
| | steps: |
| | - name: Running CI tests |
| | run: | |
| | echo "Running CI tests" |
| | |
| | cicd-test-container-build: |
| | uses: ./.github/workflows/_build_container.yml |
| | needs: [pre-flight, code-linting, cicd-wait-in-queue] |
| | if: | |
| | needs.pre-flight.outputs.test_to_run != '[]' |
| | && needs.pre-flight.outputs.components_to_run != '[]' |
| | && ( |
| | success() |
| | || ( |
| | needs.cicd-wait-in-queue.result == 'skipped' |
| | && needs.pre-flight.outputs.is_ci_workload == 'true' |
| | ) |
| | ) |
| | && !cancelled() |
| | with: |
| | image-name: nemo_container |
| | dockerfile: docker/Dockerfile.ci |
| |
|
| | cicd-import-tests: |
| | if: | |
| | needs.pre-flight.outputs.test_to_run != '[]' |
| | && needs.pre-flight.outputs.components_to_run != '[]' |
| | && ( |
| | success() |
| | || ( |
| | needs.cicd-wait-in-queue.result == 'skipped' |
| | && needs.pre-flight.outputs.is_ci_workload == 'true' |
| | ) |
| | ) |
| | && !cancelled() |
| | needs: [cicd-test-container-build, pre-flight] |
| | runs-on: self-hosted-azure-gpus-1 |
| | steps: |
| | - name: Create UUID |
| | id: uuid |
| | run: | |
| | echo "id=$(uuidgen)" >> "$GITHUB_OUTPUT" |
| | |
| | - name: Checkout NeMo |
| | uses: actions/checkout@v2 |
| | with: |
| | repository: NVIDIA/NeMo |
| | path: ${{ github.run_id }}/${{steps.uuid.outputs.id }}/NeMo |
| |
|
| | - name: Run some checks |
| | run: | |
| | docker run \ |
| | --rm \ |
| | --device=/dev/nvidia0 \ |
| | --gpus all \ |
| | --shm-size=8g \ |
| | --volume $(pwd)/${{ github.run_id }}/${{steps.uuid.outputs.id }}/NeMo:/workspace \ |
| | --env TRANSFORMERS_OFFLINE=0 \ |
| | --env HYDRA_FULL_ERROR=1 --env PYTHONUNBUFFERED=1 nemoci.azurecr.io/nemo_container:${{ github.run_id }} bash -c '\ |
| | # PyTorch Lightning version |
| | python -c "import lightning.pytorch; print(lightning.pytorch.__version__)" |
| | |
| | |
| | CUDA_VISIBLE_DEVICES="0,1" python "tests/core_ptl/check_for_ranks.py" |
| |
|
| | |
| | python tests/core_ptl/check_imports.py --domain asr |
| | python tests/core_ptl/check_imports.py --domain nlp |
| | python tests/core_ptl/check_imports.py --domain tts |
| | ' |
| | |
| | L0_Setup_Test_Data_And_Models: |
| | needs: [pre-flight, cicd-test-container-build, cicd-wait-in-queue] |
| | runs-on: self-hosted-azure |
| | if: | |
| | needs.pre-flight.outputs.test_to_run != '[]' |
| | && needs.pre-flight.outputs.components_to_run != '[]' |
| | && ( |
| | success() |
| | || ( |
| | needs.cicd-wait-in-queue.result == 'skipped' |
| | && needs.pre-flight.outputs.is_ci_workload == 'true' |
| | ) |
| | ) |
| | && !cancelled() |
| | steps: |
| | - name: Checkout |
| | uses: actions/checkout@v4 |
| | with: |
| | path: ${{ github.run_id }} |
| |
|
| | - name: main |
| | uses: NVIDIA/NeMo/.github/actions/test-template@main |
| | with: |
| | runner: ${{ runner.name }} |
| | script: L0_Setup_Test_Data_And_Models |
| | tests_to_run: '["L0_Setup_Test_Data_And_Models"]' |
| |
|
| | cicd-main-unit-tests: |
| | needs: [pre-flight, cicd-test-container-build] |
| | uses: ./.github/workflows/cicd-main-unit-tests.yml |
| | if: | |
| | needs.pre-flight.outputs.test_to_run != '[]' |
| | && needs.pre-flight.outputs.components_to_run != '[]' |
| | && ( |
| | success() |
| | || ( |
| | needs.cicd-wait-in-queue.result == 'skipped' |
| | && needs.pre-flight.outputs.is_ci_workload == 'true' |
| | ) |
| | ) |
| | && !cancelled() |
| | with: |
| | test_to_run: ${{ needs.pre-flight.outputs.test_to_run }} |
| |
|
| | cicd-main-export-deploy: |
| | needs: [pre-flight, cicd-test-container-build, cicd-main-unit-tests] |
| | uses: ./.github/workflows/cicd-main-export-deploy.yml |
| | if: | |
| | ( |
| | needs.pre-flight.outputs.test_to_run != '[]' |
| | && ( |
| | contains(fromJson(needs.pre-flight.outputs.components_to_run), 'export-deploy') |
| | ) |
| | ) |
| | && ( |
| | success() |
| | || ( |
| | needs.cicd-wait-in-queue.result == 'skipped' |
| | && needs.pre-flight.outputs.is_ci_workload == 'true' |
| | ) |
| | ) |
| | && !cancelled() |
| | with: |
| | test_to_run: ${{ needs.pre-flight.outputs.test_to_run }} |
| |
|
| | cicd-main-speech: |
| | needs: [pre-flight, cicd-test-container-build, cicd-main-unit-tests] |
| | uses: ./.github/workflows/cicd-main-speech.yml |
| | if: | |
| | ( |
| | needs.pre-flight.outputs.test_to_run != '[]' |
| | && ( |
| | contains(fromJson(needs.pre-flight.outputs.components_to_run), 'speech') |
| | ) |
| | ) |
| | && ( |
| | success() |
| | || ( |
| | needs.cicd-wait-in-queue.result == 'skipped' |
| | && needs.pre-flight.outputs.is_ci_workload == 'true' |
| | ) |
| | ) |
| | && !cancelled() |
| | with: |
| | test_to_run: ${{ needs.pre-flight.outputs.test_to_run }} |
| |
|
| | cicd-main-automodel: |
| | needs: [pre-flight, cicd-test-container-build, cicd-main-unit-tests] |
| | uses: ./.github/workflows/cicd-main-automodel.yml |
| | if: | |
| | ( |
| | needs.pre-flight.outputs.test_to_run != '[]' |
| | && ( |
| | contains(fromJson(needs.pre-flight.outputs.components_to_run), 'automodel') |
| | ) |
| | ) |
| | && ( |
| | success() |
| | || ( |
| | needs.cicd-wait-in-queue.result == 'skipped' |
| | && needs.pre-flight.outputs.is_ci_workload == 'true' |
| | ) |
| | ) |
| | && !cancelled() |
| | with: |
| | test_to_run: ${{ needs.pre-flight.outputs.test_to_run }} |
| |
|
| | cicd-main-nemo2: |
| | needs: [pre-flight, cicd-test-container-build, cicd-main-unit-tests] |
| | uses: ./.github/workflows/cicd-main-nemo2.yml |
| | if: | |
| | ( |
| | needs.pre-flight.outputs.test_to_run != '[]' |
| | && ( |
| | contains(fromJson(needs.pre-flight.outputs.components_to_run), 'nemo2') |
| | || needs.pre-flight.outputs.components_to_run == '["all"]' |
| | ) |
| | ) |
| | && ( |
| | success() |
| | || ( |
| | needs.cicd-wait-in-queue.result == 'skipped' |
| | && needs.pre-flight.outputs.is_ci_workload == 'true' |
| | ) |
| | ) |
| | && !cancelled() |
| | with: |
| | test_to_run: ${{ needs.pre-flight.outputs.test_to_run }} |
| |
|
| | Nemo_CICD_Test: |
| | needs: |
| | - pre-flight |
| | - cicd-test-container-build |
| | - cicd-import-tests |
| | - L0_Setup_Test_Data_And_Models |
| | - cicd-main-unit-tests |
| | - cicd-main-nemo2 |
| | - cicd-main-export-deploy |
| | - cicd-main-automodel |
| | - cicd-main-speech |
| | if: always() |
| | runs-on: ubuntu-latest |
| | permissions: write-all |
| | steps: |
| | - name: Checkout |
| | uses: actions/checkout@v4 |
| |
|
| | - name: Get workflow result |
| | id: result |
| | env: |
| | GH_TOKEN: ${{ github.token }} |
| | RUN_ID: ${{ github.run_id }} |
| | HAS_LABEL: ${{ github.event.label.name == 'Run CICD' }} |
| | IS_SCHEDULED: ${{ github.event_name == 'schedule' }} |
| | run: | |
| | # Get workflow run details and check job conclusions |
| | LATEST_ATTEMPT=$(gh run view $RUN_ID --json jobs -q '[.jobs[] | select(.conclusion != null) | .conclusion] | last') |
| | NUM_FAILED=$(gh run view $RUN_ID --json jobs -q '[.jobs[] | select(.conclusion == "failure") | .name] | length') |
| | NUM_CANCELLED=$(gh run view $RUN_ID --json jobs -q '[.jobs[] | select(.conclusion == "cancelled") | .name] | length') |
| | |
| | if [[ $NUM_FAILED -eq 0 && $NUM_CANCELLED -eq 0 && ("$HAS_LABEL" == "true" || "$IS_SCHEDULED" == "true") ]]; then |
| | RESULT="success" |
| | elif [[ $NUM_CANCELLED -gt 0 ]]; then |
| | RESULT="cancelled" |
| | else |
| | RESULT="failure" |
| | fi |
| |
|
| | |
| | echo "code=$RESULT" | tee -a $GITHUB_OUTPUT |
| |
|
| | - name: Checkout for GH CLI |
| | uses: actions/checkout@v4 |
| |
|
| | - name: Remove label if not cancelled |
| | if: | |
| | steps.result.outputs.code != 'cancelled' |
| | && github.event.label.name == 'Run CICD' |
| | && github.event.pull_request.head.repo.full_name == github.repository |
| | env: |
| | GH_TOKEN: ${{ github.token }} |
| | PR_NUMBER: ${{ github.event.number }} |
| | run: gh pr edit "$PR_NUMBER" --remove-label "Run CICD" |
| |
|
| | - name: Pipeline successful, add PR comment |
| | if: | |
| | steps.result.outputs.code == 'success' |
| | && github.event_name == 'pull_request' |
| | && env.SLACK_WEBHOOK != '' |
| | uses: peter-evans/create-or-update-comment@v4 |
| | env: |
| | SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} |
| | REPOSITORY: ${{ github.repository }} |
| | RUN_ID: ${{ github.run_id }} |
| | with: |
| | issue-number: ${{ github.event.number }} |
| | body: | |
| | [🤖]: Hi @${{ github.event.pull_request.user.login }} 👋, |
| | |
| | We wanted to let you know that a [CICD pipeline](https://github.com/${{ env.REPOSITORY }}/actions/runs/${{ env.RUN_ID }}) for this PR just finished successfully. |
| |
|
| | So it might be time to merge this PR or get some approvals. |
| |
|
| | //cc @chtruong814 @ko3n1g @pablo-garay @thomasdhc |
| |
|
| | - name: "Pipeline not successful and not cancelled: Send Slack alert & create step summary" |
| | if: | |
| | steps.result.outputs.code == 'failure' |
| | && github.event.label.name == 'Run CICD' |
| | && env.SLACK_WEBHOOK != '' |
| | env: |
| | SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} |
| | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} |
| | REPOSITORY: ${{ github.repository }} |
| | RUN_ID: ${{ github.run_id }} |
| | PR_NUMBER: ${{ github.event.number }} |
| | SERVER_URL: ${{ github.server_url }} |
| | run: | |
| | set -x |
| | pip install PyGithub |
| | export BRANCH_NAME=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}} |
| | |
| | python .github/scripts/notify.py |
| |
|
| | - name: Exit |
| | if: ${{ always() }} |
| | env: |
| | RESULT: ${{ steps.result.outputs.code }} |
| | run: | |
| | if [ $RESULT == "success" ]; then |
| | exit 0 |
| | else |
| | exit 1 |
| | fi |
| | |
| | Coverage: |
| | runs-on: ubuntu-latest |
| | needs: [pre-flight, Nemo_CICD_Test] |
| | if: | |
| | needs.pre-flight.outputs.test_to_run != '[]' |
| | && needs.pre-flight.outputs.components_to_run != '[]' |
| | && ( |
| | success() |
| | || needs.Nemo_CICD_Test.result == 'success' |
| | ) |
| | && !cancelled() |
| | strategy: |
| | matrix: |
| | flag: [unit-test, e2e] |
| | steps: |
| | - name: Checkout |
| | uses: actions/checkout@v4 |
| |
|
| | - name: Download coverage reports of current branch |
| | uses: actions/download-artifact@v4 |
| | with: |
| | pattern: coverage-${{ matrix.flag }}-* |
| |
|
| | - name: Get total coverage of current branch |
| | shell: bash -x -e -u -o pipefail {0} |
| | if: always() |
| | run: | |
| | pip install coverage |
| | |
| | ls -al . |
| | ls -al coverage-*/ |
| | coverage combine --keep $(ls coverage-*/.coverage) |
| | coverage report -i |
| | rm -rf coverage-* |
| | ls -al |
| |
|
| | - name: Upload coverage reports to Codecov |
| | uses: codecov/codecov-action@v5 |
| | with: |
| | token: ${{ secrets.CODECOV_TOKEN }} |
| | verbose: true |
| | flags: ${{ matrix.flag }} |
| |
|
| | - name: Upload artifacts |
| | uses: actions/upload-artifact@v4 |
| | with: |
| | name: coverage-${{ matrix.flag }}-aggregated |
| | path: | |
| | .coverage |
| | include-hidden-files: true |
| |
|