name: Training Bootc image builds on: push: branches: [ main ] paths: - 'training/**' - '.github/workflows/training_bootc.yaml' workflow_dispatch: concurrency: group: ${{ github.workflow }} cancel-in-progress: false env: REGISTRY: quay.io REGISTRY_ORG: ai-lab REGION: us-east-1 jobs: start-runner: name: Start self-hosted EC2 runner if: "!contains(github.event.pull_request.labels.*.name, 'hold-tests')" runs-on: ubuntu-latest outputs: label: ${{ steps.start-ec2-runner.outputs.label }} ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} steps: - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v1 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-region: ${{ env.REGION }} - name: Start EC2 runner id: start-ec2-runner uses: machulav/ec2-github-runner@v2 with: mode: start github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} ec2-image-id: ami-0154957ba4ce98784 ec2-instance-type: m7i.12xlarge subnet-id: subnet-0b1e1d94240813658 security-group-id: sg-055105753f5e8bd83 nvidia-bootc-builder-image: if: "!contains(github.event.pull_request.labels.*.name, 'hold-tests')" strategy: matrix: include: - image_name: nvidia-builder context: training/nvidia-bootc arch: amd64 runs-on: ${{ needs.start-runner.outputs.label }} needs: start-runner permissions: contents: read packages: write steps: - uses: actions/checkout@v4.1.7 - name: mkdir root/.docker directory run: | mkdir -p ~/.docker - name: Login to Container Registry run: podman login -u ${{ secrets.REGISTRY_USER }} -p ${{ secrets.REGISTRY_PASSWORD }} ${{ env.REGISTRY }} - name: generate a ssh key - USER SHOULD INJECT THEIR OWN AND REBUILD IF THEY USE THIS IMAGE run: | ssh-keygen -t rsa -b 4096 -f ~/.ssh/id_rsa -N "" - name: Build Image id: build_image run: make driver-toolkit ARCH=${{ matrix.arch }} working-directory: ${{ matrix.context }} - name: tag image as nvidia-builder run: podman tag ${{ env.REGISTRY }}/${{ env.REGISTRY_ORG }}/driver-toolkit:latest ${{ env.REGISTRY }}/${{ env.REGISTRY_ORG }}/${{ matrix.image_name}}:latest - name: Push image if: github.event_name == 'push' && github.ref == 'refs/heads/main' uses: redhat-actions/push-to-registry@v2.8 with: registry: ${{ env.REGISTRY }}/${{ env.REGISTRY_ORG }} image: driver-toolkit tags: latest - name: push the nvidia-builder image if: github.event_name == 'push' && github.ref == 'refs/heads/main' uses: redhat-actions/push-to-registry@v2.8 with: image: ${{ matrix.image_name}} tags: latest registry: ${{ env.REGISTRY }}/${{ env.REGISTRY_ORG }} - name: Publish Job Results to Slack id: slack if: always() uses: slackapi/slack-github-action@v1.26.0 with: payload: | { "text": "${{ github.workflow }} workflow status: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" } env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} nvidia-bootc-image: strategy: matrix: include: - image_name: nvidia-bootc driver_version: "550.54.15" context: training/nvidia-bootc arch: amd64 runs-on: ${{ needs.start-runner.outputs.label }} if: "!contains(github.event.pull_request.labels.*.name, 'hold-tests')" needs: [ nvidia-bootc-builder-image, start-runner ] steps: - uses: actions/checkout@v4.1.7 - name: mkdir root/.docker directory run: | mkdir -p ~/.docker - name: generate a ssh key - USER SHOULD INJECT THEIR OWN AND REBUILD IF THEY USE THIS IMAGE and overwrite the existing one run: | ssh-keygen -t rsa -b 4096 -f ~/.ssh/id_rsa -N "" <<