| name: SSH into our runners |
|
|
| on: |
| workflow_dispatch: |
| inputs: |
| runner_type: |
| description: 'Type of runner to test (a10 or t4)' |
| required: true |
| docker_image: |
| description: 'Name of the Docker image' |
| required: true |
| num_gpus: |
| description: 'Type of the number of gpus to use (`single` or `multi`)' |
| required: true |
|
|
| env: |
| HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} |
| HF_HOME: /mnt/cache |
| TRANSFORMERS_IS_CI: yes |
| OMP_NUM_THREADS: 8 |
| MKL_NUM_THREADS: 8 |
| RUN_SLOW: yes |
| SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} |
| TF_FORCE_GPU_ALLOW_GROWTH: true |
| CUDA_VISIBLE_DEVICES: 0,1 |
|
|
| jobs: |
| get_runner: |
| name: "Get runner to use" |
| runs-on: ubuntu-22.04 |
| outputs: |
| RUNNER: ${{ steps.set_runner.outputs.RUNNER }} |
| steps: |
| - name: Get runner to use |
| shell: bash |
| run: | |
| if [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then |
| echo "RUNNER=aws-g4dn-2xlarge-cache" >> $GITHUB_ENV |
| elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then |
| echo "RUNNER=aws-g4dn-12xlarge-cache" >> $GITHUB_ENV |
| elif [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then |
| echo "RUNNER=aws-g5-4xlarge-cache" >> $GITHUB_ENV |
| elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then |
| echo "RUNNER=aws-g5-12xlarge-cache" >> $GITHUB_ENV |
| else |
| echo "RUNNER=" >> $GITHUB_ENV |
| fi |
| |
| - name: Set runner to use |
| id: set_runner |
| run: | |
| echo ${{ env.RUNNER }} |
| echo "RUNNER=${{ env.RUNNER }}" >> $GITHUB_OUTPUT |
| |
| ssh_runner: |
| name: "SSH" |
| needs: get_runner |
| runs-on: |
| group: ${{ needs.get_runner.outputs.RUNNER }} |
| container: |
| image: ${{ github.event.inputs.docker_image }} |
| options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ |
|
|
| steps: |
| - name: Update clone |
| working-directory: /transformers |
| run: | |
| git fetch && git checkout ${{ github.sha }} |
| |
| - name: Cleanup |
| working-directory: /transformers |
| run: | |
| rm -rf tests/__pycache__ |
| rm -rf tests/models/__pycache__ |
| rm -rf reports |
| |
| - name: Show installed libraries and their versions |
| working-directory: /transformers |
| run: pip freeze |
|
|
| - name: NVIDIA-SMI |
| run: | |
| nvidia-smi |
| |
| - name: Store Slack infos |
| |
| shell: bash |
| run: | |
| echo "${{ github.actor }}" |
| github_actor=${{ github.actor }} |
| github_actor=${github_actor/'-'/'_'} |
| echo "$github_actor" |
| echo "github_actor=$github_actor" >> $GITHUB_ENV |
| |
| - name: Store Slack infos |
| |
| shell: bash |
| run: | |
| echo "${{ env.github_actor }}" |
| if [ "${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" != "" ]; then |
| echo "SLACKCHANNEL=${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" >> $GITHUB_ENV |
| else |
| echo "SLACKCHANNEL=${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}" >> $GITHUB_ENV |
| fi |
| |
| - name: Tailscale |
| uses: huggingface/tailscale-action@main |
| with: |
| authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }} |
| slackChannel: ${{ env.SLACKCHANNEL }} |
| slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} |
| waitForSSH: true |
| sshTimeout: 15m |
|
|