transformers / .github /workflows /check_failed_tests.yml

Upload folder using huggingface_hub

a9bd396 verified about 1 month ago

12.4 kB

	name: Process failed tests

	on:
	workflow_call:
	inputs:
	docker:
	required: true
	type: string
	job:
	required: true
	type: string
	slack_report_channel:
	required: true
	type: string
	ci_event:
	required: true
	type: string
	report_repo_id:
	required: true
	type: string
	commit_sha:
	required: false
	type: string
	pr_number:
	required: false
	type: string
	outputs:
	report:
	description: "Content of the report of new failures"
	value: ${{ jobs.process_new_failures_with_commit_info.outputs.report }}

	env:
	HF_HOME: /mnt/cache
	TRANSFORMERS_IS_CI: yes
	OMP_NUM_THREADS: 8
	MKL_NUM_THREADS: 8
	RUN_SLOW: yes
	# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
	# This token is created under the bot `hf-transformers-bot`.
	HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
	TF_FORCE_GPU_ALLOW_GROWTH: true
	CUDA_VISIBLE_DEVICES: 0,1


	jobs:
	check_new_failures:
	name: "Find commits for new failing tests"
	strategy:
	matrix:
	run_idx: [1]
	runs-on:
	group: aws-g5-4xlarge-cache
	outputs:
	process: ${{ steps.check_file.outputs.process }}
	container:
	image: ${{ inputs.docker }}
	options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
	steps:
	- uses: actions/download-artifact@v4
	with:
	name: ci_results_${{ inputs.job }}
	path: /transformers/ci_results_${{ inputs.job }}

	- name: Check file
	id: check_file
	working-directory: /transformers
	env:
	job: ${{ inputs.job }}
	run: \|
	if [ -f "ci_results_${job}/new_failures.json" ]; then
	echo "\`ci_results_${job}/new_failures.json\` exists, continue ..."
	echo "process=true" >> $GITHUB_ENV
	echo "process=true" >> $GITHUB_OUTPUT
	else
	echo "\`ci_results_${job}/new_failures.json\` doesn't exist, abort."
	echo "process=false" >> $GITHUB_ENV
	echo "process=false" >> $GITHUB_OUTPUT
	fi

	- uses: actions/download-artifact@v4
	if: ${{ env.process == 'true' }}
	with:
	pattern: setup_values*
	path: setup_values
	merge-multiple: true

	- name: Prepare some setup values
	if: ${{ env.process == 'true' }}
	run: \|
	if [ -f setup_values/prev_workflow_run_id.txt ]; then
	echo "PREV_WORKFLOW_RUN_ID=$(cat setup_values/prev_workflow_run_id.txt)" >> $GITHUB_ENV
	else
	echo "PREV_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
	fi

	- name: Update clone
	working-directory: /transformers
	if: ${{ env.process == 'true' }}
	env:
	commit_sha: ${{ inputs.commit_sha \|\| github.sha }}
	run: \|
	git fetch origin "$commit_sha" && git checkout "$commit_sha"

	- name: Get `START_SHA`
	working-directory: /transformers/utils
	if: ${{ env.process == 'true' }}
	env:
	commit_sha: ${{ inputs.commit_sha \|\| github.sha }}
	run: \|
	echo "START_SHA=$commit_sha" >> $GITHUB_ENV

	# This is used if the CI is triggered from a pull request `self-comment-ci.yml` (after security check is verified)
	- name: Extract the base commit on `main` (of the merge commit created by Github) if it is a PR
	id: pr_info
	if: ${{ env.process == 'true' && inputs.pr_number != '' }}
	uses: actions/github-script@v6
	with:
	script: \|
	const { data: pr } = await github.rest.pulls.get({
	owner: context.repo.owner,
	repo: context.repo.repo,
	pull_number: ${{ inputs.pr_number }}
	});

	const { data: merge_commit } = await github.rest.repos.getCommit({
	owner: pr.base.repo.owner.login,
	repo: pr.base.repo.name,
	ref: '${{ inputs.commit_sha }}',
	});

	core.setOutput('merge_commit_base_sha', merge_commit.parents[0].sha);

	# Usually, `END_SHA` should be the commit of the last previous workflow run of the SAME (scheduled) workflow.
	# (This is why we don't need to specify `workflow_id` which would be fetched automatically in the python script.)
	- name: Get `END_SHA` from previous CI runs of the same workflow
	working-directory: /transformers/utils
	if: ${{ env.process == 'true' && inputs.pr_number == '' }}
	env:
	ACCESS_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
	run: \|
	echo "END_SHA=$(TOKEN="$ACCESS_TOKEN" python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"], workflow_run_id=os.environ["PREV_WORKFLOW_RUN_ID"]); print(commit)')" >> $GITHUB_ENV

	# However, for workflow runs triggered by `issue_comment` (for pull requests), we want to check against the
	# parent commit (on `main`) of the `merge_commit` (dynamically created by GitHub). In this case, the goal is to
	# see if a reported failing test is actually ONLY failing on the `merge_commit`.
	- name: Set `END_SHA`
	if: ${{ env.process == 'true' && inputs.pr_number != '' }}
	env:
	merge_commit_base_sha: ${{ steps.pr_info.outputs.merge_commit_base_sha }}
	run: \|
	echo "END_SHA=$merge_commit_base_sha" >> $GITHUB_ENV

	- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
	working-directory: /transformers
	if: ${{ env.process == 'true' }}
	run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .

	- name: NVIDIA-SMI
	if: ${{ env.process == 'true' }}
	run: \|
	nvidia-smi

	- name: Environment
	working-directory: /transformers
	if: ${{ env.process == 'true' }}
	run: \|
	python3 utils/print_env.py

	- name: Install pytest-flakefinder
	if: ${{ env.process == 'true' }}
	run: python3 -m pip install pytest-flakefinder

	- name: Show installed libraries and their versions
	working-directory: /transformers
	if: ${{ env.process == 'true' }}
	run: pip freeze

	- name: Check failed tests
	working-directory: /transformers
	if: ${{ env.process == 'true' }}
	env:
	job: ${{ inputs.job }}
	run_idx: ${{ matrix.run_idx }}
	run: python3 utils/check_bad_commit.py --start_commit "$START_SHA" --end_commit "$END_SHA" --file "ci_results_${job}/new_failures.json" --output_file "new_failures_with_bad_commit_${job}_${run_idx}.json"

	- name: Show results
	working-directory: /transformers
	if: ${{ env.process == 'true' }}
	env:
	job: ${{ inputs.job }}
	run_idx: ${{ matrix.run_idx }}
	run: \|
	ls -l "new_failures_with_bad_commit_${job}_${run_idx}.json"
	cat "new_failures_with_bad_commit_${job}_${run_idx}.json"

	- name: Upload artifacts
	uses: actions/upload-artifact@v4
	with:
	name: new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}
	path: /transformers/new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json

	process_new_failures_with_commit_info:
	name: "process bad commit reports"
	needs: check_new_failures
	if: needs.check_new_failures.outputs.process == 'true'
	runs-on:
	group: aws-g5-4xlarge-cache
	outputs:
	report: ${{ steps.set_output.outputs.report }}
	container:
	image: ${{ inputs.docker }}
	options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
	steps:
	- uses: actions/download-artifact@v4
	with:
	name: ci_results_${{ inputs.job }}
	path: /transformers/ci_results_${{ inputs.job }}

	- uses: actions/download-artifact@v4
	with:
	pattern: new_failures_with_bad_commit_${{ inputs.job }}*
	path: /transformers/new_failures_with_bad_commit_${{ inputs.job }}
	merge-multiple: true

	- name: Check files
	working-directory: /transformers
	env:
	job: ${{ inputs.job }}
	run: \|
	ls -la /transformers
	ls -la "/transformers/new_failures_with_bad_commit_${job}"

	# Currently, we only run with a single runner by using `run_idx: [1]`. We might try to run with multiple runners
	# to further reduce the false positive caused by flaky tests, which requires further processing to merge reports.
	- name: Merge files
	shell: bash
	working-directory: /transformers
	env:
	job: ${{ inputs.job }}
	run: \|
	cp "/transformers/new_failures_with_bad_commit_${job}/new_failures_with_bad_commit_${job}_1.json" new_failures_with_bad_commit.json

	- name: Update clone
	working-directory: /transformers
	env:
	commit_sha: ${{ inputs.commit_sha \|\| github.sha }}
	run: \|
	git fetch origin "$commit_sha" && git checkout "$commit_sha"

	- name: Process report
	shell: bash
	working-directory: /transformers
	env:
	ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
	TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
	JOB_NAME: ${{ inputs.job }}
	REPORT_REPO_ID: ${{ inputs.report_repo_id }}
	run: \|
	{
	echo 'REPORT_TEXT<<EOF'
	python3 utils/process_bad_commit_report.py
	echo EOF
	} >> "$GITHUB_ENV"

	# The output is useful if a caller needs more processing, for example, we have a chain
	# self-comment-ci.yml -> self-scheduled.yml -> this one (check_failed_tests.yml),
	# and `self-comment-ci.yml` needs further processing before sending a GitHub comment to the pull request page.
	- name: Show results & Set outputs
	id: set_output
	working-directory: /transformers
	run: \|
	ls -l new_failures_with_bad_commit.json
	cat new_failures_with_bad_commit.json

	{
	echo 'report<<EOF'
	cat new_failures_with_bad_commit.json
	echo '' # Force a newline
	echo EOF
	} >> "$GITHUB_OUTPUT"

	- name: Upload artifacts
	uses: actions/upload-artifact@v4
	with:
	name: new_failures_with_bad_commit_${{ inputs.job }}
	path: /transformers/new_failures_with_bad_commit.json

	- name: Prepare Slack report title
	working-directory: /transformers
	env:
	ci_event: ${{ inputs.ci_event }}
	job: ${{ inputs.job }}
	run: \|
	pip install slack_sdk
	echo "title=$(python3 -c 'import sys; import os; sys.path.append("utils"); from utils.notification_service import job_to_test_map; ci_event = os.environ["ci_event"]; job = os.environ["job"]; test_name = job_to_test_map[job]; title = f"New failed tests of {ci_event}" + ":" + f" {test_name}"; print(title)')" >> $GITHUB_ENV

	- name: Send processed report
	if: ${{ !endsWith(env.REPORT_TEXT, '{}') }}
	uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
	with:
	# Slack channel id, channel name, or user id to post message.
	# See also: https://api.slack.com/methods/chat.postMessage#channels
	channel-id: '#${{ inputs.slack_report_channel }}'
	# For posting a rich message using Block Kit
	payload: \|
	{
	"blocks": [
	{
	"type": "header",
	"text": {
	"type": "plain_text",
	"text": "${{ env.title }}"
	}
	},
	{
	"type": "section",
	"text": {
	"type": "mrkdwn",
	"text": "${{ env.REPORT_TEXT }}"
	}
	}
	]
	}
	env:
	SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}