File size: 10,382 Bytes
a9bd396 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 | #!/usr/bin/env python
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import json
import os
import re
import subprocess
import git
import requests
def create_script(target_test):
"""Create a python script to be run by `git bisect run` to determine if `target_test` passes or fails.
If a test is not found in a commit, the script with exit code `0` (i.e. `Success`).
Args:
target_test (`str`): The test to check.
Returns:
`str`: The script to be run by `git bisect run`.
"""
script = f"""
import os
import subprocess
_ = subprocess.run(
["python3", "-m", "pip", "install", "-e", "."],
capture_output = True,
text=True,
)
result = subprocess.run(
["python3", "-m", "pytest", "-v", "--flake-finder", "--flake-runs=4", "-rfEp", f"{target_test}"],
capture_output = True,
text=True,
)
print(result.stdout)
if f"FAILED {target_test}" in result.stdout:
print("test failed")
exit(1)
elif result.returncode != 0:
if "ERROR: file or directory not found: " in result.stderr:
print("test file or directory not found in this commit")
# git bisect treats exit code 125 as `test not found`. But this causes it not be able to make the conclusion
# if a test is added between the `good commit` (exclusive) and `bad commit` (inclusive) (in git bisect terminology).
# So we return 0 here in order to allow the process being able to identify the first commit that fails the test.
exit(0)
elif "ERROR: not found: " in result.stderr:
print("test not found in this commit")
exit(0)
else:
print(f"pytest gets unknown error: {{result.stderr}}")
exit(1)
print(f"pytest runs successfully.")
exit(0)
"""
with open("target_script.py", "w") as fp:
fp.write(script.strip())
def is_bad_commit(target_test, commit):
repo = git.Repo(".") # or specify path to your repo
# Save the current HEAD reference
original_head = repo.head.commit
# Checkout to the commit
repo.git.checkout(commit)
create_script(target_test=target_test)
result = subprocess.run(
["python3", "target_script.py"],
capture_output=True,
text=True,
)
# Restore to original commit
repo.git.checkout(original_head)
n_passed = 0
o = re.findall(r"====.* (\d+) passed", result.stdout)
if len(o) > 0:
n_passed = int(o[0])
n_failed = 0
o = re.findall(r"====.* (\d+) failed", result.stdout)
if len(o) > 0:
n_failed = int(o[0])
return result.returncode != 0, n_failed, n_passed
def find_bad_commit(target_test, start_commit, end_commit):
"""Find (backward) the earliest commit between `start_commit` (inclusive) and `end_commit` (exclusive) at which `target_test` fails.
Args:
target_test (`str`): The test to check.
start_commit (`str`): The latest commit (inclusive).
end_commit (`str`): The earliest commit (exclusive).
Returns:
`str`: The earliest commit at which `target_test` fails.
"""
# check if `end_commit` fails the test
# (we only need one failure to conclude the test is flaky on the previous run with `end_commit`)
failed_before, _, _ = is_bad_commit(target_test, end_commit)
if failed_before:
return (
None,
f"flaky: test passed in the previous run (commit: {end_commit}) but failed (on the same commit) during the check of the current run.",
)
# if there is no new commit (e.g. 2 different CI runs on the same commit):
# - failed once on `start_commit` but passed on `end_commit`, which are the same commit --> flaky (or something change externally) --> don't report
if start_commit == end_commit:
return (
None,
f"flaky: test fails on the current CI run but passed in the previous run which is running on the same commit {end_commit}.",
)
# Now, we are (almost) sure `target_test` is not failing at `end_commit`
# check if `start_commit` fail the test
# **IMPORTANT** we only need one pass to conclude the test is flaky on the current run with `start_commit`!
_, n_failed, n_passed = is_bad_commit(target_test, start_commit)
if n_passed > 0:
# failed on CI run, but not reproducible here --> don't report
return None, f"flaky: test fails on the current CI run (commit: {start_commit}) but passes during the check."
create_script(target_test=target_test)
bash = f"""
git bisect reset
git bisect start --first-parent {start_commit} {end_commit}
git bisect run python3 target_script.py
"""
with open("run_git_bisect.sh", "w") as fp:
fp.write(bash.strip())
result = subprocess.run(
["bash", "run_git_bisect.sh"],
check=False,
capture_output=True,
text=True,
)
print(result.stdout)
# This happens if running the script gives exit code < 0 or other issues
if "error: bisect run failed" in result.stderr:
error_msg = f"Error when running git bisect:\nbash error: {result.stderr}\nbash output:\n{result.stdout}\nset `bad_commit` to `None`."
print(error_msg)
return None, "git bisect failed"
pattern = r"(.+) is the first bad commit"
commits = re.findall(pattern, result.stdout)
bad_commit = None
if len(commits) > 0:
bad_commit = commits[0]
print(f"Between `start_commit` {start_commit} and `end_commit` {end_commit}")
print(f"bad_commit: {bad_commit}\n")
return bad_commit, "git bisect found the bad commit."
def get_commit_info(commit):
"""Get information for a commit via `api.github.com`."""
if commit is None:
return {"commit": None, "pr_number": None, "author": None, "merged_by": None}
pr_number = None
author = None
merged_author = None
url = f"https://api.github.com/repos/huggingface/transformers/commits/{commit}/pulls"
pr_info_for_commit = requests.get(url).json()
if len(pr_info_for_commit) > 0:
pr_number = pr_info_for_commit[0]["number"]
url = f"https://api.github.com/repos/huggingface/transformers/pulls/{pr_number}"
pr_for_commit = requests.get(url).json()
author = pr_for_commit["user"]["login"]
if pr_for_commit["merged_by"] is not None:
merged_author = pr_for_commit["merged_by"]["login"]
url = f"https://api.github.com/repos/huggingface/transformers/commits/{commit}"
commit_info = requests.get(url).json()
parent = commit_info["parents"][0]["sha"]
if author is None:
author = commit_info["author"]["login"]
return {"commit": commit, "pr_number": pr_number, "author": author, "merged_by": merged_author, "parent": parent}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--start_commit", type=str, required=True, help="The latest commit hash to check.")
parser.add_argument("--end_commit", type=str, required=True, help="The earliest commit hash to check.")
parser.add_argument("--test", type=str, help="The test to check.")
parser.add_argument("--file", type=str, help="The report file.")
parser.add_argument("--output_file", type=str, required=True, help="The path of the output file.")
args = parser.parse_args()
print(f"start_commit: {args.start_commit}")
print(f"end_commit: {args.end_commit}")
# `get_commit_info` uses `requests.get()` to request info. via `api.github.com` without using token.
# If there are many new failed tests in a workflow run, this script may fail at some point with `KeyError` at
# `pr_number = pr_info_for_commit[0]["number"]` due to the rate limit.
# Let's cache the commit info. and reuse them whenever possible.
commit_info_cache = {}
if len({args.test is None, args.file is None}) != 2:
raise ValueError("Exactly one argument `test` or `file` must be specified.")
if args.test is not None:
commit, status = find_bad_commit(
target_test=args.test, start_commit=args.start_commit, end_commit=args.end_commit
)
with open(args.output_file, "w", encoding="UTF-8") as fp:
fp.write(f"{args.test}\n{commit}\n{status}")
elif os.path.isfile(args.file):
with open(args.file, "r", encoding="UTF-8") as fp:
reports = json.load(fp)
for model in reports:
# TODO: make this script able to deal with both `single-gpu` and `multi-gpu` via a new argument.
reports[model].pop("multi-gpu", None)
failed_tests = reports[model]["single-gpu"]
failed_tests_with_bad_commits = []
for test in failed_tests:
commit, status = find_bad_commit(
target_test=test, start_commit=args.start_commit, end_commit=args.end_commit
)
info = {"test": test, "commit": commit, "status": status}
if commit in commit_info_cache:
commit_info = commit_info_cache[commit]
else:
commit_info = get_commit_info(commit)
commit_info_cache[commit] = commit_info
info.update(commit_info)
failed_tests_with_bad_commits.append(info)
# If no single-gpu test failures, remove the key
if len(failed_tests_with_bad_commits) > 0:
reports[model]["single-gpu"] = failed_tests_with_bad_commits
else:
reports[model].pop("single-gpu", None)
# remove the models without any test failure
reports = {k: v for k, v in reports.items() if len(v) > 0}
with open(args.output_file, "w", encoding="UTF-8") as fp:
json.dump(reports, fp, ensure_ascii=False, indent=4)
|