| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | import argparse |
| | import json |
| | import os |
| | import re |
| | import subprocess |
| |
|
| | import git |
| | import requests |
| |
|
| |
|
| | def create_script(target_test): |
| | """Create a python script to be run by `git bisect run` to determine if `target_test` passes or fails. |
| | If a test is not found in a commit, the script with exit code `0` (i.e. `Success`). |
| | |
| | Args: |
| | target_test (`str`): The test to check. |
| | |
| | Returns: |
| | `str`: The script to be run by `git bisect run`. |
| | """ |
| |
|
| | script = f""" |
| | import os |
| | import subprocess |
| | |
| | _ = subprocess.run( |
| | ["python3", "-m", "pip", "install", "-e", "."], |
| | capture_output = True, |
| | text=True, |
| | ) |
| | |
| | result = subprocess.run( |
| | ["python3", "-m", "pytest", "-v", "--flake-finder", "--flake-runs=4", "-rfEp", f"{target_test}"], |
| | capture_output = True, |
| | text=True, |
| | ) |
| | print(result.stdout) |
| | |
| | if f"FAILED {target_test}" in result.stdout: |
| | print("test failed") |
| | exit(1) |
| | elif result.returncode != 0: |
| | if "ERROR: file or directory not found: " in result.stderr: |
| | print("test file or directory not found in this commit") |
| | # git bisect treats exit code 125 as `test not found`. But this causes it not be able to make the conclusion |
| | # if a test is added between the `good commit` (exclusive) and `bad commit` (inclusive) (in git bisect terminology). |
| | # So we return 0 here in order to allow the process being able to identify the first commit that fails the test. |
| | exit(0) |
| | elif "ERROR: not found: " in result.stderr: |
| | print("test not found in this commit") |
| | exit(0) |
| | else: |
| | print(f"pytest gets unknown error: {{result.stderr}}") |
| | exit(1) |
| | |
| | print(f"pytest runs successfully.") |
| | exit(0) |
| | """ |
| |
|
| | with open("target_script.py", "w") as fp: |
| | fp.write(script.strip()) |
| |
|
| |
|
| | def is_bad_commit(target_test, commit): |
| | repo = git.Repo(".") |
| |
|
| | |
| | original_head = repo.head.commit |
| |
|
| | |
| | repo.git.checkout(commit) |
| |
|
| | create_script(target_test=target_test) |
| |
|
| | result = subprocess.run( |
| | ["python3", "target_script.py"], |
| | capture_output=True, |
| | text=True, |
| | ) |
| |
|
| | |
| | repo.git.checkout(original_head) |
| |
|
| | n_passed = 0 |
| | o = re.findall(r"====.* (\d+) passed", result.stdout) |
| | if len(o) > 0: |
| | n_passed = int(o[0]) |
| |
|
| | n_failed = 0 |
| | o = re.findall(r"====.* (\d+) failed", result.stdout) |
| | if len(o) > 0: |
| | n_failed = int(o[0]) |
| |
|
| | return result.returncode != 0, n_failed, n_passed |
| |
|
| |
|
| | def find_bad_commit(target_test, start_commit, end_commit): |
| | """Find (backward) the earliest commit between `start_commit` (inclusive) and `end_commit` (exclusive) at which `target_test` fails. |
| | |
| | Args: |
| | target_test (`str`): The test to check. |
| | start_commit (`str`): The latest commit (inclusive). |
| | end_commit (`str`): The earliest commit (exclusive). |
| | |
| | Returns: |
| | `str`: The earliest commit at which `target_test` fails. |
| | """ |
| |
|
| | |
| | |
| | failed_before, _, _ = is_bad_commit(target_test, end_commit) |
| | if failed_before: |
| | return ( |
| | None, |
| | f"flaky: test passed in the previous run (commit: {end_commit}) but failed (on the same commit) during the check of the current run.", |
| | ) |
| |
|
| | |
| | |
| | if start_commit == end_commit: |
| | return ( |
| | None, |
| | f"flaky: test fails on the current CI run but passed in the previous run which is running on the same commit {end_commit}.", |
| | ) |
| |
|
| | |
| | |
| | |
| | _, n_failed, n_passed = is_bad_commit(target_test, start_commit) |
| | if n_passed > 0: |
| | |
| | return None, f"flaky: test fails on the current CI run (commit: {start_commit}) but passes during the check." |
| |
|
| | create_script(target_test=target_test) |
| |
|
| | bash = f""" |
| | git bisect reset |
| | git bisect start --first-parent {start_commit} {end_commit} |
| | git bisect run python3 target_script.py |
| | """ |
| |
|
| | with open("run_git_bisect.sh", "w") as fp: |
| | fp.write(bash.strip()) |
| |
|
| | result = subprocess.run( |
| | ["bash", "run_git_bisect.sh"], |
| | check=False, |
| | capture_output=True, |
| | text=True, |
| | ) |
| | print(result.stdout) |
| |
|
| | |
| | if "error: bisect run failed" in result.stderr: |
| | error_msg = f"Error when running git bisect:\nbash error: {result.stderr}\nbash output:\n{result.stdout}\nset `bad_commit` to `None`." |
| | print(error_msg) |
| | return None, "git bisect failed" |
| |
|
| | pattern = r"(.+) is the first bad commit" |
| | commits = re.findall(pattern, result.stdout) |
| |
|
| | bad_commit = None |
| | if len(commits) > 0: |
| | bad_commit = commits[0] |
| |
|
| | print(f"Between `start_commit` {start_commit} and `end_commit` {end_commit}") |
| | print(f"bad_commit: {bad_commit}\n") |
| |
|
| | return bad_commit, "git bisect found the bad commit." |
| |
|
| |
|
| | def get_commit_info(commit): |
| | """Get information for a commit via `api.github.com`.""" |
| | if commit is None: |
| | return {"commit": None, "pr_number": None, "author": None, "merged_by": None} |
| |
|
| | pr_number = None |
| | author = None |
| | merged_author = None |
| |
|
| | url = f"https://api.github.com/repos/huggingface/transformers/commits/{commit}/pulls" |
| | pr_info_for_commit = requests.get(url).json() |
| |
|
| | if len(pr_info_for_commit) > 0: |
| | pr_number = pr_info_for_commit[0]["number"] |
| |
|
| | url = f"https://api.github.com/repos/huggingface/transformers/pulls/{pr_number}" |
| | pr_for_commit = requests.get(url).json() |
| | author = pr_for_commit["user"]["login"] |
| | if pr_for_commit["merged_by"] is not None: |
| | merged_author = pr_for_commit["merged_by"]["login"] |
| |
|
| | url = f"https://api.github.com/repos/huggingface/transformers/commits/{commit}" |
| | commit_info = requests.get(url).json() |
| | parent = commit_info["parents"][0]["sha"] |
| | if author is None: |
| | author = commit_info["author"]["login"] |
| |
|
| | return {"commit": commit, "pr_number": pr_number, "author": author, "merged_by": merged_author, "parent": parent} |
| |
|
| |
|
| | if __name__ == "__main__": |
| | parser = argparse.ArgumentParser() |
| | parser.add_argument("--start_commit", type=str, required=True, help="The latest commit hash to check.") |
| | parser.add_argument("--end_commit", type=str, required=True, help="The earliest commit hash to check.") |
| | parser.add_argument("--test", type=str, help="The test to check.") |
| | parser.add_argument("--file", type=str, help="The report file.") |
| | parser.add_argument("--output_file", type=str, required=True, help="The path of the output file.") |
| | args = parser.parse_args() |
| |
|
| | print(f"start_commit: {args.start_commit}") |
| | print(f"end_commit: {args.end_commit}") |
| |
|
| | |
| | |
| | |
| | |
| | commit_info_cache = {} |
| |
|
| | if len({args.test is None, args.file is None}) != 2: |
| | raise ValueError("Exactly one argument `test` or `file` must be specified.") |
| |
|
| | if args.test is not None: |
| | commit, status = find_bad_commit( |
| | target_test=args.test, start_commit=args.start_commit, end_commit=args.end_commit |
| | ) |
| | with open(args.output_file, "w", encoding="UTF-8") as fp: |
| | fp.write(f"{args.test}\n{commit}\n{status}") |
| | elif os.path.isfile(args.file): |
| | with open(args.file, "r", encoding="UTF-8") as fp: |
| | reports = json.load(fp) |
| |
|
| | for model in reports: |
| | |
| | reports[model].pop("multi-gpu", None) |
| | failed_tests = reports[model]["single-gpu"] |
| |
|
| | failed_tests_with_bad_commits = [] |
| | for test in failed_tests: |
| | commit, status = find_bad_commit( |
| | target_test=test, start_commit=args.start_commit, end_commit=args.end_commit |
| | ) |
| | info = {"test": test, "commit": commit, "status": status} |
| |
|
| | if commit in commit_info_cache: |
| | commit_info = commit_info_cache[commit] |
| | else: |
| | commit_info = get_commit_info(commit) |
| | commit_info_cache[commit] = commit_info |
| |
|
| | info.update(commit_info) |
| | failed_tests_with_bad_commits.append(info) |
| |
|
| | |
| | if len(failed_tests_with_bad_commits) > 0: |
| | reports[model]["single-gpu"] = failed_tests_with_bad_commits |
| | else: |
| | reports[model].pop("single-gpu", None) |
| |
|
| | |
| | reports = {k: v for k, v in reports.items() if len(v) > 0} |
| |
|
| | with open(args.output_file, "w", encoding="UTF-8") as fp: |
| | json.dump(reports, fp, ensure_ascii=False, indent=4) |
| |
|