File size: 5,179 Bytes
a9bd396
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
"""An internal script to process `new_failures_with_bad_commit.json` produced by `utils/check_bad_commit.py`.

This is used by `.github/workflows/check_failed_model_tests.yml` to produce a slack report of the following form

```
<{url}|New failed tests>
{
   "GH_ydshieh": {
       "vit": 1
   }
}
```
"""

import json
import os
from collections import Counter
from copy import deepcopy

from get_previous_daily_ci import get_last_daily_ci_run
from huggingface_hub import HfApi


if __name__ == "__main__":
    api = HfApi()

    job_name = os.environ.get("JOB_NAME")

    # Upload to Hub and get the url
    # if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder`
    report_repo_subfolder = ""
    if os.getenv("GITHUB_EVENT_NAME") != "schedule":
        report_repo_subfolder = f"{os.getenv('GITHUB_RUN_NUMBER')}-{os.getenv('GITHUB_RUN_ID')}"
        report_repo_subfolder = f"runs/{report_repo_subfolder}"

    workflow_run = get_last_daily_ci_run(
        token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_run_id=os.getenv("GITHUB_RUN_ID")
    )
    workflow_run_created_time = workflow_run["created_at"]

    report_repo_folder = workflow_run_created_time.split("T")[0]

    if report_repo_subfolder:
        report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}"

    report_repo_id = os.getenv("REPORT_REPO_ID")

    with open("new_failures_with_bad_commit.json") as fp:
        data = json.load(fp)

    with open(f"ci_results_{job_name}/job_links.json") as fp:
        job_links = json.load(fp)

    # Update `new_failures_with_bad_commit.json` with job links information before uploading to Hub repository
    #   - need to change `single-gpu` to `single` and same for `multi-gpu` to match the keys in `job_link`.
    for model, model_result in data.items():
        for device, failed_tests in model_result.items():
            for failed_test in failed_tests:
                key = model
                if list(job_links.keys()) == [job_name]:
                    key = job_name
                failed_test["job_link"] = job_links[key][device.replace("-gpu", "")]

    with open("new_failures_with_bad_commit.json", "w") as fp:
        json.dump(data, fp, indent=4, ensure_ascii=False)

    commit_info = api.upload_file(
        path_or_fileobj="new_failures_with_bad_commit.json",
        path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/new_failures_with_bad_commit.json",
        repo_id=report_repo_id,
        repo_type="dataset",
        token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
    )

    # TODO: extend
    team_members = [
        "ArthurZucker",
        "Cyrilvallez",
        "LysandreJik",
        "MekkCyber",
        "Rocketknight1",
        "SunMarc",
        "ebezzam",
        "eustlb",
        "gante",
        "itazap",
        "ivarflakstad",
        "molbap",
        "remi-or",
        "stevhliu",
        "vasqu",
        "ydshieh",
        "zucchini-nlp",
    ]

    # Counting the number of failures grouped by authors
    new_data = {}
    for model, model_result in data.items():
        for device, failed_tests in model_result.items():
            for failed_test in failed_tests:
                author = failed_test["author"]

                if author not in team_members:
                    author = failed_test["merged_by"]

                if author not in new_data:
                    new_data[author] = Counter()
                new_data[author].update([model])
    for author in new_data:
        new_data[author] = dict(new_data[author])

    # Group by author
    new_data_full = {author: deepcopy(data) for author in new_data}
    for author, _data in new_data_full.items():
        for model, model_result in _data.items():
            for device, failed_tests in model_result.items():
                failed_tests = [x for x in failed_tests if x["author"] == author or x["merged_by"] == author]
                model_result[device] = failed_tests
            _data[model] = {k: v for k, v in model_result.items() if len(v) > 0}
        new_data_full[author] = {k: v for k, v in _data.items() if len(v) > 0}

    with open("new_failures_with_bad_commit_grouped_by_authors.json", "w") as fp:
        json.dump(new_data_full, fp, ensure_ascii=False, indent=4)
    commit_info = api.upload_file(
        path_or_fileobj="new_failures_with_bad_commit_grouped_by_authors.json",
        path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/new_failures_with_bad_commit_grouped_by_authors.json",
        repo_id=report_repo_id,
        repo_type="dataset",
        token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
    )
    url = f"https://huggingface.co/datasets/{report_repo_id}/raw/{commit_info.oid}/{report_repo_folder}/ci_results_{job_name}/new_failures_with_bad_commit_grouped_by_authors.json"

    # Add `GH_` prefix as keyword mention
    output = {}
    for author, item in new_data.items():
        author = f"GH_{author}"
        output[author] = item

    report = f"<{url}|New failed tests>\\n\\n"
    report += json.dumps(output, indent=4).replace('"', '\\"').replace("\n", "\\n")
    print(report)