Spaces:
Build error
Build error
Amy Roberts
commited on
Commit
·
b42fea9
1
Parent(s):
6b0b6fd
Ruff
Browse files- app.py +33 -7
- defaults.py +1 -0
- fetch.py +8 -12
- find_similar_issues.py +8 -11
- get_topic.py +0 -57
- retrieval.py +0 -1
- update_stored_issues.py +8 -26
app.py
CHANGED
|
@@ -31,20 +31,39 @@ def get_query_issue_information(issue_no, token):
|
|
| 31 |
return request.json()
|
| 32 |
|
| 33 |
|
| 34 |
-
def run_find_similar_issues(token, n_issues, issue_no, query):
|
| 35 |
if issue_no == "":
|
| 36 |
issue_no = None
|
| 37 |
if query == "":
|
| 38 |
query = None
|
| 39 |
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
issues_html = [f"<a href='{issue['html_url']}' target='_blank'>#{issue['number']} - {issue['title']}</a>" for issue in similar_issues]
|
| 43 |
issues_html = "<br>".join(issues_html)
|
| 44 |
-
return issues_html
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
-
|
|
|
|
| 48 |
# Archive the stored issues
|
| 49 |
if os.path.exists("issues.json"):
|
| 50 |
date_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
|
@@ -63,6 +82,12 @@ def update_issues():
|
|
| 63 |
model_id="all-mpnet-base-v2",
|
| 64 |
update=True
|
| 65 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
|
| 68 |
with gr.Blocks(title="Github Bot") as demo:
|
|
@@ -76,8 +101,9 @@ with gr.Blocks(title="Github Bot") as demo:
|
|
| 76 |
with gr.Column():
|
| 77 |
token = gr.Textbox(label="Github Token", placeholder="Your github token for authentication. This is not stored anywhere.")
|
| 78 |
n_issues = gr.Slider(1, 50, value=5, step=1, label="Number of similar issues", info="Choose between 1 and 50")
|
| 79 |
-
|
| 80 |
-
update_button.
|
|
|
|
| 81 |
|
| 82 |
with gr.Row():
|
| 83 |
submit_button = gr.Button(value="Submit")
|
|
@@ -85,7 +111,7 @@ with gr.Blocks(title="Github Bot") as demo:
|
|
| 85 |
with gr.Row():
|
| 86 |
with gr.Row():
|
| 87 |
issues_html = gr.HTML(label="Issue text", elem_id="issue_html")
|
| 88 |
-
submit_button.click(run_find_similar_issues, outputs=[issues_html], inputs=[token, n_issues, issue_no, query])
|
| 89 |
|
| 90 |
with gr.Tab("Find maintainers to ping"):
|
| 91 |
with gr.Row():
|
|
|
|
| 31 |
return request.json()
|
| 32 |
|
| 33 |
|
| 34 |
+
def run_find_similar_issues(token, n_issues, issue_no, query, issue_types):
|
| 35 |
if issue_no == "":
|
| 36 |
issue_no = None
|
| 37 |
if query == "":
|
| 38 |
query = None
|
| 39 |
|
| 40 |
+
if len(issue_types) == 0:
|
| 41 |
+
raise ValueError("At least one issue type must be selected")
|
| 42 |
+
|
| 43 |
+
similar_issues = []
|
| 44 |
+
similar_pulls = []
|
| 45 |
+
if "Issue" in issue_types:
|
| 46 |
+
similar_issues = get_similar_issues(issue_no=issue_no, query=query, token=token, top_k=n_issues, issue_type="issue")
|
| 47 |
+
if "Pull Request" in issue_types:
|
| 48 |
+
similar_pulls = get_similar_issues(issue_no=issue_no, query=query, token=token, top_k=n_issues, issue_type="pull")
|
| 49 |
|
| 50 |
issues_html = [f"<a href='{issue['html_url']}' target='_blank'>#{issue['number']} - {issue['title']}</a>" for issue in similar_issues]
|
| 51 |
issues_html = "<br>".join(issues_html)
|
|
|
|
| 52 |
|
| 53 |
+
pulls_html = [f"<a href='{issue['html_url']}' target='_blank'>#{issue['number']} - {issue['title']}</a>" for issue in similar_pulls]
|
| 54 |
+
pulls_html = "<br>".join(pulls_html)
|
| 55 |
+
|
| 56 |
+
final = ""
|
| 57 |
+
if len(issues_html) > 0:
|
| 58 |
+
final += f"<h2>Issues</h2>{issues_html}"
|
| 59 |
+
if len(pulls_html) > 0:
|
| 60 |
+
final += f"<h2>Pull Requests</h2>{pulls_html}"
|
| 61 |
+
|
| 62 |
+
# return issues_html
|
| 63 |
+
return final
|
| 64 |
|
| 65 |
+
|
| 66 |
+
def update():
|
| 67 |
# Archive the stored issues
|
| 68 |
if os.path.exists("issues.json"):
|
| 69 |
date_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
|
|
|
| 82 |
model_id="all-mpnet-base-v2",
|
| 83 |
update=True
|
| 84 |
)
|
| 85 |
+
build_embeddings.embed_issues(
|
| 86 |
+
input_filename="issues_dict.json",
|
| 87 |
+
issue_type="pull",
|
| 88 |
+
model_id="all-mpnet-base-v2",
|
| 89 |
+
update=True
|
| 90 |
+
)
|
| 91 |
|
| 92 |
|
| 93 |
with gr.Blocks(title="Github Bot") as demo:
|
|
|
|
| 101 |
with gr.Column():
|
| 102 |
token = gr.Textbox(label="Github Token", placeholder="Your github token for authentication. This is not stored anywhere.")
|
| 103 |
n_issues = gr.Slider(1, 50, value=5, step=1, label="Number of similar issues", info="Choose between 1 and 50")
|
| 104 |
+
issue_types = gr.CheckboxGroup(["Issue", "Pull Request"], label="Issue types")
|
| 105 |
+
update_button = gr.Button(value="Update issues", trigger_mode="once")
|
| 106 |
+
update_button.click(update, outputs=[], inputs=[])
|
| 107 |
|
| 108 |
with gr.Row():
|
| 109 |
submit_button = gr.Button(value="Submit")
|
|
|
|
| 111 |
with gr.Row():
|
| 112 |
with gr.Row():
|
| 113 |
issues_html = gr.HTML(label="Issue text", elem_id="issue_html")
|
| 114 |
+
submit_button.click(run_find_similar_issues, outputs=[issues_html], inputs=[token, n_issues, issue_no, query, issue_types])
|
| 115 |
|
| 116 |
with gr.Tab("Find maintainers to ping"):
|
| 117 |
with gr.Row():
|
defaults.py
CHANGED
|
@@ -3,3 +3,4 @@ import os
|
|
| 3 |
OWNER = "huggingface"
|
| 4 |
REPO = "transformers"
|
| 5 |
TOKEN = os.environ.get("GITHUB_TOKEN")
|
|
|
|
|
|
| 3 |
OWNER = "huggingface"
|
| 4 |
REPO = "transformers"
|
| 5 |
TOKEN = os.environ.get("GITHUB_TOKEN")
|
| 6 |
+
GITHUB_API_VERSION = "2022-11-28"
|
fetch.py
CHANGED
|
@@ -2,16 +2,16 @@
|
|
| 2 |
Script to fetch issues from the transformers repo and save them to a json file
|
| 3 |
"""
|
| 4 |
|
| 5 |
-
import json
|
| 6 |
-
|
| 7 |
import argparse
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
import requests
|
| 10 |
-
import os
|
| 11 |
import numpy as np
|
| 12 |
-
|
| 13 |
-
import
|
| 14 |
-
import logging
|
| 15 |
|
| 16 |
logging.basicConfig(level=logging.INFO)
|
| 17 |
|
|
@@ -19,11 +19,7 @@ logger = logging.getLogger(__name__)
|
|
| 19 |
|
| 20 |
today = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
|
| 21 |
|
| 22 |
-
|
| 23 |
-
REPO = "transformers"
|
| 24 |
-
GITHUB_API_VERSION = "2022-11-28"
|
| 25 |
-
TOKEN = os.environ.get("GITHUB_TOKEN")
|
| 26 |
-
JSON_FILE = f"issues.json"
|
| 27 |
UPDATE_FILE = False
|
| 28 |
OVERWRITE_FILE = True
|
| 29 |
|
|
@@ -65,7 +61,7 @@ def get_issues(
|
|
| 65 |
url = f"https://api.github.com/repos/{owner}/{repo}/issues"
|
| 66 |
headers = {
|
| 67 |
"Accept": "application/vnd.github+json",
|
| 68 |
-
|
| 69 |
"X-GitHub-Api-Version": f"{github_api_version}",
|
| 70 |
"User-Agent": "amyeroberts",
|
| 71 |
}
|
|
|
|
| 2 |
Script to fetch issues from the transformers repo and save them to a json file
|
| 3 |
"""
|
| 4 |
|
|
|
|
|
|
|
| 5 |
import argparse
|
| 6 |
+
import datetime
|
| 7 |
+
import logging
|
| 8 |
+
import json
|
| 9 |
+
import os
|
| 10 |
|
| 11 |
import requests
|
|
|
|
| 12 |
import numpy as np
|
| 13 |
+
|
| 14 |
+
from defaults import OWNER, REPO, GITHUB_API_VERSION, TOKEN
|
|
|
|
| 15 |
|
| 16 |
logging.basicConfig(level=logging.INFO)
|
| 17 |
|
|
|
|
| 19 |
|
| 20 |
today = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
|
| 21 |
|
| 22 |
+
JSON_FILE = "issues.json"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
UPDATE_FILE = False
|
| 24 |
OVERWRITE_FILE = True
|
| 25 |
|
|
|
|
| 61 |
url = f"https://api.github.com/repos/{owner}/{repo}/issues"
|
| 62 |
headers = {
|
| 63 |
"Accept": "application/vnd.github+json",
|
| 64 |
+
"Authorization": f"{token}",
|
| 65 |
"X-GitHub-Api-Version": f"{github_api_version}",
|
| 66 |
"User-Agent": "amyeroberts",
|
| 67 |
}
|
find_similar_issues.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
import pprint
|
| 2 |
import json
|
| 3 |
import argparse
|
| 4 |
import requests
|
|
@@ -17,11 +16,12 @@ def load_embeddings():
|
|
| 17 |
embeddings = np.load("issue_embeddings.npy")
|
| 18 |
return embeddings
|
| 19 |
|
| 20 |
-
|
|
|
|
| 21 |
"""
|
| 22 |
Function to load issue information from file
|
| 23 |
"""
|
| 24 |
-
with open("
|
| 25 |
embedding_index_to_issue = json.load(f)
|
| 26 |
|
| 27 |
with open("issues_dict.json", "r") as f:
|
|
@@ -45,23 +45,20 @@ def get_issue(issue_no, token=TOKEN, owner=OWNER, repo=REPO):
|
|
| 45 |
"""
|
| 46 |
Function to get issue from GitHub
|
| 47 |
"""
|
| 48 |
-
url = f"https://api.github.com/repos/{owner}/{repo}/issues"
|
| 49 |
headers = {
|
| 50 |
"Accept": "application/vnd.github+json",
|
| 51 |
-
|
| 52 |
"X-GitHub-Api-Version": "2022-11-28",
|
| 53 |
"User-Agent": "amyeroberts",
|
| 54 |
}
|
| 55 |
-
request = requests.get(
|
| 56 |
-
f"https://api.github.com/repos/{OWNER}/{REPO}/issues/{issue_no}",
|
| 57 |
-
headers=headers,
|
| 58 |
-
)
|
| 59 |
if request.status_code != 200:
|
| 60 |
raise ValueError(f"Request failed with status code {request.status_code}")
|
| 61 |
return request.json()
|
| 62 |
|
| 63 |
|
| 64 |
-
def get_similar_issues(issue_no, query, top_k=5, token=TOKEN, owner=OWNER, repo=REPO):
|
| 65 |
"""
|
| 66 |
Function to find similar issues
|
| 67 |
"""
|
|
@@ -86,7 +83,7 @@ def get_similar_issues(issue_no, query, top_k=5, token=TOKEN, owner=OWNER, repo=
|
|
| 86 |
most_similar_indices = np.argsort(cosine_similarities)
|
| 87 |
most_similar_indices = most_similar_indices[0][::-1]
|
| 88 |
|
| 89 |
-
embedding_index_to_issue, issues = load_issue_information()
|
| 90 |
|
| 91 |
similar_issues = []
|
| 92 |
for i in most_similar_indices[:top_k]:
|
|
|
|
|
|
|
| 1 |
import json
|
| 2 |
import argparse
|
| 3 |
import requests
|
|
|
|
| 16 |
embeddings = np.load("issue_embeddings.npy")
|
| 17 |
return embeddings
|
| 18 |
|
| 19 |
+
|
| 20 |
+
def load_issue_information(issue_type="issue"):
|
| 21 |
"""
|
| 22 |
Function to load issue information from file
|
| 23 |
"""
|
| 24 |
+
with open(f"embedding_index_to_{issue_type}.json", "r") as f:
|
| 25 |
embedding_index_to_issue = json.load(f)
|
| 26 |
|
| 27 |
with open("issues_dict.json", "r") as f:
|
|
|
|
| 45 |
"""
|
| 46 |
Function to get issue from GitHub
|
| 47 |
"""
|
| 48 |
+
url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_no}"
|
| 49 |
headers = {
|
| 50 |
"Accept": "application/vnd.github+json",
|
| 51 |
+
"Authorization": f"{token}",
|
| 52 |
"X-GitHub-Api-Version": "2022-11-28",
|
| 53 |
"User-Agent": "amyeroberts",
|
| 54 |
}
|
| 55 |
+
request = requests.get(url, headers=headers)
|
|
|
|
|
|
|
|
|
|
| 56 |
if request.status_code != 200:
|
| 57 |
raise ValueError(f"Request failed with status code {request.status_code}")
|
| 58 |
return request.json()
|
| 59 |
|
| 60 |
|
| 61 |
+
def get_similar_issues(issue_no, query, top_k=5, token=TOKEN, owner=OWNER, repo=REPO, issue_type="issue"):
|
| 62 |
"""
|
| 63 |
Function to find similar issues
|
| 64 |
"""
|
|
|
|
| 83 |
most_similar_indices = np.argsort(cosine_similarities)
|
| 84 |
most_similar_indices = most_similar_indices[0][::-1]
|
| 85 |
|
| 86 |
+
embedding_index_to_issue, issues = load_issue_information(issue_type=issue_type)
|
| 87 |
|
| 88 |
similar_issues = []
|
| 89 |
for i in most_similar_indices[:top_k]:
|
get_topic.py
DELETED
|
@@ -1,57 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
import json
|
| 3 |
-
|
| 4 |
-
with open("issues_dict.json", "r") as f:
|
| 5 |
-
issues = json.load(f)
|
| 6 |
-
|
| 7 |
-
topic_maintainers_map ={
|
| 8 |
-
"text models": ["@ArthurZucker", "@younesbelkada"],
|
| 9 |
-
"vision models": "@amyeroberts",
|
| 10 |
-
"speech models": "@sanchit-gandhi",
|
| 11 |
-
"graph models": "@clefourrier",
|
| 12 |
-
"flax": "@sanchit-gandhi",
|
| 13 |
-
"generate": "@gante",
|
| 14 |
-
"pipelines": "@Narsil",
|
| 15 |
-
"tensorflow": ["@gante", "@Rocketknight1"],
|
| 16 |
-
"tokenizers": "@ArthurZucker",
|
| 17 |
-
"trainer": ["@muellerzr", "@pacman100"],
|
| 18 |
-
"deepspeed": "@pacman100",
|
| 19 |
-
"ray/raytune": ["@richardliaw", "@amogkam"],
|
| 20 |
-
"Big Model Inference": "@SunMarc",
|
| 21 |
-
"quantization (bitsandbytes, autogpt)": ["@SunMarc", "@younesbelkada"],
|
| 22 |
-
"Documentation": ["@stevhliu", "@MKhalusova"],
|
| 23 |
-
"accelerate": "different repo",
|
| 24 |
-
"datasets": "different repo",
|
| 25 |
-
"diffusers": "different repo",
|
| 26 |
-
"rust tokenizers": "different repo",
|
| 27 |
-
"Flax examples": "@sanchit-gandhi",
|
| 28 |
-
"PyTorch vision examples": "@amyeroberts",
|
| 29 |
-
"PyTorch text examples": "@ArthurZucker",
|
| 30 |
-
"PyTorch speech examples": "@sanchit-gandhi",
|
| 31 |
-
"PyTorch generate examples": "@gante",
|
| 32 |
-
"TensorFlow": "@Rocketknight1",
|
| 33 |
-
"Research projects and examples": "not maintained",
|
| 34 |
-
}
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
issue_no = 2781
|
| 38 |
-
issue = issues[str(issue_no)]
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
from transformers import AutoTokenizer, LlamaForCausalLM
|
| 42 |
-
|
| 43 |
-
model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
|
| 44 |
-
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
|
| 45 |
-
|
| 46 |
-
# prompt = f"Which of the following topics {list(topic_maintainers_map.keys())} is this issue about:\n{issue['body']}"
|
| 47 |
-
prompt = f"QUESTION: What is the provided issue about? Pick up to 3 topics from the following list: {list(topic_maintainers_map.keys())} \nISSUE START:\n{issue['body']} \n ISSUE END. \n ANSWER:"
|
| 48 |
-
inputs = tokenizer(prompt, return_tensors="pt")
|
| 49 |
-
|
| 50 |
-
prefix_len = inputs.input_ids.shape[1]
|
| 51 |
-
|
| 52 |
-
# Generate
|
| 53 |
-
generate_ids = model.generate(inputs.input_ids, max_length=30 + prefix_len)
|
| 54 |
-
outputs = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
| 55 |
-
print(outputs[prefix_len:])
|
| 56 |
-
|
| 57 |
-
print("TITLE", issue["number"] + " " + issue["title"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
retrieval.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import argparse
|
| 2 |
import json
|
| 3 |
-
import pprint
|
| 4 |
|
| 5 |
import numpy as np
|
| 6 |
from sentence_transformers import SentenceTransformer
|
|
|
|
| 1 |
import argparse
|
| 2 |
import json
|
|
|
|
| 3 |
|
| 4 |
import numpy as np
|
| 5 |
from sentence_transformers import SentenceTransformer
|
update_stored_issues.py
CHANGED
|
@@ -1,38 +1,20 @@
|
|
| 1 |
"""
|
| 2 |
Module which updates any of the issues to reflect changes in the issue state
|
| 3 |
"""
|
| 4 |
-
import json
|
| 5 |
-
import datetime
|
| 6 |
-
from defaults import TOKEN, OWNER, REPO
|
| 7 |
-
|
| 8 |
-
GITHUB_API_VERSION = "2022-11-28"
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
# Get the issues that have been updated since the last update
|
| 13 |
-
|
| 14 |
-
import json
|
| 15 |
-
|
| 16 |
import argparse
|
| 17 |
-
|
| 18 |
-
import requests
|
| 19 |
-
import os
|
| 20 |
-
import numpy as np
|
| 21 |
import json
|
| 22 |
-
import datetime
|
| 23 |
import logging
|
|
|
|
| 24 |
|
| 25 |
-
|
|
|
|
| 26 |
|
| 27 |
-
|
| 28 |
|
| 29 |
-
|
|
|
|
| 30 |
|
| 31 |
-
|
| 32 |
-
REPO = "transformers"
|
| 33 |
-
GITHUB_API_VERSION = "2022-11-28"
|
| 34 |
-
TOKEN = os.environ.get("GITHUB_TOKEN")
|
| 35 |
-
JSON_FILE = f"issues.json"
|
| 36 |
|
| 37 |
|
| 38 |
def update_issues(
|
|
@@ -62,7 +44,7 @@ def update_issues(
|
|
| 62 |
url = f"https://api.github.com/repos/{owner}/{repo}/issues"
|
| 63 |
headers = {
|
| 64 |
"Accept": "application/vnd.github+json",
|
| 65 |
-
|
| 66 |
"X-GitHub-Api-Version": f"{github_api_version}",
|
| 67 |
"User-Agent": "amyeroberts",
|
| 68 |
}
|
|
|
|
| 1 |
"""
|
| 2 |
Module which updates any of the issues to reflect changes in the issue state
|
| 3 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import argparse
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
import json
|
|
|
|
| 6 |
import logging
|
| 7 |
+
import os
|
| 8 |
|
| 9 |
+
import numpy as np
|
| 10 |
+
import requests
|
| 11 |
|
| 12 |
+
from defaults import TOKEN, OWNER, REPO, GITHUB_API_VERSION
|
| 13 |
|
| 14 |
+
logging.basicConfig(level=logging.INFO)
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
|
| 17 |
+
JSON_FILE = "issues.json"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
def update_issues(
|
|
|
|
| 44 |
url = f"https://api.github.com/repos/{owner}/{repo}/issues"
|
| 45 |
headers = {
|
| 46 |
"Accept": "application/vnd.github+json",
|
| 47 |
+
"Authorization": f"{token}",
|
| 48 |
"X-GitHub-Api-Version": f"{github_api_version}",
|
| 49 |
"User-Agent": "amyeroberts",
|
| 50 |
}
|