Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,18 @@ from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
|
| 5 |
import torch
|
| 6 |
import numpy as np
|
| 7 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
# Load BERT model & tokenizer
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
|
|
@@ -29,7 +41,67 @@ category_mappings = {
|
|
| 29 |
"kmeans_labels": pd.Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int32')
|
| 30 |
}
|
| 31 |
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
# BERT embedding
|
| 34 |
combined_text = f"{component} [SEP] {title} [SEP] {description}"
|
| 35 |
inputs = tokenizer(combined_text, return_tensors="pt", truncation=True,max_length=512)
|
|
@@ -57,8 +129,10 @@ iface = gr.Interface(
|
|
| 57 |
inputs=[
|
| 58 |
gr.Textbox(label="Component"),
|
| 59 |
gr.Textbox(label="Title"),
|
| 60 |
-
gr.Textbox(label="Description")
|
|
|
|
|
|
|
| 61 |
],
|
| 62 |
outputs="text"
|
| 63 |
)
|
| 64 |
-
iface.launch()
|
|
|
|
| 5 |
import torch
|
| 6 |
import numpy as np
|
| 7 |
import pandas as pd
|
| 8 |
+
from supabase import create_client, Client
|
| 9 |
+
from google import genai
|
| 10 |
+
import os
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
apikey = os.getenv('gemini_secret')
|
| 14 |
+
client = genai.Client(api_key=apikey)
|
| 15 |
+
|
| 16 |
+
supabase_url = os.getenv("NEXT_PUBLIC_SUPABASE_URL")
|
| 17 |
+
supabase_anon_key = os.getenv("NEXT_PUBLIC_SUPABASE_ANON_KEY")
|
| 18 |
+
|
| 19 |
+
supabase: Client = create_client(supabase_url, supabase_anon_key)
|
| 20 |
|
| 21 |
# Load BERT model & tokenizer
|
| 22 |
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
|
|
|
|
| 41 |
"kmeans_labels": pd.Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int32')
|
| 42 |
}
|
| 43 |
|
| 44 |
+
|
| 45 |
+
def combine_document_features(component, description):
|
| 46 |
+
return f"Component is {component} with Description {description}"
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def retrieve_similar_documents(component, description,pid):
|
| 50 |
+
new_doc_text = combine_document_features(component, description)
|
| 51 |
+
inputs = tokenizer(new_doc_text, return_tensors="pt", truncation=True,max_length=512)
|
| 52 |
+
with torch.no_grad():
|
| 53 |
+
outputs = model(**inputs)
|
| 54 |
+
new_embedding = outputs.hidden_states[-1][:, 0, :][0].tolist()
|
| 55 |
+
|
| 56 |
+
# Get all documents from the "bugs" table
|
| 57 |
+
response = supabase.rpc("get_similar_bugs", {
|
| 58 |
+
"query_embedding": new_embedding,
|
| 59 |
+
"pid": pid
|
| 60 |
+
}).execute()
|
| 61 |
+
|
| 62 |
+
if not response.data:
|
| 63 |
+
return None, new_doc_text
|
| 64 |
+
outputs=[]
|
| 65 |
+
for i in response.data:
|
| 66 |
+
outputs.append(i)
|
| 67 |
+
return outputs,new_doc_text
|
| 68 |
+
|
| 69 |
+
def generate_title(soft_prompt, new_document):
|
| 70 |
+
soft_prompt_examples = '\n\n'.join(
|
| 71 |
+
f'Component: {value[0]}\n Description: {value[1]}\n title: {key}' for key, value in soft_prompt.items()
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
input_prompt = f'''Generate a Bug report title based on description and Component. Do not generate anything else. Try to keep it in a limit of 30 characters.
|
| 75 |
+
Look at the below examples to understand the task.
|
| 76 |
+
{soft_prompt_examples}
|
| 77 |
+
Now using the examples above, try giving the output for the following input enclosed within the | delimiter.
|
| 78 |
+
| {new_document} |.
|
| 79 |
+
Give the title as instructed above. Give the title only'''
|
| 80 |
+
|
| 81 |
+
response = client.models.generate_content(model="gemini-2.0-flash", contents=input_prompt)
|
| 82 |
+
return response.text
|
| 83 |
+
|
| 84 |
+
def suggest_title(component, description,pid):
|
| 85 |
+
similar_docs, new_doc_text = retrieve_similar_documents(component,description,pid)
|
| 86 |
+
|
| 87 |
+
examples={'''Concatenation of string variables slow compared to strings themselves''':['''JavaScript Engine''','''Ill be uploading a test case with various tests of string concatenation. ; Mozilla (build 2000040308) shows good performance with all the ones that uses ; strings directly; e.g. string1 + string2. its the last three it has ; problems with; they use string variables (e.g. var1 + var 2) in the ; concatenation.; ; try it out for yourselves. all numberical values shown in the form fields is ; the execution time in millseconds. the four tests on the left hand side; and ; the 2 at the top on the right hand side finished in around 1650ms on my P3/450. ; this is just the same speed as Netscape Comm 4.72. On the last three tests on ; the right hand side NC4.72 uses 7000ms; 10000ms and around 4500ms respectively; ; while Mozilla suddenly uses 10000ms; 14750ms and 5500ms on the same three tests. ; Im slightly surprised by this sudden large increase in execution time.; ; the test results are very positive compared to IE5.01 though; except for the ; three tests with variables in them. the 4 tests on the left hand side; from top ; to bottom; finish in around 5.5s; 9s; 12.5s and 16s in IE5.01. in other words; ; a nearly linear increase in usage for each string that gets added. the two top ; tests on the right hand side finish in around 9.3s and 20s; a _huge_ difference ; from both Mozilla and Communicator. the last three tests; with variables; ; execute at just about the same speed as Communicator though (the last one ; actually about a second faster).'''],'''Linux/Slackware: undefined iostream symbols; app wont start''':['''HTML: Parser''','''johnny:~/mozilla/package# ./mozilla-apprunner.sh; MOZILLA_FIVE_HOME=/root/mozilla/package; LD_LIBRARY_PATH=/root/mozilla/package:/usr/local/rvplayer5.0; MOZ_PROGRAM=./apprunner; moz_debug=0; moz_debugger=; ./apprunner: error in loading shared libraries; /root/mozilla/package/libraptorhtmlpars.so: undefined symbol:; __vt_8iostream.3ios; ; I am running Slackware 4.0 and never have had any luck running any; of these milestone releases. This was the M7 attempt.; Just thought you should know.; Thanks; Johnny O''']}
|
| 88 |
+
|
| 89 |
+
soft_prompt = {i["title"]: [i["component"],i["description"]] for i in similar_docs if i["similarity"] > 0} if similar_docs else None
|
| 90 |
+
if soft_prompt is None:
|
| 91 |
+
soft_prompt = {}
|
| 92 |
+
|
| 93 |
+
for k, v in examples.items():
|
| 94 |
+
if len(soft_prompt) >= 3:
|
| 95 |
+
break
|
| 96 |
+
if k not in soft_prompt:
|
| 97 |
+
soft_prompt[k] = v
|
| 98 |
+
|
| 99 |
+
generated_title = generate_title(soft_prompt, new_doc_text)
|
| 100 |
+
return f"{generated_title}"
|
| 101 |
+
|
| 102 |
+
def infer(component,title,description,pid=None,mode="priority"):
|
| 103 |
+
if mode=="title":
|
| 104 |
+
return suggest_title(component, description,pid)
|
| 105 |
# BERT embedding
|
| 106 |
combined_text = f"{component} [SEP] {title} [SEP] {description}"
|
| 107 |
inputs = tokenizer(combined_text, return_tensors="pt", truncation=True,max_length=512)
|
|
|
|
| 129 |
inputs=[
|
| 130 |
gr.Textbox(label="Component"),
|
| 131 |
gr.Textbox(label="Title"),
|
| 132 |
+
gr.Textbox(label="Description"),
|
| 133 |
+
gr.Textbox(label="PID (optional)", value="default_pid"), # Or Hidden
|
| 134 |
+
gr.Radio(["priority", "title"], label="Mode", value="priority")
|
| 135 |
],
|
| 136 |
outputs="text"
|
| 137 |
)
|
| 138 |
+
iface.launch()
|