Amii2410 commited on
Commit
ebbae50
·
verified ·
1 Parent(s): e558fc7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -40
app.py CHANGED
@@ -1,58 +1,63 @@
1
  import gradio as gr
2
  from sentence_transformers import SentenceTransformer, util
3
- import networkx as nx
4
 
5
- # Load the SentenceTransformer model once at startup
6
  model = SentenceTransformer("sentence-transformers/paraphrase-mpnet-base-v2")
7
 
8
- def group_duplicates_api(complaints, threshold=0.7):
9
  """
10
- Groups similar/duplicate complaints into clusters.
11
- complaints: multiline string or list of strings
12
- threshold: similarity score between 0 and 1
13
  """
14
- # Handle empty input
15
- if not complaints:
16
  return []
17
 
18
- # If using the textbox input, split by newline
19
- if isinstance(complaints, str):
20
- complaints = [c.strip() for c in complaints.split("\n") if c.strip()]
21
 
22
- # If fewer than 2 complaints, nothing to compare
23
- if len(complaints) < 2:
24
- return [[c] for c in complaints]
25
 
26
- # Compute embeddings and cosine similarities
27
- embeddings = model.encode(complaints, convert_to_tensor=True)
28
- cosine_scores = util.pytorch_cos_sim(embeddings, embeddings)
29
 
30
- # Build similarity graph
31
- G = nx.Graph()
32
- G.add_nodes_from(range(len(complaints)))
 
 
 
 
 
33
 
34
- for i in range(len(complaints)):
35
- for j in range(i + 1, len(complaints)):
36
- if cosine_scores[i][j].item() >= threshold:
37
- G.add_edge(i, j)
38
 
39
- # Extract connected components as duplicate groups
40
- duplicate_groups = list(nx.connected_components(G))
41
- results = [[complaints[idx] for idx in group] for group in duplicate_groups]
42
- return results
43
-
44
- # Create Gradio interface
45
- demo = gr.Interface(
46
- fn=group_duplicates_api,
 
 
 
 
 
 
 
 
 
 
47
  inputs=[
48
- gr.Textbox(lines=10, placeholder="Enter complaints separated by newline", label="Complaints"),
49
- gr.Slider(0.5, 0.95, value=0.7, step=0.01, label="Similarity Threshold")
 
50
  ],
51
- outputs=gr.JSON(label="Duplicate Groups"),
52
- title="Duplicate Complaint Grouping API",
53
- description="Paste multiple complaints (one per line) and get grouped duplicates based on semantic similarity."
54
  )
55
 
56
- if __name__ == "__main__":
57
- demo.launch()
58
-
 
1
  import gradio as gr
2
  from sentence_transformers import SentenceTransformer, util
 
3
 
4
+ # Load model once at startup
5
  model = SentenceTransformer("sentence-transformers/paraphrase-mpnet-base-v2")
6
 
7
+ def find_matching_issues(newIssue, nearbyIssues, threshold=0.75):
8
  """
9
+ Finds similar issues for a new complaint.
 
 
10
  """
11
+ if not nearbyIssues:
 
12
  return []
13
 
14
+ # Encode new issue
15
+ new_emb = model.encode(newIssue, convert_to_tensor=True)
 
16
 
17
+ # Encode all nearby issues
18
+ issue_texts = [item["issue"] for item in nearbyIssues]
19
+ issue_embs = model.encode(issue_texts, convert_to_tensor=True)
20
 
21
+ # Compute cosine similarity
22
+ cosine_scores = util.pytorch_cos_sim(new_emb, issue_embs)[0]
 
23
 
24
+ matches = []
25
+ for i, score in enumerate(cosine_scores):
26
+ if score.item() >= threshold:
27
+ matches.append({
28
+ "ID": nearbyIssues[i]["ID"],
29
+ "issue": nearbyIssues[i]["issue"],
30
+ "similarity": round(score.item(), 3)
31
+ })
32
 
33
+ return matches
 
 
 
34
 
35
+ # Wrapper for Gradio interface
36
+ def match_api(newIssue, issues_text, threshold):
37
+ """
38
+ issues_text: input like '123: electricity problem; 124: water problem'
39
+ """
40
+ # Parse nearby issues (simple format: ID: text)
41
+ nearbyIssues = []
42
+ for part in issues_text.split(";"):
43
+ if ":" in part:
44
+ ID, issue = part.split(":", 1)
45
+ nearbyIssues.append({"ID": ID.strip(), "issue": issue.strip()})
46
+
47
+ matches = find_matching_issues(newIssue, nearbyIssues, threshold)
48
+ return matches
49
+
50
+ # Gradio interface
51
+ iface = gr.Interface(
52
+ fn=match_api,
53
  inputs=[
54
+ gr.Textbox(label="New Issue"),
55
+ gr.Textbox(label="Nearby Issues (format: ID: issue; ID: issue)"),
56
+ gr.Slider(0, 1, value=0.7, step=0.05, label="Threshold")
57
  ],
58
+ outputs="json",
59
+ title="Issue Matcher API",
60
+ description="Finds similar issues based on semantic similarity."
61
  )
62
 
63
+ iface.launch()