Duplicate_API / app.py
Amii2410's picture
Update app.py
ebbae50 verified
import gradio as gr
from sentence_transformers import SentenceTransformer, util
# Load model once at startup
model = SentenceTransformer("sentence-transformers/paraphrase-mpnet-base-v2")
def find_matching_issues(newIssue, nearbyIssues, threshold=0.75):
"""
Finds similar issues for a new complaint.
"""
if not nearbyIssues:
return []
# Encode new issue
new_emb = model.encode(newIssue, convert_to_tensor=True)
# Encode all nearby issues
issue_texts = [item["issue"] for item in nearbyIssues]
issue_embs = model.encode(issue_texts, convert_to_tensor=True)
# Compute cosine similarity
cosine_scores = util.pytorch_cos_sim(new_emb, issue_embs)[0]
matches = []
for i, score in enumerate(cosine_scores):
if score.item() >= threshold:
matches.append({
"ID": nearbyIssues[i]["ID"],
"issue": nearbyIssues[i]["issue"],
"similarity": round(score.item(), 3)
})
return matches
# Wrapper for Gradio interface
def match_api(newIssue, issues_text, threshold):
"""
issues_text: input like '123: electricity problem; 124: water problem'
"""
# Parse nearby issues (simple format: ID: text)
nearbyIssues = []
for part in issues_text.split(";"):
if ":" in part:
ID, issue = part.split(":", 1)
nearbyIssues.append({"ID": ID.strip(), "issue": issue.strip()})
matches = find_matching_issues(newIssue, nearbyIssues, threshold)
return matches
# Gradio interface
iface = gr.Interface(
fn=match_api,
inputs=[
gr.Textbox(label="New Issue"),
gr.Textbox(label="Nearby Issues (format: ID: issue; ID: issue)"),
gr.Slider(0, 1, value=0.7, step=0.05, label="Threshold")
],
outputs="json",
title="Issue Matcher API",
description="Finds similar issues based on semantic similarity."
)
iface.launch()