pujithapsx's picture
Update app.py
602ebb2 verified
import gradio as gr
from sentence_transformers import CrossEncoder
import torch
# Load fine-tuned model from Hub
MODEL_NAME = "pujithapsx/address-crossencoder-bge-reranker-v2-m3-finetuned"
print("Loading model...")
# Use CrossEncoder directly - it handles the model format correctly
model = CrossEncoder(
MODEL_NAME,
trust_remote_code=True,
device="cpu" # Use "cuda" if GPU is available in your HF Space
)
print("Model loaded successfully!")
# STATIC THRESHOLD
THRESHOLD = 0.75
def predict_similarity(input1, input2):
"""
Predict similarity between two addresses using static threshold.
Returns: Similarity %, Match/No Match result, and confidence bar value.
"""
if not input1.strip() or not input2.strip():
return "—", "⚠️ Please provide both addresses", 0
score = model.predict([[input1, input2]])[0]
similarity_pct = score * 100
if score >= THRESHOLD:
result = "✅ MATCH"
confidence_label = "High" if score > 0.85 else "Medium"
else:
result = "❌ NO MATCH"
confidence_label = "High" if score < 0.40 else "Medium"
similarity_str = f"{similarity_pct:.2f}%"
result_str = f"{result} • Confidence: {confidence_label}"
return similarity_str, result_str, float(similarity_pct)
# ── Custom CSS ──────────────────────────────────────────────────────────────
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=DM+Sans:wght@300;400;500;600&display=swap');
:root {
--bg: #0b0f1a;
--surface: #111827;
--border: #1f2d45;
--accent: #38bdf8;
--accent2: #818cf8;
--green: #34d399;
--red: #f87171;
--yellow: #fbbf24;
--text: #e2e8f0;
--muted: #64748b;
--radius: 12px;
}
body, .gradio-container {
background: var(--bg) !important;
font-family: 'DM Sans', sans-serif !important;
color: var(--text) !important;
}
/* ── header ── */
#header-box {
background: linear-gradient(135deg, #0f172a 0%, #1e293b 100%);
border: 1px solid var(--border);
border-radius: var(--radius);
padding: 36px 40px 28px;
margin-bottom: 24px;
position: relative;
overflow: hidden;
}
#header-box::before {
content: '';
position: absolute;
top: -60px; right: -60px;
width: 200px; height: 200px;
background: radial-gradient(circle, rgba(56,189,248,0.12) 0%, transparent 70%);
pointer-events: none;
}
#header-box h1 {
font-family: 'Space Mono', monospace !important;
font-size: 1.9rem !important;
font-weight: 700 !important;
color: var(--accent) !important;
margin: 0 0 8px !important;
letter-spacing: -0.5px;
}
#header-box p {
color: var(--muted) !important;
font-size: 0.95rem !important;
margin: 0 !important;
line-height: 1.6;
}
.badge {
display: inline-block;
background: rgba(56,189,248,0.12);
border: 1px solid rgba(56,189,248,0.3);
color: var(--accent);
font-family: 'Space Mono', monospace;
font-size: 0.7rem;
padding: 3px 10px;
border-radius: 20px;
margin-right: 8px;
margin-top: 12px;
}
/* ── input cards ── */
.input-card textarea,
.input-card input {
background: var(--surface) !important;
border: 1px solid var(--border) !important;
border-radius: var(--radius) !important;
color: var(--text) !important;
font-family: 'DM Sans', sans-serif !important;
font-size: 0.95rem !important;
padding: 14px 16px !important;
transition: border-color 0.2s;
}
.input-card textarea:focus,
.input-card input:focus {
border-color: var(--accent) !important;
box-shadow: 0 0 0 3px rgba(56,189,248,0.1) !important;
}
label span {
font-family: 'Space Mono', monospace !important;
font-size: 0.75rem !important;
color: var(--accent2) !important;
letter-spacing: 0.5px;
text-transform: uppercase;
}
/* ── button ── */
#run-btn {
background: linear-gradient(135deg, var(--accent) 0%, var(--accent2) 100%) !important;
border: none !important;
border-radius: var(--radius) !important;
color: #0b0f1a !important;
font-family: 'Space Mono', monospace !important;
font-size: 0.9rem !important;
font-weight: 700 !important;
letter-spacing: 0.5px;
padding: 14px 32px !important;
cursor: pointer;
transition: opacity 0.2s, transform 0.15s;
width: 100%;
margin-top: 8px;
}
#run-btn:hover { opacity: 0.9; transform: translateY(-1px); }
#run-btn:active { transform: translateY(0); }
/* ── output cards ── */
.output-card textarea,
.output-card input {
background: #0d1424 !important;
border: 1px solid var(--border) !important;
border-radius: var(--radius) !important;
color: var(--text) !important;
font-family: 'Space Mono', monospace !important;
font-size: 1.1rem !important;
font-weight: 700 !important;
text-align: center;
}
/* ── slider (score bar) ── */
.score-slider input[type=range] {
accent-color: var(--accent);
}
/* ── examples table ── */
.gr-samples-table {
background: var(--surface) !important;
border: 1px solid var(--border) !important;
border-radius: var(--radius) !important;
}
.gr-samples-table th {
font-family: 'Space Mono', monospace !important;
font-size: 0.72rem !important;
color: var(--muted) !important;
text-transform: uppercase;
letter-spacing: 0.5px;
background: #0d1424 !important;
}
.gr-samples-table td {
color: var(--text) !important;
font-size: 0.88rem !important;
}
.gr-samples-table tr:hover td {
background: rgba(56,189,248,0.04) !important;
}
/* ── info footer ── */
#footer-info {
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius);
padding: 20px 24px;
margin-top: 20px;
}
#footer-info p {
color: var(--muted) !important;
font-family: 'Space Mono', monospace !important;
font-size: 0.78rem !important;
margin: 4px 0 !important;
line-height: 1.7;
}
#footer-info span { color: var(--accent) !important; }
"""
# ── Gradio UI ────────────────────────────────────────────────────────────────
with gr.Blocks(css=custom_css, title="Address Entity Matcher") as demo:
# Header
gr.HTML("""
<div id="header-box">
<h1>Address Entity Matcher</h1>
<p>
Enter two addresses to determine whether they refer to the same location.<br>
Powered by a fine-tuned <strong>BGE-Reranker-v2-m3</strong> cross-encoder model.
</p>
<span class="badge">CrossEncoder</span>
<span class="badge">BGE-Reranker-v2-m3</span>
<span class="badge">Threshold: 0.75</span>
</div>
""")
# Inputs
with gr.Row(equal_height=True):
with gr.Column(elem_classes="input-card"):
input1 = gr.Textbox(
label="Address 1",
placeholder="e.g., Flat 12-B Sector 5 Noida",
lines=3,
)
with gr.Column(elem_classes="input-card"):
input2 = gr.Textbox(
label="Address 2",
placeholder="e.g., Flat 23-B Sector 5 Noida",
lines=3,
)
btn = gr.Button(" Check Match", elem_id="run-btn", variant="primary")
# Outputs
with gr.Row(equal_height=True):
with gr.Column(elem_classes="output-card"):
similarity_output = gr.Textbox(
label="Similarity Score",
interactive=False,
placeholder="—",
)
with gr.Column(elem_classes="output-card"):
result_output = gr.Textbox(
label="Match Result",
interactive=False,
placeholder="—",
)
score_bar = gr.Slider(
minimum=0,
maximum=100,
value=0,
step=0.01,
label="Score Visualisation (threshold line: 75%)",
interactive=False,
elem_classes="score-slider",
)
# Examples
gr.Examples(
examples=[
["Flat 12-B Sector 5 Noida", "Flat 23-B Sector 5 Noida"],
["Phase 4 Whitefield Bangalore", "Whitefield Phase V Bangalore"],
["Thirteen I 7th Avenue Adyar Chennai", "13 Seventh Avenue Adyar Chennai"],
["Twenty Nine A Second Cross Koramangala Bengaluru", "47 Forty Seven B Third Street Indiranagar Bengaluru"],
["Plot 8 Banjara Hills Hyderabad", "Plot 8 Banjara Hills Hyderabad"],
["House No 4 Lane 2 DLF Phase 1 Gurugram", "H.No 4/2 DLF Phase One Gurgaon"],
],
inputs=[input1, input2],
label="Examples",
)
# Footer info
gr.HTML(f"""
<div id="footer-info">
<p> <strong>Model:</strong> <span>pujithapsx/address-crossencoder-bge-reranker-v2-m3-finetuned</span></p>
<p><strong>Threshold:</strong> <span>{THRESHOLD}</span> — Score ≥ {THRESHOLD} → MATCH &nbsp;|&nbsp; Score &lt; {THRESHOLD} → NO MATCH</p>
<p> <strong>Confidence:</strong> High (score &gt; 0.85 or &lt; 0.40) &nbsp;|&nbsp; Medium (otherwise)</p>
</div>
""")
# Wiring
btn.click(
fn=predict_similarity,
inputs=[input1, input2],
outputs=[similarity_output, result_output, score_bar],
)
if __name__ == "__main__":
demo.launch()