pujithapsx's picture
Update frontend/app.py
95469ff verified
import gradio as gr
import json
import re
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from backend.matching_service import perform_match
from backend.models import EntityRecord
# =========================================================
# CONSTANTS
# =========================================================
MAX_FIELDS = 20
# =========================================================
# HELPERS
# =========================================================
def preprocess_text(text):
if not text:
return ""
return re.sub(r"\s+", " ", text.strip())
def convert_to_api_payload(record: dict) -> dict:
payload = {
"name": record.get("name", ""),
"firstname": record.get("firstname", ""),
"middlename": record.get("middlename", ""),
"lastname": record.get("lastname", ""),
"mothername": record.get("mothername", ""),
"fathername": record.get("fathername", ""),
"spousename": record.get("spousename", ""),
"othername": record.get("othername", ""),
"dob": record.get("dob", ""),
"gender": record.get("gender", ""),
"AADHAR": record.get("AADHAR", ""),
"pan": record.get("pan", ""),
"licenseid": record.get("licenseid", ""),
"passportid": record.get("passportid", ""),
"voterid": record.get("voterid", ""),
"companyname": record.get("companyname", ""),
"parentcompanyname": record.get("parentcompanyname", ""),
"phones": [],
"emails": [],
"addresses": [],
"custom_fields": {},
}
for i in range(MAX_FIELDS):
val = record.get(f"phone_{i}", "")
if val:
payload["phones"].append(str(val))
val = record.get(f"email_{i}", "")
if val:
payload["emails"].append(str(val))
addr_keys = [f"addressline_{i}", f"city_{i}", f"state_{i}", f"zipcode_{i}"]
if any(k in record for k in addr_keys):
addr = {
"addressline": record.get(f"addressline_{i}", ""),
"city": record.get(f"city_{i}", ""),
"state": record.get(f"state_{i}", ""),
"zipcode": record.get(f"zipcode_{i}", ""),
}
payload["addresses"].append(addr)
known_keys = set(payload.keys()) - {"phones", "emails", "addresses", "custom_fields"}
known_prefixes = ("addressline_", "city_", "state_", "zipcode_", "phone_", "email_")
for k, v in record.items():
k_str = str(k)
if k_str in known_keys:
continue
if any(k_str.startswith(p) for p in known_prefixes):
continue
if v and str(v).strip():
payload["custom_fields"][k_str] = str(v)
return payload
# =========================================================
# MATCH FUNCTION
# =========================================================
def run_match(
# Record 1 personal
r1_name, r1_firstname, r1_middlename, r1_lastname,
r1_mothername, r1_fathername, r1_spousename, r1_othername,
r1_dob, r1_gender,
# Record 1 identifiers
r1_aadhar, r1_pan, r1_licenseid, r1_passportid, r1_voterid,
# Record 1 addresses (5 slots)
r1_addr0_line, r1_addr0_city, r1_addr0_state, r1_addr0_zip,
r1_addr1_line, r1_addr1_city, r1_addr1_state, r1_addr1_zip,
r1_addr2_line, r1_addr2_city, r1_addr2_state, r1_addr2_zip,
r1_addr3_line, r1_addr3_city, r1_addr3_state, r1_addr3_zip,
r1_addr4_line, r1_addr4_city, r1_addr4_state, r1_addr4_zip,
# Record 1 phones (5 slots)
r1_phone0, r1_phone1, r1_phone2, r1_phone3, r1_phone4,
# Record 1 emails (5 slots)
r1_email0, r1_email1, r1_email2, r1_email3, r1_email4,
# Record 1 employment
r1_company, r1_parent_company,
# Record 1 custom fields (5 slots)
r1_cf0_name, r1_cf0_val,
r1_cf1_name, r1_cf1_val,
r1_cf2_name, r1_cf2_val,
r1_cf3_name, r1_cf3_val,
r1_cf4_name, r1_cf4_val,
# Record 2 personal
r2_name, r2_firstname, r2_middlename, r2_lastname,
r2_mothername, r2_fathername, r2_spousename, r2_othername,
r2_dob, r2_gender,
# Record 2 identifiers
r2_aadhar, r2_pan, r2_licenseid, r2_passportid, r2_voterid,
# Record 2 addresses (5 slots)
r2_addr0_line, r2_addr0_city, r2_addr0_state, r2_addr0_zip,
r2_addr1_line, r2_addr1_city, r2_addr1_state, r2_addr1_zip,
r2_addr2_line, r2_addr2_city, r2_addr2_state, r2_addr2_zip,
r2_addr3_line, r2_addr3_city, r2_addr3_state, r2_addr3_zip,
r2_addr4_line, r2_addr4_city, r2_addr4_state, r2_addr4_zip,
# Record 2 phones (5 slots)
r2_phone0, r2_phone1, r2_phone2, r2_phone3, r2_phone4,
# Record 2 emails (5 slots)
r2_email0, r2_email1, r2_email2, r2_email3, r2_email4,
# Record 2 employment
r2_company, r2_parent_company,
# Record 2 custom fields (5 slots)
r2_cf0_name, r2_cf0_val,
r2_cf1_name, r2_cf1_val,
r2_cf2_name, r2_cf2_val,
r2_cf3_name, r2_cf3_val,
r2_cf4_name, r2_cf4_val,
):
def build_record(
name, firstname, middlename, lastname,
mothername, fathername, spousename, othername, dob, gender,
aadhar, pan, licenseid, passportid, voterid,
addr_lines, phones, emails,
company, parent_company,
custom_fields_pairs,
):
rec = {
"name": name, "firstname": firstname, "middlename": middlename,
"lastname": lastname, "mothername": mothername, "fathername": fathername,
"spousename": spousename, "othername": othername, "dob": dob, "gender": gender,
"AADHAR": aadhar, "pan": pan, "licenseid": licenseid,
"passportid": passportid, "voterid": voterid,
"companyname": company, "parentcompanyname": parent_company,
}
for i, (line, city, state, zipcode) in enumerate(addr_lines):
rec[f"addressline_{i}"] = line
rec[f"city_{i}"] = city
rec[f"state_{i}"] = state
rec[f"zipcode_{i}"] = zipcode
for i, ph in enumerate(phones):
rec[f"phone_{i}"] = ph
for i, em in enumerate(emails):
rec[f"email_{i}"] = em
for cf_name, cf_val in custom_fields_pairs:
if cf_name and cf_name.strip():
rec[cf_name.strip()] = cf_val
return rec
r1 = build_record(
r1_name, r1_firstname, r1_middlename, r1_lastname,
r1_mothername, r1_fathername, r1_spousename, r1_othername, r1_dob, r1_gender,
r1_aadhar, r1_pan, r1_licenseid, r1_passportid, r1_voterid,
[
(r1_addr0_line, r1_addr0_city, r1_addr0_state, r1_addr0_zip),
(r1_addr1_line, r1_addr1_city, r1_addr1_state, r1_addr1_zip),
(r1_addr2_line, r1_addr2_city, r1_addr2_state, r1_addr2_zip),
(r1_addr3_line, r1_addr3_city, r1_addr3_state, r1_addr3_zip),
(r1_addr4_line, r1_addr4_city, r1_addr4_state, r1_addr4_zip),
],
[r1_phone0, r1_phone1, r1_phone2, r1_phone3, r1_phone4],
[r1_email0, r1_email1, r1_email2, r1_email3, r1_email4],
r1_company, r1_parent_company,
[
(r1_cf0_name, r1_cf0_val), (r1_cf1_name, r1_cf1_val),
(r1_cf2_name, r1_cf2_val), (r1_cf3_name, r1_cf3_val),
(r1_cf4_name, r1_cf4_val),
],
)
r2 = build_record(
r2_name, r2_firstname, r2_middlename, r2_lastname,
r2_mothername, r2_fathername, r2_spousename, r2_othername, r2_dob, r2_gender,
r2_aadhar, r2_pan, r2_licenseid, r2_passportid, r2_voterid,
[
(r2_addr0_line, r2_addr0_city, r2_addr0_state, r2_addr0_zip),
(r2_addr1_line, r2_addr1_city, r2_addr1_state, r2_addr1_zip),
(r2_addr2_line, r2_addr2_city, r2_addr2_state, r2_addr2_zip),
(r2_addr3_line, r2_addr3_city, r2_addr3_state, r2_addr3_zip),
(r2_addr4_line, r2_addr4_city, r2_addr4_state, r2_addr4_zip),
],
[r2_phone0, r2_phone1, r2_phone2, r2_phone3, r2_phone4],
[r2_email0, r2_email1, r2_email2, r2_email3, r2_email4],
r2_company, r2_parent_company,
[
(r2_cf0_name, r2_cf0_val), (r2_cf1_name, r2_cf1_val),
(r2_cf2_name, r2_cf2_val), (r2_cf3_name, r2_cf3_val),
(r2_cf4_name, r2_cf4_val),
],
)
try:
r1_payload = convert_to_api_payload(r1)
r2_payload = convert_to_api_payload(r2)
rec1 = EntityRecord(**r1_payload)
rec2 = EntityRecord(**r2_payload)
result_data = perform_match(rec1, rec2, mode="embedding")
result = {
"overall_decision": result_data["overall_decision"],
"reason": result_data["reason"],
"field_results": result_data["field_scores"],
}
return json.dumps(result, indent=2)
except Exception as e:
return json.dumps({"error": str(e)}, indent=2)
# =========================================================
# UI BUILDER HELPERS
# =========================================================
def personal_fields():
inputs = []
with gr.Row():
full_name = gr.Textbox(label="Full Name", placeholder="Enter full name")
first_name = gr.Textbox(label="First Name", placeholder="Enter first name")
inputs += [full_name, first_name]
with gr.Row():
middle_name = gr.Textbox(label="Middle Name", placeholder="Enter middle name")
last_name = gr.Textbox(label="Last Name", placeholder="Enter last name")
inputs += [middle_name, last_name]
with gr.Row():
mother_name = gr.Textbox(label="Mother's Name", placeholder="Enter mother's name")
father_name = gr.Textbox(label="Father's Name", placeholder="Enter father's name")
inputs += [mother_name, father_name]
with gr.Row():
spouse_name = gr.Textbox(label="Spouse's Name", placeholder="Enter spouse's name")
other_name = gr.Textbox(label="Other Name", placeholder="Enter other name")
inputs += [spouse_name, other_name]
with gr.Row():
dob = gr.Textbox(label="Date of Birth", placeholder="YYYY-MM-DD")
gender = gr.Textbox(label="Gender", placeholder="Male/Female/Other")
inputs += [dob, gender]
return inputs # 10 inputs
def identifier_fields():
inputs = []
with gr.Row():
aadhar = gr.Textbox(label="Aadhar Number", placeholder="Enter Aadhar number")
pan = gr.Textbox(label="PAN Number", placeholder="Enter PAN number")
inputs += [aadhar, pan]
with gr.Row():
license_ = gr.Textbox(label="License Number", placeholder="Enter license number")
passport = gr.Textbox(label="Passport Number", placeholder="Enter passport number")
inputs += [license_, passport]
with gr.Row():
voter_id = gr.Textbox(label="Voter ID", placeholder="Enter voter ID")
gr.HTML("") # spacer
inputs += [voter_id]
return inputs # 5 inputs
def address_fields(slot):
label = "Primary Address" if slot == 0 else f"Address {slot + 1}"
with gr.Accordion(label=label, open=(slot == 0)):
line = gr.Textbox(label="Street Address", placeholder="Street, Building, Area")
with gr.Row():
city = gr.Textbox(label="City", placeholder="Enter city")
state = gr.Textbox(label="State", placeholder="Enter state")
zipcode = gr.Textbox(label="Pincode", placeholder="6-digit postal code")
return line, city, state, zipcode
def contact_fields():
phone_inputs = []
email_inputs = []
with gr.Accordion("Phone Numbers", open=True):
for i in range(5):
ph = gr.Textbox(label=f"Phone {i + 1}", placeholder="Enter phone number")
phone_inputs.append(ph)
with gr.Accordion("Email Addresses", open=True):
for i in range(5):
em = gr.Textbox(label=f"Email {i + 1}", placeholder="Enter email address")
email_inputs.append(em)
return phone_inputs, email_inputs
def employment_fields():
with gr.Row():
company = gr.Textbox(label="Company Name", placeholder="Enter company name")
parent_company = gr.Textbox(label="Parent Company Name", placeholder="Enter parent company name")
return company, parent_company
def custom_field_slots():
pairs = []
with gr.Accordion("Custom Fields (up to 5)", open=False):
for i in range(5):
with gr.Row():
cf_name = gr.Textbox(label=f"Field Name {i + 1}", placeholder="e.g. MemberID")
cf_val = gr.Textbox(label=f"Field Value {i + 1}", placeholder="Value")
pairs.append((cf_name, cf_val))
return pairs # 5 pairs
# =========================================================
# BUILD THE GRADIO APP
# =========================================================
def build_app():
with gr.Blocks(title="GEN AI Record Level Matching") as demo:
gr.Markdown("# GEN AI Record Level Matching")
gr.Markdown("Enter details for two records below and click **Run Record Match** to see the matching result.")
with gr.Row(equal_height=False):
# ── RECORD 1 ──
with gr.Column():
gr.Markdown("## Record 1")
with gr.Accordion("Personal Details", open=True):
r1_personal = personal_fields()
with gr.Accordion("Equalities", open=True):
r1_ids = identifier_fields()
r1_custom_pairs = custom_field_slots()
with gr.Accordion("Address Details", open=True):
r1_addr_fields = []
for slot in range(5):
line, city, state, zipcode = address_fields(slot)
r1_addr_fields += [line, city, state, zipcode]
with gr.Accordion("Contact Information", open=True):
r1_phones, r1_emails = contact_fields()
with gr.Accordion("Employment Details", open=True):
r1_company, r1_pcompany = employment_fields()
# ── RECORD 2 ──
with gr.Column():
gr.Markdown("## Record 2")
with gr.Accordion("Personal Details", open=True):
r2_personal = personal_fields()
with gr.Accordion("Equalities", open=True):
r2_ids = identifier_fields()
r2_custom_pairs = custom_field_slots()
with gr.Accordion("Address Details", open=True):
r2_addr_fields = []
for slot in range(5):
line, city, state, zipcode = address_fields(slot)
r2_addr_fields += [line, city, state, zipcode]
with gr.Accordion("Contact Information", open=True):
r2_phones, r2_emails = contact_fields()
with gr.Accordion("Employment Details", open=True):
r2_company, r2_pcompany = employment_fields()
run_btn = gr.Button("Run Record Match", variant="primary")
result_output = gr.Code(label="Matching Result", language="json", lines=20)
# Wire inputs in exact order matching run_match() signature
all_inputs = (
r1_personal # 10
+ r1_ids # 5
+ r1_addr_fields # 20
+ r1_phones # 5
+ r1_emails # 5
+ [r1_company, r1_pcompany] # 2
+ [w for pair in r1_custom_pairs for w in pair] # 10
+ r2_personal # 10
+ r2_ids # 5
+ r2_addr_fields # 20
+ r2_phones # 5
+ r2_emails # 5
+ [r2_company, r2_pcompany] # 2
+ [w for pair in r2_custom_pairs for w in pair] # 10
)
# Total = 114 inputs
run_btn.click(fn=run_match, inputs=all_inputs, outputs=result_output)
return demo
# =========================================================
# ENTRY POINT
# =========================================================
if __name__ == "__main__":
app = build_app()
app.launch()