import gradio as gr import json import re import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from backend.matching_service import perform_match from backend.models import EntityRecord # ========================================================= # CONSTANTS # ========================================================= MAX_FIELDS = 20 # ========================================================= # HELPERS # ========================================================= def preprocess_text(text): if not text: return "" return re.sub(r"\s+", " ", text.strip()) def convert_to_api_payload(record: dict) -> dict: payload = { "name": record.get("name", ""), "firstname": record.get("firstname", ""), "middlename": record.get("middlename", ""), "lastname": record.get("lastname", ""), "mothername": record.get("mothername", ""), "fathername": record.get("fathername", ""), "spousename": record.get("spousename", ""), "othername": record.get("othername", ""), "dob": record.get("dob", ""), "gender": record.get("gender", ""), "AADHAR": record.get("AADHAR", ""), "pan": record.get("pan", ""), "licenseid": record.get("licenseid", ""), "passportid": record.get("passportid", ""), "voterid": record.get("voterid", ""), "companyname": record.get("companyname", ""), "parentcompanyname": record.get("parentcompanyname", ""), "phones": [], "emails": [], "addresses": [], "custom_fields": {}, } for i in range(MAX_FIELDS): val = record.get(f"phone_{i}", "") if val: payload["phones"].append(str(val)) val = record.get(f"email_{i}", "") if val: payload["emails"].append(str(val)) addr_keys = [f"addressline_{i}", f"city_{i}", f"state_{i}", f"zipcode_{i}"] if any(k in record for k in addr_keys): addr = { "addressline": record.get(f"addressline_{i}", ""), "city": record.get(f"city_{i}", ""), "state": record.get(f"state_{i}", ""), "zipcode": record.get(f"zipcode_{i}", ""), } payload["addresses"].append(addr) known_keys = set(payload.keys()) - {"phones", "emails", "addresses", "custom_fields"} known_prefixes = ("addressline_", "city_", "state_", "zipcode_", "phone_", "email_") for k, v in record.items(): k_str = str(k) if k_str in known_keys: continue if any(k_str.startswith(p) for p in known_prefixes): continue if v and str(v).strip(): payload["custom_fields"][k_str] = str(v) return payload # ========================================================= # MATCH FUNCTION # ========================================================= def run_match( # Record 1 personal r1_name, r1_firstname, r1_middlename, r1_lastname, r1_mothername, r1_fathername, r1_spousename, r1_othername, r1_dob, r1_gender, # Record 1 identifiers r1_aadhar, r1_pan, r1_licenseid, r1_passportid, r1_voterid, # Record 1 addresses (5 slots) r1_addr0_line, r1_addr0_city, r1_addr0_state, r1_addr0_zip, r1_addr1_line, r1_addr1_city, r1_addr1_state, r1_addr1_zip, r1_addr2_line, r1_addr2_city, r1_addr2_state, r1_addr2_zip, r1_addr3_line, r1_addr3_city, r1_addr3_state, r1_addr3_zip, r1_addr4_line, r1_addr4_city, r1_addr4_state, r1_addr4_zip, # Record 1 phones (5 slots) r1_phone0, r1_phone1, r1_phone2, r1_phone3, r1_phone4, # Record 1 emails (5 slots) r1_email0, r1_email1, r1_email2, r1_email3, r1_email4, # Record 1 employment r1_company, r1_parent_company, # Record 1 custom fields (5 slots) r1_cf0_name, r1_cf0_val, r1_cf1_name, r1_cf1_val, r1_cf2_name, r1_cf2_val, r1_cf3_name, r1_cf3_val, r1_cf4_name, r1_cf4_val, # Record 2 personal r2_name, r2_firstname, r2_middlename, r2_lastname, r2_mothername, r2_fathername, r2_spousename, r2_othername, r2_dob, r2_gender, # Record 2 identifiers r2_aadhar, r2_pan, r2_licenseid, r2_passportid, r2_voterid, # Record 2 addresses (5 slots) r2_addr0_line, r2_addr0_city, r2_addr0_state, r2_addr0_zip, r2_addr1_line, r2_addr1_city, r2_addr1_state, r2_addr1_zip, r2_addr2_line, r2_addr2_city, r2_addr2_state, r2_addr2_zip, r2_addr3_line, r2_addr3_city, r2_addr3_state, r2_addr3_zip, r2_addr4_line, r2_addr4_city, r2_addr4_state, r2_addr4_zip, # Record 2 phones (5 slots) r2_phone0, r2_phone1, r2_phone2, r2_phone3, r2_phone4, # Record 2 emails (5 slots) r2_email0, r2_email1, r2_email2, r2_email3, r2_email4, # Record 2 employment r2_company, r2_parent_company, # Record 2 custom fields (5 slots) r2_cf0_name, r2_cf0_val, r2_cf1_name, r2_cf1_val, r2_cf2_name, r2_cf2_val, r2_cf3_name, r2_cf3_val, r2_cf4_name, r2_cf4_val, ): def build_record( name, firstname, middlename, lastname, mothername, fathername, spousename, othername, dob, gender, aadhar, pan, licenseid, passportid, voterid, addr_lines, phones, emails, company, parent_company, custom_fields_pairs, ): rec = { "name": name, "firstname": firstname, "middlename": middlename, "lastname": lastname, "mothername": mothername, "fathername": fathername, "spousename": spousename, "othername": othername, "dob": dob, "gender": gender, "AADHAR": aadhar, "pan": pan, "licenseid": licenseid, "passportid": passportid, "voterid": voterid, "companyname": company, "parentcompanyname": parent_company, } for i, (line, city, state, zipcode) in enumerate(addr_lines): rec[f"addressline_{i}"] = line rec[f"city_{i}"] = city rec[f"state_{i}"] = state rec[f"zipcode_{i}"] = zipcode for i, ph in enumerate(phones): rec[f"phone_{i}"] = ph for i, em in enumerate(emails): rec[f"email_{i}"] = em for cf_name, cf_val in custom_fields_pairs: if cf_name and cf_name.strip(): rec[cf_name.strip()] = cf_val return rec r1 = build_record( r1_name, r1_firstname, r1_middlename, r1_lastname, r1_mothername, r1_fathername, r1_spousename, r1_othername, r1_dob, r1_gender, r1_aadhar, r1_pan, r1_licenseid, r1_passportid, r1_voterid, [ (r1_addr0_line, r1_addr0_city, r1_addr0_state, r1_addr0_zip), (r1_addr1_line, r1_addr1_city, r1_addr1_state, r1_addr1_zip), (r1_addr2_line, r1_addr2_city, r1_addr2_state, r1_addr2_zip), (r1_addr3_line, r1_addr3_city, r1_addr3_state, r1_addr3_zip), (r1_addr4_line, r1_addr4_city, r1_addr4_state, r1_addr4_zip), ], [r1_phone0, r1_phone1, r1_phone2, r1_phone3, r1_phone4], [r1_email0, r1_email1, r1_email2, r1_email3, r1_email4], r1_company, r1_parent_company, [ (r1_cf0_name, r1_cf0_val), (r1_cf1_name, r1_cf1_val), (r1_cf2_name, r1_cf2_val), (r1_cf3_name, r1_cf3_val), (r1_cf4_name, r1_cf4_val), ], ) r2 = build_record( r2_name, r2_firstname, r2_middlename, r2_lastname, r2_mothername, r2_fathername, r2_spousename, r2_othername, r2_dob, r2_gender, r2_aadhar, r2_pan, r2_licenseid, r2_passportid, r2_voterid, [ (r2_addr0_line, r2_addr0_city, r2_addr0_state, r2_addr0_zip), (r2_addr1_line, r2_addr1_city, r2_addr1_state, r2_addr1_zip), (r2_addr2_line, r2_addr2_city, r2_addr2_state, r2_addr2_zip), (r2_addr3_line, r2_addr3_city, r2_addr3_state, r2_addr3_zip), (r2_addr4_line, r2_addr4_city, r2_addr4_state, r2_addr4_zip), ], [r2_phone0, r2_phone1, r2_phone2, r2_phone3, r2_phone4], [r2_email0, r2_email1, r2_email2, r2_email3, r2_email4], r2_company, r2_parent_company, [ (r2_cf0_name, r2_cf0_val), (r2_cf1_name, r2_cf1_val), (r2_cf2_name, r2_cf2_val), (r2_cf3_name, r2_cf3_val), (r2_cf4_name, r2_cf4_val), ], ) try: r1_payload = convert_to_api_payload(r1) r2_payload = convert_to_api_payload(r2) rec1 = EntityRecord(**r1_payload) rec2 = EntityRecord(**r2_payload) result_data = perform_match(rec1, rec2, mode="embedding") result = { "overall_decision": result_data["overall_decision"], "reason": result_data["reason"], "field_results": result_data["field_scores"], } return json.dumps(result, indent=2) except Exception as e: return json.dumps({"error": str(e)}, indent=2) # ========================================================= # UI BUILDER HELPERS # ========================================================= def personal_fields(): inputs = [] with gr.Row(): full_name = gr.Textbox(label="Full Name", placeholder="Enter full name") first_name = gr.Textbox(label="First Name", placeholder="Enter first name") inputs += [full_name, first_name] with gr.Row(): middle_name = gr.Textbox(label="Middle Name", placeholder="Enter middle name") last_name = gr.Textbox(label="Last Name", placeholder="Enter last name") inputs += [middle_name, last_name] with gr.Row(): mother_name = gr.Textbox(label="Mother's Name", placeholder="Enter mother's name") father_name = gr.Textbox(label="Father's Name", placeholder="Enter father's name") inputs += [mother_name, father_name] with gr.Row(): spouse_name = gr.Textbox(label="Spouse's Name", placeholder="Enter spouse's name") other_name = gr.Textbox(label="Other Name", placeholder="Enter other name") inputs += [spouse_name, other_name] with gr.Row(): dob = gr.Textbox(label="Date of Birth", placeholder="YYYY-MM-DD") gender = gr.Textbox(label="Gender", placeholder="Male/Female/Other") inputs += [dob, gender] return inputs # 10 inputs def identifier_fields(): inputs = [] with gr.Row(): aadhar = gr.Textbox(label="Aadhar Number", placeholder="Enter Aadhar number") pan = gr.Textbox(label="PAN Number", placeholder="Enter PAN number") inputs += [aadhar, pan] with gr.Row(): license_ = gr.Textbox(label="License Number", placeholder="Enter license number") passport = gr.Textbox(label="Passport Number", placeholder="Enter passport number") inputs += [license_, passport] with gr.Row(): voter_id = gr.Textbox(label="Voter ID", placeholder="Enter voter ID") gr.HTML("") # spacer inputs += [voter_id] return inputs # 5 inputs def address_fields(slot): label = "Primary Address" if slot == 0 else f"Address {slot + 1}" with gr.Accordion(label=label, open=(slot == 0)): line = gr.Textbox(label="Street Address", placeholder="Street, Building, Area") with gr.Row(): city = gr.Textbox(label="City", placeholder="Enter city") state = gr.Textbox(label="State", placeholder="Enter state") zipcode = gr.Textbox(label="Pincode", placeholder="6-digit postal code") return line, city, state, zipcode def contact_fields(): phone_inputs = [] email_inputs = [] with gr.Accordion("Phone Numbers", open=True): for i in range(5): ph = gr.Textbox(label=f"Phone {i + 1}", placeholder="Enter phone number") phone_inputs.append(ph) with gr.Accordion("Email Addresses", open=True): for i in range(5): em = gr.Textbox(label=f"Email {i + 1}", placeholder="Enter email address") email_inputs.append(em) return phone_inputs, email_inputs def employment_fields(): with gr.Row(): company = gr.Textbox(label="Company Name", placeholder="Enter company name") parent_company = gr.Textbox(label="Parent Company Name", placeholder="Enter parent company name") return company, parent_company def custom_field_slots(): pairs = [] with gr.Accordion("Custom Fields (up to 5)", open=False): for i in range(5): with gr.Row(): cf_name = gr.Textbox(label=f"Field Name {i + 1}", placeholder="e.g. MemberID") cf_val = gr.Textbox(label=f"Field Value {i + 1}", placeholder="Value") pairs.append((cf_name, cf_val)) return pairs # 5 pairs # ========================================================= # BUILD THE GRADIO APP # ========================================================= def build_app(): with gr.Blocks(title="GEN AI Record Level Matching") as demo: gr.Markdown("# GEN AI Record Level Matching") gr.Markdown("Enter details for two records below and click **Run Record Match** to see the matching result.") with gr.Row(equal_height=False): # ── RECORD 1 ── with gr.Column(): gr.Markdown("## Record 1") with gr.Accordion("Personal Details", open=True): r1_personal = personal_fields() with gr.Accordion("Equalities", open=True): r1_ids = identifier_fields() r1_custom_pairs = custom_field_slots() with gr.Accordion("Address Details", open=True): r1_addr_fields = [] for slot in range(5): line, city, state, zipcode = address_fields(slot) r1_addr_fields += [line, city, state, zipcode] with gr.Accordion("Contact Information", open=True): r1_phones, r1_emails = contact_fields() with gr.Accordion("Employment Details", open=True): r1_company, r1_pcompany = employment_fields() # ── RECORD 2 ── with gr.Column(): gr.Markdown("## Record 2") with gr.Accordion("Personal Details", open=True): r2_personal = personal_fields() with gr.Accordion("Equalities", open=True): r2_ids = identifier_fields() r2_custom_pairs = custom_field_slots() with gr.Accordion("Address Details", open=True): r2_addr_fields = [] for slot in range(5): line, city, state, zipcode = address_fields(slot) r2_addr_fields += [line, city, state, zipcode] with gr.Accordion("Contact Information", open=True): r2_phones, r2_emails = contact_fields() with gr.Accordion("Employment Details", open=True): r2_company, r2_pcompany = employment_fields() run_btn = gr.Button("Run Record Match", variant="primary") result_output = gr.Code(label="Matching Result", language="json", lines=20) # Wire inputs in exact order matching run_match() signature all_inputs = ( r1_personal # 10 + r1_ids # 5 + r1_addr_fields # 20 + r1_phones # 5 + r1_emails # 5 + [r1_company, r1_pcompany] # 2 + [w for pair in r1_custom_pairs for w in pair] # 10 + r2_personal # 10 + r2_ids # 5 + r2_addr_fields # 20 + r2_phones # 5 + r2_emails # 5 + [r2_company, r2_pcompany] # 2 + [w for pair in r2_custom_pairs for w in pair] # 10 ) # Total = 114 inputs run_btn.click(fn=run_match, inputs=all_inputs, outputs=result_output) return demo # ========================================================= # ENTRY POINT # ========================================================= if __name__ == "__main__": app = build_app() app.launch()