| import gradio as gr |
| import json |
| import re |
| import os |
| import sys |
|
|
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| from backend.matching_service import perform_match |
| from backend.models import EntityRecord |
|
|
| |
| |
| |
| MAX_FIELDS = 20 |
|
|
|
|
| |
| |
| |
| def preprocess_text(text): |
| if not text: |
| return "" |
| return re.sub(r"\s+", " ", text.strip()) |
|
|
|
|
| def convert_to_api_payload(record: dict) -> dict: |
| payload = { |
| "name": record.get("name", ""), |
| "firstname": record.get("firstname", ""), |
| "middlename": record.get("middlename", ""), |
| "lastname": record.get("lastname", ""), |
| "mothername": record.get("mothername", ""), |
| "fathername": record.get("fathername", ""), |
| "spousename": record.get("spousename", ""), |
| "othername": record.get("othername", ""), |
| "dob": record.get("dob", ""), |
| "gender": record.get("gender", ""), |
| "AADHAR": record.get("AADHAR", ""), |
| "pan": record.get("pan", ""), |
| "licenseid": record.get("licenseid", ""), |
| "passportid": record.get("passportid", ""), |
| "voterid": record.get("voterid", ""), |
| "companyname": record.get("companyname", ""), |
| "parentcompanyname": record.get("parentcompanyname", ""), |
| "phones": [], |
| "emails": [], |
| "addresses": [], |
| "custom_fields": {}, |
| } |
|
|
| for i in range(MAX_FIELDS): |
| val = record.get(f"phone_{i}", "") |
| if val: |
| payload["phones"].append(str(val)) |
|
|
| val = record.get(f"email_{i}", "") |
| if val: |
| payload["emails"].append(str(val)) |
|
|
| addr_keys = [f"addressline_{i}", f"city_{i}", f"state_{i}", f"zipcode_{i}"] |
| if any(k in record for k in addr_keys): |
| addr = { |
| "addressline": record.get(f"addressline_{i}", ""), |
| "city": record.get(f"city_{i}", ""), |
| "state": record.get(f"state_{i}", ""), |
| "zipcode": record.get(f"zipcode_{i}", ""), |
| } |
| payload["addresses"].append(addr) |
|
|
| known_keys = set(payload.keys()) - {"phones", "emails", "addresses", "custom_fields"} |
| known_prefixes = ("addressline_", "city_", "state_", "zipcode_", "phone_", "email_") |
|
|
| for k, v in record.items(): |
| k_str = str(k) |
| if k_str in known_keys: |
| continue |
| if any(k_str.startswith(p) for p in known_prefixes): |
| continue |
| if v and str(v).strip(): |
| payload["custom_fields"][k_str] = str(v) |
|
|
| return payload |
|
|
|
|
| |
| |
| |
| def run_match( |
| |
| r1_name, r1_firstname, r1_middlename, r1_lastname, |
| r1_mothername, r1_fathername, r1_spousename, r1_othername, |
| r1_dob, r1_gender, |
| |
| r1_aadhar, r1_pan, r1_licenseid, r1_passportid, r1_voterid, |
| |
| r1_addr0_line, r1_addr0_city, r1_addr0_state, r1_addr0_zip, |
| r1_addr1_line, r1_addr1_city, r1_addr1_state, r1_addr1_zip, |
| r1_addr2_line, r1_addr2_city, r1_addr2_state, r1_addr2_zip, |
| r1_addr3_line, r1_addr3_city, r1_addr3_state, r1_addr3_zip, |
| r1_addr4_line, r1_addr4_city, r1_addr4_state, r1_addr4_zip, |
| |
| r1_phone0, r1_phone1, r1_phone2, r1_phone3, r1_phone4, |
| |
| r1_email0, r1_email1, r1_email2, r1_email3, r1_email4, |
| |
| r1_company, r1_parent_company, |
| |
| r1_cf0_name, r1_cf0_val, |
| r1_cf1_name, r1_cf1_val, |
| r1_cf2_name, r1_cf2_val, |
| r1_cf3_name, r1_cf3_val, |
| r1_cf4_name, r1_cf4_val, |
| |
| r2_name, r2_firstname, r2_middlename, r2_lastname, |
| r2_mothername, r2_fathername, r2_spousename, r2_othername, |
| r2_dob, r2_gender, |
| |
| r2_aadhar, r2_pan, r2_licenseid, r2_passportid, r2_voterid, |
| |
| r2_addr0_line, r2_addr0_city, r2_addr0_state, r2_addr0_zip, |
| r2_addr1_line, r2_addr1_city, r2_addr1_state, r2_addr1_zip, |
| r2_addr2_line, r2_addr2_city, r2_addr2_state, r2_addr2_zip, |
| r2_addr3_line, r2_addr3_city, r2_addr3_state, r2_addr3_zip, |
| r2_addr4_line, r2_addr4_city, r2_addr4_state, r2_addr4_zip, |
| |
| r2_phone0, r2_phone1, r2_phone2, r2_phone3, r2_phone4, |
| |
| r2_email0, r2_email1, r2_email2, r2_email3, r2_email4, |
| |
| r2_company, r2_parent_company, |
| |
| r2_cf0_name, r2_cf0_val, |
| r2_cf1_name, r2_cf1_val, |
| r2_cf2_name, r2_cf2_val, |
| r2_cf3_name, r2_cf3_val, |
| r2_cf4_name, r2_cf4_val, |
| ): |
| def build_record( |
| name, firstname, middlename, lastname, |
| mothername, fathername, spousename, othername, dob, gender, |
| aadhar, pan, licenseid, passportid, voterid, |
| addr_lines, phones, emails, |
| company, parent_company, |
| custom_fields_pairs, |
| ): |
| rec = { |
| "name": name, "firstname": firstname, "middlename": middlename, |
| "lastname": lastname, "mothername": mothername, "fathername": fathername, |
| "spousename": spousename, "othername": othername, "dob": dob, "gender": gender, |
| "AADHAR": aadhar, "pan": pan, "licenseid": licenseid, |
| "passportid": passportid, "voterid": voterid, |
| "companyname": company, "parentcompanyname": parent_company, |
| } |
| for i, (line, city, state, zipcode) in enumerate(addr_lines): |
| rec[f"addressline_{i}"] = line |
| rec[f"city_{i}"] = city |
| rec[f"state_{i}"] = state |
| rec[f"zipcode_{i}"] = zipcode |
| for i, ph in enumerate(phones): |
| rec[f"phone_{i}"] = ph |
| for i, em in enumerate(emails): |
| rec[f"email_{i}"] = em |
| for cf_name, cf_val in custom_fields_pairs: |
| if cf_name and cf_name.strip(): |
| rec[cf_name.strip()] = cf_val |
| return rec |
|
|
| r1 = build_record( |
| r1_name, r1_firstname, r1_middlename, r1_lastname, |
| r1_mothername, r1_fathername, r1_spousename, r1_othername, r1_dob, r1_gender, |
| r1_aadhar, r1_pan, r1_licenseid, r1_passportid, r1_voterid, |
| [ |
| (r1_addr0_line, r1_addr0_city, r1_addr0_state, r1_addr0_zip), |
| (r1_addr1_line, r1_addr1_city, r1_addr1_state, r1_addr1_zip), |
| (r1_addr2_line, r1_addr2_city, r1_addr2_state, r1_addr2_zip), |
| (r1_addr3_line, r1_addr3_city, r1_addr3_state, r1_addr3_zip), |
| (r1_addr4_line, r1_addr4_city, r1_addr4_state, r1_addr4_zip), |
| ], |
| [r1_phone0, r1_phone1, r1_phone2, r1_phone3, r1_phone4], |
| [r1_email0, r1_email1, r1_email2, r1_email3, r1_email4], |
| r1_company, r1_parent_company, |
| [ |
| (r1_cf0_name, r1_cf0_val), (r1_cf1_name, r1_cf1_val), |
| (r1_cf2_name, r1_cf2_val), (r1_cf3_name, r1_cf3_val), |
| (r1_cf4_name, r1_cf4_val), |
| ], |
| ) |
|
|
| r2 = build_record( |
| r2_name, r2_firstname, r2_middlename, r2_lastname, |
| r2_mothername, r2_fathername, r2_spousename, r2_othername, r2_dob, r2_gender, |
| r2_aadhar, r2_pan, r2_licenseid, r2_passportid, r2_voterid, |
| [ |
| (r2_addr0_line, r2_addr0_city, r2_addr0_state, r2_addr0_zip), |
| (r2_addr1_line, r2_addr1_city, r2_addr1_state, r2_addr1_zip), |
| (r2_addr2_line, r2_addr2_city, r2_addr2_state, r2_addr2_zip), |
| (r2_addr3_line, r2_addr3_city, r2_addr3_state, r2_addr3_zip), |
| (r2_addr4_line, r2_addr4_city, r2_addr4_state, r2_addr4_zip), |
| ], |
| [r2_phone0, r2_phone1, r2_phone2, r2_phone3, r2_phone4], |
| [r2_email0, r2_email1, r2_email2, r2_email3, r2_email4], |
| r2_company, r2_parent_company, |
| [ |
| (r2_cf0_name, r2_cf0_val), (r2_cf1_name, r2_cf1_val), |
| (r2_cf2_name, r2_cf2_val), (r2_cf3_name, r2_cf3_val), |
| (r2_cf4_name, r2_cf4_val), |
| ], |
| ) |
|
|
| try: |
| r1_payload = convert_to_api_payload(r1) |
| r2_payload = convert_to_api_payload(r2) |
|
|
| rec1 = EntityRecord(**r1_payload) |
| rec2 = EntityRecord(**r2_payload) |
|
|
| result_data = perform_match(rec1, rec2, mode="embedding") |
|
|
| result = { |
| "overall_decision": result_data["overall_decision"], |
| "reason": result_data["reason"], |
| "field_results": result_data["field_scores"], |
| } |
| return json.dumps(result, indent=2) |
|
|
| except Exception as e: |
| return json.dumps({"error": str(e)}, indent=2) |
|
|
|
|
| |
| |
| |
| def personal_fields(): |
| inputs = [] |
| with gr.Row(): |
| full_name = gr.Textbox(label="Full Name", placeholder="Enter full name") |
| first_name = gr.Textbox(label="First Name", placeholder="Enter first name") |
| inputs += [full_name, first_name] |
| with gr.Row(): |
| middle_name = gr.Textbox(label="Middle Name", placeholder="Enter middle name") |
| last_name = gr.Textbox(label="Last Name", placeholder="Enter last name") |
| inputs += [middle_name, last_name] |
| with gr.Row(): |
| mother_name = gr.Textbox(label="Mother's Name", placeholder="Enter mother's name") |
| father_name = gr.Textbox(label="Father's Name", placeholder="Enter father's name") |
| inputs += [mother_name, father_name] |
| with gr.Row(): |
| spouse_name = gr.Textbox(label="Spouse's Name", placeholder="Enter spouse's name") |
| other_name = gr.Textbox(label="Other Name", placeholder="Enter other name") |
| inputs += [spouse_name, other_name] |
| with gr.Row(): |
| dob = gr.Textbox(label="Date of Birth", placeholder="YYYY-MM-DD") |
| gender = gr.Textbox(label="Gender", placeholder="Male/Female/Other") |
| inputs += [dob, gender] |
| return inputs |
|
|
|
|
| def identifier_fields(): |
| inputs = [] |
| with gr.Row(): |
| aadhar = gr.Textbox(label="Aadhar Number", placeholder="Enter Aadhar number") |
| pan = gr.Textbox(label="PAN Number", placeholder="Enter PAN number") |
| inputs += [aadhar, pan] |
| with gr.Row(): |
| license_ = gr.Textbox(label="License Number", placeholder="Enter license number") |
| passport = gr.Textbox(label="Passport Number", placeholder="Enter passport number") |
| inputs += [license_, passport] |
| with gr.Row(): |
| voter_id = gr.Textbox(label="Voter ID", placeholder="Enter voter ID") |
| gr.HTML("") |
| inputs += [voter_id] |
| return inputs |
|
|
|
|
| def address_fields(slot): |
| label = "Primary Address" if slot == 0 else f"Address {slot + 1}" |
| with gr.Accordion(label=label, open=(slot == 0)): |
| line = gr.Textbox(label="Street Address", placeholder="Street, Building, Area") |
| with gr.Row(): |
| city = gr.Textbox(label="City", placeholder="Enter city") |
| state = gr.Textbox(label="State", placeholder="Enter state") |
| zipcode = gr.Textbox(label="Pincode", placeholder="6-digit postal code") |
| return line, city, state, zipcode |
|
|
|
|
| def contact_fields(): |
| phone_inputs = [] |
| email_inputs = [] |
| with gr.Accordion("Phone Numbers", open=True): |
| for i in range(5): |
| ph = gr.Textbox(label=f"Phone {i + 1}", placeholder="Enter phone number") |
| phone_inputs.append(ph) |
| with gr.Accordion("Email Addresses", open=True): |
| for i in range(5): |
| em = gr.Textbox(label=f"Email {i + 1}", placeholder="Enter email address") |
| email_inputs.append(em) |
| return phone_inputs, email_inputs |
|
|
|
|
| def employment_fields(): |
| with gr.Row(): |
| company = gr.Textbox(label="Company Name", placeholder="Enter company name") |
| parent_company = gr.Textbox(label="Parent Company Name", placeholder="Enter parent company name") |
| return company, parent_company |
|
|
|
|
| def custom_field_slots(): |
| pairs = [] |
| with gr.Accordion("Custom Fields (up to 5)", open=False): |
| for i in range(5): |
| with gr.Row(): |
| cf_name = gr.Textbox(label=f"Field Name {i + 1}", placeholder="e.g. MemberID") |
| cf_val = gr.Textbox(label=f"Field Value {i + 1}", placeholder="Value") |
| pairs.append((cf_name, cf_val)) |
| return pairs |
|
|
|
|
| |
| |
| |
| def build_app(): |
| with gr.Blocks(title="GEN AI Record Level Matching") as demo: |
|
|
| gr.Markdown("# GEN AI Record Level Matching") |
| gr.Markdown("Enter details for two records below and click **Run Record Match** to see the matching result.") |
|
|
| with gr.Row(equal_height=False): |
|
|
| |
| with gr.Column(): |
| gr.Markdown("## Record 1") |
|
|
| with gr.Accordion("Personal Details", open=True): |
| r1_personal = personal_fields() |
|
|
| with gr.Accordion("Equalities", open=True): |
| r1_ids = identifier_fields() |
| r1_custom_pairs = custom_field_slots() |
|
|
| with gr.Accordion("Address Details", open=True): |
| r1_addr_fields = [] |
| for slot in range(5): |
| line, city, state, zipcode = address_fields(slot) |
| r1_addr_fields += [line, city, state, zipcode] |
|
|
| with gr.Accordion("Contact Information", open=True): |
| r1_phones, r1_emails = contact_fields() |
|
|
| with gr.Accordion("Employment Details", open=True): |
| r1_company, r1_pcompany = employment_fields() |
|
|
| |
| with gr.Column(): |
| gr.Markdown("## Record 2") |
|
|
| with gr.Accordion("Personal Details", open=True): |
| r2_personal = personal_fields() |
|
|
| with gr.Accordion("Equalities", open=True): |
| r2_ids = identifier_fields() |
| r2_custom_pairs = custom_field_slots() |
|
|
| with gr.Accordion("Address Details", open=True): |
| r2_addr_fields = [] |
| for slot in range(5): |
| line, city, state, zipcode = address_fields(slot) |
| r2_addr_fields += [line, city, state, zipcode] |
|
|
| with gr.Accordion("Contact Information", open=True): |
| r2_phones, r2_emails = contact_fields() |
|
|
| with gr.Accordion("Employment Details", open=True): |
| r2_company, r2_pcompany = employment_fields() |
|
|
| run_btn = gr.Button("Run Record Match", variant="primary") |
|
|
| result_output = gr.Code(label="Matching Result", language="json", lines=20) |
|
|
| |
| all_inputs = ( |
| r1_personal |
| + r1_ids |
| + r1_addr_fields |
| + r1_phones |
| + r1_emails |
| + [r1_company, r1_pcompany] |
| + [w for pair in r1_custom_pairs for w in pair] |
| + r2_personal |
| + r2_ids |
| + r2_addr_fields |
| + r2_phones |
| + r2_emails |
| + [r2_company, r2_pcompany] |
| + [w for pair in r2_custom_pairs for w in pair] |
| ) |
| |
|
|
| run_btn.click(fn=run_match, inputs=all_inputs, outputs=result_output) |
|
|
| return demo |
|
|
|
|
| |
| |
| |
| if __name__ == "__main__": |
| app = build_app() |
| app.launch() |