Spaces:

pujithapsx
/

streamlit_test

Sleeping

App Files Files Community

streamlit_test / src /streamlit_app.py

pujithapsx

new added

0ceaa0b 3 months ago

raw

history blame contribute delete

32.1 kB

	import streamlit as st
	import json
	import re
	import difflib
	from PIL import Image
	import base64
	import os

	# =========================================================
	# PAGE CONFIG
	# =========================================================
	st.set_page_config(
	page_title="GEN AI Record Level Matching",
	page_icon="🔍",
	layout="wide",
	initial_sidebar_state="collapsed"
	)

	# =========================================================
	# CUSTOM CSS
	# =========================================================
	st.markdown("""
	<style>
	[data-testid="stAppViewContainer"], [data-testid="stApp"], .stApp {
	background-color: #f0f2f5 !important;
	color: #333 !important;
	}
	.main { background-color: #f0f2f5; }
	.stAppDeployButton, .stMainMenu, #MainMenu,
	[data-testid="stToolbarActions"], [data-testid="stStatusWidget"] {
	display: none !important;
	}
	header[data-testid="stHeader"] { background: transparent !important; }
	.block-container { padding-top: 2rem !important; padding-bottom: 2rem !important; }
	.header-title {
	text-align: center; color: #5B4E8B; font-size: 28px;
	font-weight: 600; margin-bottom: 10px;
	}
	.header-subtitle {
	text-align: center; color: #666; font-size: 14px; margin-bottom: 30px;
	}
	.logo-title-container {
	display: flex; align-items: center; justify-content: center;
	gap: 15px; margin-bottom: 10px;
	}
	.record-header {
	color: #612383; font-size: 26px; font-weight: 700;
	margin-bottom: 25px; padding-bottom: 12px;
	border-bottom: 3px solid;
	border-image: linear-gradient(90deg, #612383, #E9592E, #F5A700) 1;
	}
	.section-card {
	background: white; border-radius: 12px;
	box-shadow: 0 2px 8px rgba(0,0,0,0.08);
	margin-bottom: 20px; overflow: hidden;
	}
	.section-header-gradient {
	background: linear-gradient(90deg, #612383 0%, #E9592E 100%);
	color: white; padding: 14px 20px; font-size: 14px;
	font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px;
	display: flex; align-items: center; gap: 10px;
	}
	.section-content { padding: 20px; }
	.stTextInput > div > div > input {
	background-color: #fafbfc !important; color: #333 !important;
	border: 1px solid #e1e4e8 !important; border-radius: 8px !important;
	padding: 10px 14px !important; font-size: 14px !important;
	}
	.stTextInput > div > div > input:focus {
	border-color: #E9592E !important;
	box-shadow: 0 0 0 3px rgba(233,89,46,0.1) !important;
	}
	.stTextInput label { color: #555 !important; font-size: 13px !important; font-weight: 500 !important; }
	.subsection-label { color: #666; font-size: 13px; font-weight: 500; margin-bottom: 12px; }
	div[data-testid="stButton"] button:not([kind="primary"]):not([kind="secondary"]) {
	width: 36px !important; height: 36px !important; min-width: 36px !important;
	padding: 0 !important; border-radius: 6px !important; font-size: 18px !important;
	background-color: white !important; color: #612383 !important;
	border: 1px solid #d0d7de !important;
	}
	button[kind="primary"] {
	background: linear-gradient(90deg, #612383 0%, #E9592E 100%) !important;
	color: white !important; border: none !important; border-radius: 10px !important;
	padding: 16px 32px !important; font-size: 16px !important; font-weight: 600 !important;
	box-shadow: 0 4px 12px rgba(97,35,131,0.25) !important;
	text-transform: uppercase; letter-spacing: 0.5px; height: auto !important;
	}
	button[kind="secondary"] {
	background: linear-gradient(90deg, #612383 0%, #E9592E 100%) !important;
	color: white !important; border: none !important; border-radius: 10px !important;
	padding: 12px 24px !important; font-size: 13px !important; font-weight: 600 !important;
	min-width: 140px !important; height: auto !important;
	box-shadow: 0 4px 12px rgba(97,35,131,0.25) !important;
	text-transform: uppercase; letter-spacing: 0.5px;
	}
	.result-box {
	background: white !important; border-radius: 12px !important;
	padding: 25px !important; margin-top: 30px !important;
	box-shadow: 0 4px 16px rgba(0,0,0,0.1) !important;
	border-top: 4px solid;
	border-image: linear-gradient(90deg, #612383, #E9592E, #F5A700) 1;
	}
	.result-header { color: #612383; font-size: 18px; font-weight: 600; margin-bottom: 15px; }
	.section-divider { border: none; border-top: 1px solid #e1e4e8; margin: 20px 0; }
	div[data-testid="stExpander"] summary { color: #333 !important; font-weight: 600 !important; }
	div[data-testid="stExpander"] summary svg { stroke: #333 !important; }
	.address-title { font-weight: 600; color: #612383; font-size: 14px; }
	::placeholder { color: #666 !important; opacity: 1 !important; }
	[data-testid="stJson"], [data-testid="stCodeBlock"] {
	background-color: #ffffff !important; color: #333333 !important;
	border: 1px solid #e1e4e8 !important; border-radius: 8px !important;
	}
	div[data-testid="stRadio"] label { color: #333 !important; font-size: 14px !important; font-weight: 500 !important; }
	div[data-testid="stRadio"] > label:first-child { color: #222 !important; font-size: 15px !important; font-weight: 600 !important; }
	div[data-testid="stRadio"] div[role="radiogroup"] label[data-baseweb="radio"] div:first-child {
	border-color: #612383 !important;
	}
	div[data-testid="stRadio"] div[role="radiogroup"] label[data-baseweb="radio"] div:first-child div {
	background-color: #612383 !important;
	}
	</style>
	""", unsafe_allow_html=True)

	# =========================================================
	# SESSION STATE
	# =========================================================
	MAX_FIELDS = 20

	defaults = {
	'address_ids_r1': [0], 'address_ids_r2': [0],
	'phone_ids_r1': [0], 'phone_ids_r2': [0],
	'email_ids_r1': [0], 'email_ids_r2': [0],
	'custom_fields_r1': [], 'custom_fields_r2': [],
	}
	for k, v in defaults.items():
	if k not in st.session_state:
	st.session_state[k] = v

	# =========================================================
	# PURE PYTHON MATCHING LOGIC (no external ML libs)
	# =========================================================

	def normalize_text(text):
	if not text:
	return ""
	return re.sub(r"\s+", " ", str(text).strip().lower())

	def fuzzy_ratio(a, b):
	"""Simple fuzzy ratio using difflib (0-100)"""
	if not a or not b:
	return 0
	return int(difflib.SequenceMatcher(None, a, b).ratio() * 100)

	def token_sort_ratio(a, b):
	"""Token sort ratio - sort words before comparing"""
	if not a or not b:
	return 0
	a_sorted = " ".join(sorted(a.split()))
	b_sorted = " ".join(sorted(b.split()))
	return fuzzy_ratio(a_sorted, b_sorted)

	def name_similarity(a, b):
	"""Compare two name strings"""
	if not a and not b:
	return -1 # both missing
	if not a or not b:
	return 0
	a, b = normalize_text(a), normalize_text(b)
	r1 = fuzzy_ratio(a, b)
	r2 = token_sort_ratio(a, b)
	return max(r1, r2)

	def match_names(name1, fn1, ln1, mn1, name2, fn2, ln2, mn2):
	"""Match full name records, returns dict with percent scores"""

	def build_full(name, fn, mn, ln):
	parts = [p for p in [fn, mn, ln] if p and p.strip()]
	if parts:
	return " ".join(parts)
	return name or ""

	full1 = normalize_text(build_full(name1, fn1, mn1, ln1) or name1 or "")
	full2 = normalize_text(build_full(name2, fn2, mn2, ln2) or name2 or "")

	full_score = name_similarity(full1, full2) if (full1 or full2) else -1

	fn_score = name_similarity(normalize_text(fn1), normalize_text(fn2)) if (fn1 or fn2) else -1
	mn_score = name_similarity(normalize_text(mn1), normalize_text(mn2)) if (mn1 or mn2) else -1
	ln_score = name_similarity(normalize_text(ln1), normalize_text(ln2)) if (ln1 or ln2) else -1

	return {
	"full_name_percent": full_score,
	"firstname_percent": fn_score,
	"middlename_percent": mn_score,
	"lastname_percent": ln_score,
	}

	def match_single(a, b):
	"""Generic single field name/text matching"""
	if not a and not b:
	return -1
	return name_similarity(normalize_text(a), normalize_text(b))

	def match_addresses(addrs1, addrs2):
	"""Match lists of addresses, return best score"""
	valid1 = [normalize_text(a) for a in addrs1 if a and a.strip()]
	valid2 = [normalize_text(a) for a in addrs2 if a and a.strip()]
	if not valid1 and not valid2:
	return -1
	if not valid1 or not valid2:
	return 0
	best = 0
	for a1 in valid1:
	for a2 in valid2:
	s = max(fuzzy_ratio(a1, a2), token_sort_ratio(a1, a2))
	if s > best:
	best = s
	return best

	def normalize_phone(p):
	if not p:
	return ""
	return re.sub(r"[^\d]", "", str(p))

	def compare_phones(phones1, phones2):
	v1 = [normalize_phone(p) for p in phones1 if p and normalize_phone(p)]
	v2 = [normalize_phone(p) for p in phones2 if p and normalize_phone(p)]
	if not v1 and not v2:
	return -1
	if not v1 or not v2:
	return 0
	for p1 in v1:
	for p2 in v2:
	if p1 == p2 or p1[-10:] == p2[-10:]:
	return 100
	return 0

	def compare_emails(emails1, emails2):
	v1 = [e.strip().lower() for e in emails1 if e and e.strip()]
	v2 = [e.strip().lower() for e in emails2 if e and e.strip()]
	if not v1 and not v2:
	return -1
	if not v1 or not v2:
	return 0
	for e1 in v1:
	for e2 in v2:
	if e1 == e2:
	return 100
	return 0

	def compare_exact(a, b):
	if not a and not b:
	return -1
	if not a or not b:
	return 0
	return 100 if normalize_text(a) == normalize_text(b) else 0

	def standardize_city(city):
	if not city:
	return ""
	return re.sub(r"\s+", " ", str(city).strip().upper())

	def standardize_state(state):
	if not state:
	return ""
	return re.sub(r"\s+", " ", str(state).strip().upper())

	def standardize_dob(dob):
	if not dob:
	return ""
	dob = dob.strip()
	# Try to normalize to YYYY-MM-DD
	for fmt in [r"(\d{4})[/-](\d{2})[/-](\d{2})", r"(\d{2})[/-](\d{2})[/-](\d{4})"]:
	m = re.match(fmt, dob)
	if m:
	g = m.groups()
	if len(g[0]) == 4:
	return f"{g[0]}-{g[1]}-{g[2]}"
	else:
	return f"{g[2]}-{g[1]}-{g[0]}"
	return dob

	def normalize_gender(val):
	if not val:
	return None
	s = str(val).strip().lower()
	if s in ['m', 'male', 'men', 'man']:
	return 'MALE'
	if s in ['f', 'female', 'women', 'woman']:
	return 'FEMALE'
	return s.upper()

	def score_to_label(score, field):
	"""Convert numeric score to display value"""
	if score == -1:
	return "missing value"
	return round(float(score), 2)

	def get_dynamic_fields(record, prefix):
	fields = []
	i = 0
	while True:
	key = f"{prefix}{i}"
	if key in record:
	fields.append(record.get(key))
	i += 1
	else:
	break
	return fields

	def is_valid(val):
	return val and str(val).strip() not in ["", "-", " ", "NA", "N/A", "NULL"]

	def evaluate_rules(scores):
	"""Simple rule-based overall decision"""
	numeric_scores = {k: v for k, v in scores.items() if isinstance(v, (int, float)) and v != -1}
	missing = {k: v for k, v in scores.items() if v == "missing value" or v == -1}

	if not numeric_scores:
	return "UNABLE TO DETERMINE", "Insufficient data to make a determination."

	# Strong identifiers
	strong_ids = ["AADHAR", "PAN", "PASSPORTID", "LICENSEID", "VOTERID"]
	for sid in strong_ids:
	if scores.get(sid) == 100:
	return "MATCH", f"Strong identifier match on {sid}."

	# Name + DOB + phone
	name_score = scores.get("NAME", scores.get("FIRSTNAME", 0))
	if isinstance(name_score, str):
	name_score = 0

	high_matches = sum(1 for k, v in numeric_scores.items() if isinstance(v, (int, float)) and v >= 80)
	total_evaluated = len(numeric_scores)

	if total_evaluated == 0:
	return "UNABLE TO DETERMINE", "No fields to compare."

	match_ratio = high_matches / total_evaluated

	if match_ratio >= 0.7:
	return "MATCH", f"{high_matches}/{total_evaluated} fields matched at ≥80%."
	elif match_ratio >= 0.4:
	return "POSSIBLE MATCH", f"{high_matches}/{total_evaluated} fields matched at ≥80%."
	else:
	return "NO MATCH", f"Only {high_matches}/{total_evaluated} fields matched at ≥80%."

	def match_records(r1, r2):
	"""Full matching pipeline"""

	# Name matching
	name_result = match_names(
	r1.get("name"), r1.get("firstname"), r1.get("lastname"), r1.get("middlename"),
	r2.get("name"), r2.get("firstname"), r2.get("lastname"), r2.get("middlename")
	)

	# Address matching
	r1_addrs = get_dynamic_fields(r1, "addressline_")
	r2_addrs = get_dynamic_fields(r2, "addressline_")
	address_score = match_addresses(r1_addrs, r2_addrs)

	# Phone
	r1_phones = get_dynamic_fields(r1, "phone_")
	r2_phones = get_dynamic_fields(r2, "phone_")
	phone_score = compare_phones(r1_phones, r2_phones)

	# Email
	r1_emails = get_dynamic_fields(r1, "email_")
	r2_emails = get_dynamic_fields(r2, "email_")
	email_score = compare_emails(r1_emails, r2_emails)

	# City / State / Zipcode
	r1_cities = [standardize_city(c) for c in get_dynamic_fields(r1, "city_") if is_valid(c)]
	r2_cities = [standardize_city(c) for c in get_dynamic_fields(r2, "city_") if is_valid(c)]
	r1_states = [standardize_state(s) for s in get_dynamic_fields(r1, "state_") if is_valid(s)]
	r2_states = [standardize_state(s) for s in get_dynamic_fields(r2, "state_") if is_valid(s)]
	r1_zips = get_dynamic_fields(r1, "zipcode_")
	r2_zips = get_dynamic_fields(r2, "zipcode_")

	city_score = -1
	if r1_cities or r2_cities:
	city_score = 100 if any(c1 == c2 for c1 in r1_cities for c2 in r2_cities) else 0

	state_score = -1
	if r1_states or r2_states:
	state_score = 100 if any(s1 == s2 for s1 in r1_states for s2 in r2_states) else 0

	zipcode_score = compare_exact(
	next((z for z in r1_zips if is_valid(z)), None),
	next((z for z in r2_zips if is_valid(z)), None)
	) if (r1_zips or r2_zips) else -1

	# Exact fields
	def safe_exact(k1, k2=None):
	k2 = k2 or k1
	return compare_exact(r1.get(k1), r2.get(k2))

	g1 = normalize_gender(r1.get("gender"))
	g2 = normalize_gender(r2.get("gender"))
	if not g1 and not g2:
	gender_score = -1
	elif g1 and g2:
	gender_score = 100 if g1 == g2 else 0
	else:
	gender_score = 0

	results = {
	"GENDER": gender_score,
	"NAME": name_result["full_name_percent"],
	"FIRSTNAME": name_result["firstname_percent"],
	"MIDDLENAME": name_result["middlename_percent"],
	"LASTNAME": name_result["lastname_percent"],
	"SPOUSENAME": match_single(r1.get("spousename"), r2.get("spousename")),
	"MOTHERNAME": match_single(r1.get("mothername"), r2.get("mothername")),
	"FATHERNAME": match_single(r1.get("fathername"), r2.get("fathername")),
	"COMPANYNAME": match_single(r1.get("companyname"), r2.get("companyname")),
	"PARENTCOMPANYNAME": match_single(r1.get("parentcompanyname"), r2.get("parentcompanyname")),
	"AADHAR": safe_exact("AADHAR"),
	"PAN": safe_exact("pan"),
	"LICENSEID": safe_exact("licenseid"),
	"PASSPORTID": safe_exact("passportid"),
	"VOTERID": safe_exact("voterid"),
	"BIRTHDATE": compare_exact(r1.get("dob"), r2.get("dob")),
	"PHONE": phone_score,
	"EMAIL": email_score,
	"ADDRESSLINE": address_score,
	"CITY": city_score,
	"STATE": state_score,
	"ZIPCODE": zipcode_score,
	}

	# Custom fields
	known = {"name","firstname","middlename","lastname","spousename","mothername",
	"fathername","dob","gender","AADHAR","pan","licenseid","passportid",
	"voterid","companyname","parentcompanyname"}
	dyn_prefixes = ("zipcode_","city_","state_","phone_","email_","addressline_")

	all_keys = set(r1.keys()) \| set(r2.keys())
	for key in all_keys:
	ks = str(key)
	if ks in known:
	continue
	if any(ks.startswith(p) for p in dyn_prefixes):
	continue
	v1, v2 = r1.get(key), r2.get(key)
	if v1 or v2:
	results[ks.upper()] = compare_exact(v1, v2)

	return results

	# =========================================================
	# UI HELPERS
	# =========================================================
	def preprocess_text(text):
	if not text:
	return ""
	return re.sub(r"\s+", " ", text.strip())

	def create_section_card(title, icon_svg, content_func, args, *kwargs):
	st.markdown(f'''
	<div class="section-card">
	<div class="section-header-gradient">{icon_svg} {title}</div>
	<div class="section-content">
	''', unsafe_allow_html=True)
	result = content_func(args, *kwargs)
	st.markdown('</div></div>', unsafe_allow_html=True)
	return result

	ICONS = {
	"user": '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M20 21v-2a4 4 0 0 0-4-4H8a4 4 0 0 0-4 4v2"></path><circle cx="12" cy="7" r="4"></circle></svg>',
	"id": '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><rect x="3" y="4" width="18" height="16" rx="2"></rect><line x1="16" y1="2" x2="16" y2="6"></line><line x1="8" y1="2" x2="8" y2="6"></line><line x1="3" y1="10" x2="21" y2="10"></line></svg>',
	"map": '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><polygon points="3 6 9 3 15 6 21 3 21 18 15 21 9 18 3 21"></polygon></svg>',
	"phone": '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M22 16.92v3a2 2 0 0 1-2.18 2 19.79 19.79 0 0 1-8.63-3.07 19.5 19.5 0 0 1-6-6 19.79 19.79 0 0 1-3.07-8.67A2 2 0 0 1 4.11 2h3a2 2 0 0 1 2 1.72c.127.96.361 1.903.7 2.81a2 2 0 0 1-.45 2.11L8.09 9.91a16 16 0 0 0 6 6l1.27-1.27a2 2 0 0 1 2.11-.45c.907.339 1.85.573 2.81.7A2 2 0 0 1 22 16.92z"></path></svg>',
	"briefcase": '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><rect x="2" y="7" width="20" height="14" rx="2" ry="2"></rect><path d="M16 21V5a2 2 0 0 0-2-2h-4a2 2 0 0 0-2 2v16"></path></svg>'
	}

	# =========================================================
	# SECTION CONTENT FUNCTIONS
	# =========================================================

	def name_fields_content(record_num, prefix=""):
	col1, col2 = st.columns(2)
	with col1:
	full_name = st.text_input("Full Name", key=f"{prefix}name_{record_num}", placeholder="Enter full name")
	with col2:
	first_name = st.text_input("First Name", key=f"{prefix}firstname_{record_num}", placeholder="Enter first name")
	col1, col2 = st.columns(2)
	with col1:
	middle_name = st.text_input("Middle Name", key=f"{prefix}middlename_{record_num}", placeholder="Enter middle name")
	with col2:
	last_name = st.text_input("Last Name", key=f"{prefix}lastname_{record_num}", placeholder="Enter last name")
	col1, col2 = st.columns(2)
	with col1:
	mother_name = st.text_input("Mother's Name", key=f"{prefix}mothername_{record_num}", placeholder="Enter mother's name")
	with col2:
	father_name = st.text_input("Father's Name", key=f"{prefix}fathername_{record_num}", placeholder="Enter father's name")
	col1, col2 = st.columns(2)
	with col1:
	spouse_name = st.text_input("Spouse's Name", key=f"{prefix}spousename_{record_num}", placeholder="Enter spouse's name")
	with col2:
	other_name = st.text_input("Other Name", key=f"{prefix}othername_{record_num}", placeholder="Enter other name")
	col1, col2 = st.columns(2)
	with col1:
	dob = st.text_input("Date of Birth", key=f"{prefix}dob_{record_num}", placeholder="YYYY-MM-DD")
	with col2:
	gender = st.text_input("Gender", key=f"{prefix}gender_{record_num}", placeholder="Male/Female/Other")
	return {
	"name": full_name, "firstname": first_name, "middlename": middle_name,
	"lastname": last_name, "mothername": mother_name, "fathername": father_name,
	"spousename": spouse_name, "othername": other_name, "gender": gender, "dob": dob
	}

	def identifier_fields_content(record_num, prefix=""):
	col1, col2 = st.columns(2)
	with col1:
	aadhar = st.text_input("Aadhar Number", key=f"{prefix}taxid_{record_num}", placeholder="Enter Aadhar number")
	with col2:
	pan = st.text_input("PAN Number", key=f"{prefix}pan_{record_num}", placeholder="Enter PAN number")
	col1, col2 = st.columns(2)
	with col1:
	license_id = st.text_input("License Number", key=f"{prefix}licenseid_{record_num}", placeholder="Enter license number")
	with col2:
	passport = st.text_input("Passport Number", key=f"{prefix}passportid_{record_num}", placeholder="Enter passport number")
	col1, _ = st.columns(2)
	with col1:
	voter_id = st.text_input("Voter ID", key=f"{prefix}voterid_{record_num}", placeholder="Enter voter ID")

	st.markdown('<div class="subsection-label" style="margin-top:15px;">Custom Fields</div>', unsafe_allow_html=True)

	custom_fields = st.session_state[f"custom_fields_{prefix.strip('_')}"]
	custom_data = {}

	for idx, field in enumerate(custom_fields):
	col_c1, col_c2, col_rem = st.columns([5, 5, 1])
	with col_c1:
	field_name = st.text_input(f"Field Name {idx+1}", value=field.get('name', ''),
	key=f"{prefix}custom_name_{idx}_{record_num}", placeholder="Field Name")
	custom_fields[idx]['name'] = field_name
	with col_c2:
	field_val = st.text_input(f"Field Value {idx+1}", value=field.get('value', ''),
	key=f"{prefix}custom_val_{idx}_{record_num}", placeholder="Value")
	custom_fields[idx]['value'] = field_val
	if field_name:
	custom_data[field_name] = field_val
	with col_rem:
	st.write("")
	st.write("")
	if st.button("−", key=f"{prefix}remove_custom_{idx}_{record_num}"):
	custom_fields.pop(idx)
	st.rerun()

	if st.button("+ ADD FIELD", key=f"{prefix}add_custom_{record_num}", type="secondary"):
	custom_fields.append({'name': '', 'value': ''})
	st.rerun()

	result = {"AADHAR": aadhar, "pan": pan, "licenseid": license_id, "passportid": passport, "voterid": voter_id}
	result.update(custom_data)
	return result

	def address_item_content(record_num, addr_id, prefix=""):
	address_line = st.text_input("Street Address", key=f"{prefix}addressline_{addr_id}_{record_num}", placeholder="Street, Building, Area")
	city = st.text_input("City", key=f"{prefix}city_{addr_id}_{record_num}", placeholder="Enter city")
	state = st.text_input("State", key=f"{prefix}state_{addr_id}_{record_num}", placeholder="Enter state")
	pincode = st.text_input("Pincode", key=f"{prefix}zipcode_{addr_id}_{record_num}", placeholder="6-digit postal code")
	return {
	f"addressline_{addr_id}": address_line,
	f"city_{addr_id}": city,
	f"state_{addr_id}": state,
	f"zipcode_{addr_id}": pincode,
	}

	def addresses_section_content(record_num, prefix=""):
	ids_key = f"address_ids_{prefix.strip('_')}"
	ids = st.session_state[ids_key]
	addresses = {}
	col_title, col_add = st.columns([6, 1])
	with col_title:
	st.markdown('<div class="subsection-label">Manage Addresses</div>', unsafe_allow_html=True)
	with col_add:
	if len(ids) < MAX_FIELDS:
	if st.button("＋", key=f"{prefix}add_address_{record_num}"):
	ids.append(max(ids) + 1 if ids else 0)
	st.rerun()
	for idx, addr_id in enumerate(ids):
	header_cols = st.columns([8, 1])
	with header_cols[0]:
	header_text = f"Address {addr_id + 1}" if addr_id > 0 else "Primary Address"
	st.markdown(f"<div class='address-title'>{header_text}</div>", unsafe_allow_html=True)
	with header_cols[1]:
	if len(ids) > 1:
	if st.button("−", key=f"{prefix}remove_address_{addr_id}_{record_num}"):
	ids.remove(addr_id)
	st.rerun()
	addr_data = address_item_content(record_num, addr_id, prefix)
	addresses.update(addr_data)
	if idx < len(ids) - 1:
	st.markdown("<hr style='margin:20px 0;border:none;border-top:1px solid #e1e4e8;'>", unsafe_allow_html=True)
	return addresses

	def contact_section_content(record_num, prefix=""):
	contacts = {}
	r = prefix.strip("_")
	phone_ids = st.session_state[f"phone_ids_{r}"]
	email_ids = st.session_state[f"email_ids_{r}"]

	st.markdown('<div class="subsection-label">📞 Phone Numbers</div>', unsafe_allow_html=True)
	for i, phone_id in enumerate(phone_ids):
	cols = st.columns([8, 1, 1])
	with cols[0]:
	phone_val = st.text_input(f"Phone {phone_id+1}", key=f"{prefix}phone_{phone_id}_{record_num}",
	placeholder="Enter phone number", label_visibility="collapsed")
	contacts[f"phone_{phone_id}"] = phone_val
	with cols[1]:
	if len(phone_ids) < MAX_FIELDS:
	if st.button("＋", key=f"{prefix}add_phone_{phone_id}_{record_num}"):
	st.session_state[f"phone_ids_{r}"].append(max(phone_ids) + 1 if phone_ids else 0)
	st.rerun()
	with cols[2]:
	if len(phone_ids) > 1:
	if st.button("−", key=f"{prefix}remove_phone_{phone_id}_{record_num}"):
	st.session_state[f"phone_ids_{r}"].remove(phone_id)
	st.rerun()

	st.markdown('<hr class="section-divider">', unsafe_allow_html=True)
	st.markdown('<div class="subsection-label">✉️ Email Addresses</div>', unsafe_allow_html=True)

	for i, email_id in enumerate(email_ids):
	cols = st.columns([8, 1, 1])
	with cols[0]:
	email_val = st.text_input(f"Email {email_id+1}", key=f"{prefix}email_{email_id}_{record_num}",
	placeholder="Enter email address", label_visibility="collapsed")
	contacts[f"email_{email_id}"] = email_val
	with cols[1]:
	if len(email_ids) < MAX_FIELDS:
	if st.button("＋", key=f"{prefix}add_email_{email_id}_{record_num}"):
	st.session_state[f"email_ids_{r}"].append(max(email_ids) + 1 if email_ids else 0)
	st.rerun()
	with cols[2]:
	if len(email_ids) > 1:
	if st.button("−", key=f"{prefix}remove_email_{email_id}_{record_num}"):
	st.session_state[f"email_ids_{r}"].remove(email_id)
	st.rerun()
	return contacts

	def other_details_content(record_num, prefix=""):
	col1, col2 = st.columns(2)
	with col1:
	company = st.text_input("Company Name", key=f"{prefix}companyname_{record_num}", placeholder="Enter company name")
	with col2:
	parent_company = st.text_input("Parent Company Name", key=f"{prefix}parentcompanyname_{record_num}", placeholder="Enter parent company name")
	return {"companyname": company, "parentcompanyname": parent_company}

	# =========================================================
	# MAIN
	# =========================================================
	def main():
	st.markdown('''
	<div class="logo-title-container">
	<div style="font-size:36px;">🔍</div>
	<div class="header-title">Record Level Matching Using Transformer based Models</div>
	</div>
	''', unsafe_allow_html=True)
	st.markdown('<div class="header-subtitle">Enter details for two records below and click "Run Record Match" to see the matching result</div>', unsafe_allow_html=True)

	# Mode selector (UI only — Embedding is the only functional mode here)
	mode_col1, _ = st.columns([4, 6])
	with mode_col1:
	matching_mode = st.radio(
	"Matching Mode",
	["Embedding Mode", "LLM Mode"],
	key="matching_mode",
	horizontal=True,
	help="Embedding: Fuzzy/Token-based matching \| LLM Mode: Requires external LLM server (unavailable in standalone)"
	)

	if matching_mode == "LLM Mode":
	st.warning("⚠️ LLM Mode requires an external vLLM server. Falling back to Embedding (fuzzy) matching for standalone use.")

	col1, col2 = st.columns(2)

	with col1:
	st.markdown('<div class="record-header">Record 1</div>', unsafe_allow_html=True)
	r1_names = create_section_card("Personal Details", ICONS["user"], name_fields_content, 1, "r1_")
	r1_identifiers = create_section_card("Equalities", ICONS["id"], identifier_fields_content, 1, "r1_")
	r1_addresses = create_section_card("Address Details", ICONS["map"], addresses_section_content, 1, "r1_")
	r1_contacts = create_section_card("Contact Information", ICONS["phone"], contact_section_content, 1, "r1_")
	r1_other = create_section_card("Employment Details", ICONS["briefcase"], other_details_content, 1, "r1_")

	with col2:
	st.markdown('<div class="record-header">Record 2</div>', unsafe_allow_html=True)
	r2_names = create_section_card("Personal Details", ICONS["user"], name_fields_content, 2, "r2_")
	r2_identifiers = create_section_card("Equalities", ICONS["id"], identifier_fields_content, 2, "r2_")
	r2_addresses = create_section_card("Address Details", ICONS["map"], addresses_section_content, 2, "r2_")
	r2_contacts = create_section_card("Contact Information", ICONS["phone"], contact_section_content, 2, "r2_")
	r2_other = create_section_card("Employment Details", ICONS["briefcase"], other_details_content, 2, "r2_")

	if st.button("🚀 Run Record Match", type="primary", use_container_width=True):
	r1 = {r1_names, r1_identifiers, r1_addresses, r1_contacts, **r1_other}
	r2 = {r2_names, r2_identifiers, r2_addresses, r2_contacts, **r2_other}

	# Pre-process
	def process(r):
	out = {}
	for k, v in r.items():
	k_str = str(k)
	if k_str == "dob":
	out[k_str] = standardize_dob(v or "")
	elif k_str.startswith("city_"):
	out[k_str] = standardize_city(v) if v else None
	elif k_str.startswith("state_"):
	out[k_str] = standardize_state(v) if v else None
	else:
	out[k_str] = preprocess_text(v) if isinstance(v, str) else v
	return out

	r1p = process(r1)
	r2p = process(r2)

	with st.spinner("Matching records..."):
	raw_scores = match_records(r1p, r2p)

	def fmt(v):
	if v == -1:
	return "missing value"
	return round(float(v), 2)

	field_scores = {k: fmt(v) for k, v in raw_scores.items()}
	overall_decision, reason = evaluate_rules(raw_scores)

	result = {
	"overall_decision": overall_decision,
	"reason": reason,
	"field_scores": field_scores,
	}

	st.markdown('''
	<div class="result-box">
	<div class="result-header">📊 Matching Result (JSON)</div>
	</div>
	''', unsafe_allow_html=True)
	st.json(result, expanded=True)

	if __name__ == "__main__":
	main()