""" Field_Format_Rules.py ===================== Validates the *format* of every common claim field — rejects malformed values, not just missing ones. Companion to `Hard_Rule_Engine` (which checks for presence) and `Demographic_CPT_Rules` (which checks clinical appropriateness). All accepted-value sets live at the top of this file. Add an entry, rule auto-picks it up. Rule names emitted ------------------ rFirstNameFormat, rLastNameFormat Names with digits / bad chars rCityFormat City with digits / odd chars rStateInvalid State not a recognised US code rZipFormat Zip not 5 or 9 digits rPOSInvalid POS not in CMS 2-digit list rBillAsInvalid BillAs not Primary/Secondary/etc. rDateFormat Date field not parseable rDOBRange DOB outside sensible range rUnitsFormat ServiceUnits not a positive integer rChargesFormat TotalCharges not a positive number rPhoneFormat Phone not 10-digit US format rPolicyFormat PolicyNumber contains bad chars """ from __future__ import annotations import re from datetime import date from typing import Dict, List, Optional from Hard_Rule_Engine import RuleResult, RuleFunc, fail, get # ═══════════════════════════════════════════════════════════════════════════ # VALID-VALUE TABLES (edit to extend) # ═══════════════════════════════════════════════════════════════════════════ VALID_US_STATES: set = { "AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA", "HI","ID","IL","IN","IA","KS","KY","LA","ME","MD", "MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ", "NM","NY","NC","ND","OH","OK","OR","PA","RI","SC", "SD","TN","TX","UT","VT","VA","WA","WV","WI","WY", # Territories + DC "DC","PR","VI","GU","AS","MP", # Military "AE","AP","AA", } # Medicare Place-of-Service 2-digit codes (CMS list abridged — add more as needed) VALID_POS_CODES: set = { "01","02","03","04","05","06","07","08","09", "10","11","12","13","14","15","16","17","18","19", "20","21","22","23","24","25","26","27","31","32", "33","34","41","42","49","50","51","52","53","54", "55","56","57","58","60","61","62","65","71","72", "81","99", } VALID_BILL_AS: set = { "primary", "secondary", "tertiary", "self-pay", "selfpay", "self pay", "workers comp", "workerscomp", "auto", "liability", "other", } # ═══════════════════════════════════════════════════════════════════════════ # REGEXES # ═══════════════════════════════════════════════════════════════════════════ # Names: letters + space/hyphen/apostrophe/period; 1-50 chars. _NAME_PATTERN = re.compile(r"^[A-Za-z][A-Za-z\s\-'\.]{0,49}$") # City: letters + space/hyphen/period; 1-60 chars. _CITY_PATTERN = re.compile(r"^[A-Za-z][A-Za-z\s\-'\.]{0,59}$") # Zip: 5 digits, optional -4 digits. _ZIP_PATTERN = re.compile(r"^\d{5}(-\d{4})?$") # Policy: alphanumeric + optional hyphens; 3-30 chars. _POLICY_PATTERN = re.compile(r"^[A-Za-z0-9\-]{3,30}$") # Phone: 10 digits (allowing common separators). _PHONE_PATTERN = re.compile(r"^[\d\s\-\.\(\)\+]{10,20}$") # ISO date (YYYY-MM-DD) — we accept this format only. _DATE_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}") def _nonempty(row: Dict, key: str) -> str: return str(get(row, key, "")).strip() # ═══════════════════════════════════════════════════════════════════════════ # NAME RULES # ═══════════════════════════════════════════════════════════════════════════ def rule_firstname_format(row: Dict) -> Optional[RuleResult]: v = _nonempty(row, "FirstName") if v and not _NAME_PATTERN.match(v): return fail("rFirstNameFormat", f"FirstName '{v}' contains invalid characters. Use letters, hyphens, apostrophes, or spaces only.") return None def rule_lastname_format(row: Dict) -> Optional[RuleResult]: v = _nonempty(row, "LastName") if v and not _NAME_PATTERN.match(v): return fail("rLastNameFormat", f"LastName '{v}' contains invalid characters. Use letters, hyphens, apostrophes, or spaces only.") return None # ═══════════════════════════════════════════════════════════════════════════ # ADDRESS RULES # ═══════════════════════════════════════════════════════════════════════════ def rule_city_format(row: Dict) -> Optional[RuleResult]: v = _nonempty(row, "City") if v and not _CITY_PATTERN.match(v): return fail("rCityFormat", f"City '{v}' contains invalid characters. Use letters, hyphens, or spaces only.") return None def rule_state_valid(row: Dict) -> Optional[RuleResult]: v = _nonempty(row, "State").upper() if v and v not in VALID_US_STATES: return fail("rStateInvalid", f"State '{v}' is not a recognised US postal code (AL–WY, DC, PR, etc.).") return None def rule_zip_format(row: Dict) -> Optional[RuleResult]: v = _nonempty(row, "Zip") if v and not _ZIP_PATTERN.match(v): return fail("rZipFormat", f"Zip '{v}' must be 5 digits or 5+4 format (e.g. 60601 or 60601-1234).") return None # ═══════════════════════════════════════════════════════════════════════════ # POS / BILL-AS RULES # ═══════════════════════════════════════════════════════════════════════════ def rule_pos_valid(row: Dict) -> Optional[RuleResult]: v = _nonempty(row, "POS") if v and v not in VALID_POS_CODES: return fail("rPOSInvalid", f"POS '{v}' is not a recognised CMS Place-of-Service code.") return None def rule_bill_as_valid(row: Dict) -> Optional[RuleResult]: v = _nonempty(row, "BillAs").lower() if v and v not in VALID_BILL_AS: return fail("rBillAsInvalid", f"BillAs '{get(row,'BillAs')}' is not valid. Use Primary, Secondary, Tertiary, Self-Pay, or Other.") return None # ═══════════════════════════════════════════════════════════════════════════ # DATE RULES # ═══════════════════════════════════════════════════════════════════════════ def _parse_iso_date(value: str) -> Optional[date]: value = value.strip() if not _DATE_PATTERN.match(value): return None try: return date.fromisoformat(value[:10]) except ValueError: return None def _date_format_rule(field: str, rule_name: str, msg_label: str): """Factory: a rule that requires `field` to be a parseable ISO date.""" def rule(row: Dict) -> Optional[RuleResult]: v = _nonempty(row, field) if v and _parse_iso_date(v) is None: return fail(rule_name, f"{msg_label} '{v}' is not a valid date (expected YYYY-MM-DD).") return None rule.__name__ = f"rule_{rule_name}" return rule rule_dob_format = _date_format_rule("DateOfBirth", "rDateFormatDOB", "DateOfBirth") rule_dos_format = _date_format_rule("DOS", "rDateFormatDOS", "DOS") rule_sub_date_format = _date_format_rule("SubmissionDate", "rDateFormatSub", "SubmissionDate") rule_adm_date_format = _date_format_rule("AdmissionDate", "rDateFormatAdm", "AdmissionDate") rule_dis_date_format = _date_format_rule("DischargeDate", "rDateFormatDis", "DischargeDate") def rule_dob_range(row: Dict) -> Optional[RuleResult]: v = _nonempty(row, "DateOfBirth") if not v: return None dob = _parse_iso_date(v) if dob is None: return None # format rule already reports today = date.today() if dob > today: return fail("rDOBRange", f"DateOfBirth {dob} is in the future.") if dob.year < 1900: return fail("rDOBRange", f"DateOfBirth year {dob.year} is unreasonable.") age = (today - dob).days / 365.25 if age > 125: return fail("rDOBRange", f"DateOfBirth implies age {age:.0f} years — check the date.") return None # ═══════════════════════════════════════════════════════════════════════════ # NUMERIC FIELDS # ═══════════════════════════════════════════════════════════════════════════ def rule_units_format(row: Dict) -> Optional[RuleResult]: v = _nonempty(row, "ServiceUnits") if not v: return None try: n = float(v) except ValueError: return fail("rUnitsFormat", f"ServiceUnits '{v}' must be a positive integer.") if n <= 0 or n != int(n): return fail("rUnitsFormat", f"ServiceUnits '{v}' must be a positive integer.") return None def rule_charges_format(row: Dict) -> Optional[RuleResult]: v = _nonempty(row, "TotalCharges") if not v: return None try: n = float(str(v).replace(",", "").replace("$", "").strip()) except ValueError: return fail("rChargesFormat", f"TotalCharges '{v}' is not a valid number.") if n < 0: return fail("rChargesFormat", f"TotalCharges {n} cannot be negative.") return None # ═══════════════════════════════════════════════════════════════════════════ # PHONE / POLICY # ═══════════════════════════════════════════════════════════════════════════ def rule_phone_format(row: Dict) -> Optional[RuleResult]: v = _nonempty(row, "Phone Number") or _nonempty(row, "Phone") if not v: return None if not _PHONE_PATTERN.match(v): return fail("rPhoneFormat", f"Phone '{v}' must contain 10 digits (formatting allowed).") digits = re.sub(r"\D", "", v) if len(digits) != 10: return fail("rPhoneFormat", f"Phone '{v}' must contain exactly 10 digits, got {len(digits)}.") return None def rule_policy_format(row: Dict) -> Optional[RuleResult]: v = _nonempty(row, "PolicyNumber") if v and not _POLICY_PATTERN.match(v): return fail("rPolicyFormat", f"PolicyNumber '{v}' must be 3–30 alphanumeric characters (hyphens OK).") return None # ═══════════════════════════════════════════════════════════════════════════ # MASTER LIST — imported by Additional_RBS_Rules.py # ═══════════════════════════════════════════════════════════════════════════ FIELD_FORMAT_RULES: List[RuleFunc] = [ rule_firstname_format, rule_lastname_format, rule_city_format, rule_state_valid, rule_zip_format, rule_pos_valid, rule_bill_as_valid, rule_dob_format, rule_dos_format, rule_sub_date_format, rule_adm_date_format, rule_dis_date_format, rule_dob_range, rule_units_format, rule_charges_format, rule_phone_format, rule_policy_format, ]