| """ |
| Field_Format_Rules.py |
| ===================== |
| Validates the *format* of every common claim field β rejects malformed values, |
| not just missing ones. Companion to `Hard_Rule_Engine` (which checks for |
| presence) and `Demographic_CPT_Rules` (which checks clinical appropriateness). |
| |
| All accepted-value sets live at the top of this file. Add an entry, rule |
| auto-picks it up. |
| |
| Rule names emitted |
| ------------------ |
| rFirstNameFormat, rLastNameFormat Names with digits / bad chars |
| rCityFormat City with digits / odd chars |
| rStateInvalid State not a recognised US code |
| rZipFormat Zip not 5 or 9 digits |
| rPOSInvalid POS not in CMS 2-digit list |
| rBillAsInvalid BillAs not Primary/Secondary/etc. |
| rDateFormat Date field not parseable |
| rDOBRange DOB outside sensible range |
| rUnitsFormat ServiceUnits not a positive integer |
| rChargesFormat TotalCharges not a positive number |
| rPhoneFormat Phone not 10-digit US format |
| rPolicyFormat PolicyNumber contains bad chars |
| """ |
|
|
| from __future__ import annotations |
|
|
| import re |
| from datetime import date |
| from typing import Dict, List, Optional |
|
|
| from Hard_Rule_Engine import RuleResult, RuleFunc, fail, get |
|
|
|
|
| |
| |
| |
|
|
| VALID_US_STATES: set = { |
| "AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA", |
| "HI","ID","IL","IN","IA","KS","KY","LA","ME","MD", |
| "MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ", |
| "NM","NY","NC","ND","OH","OK","OR","PA","RI","SC", |
| "SD","TN","TX","UT","VT","VA","WA","WV","WI","WY", |
| |
| "DC","PR","VI","GU","AS","MP", |
| |
| "AE","AP","AA", |
| } |
|
|
| |
| VALID_POS_CODES: set = { |
| "01","02","03","04","05","06","07","08","09", |
| "10","11","12","13","14","15","16","17","18","19", |
| "20","21","22","23","24","25","26","27","31","32", |
| "33","34","41","42","49","50","51","52","53","54", |
| "55","56","57","58","60","61","62","65","71","72", |
| "81","99", |
| } |
|
|
| VALID_BILL_AS: set = { |
| "primary", "secondary", "tertiary", "self-pay", "selfpay", "self pay", |
| "workers comp", "workerscomp", "auto", "liability", "other", |
| } |
|
|
|
|
| |
| |
| |
|
|
| |
| _NAME_PATTERN = re.compile(r"^[A-Za-z][A-Za-z\s\-'\.]{0,49}$") |
|
|
| |
| _CITY_PATTERN = re.compile(r"^[A-Za-z][A-Za-z\s\-'\.]{0,59}$") |
|
|
| |
| _ZIP_PATTERN = re.compile(r"^\d{5}(-\d{4})?$") |
|
|
| |
| _POLICY_PATTERN = re.compile(r"^[A-Za-z0-9\-]{3,30}$") |
|
|
| |
| _PHONE_PATTERN = re.compile(r"^[\d\s\-\.\(\)\+]{10,20}$") |
|
|
| |
| _DATE_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}") |
|
|
|
|
| def _nonempty(row: Dict, key: str) -> str: |
| return str(get(row, key, "")).strip() |
|
|
|
|
| |
| |
| |
|
|
| def rule_firstname_format(row: Dict) -> Optional[RuleResult]: |
| v = _nonempty(row, "FirstName") |
| if v and not _NAME_PATTERN.match(v): |
| return fail("rFirstNameFormat", |
| f"FirstName '{v}' contains invalid characters. Use letters, hyphens, apostrophes, or spaces only.") |
| return None |
|
|
|
|
| def rule_lastname_format(row: Dict) -> Optional[RuleResult]: |
| v = _nonempty(row, "LastName") |
| if v and not _NAME_PATTERN.match(v): |
| return fail("rLastNameFormat", |
| f"LastName '{v}' contains invalid characters. Use letters, hyphens, apostrophes, or spaces only.") |
| return None |
|
|
|
|
| |
| |
| |
|
|
| def rule_city_format(row: Dict) -> Optional[RuleResult]: |
| v = _nonempty(row, "City") |
| if v and not _CITY_PATTERN.match(v): |
| return fail("rCityFormat", |
| f"City '{v}' contains invalid characters. Use letters, hyphens, or spaces only.") |
| return None |
|
|
|
|
| def rule_state_valid(row: Dict) -> Optional[RuleResult]: |
| v = _nonempty(row, "State").upper() |
| if v and v not in VALID_US_STATES: |
| return fail("rStateInvalid", |
| f"State '{v}' is not a recognised US postal code (ALβWY, DC, PR, etc.).") |
| return None |
|
|
|
|
| def rule_zip_format(row: Dict) -> Optional[RuleResult]: |
| v = _nonempty(row, "Zip") |
| if v and not _ZIP_PATTERN.match(v): |
| return fail("rZipFormat", |
| f"Zip '{v}' must be 5 digits or 5+4 format (e.g. 60601 or 60601-1234).") |
| return None |
|
|
|
|
| |
| |
| |
|
|
| def rule_pos_valid(row: Dict) -> Optional[RuleResult]: |
| v = _nonempty(row, "POS") |
| if v and v not in VALID_POS_CODES: |
| return fail("rPOSInvalid", |
| f"POS '{v}' is not a recognised CMS Place-of-Service code.") |
| return None |
|
|
|
|
| def rule_bill_as_valid(row: Dict) -> Optional[RuleResult]: |
| v = _nonempty(row, "BillAs").lower() |
| if v and v not in VALID_BILL_AS: |
| return fail("rBillAsInvalid", |
| f"BillAs '{get(row,'BillAs')}' is not valid. Use Primary, Secondary, Tertiary, Self-Pay, or Other.") |
| return None |
|
|
|
|
| |
| |
| |
|
|
| def _parse_iso_date(value: str) -> Optional[date]: |
| value = value.strip() |
| if not _DATE_PATTERN.match(value): |
| return None |
| try: |
| return date.fromisoformat(value[:10]) |
| except ValueError: |
| return None |
|
|
|
|
| def _date_format_rule(field: str, rule_name: str, msg_label: str): |
| """Factory: a rule that requires `field` to be a parseable ISO date.""" |
| def rule(row: Dict) -> Optional[RuleResult]: |
| v = _nonempty(row, field) |
| if v and _parse_iso_date(v) is None: |
| return fail(rule_name, f"{msg_label} '{v}' is not a valid date (expected YYYY-MM-DD).") |
| return None |
| rule.__name__ = f"rule_{rule_name}" |
| return rule |
|
|
| rule_dob_format = _date_format_rule("DateOfBirth", "rDateFormatDOB", "DateOfBirth") |
| rule_dos_format = _date_format_rule("DOS", "rDateFormatDOS", "DOS") |
| rule_sub_date_format = _date_format_rule("SubmissionDate", "rDateFormatSub", "SubmissionDate") |
| rule_adm_date_format = _date_format_rule("AdmissionDate", "rDateFormatAdm", "AdmissionDate") |
| rule_dis_date_format = _date_format_rule("DischargeDate", "rDateFormatDis", "DischargeDate") |
|
|
|
|
| def rule_dob_range(row: Dict) -> Optional[RuleResult]: |
| v = _nonempty(row, "DateOfBirth") |
| if not v: |
| return None |
| dob = _parse_iso_date(v) |
| if dob is None: |
| return None |
| today = date.today() |
| if dob > today: |
| return fail("rDOBRange", f"DateOfBirth {dob} is in the future.") |
| if dob.year < 1900: |
| return fail("rDOBRange", f"DateOfBirth year {dob.year} is unreasonable.") |
| age = (today - dob).days / 365.25 |
| if age > 125: |
| return fail("rDOBRange", f"DateOfBirth implies age {age:.0f} years β check the date.") |
| return None |
|
|
|
|
| |
| |
| |
|
|
| def rule_units_format(row: Dict) -> Optional[RuleResult]: |
| v = _nonempty(row, "ServiceUnits") |
| if not v: |
| return None |
| try: |
| n = float(v) |
| except ValueError: |
| return fail("rUnitsFormat", f"ServiceUnits '{v}' must be a positive integer.") |
| if n <= 0 or n != int(n): |
| return fail("rUnitsFormat", f"ServiceUnits '{v}' must be a positive integer.") |
| return None |
|
|
|
|
| def rule_charges_format(row: Dict) -> Optional[RuleResult]: |
| v = _nonempty(row, "TotalCharges") |
| if not v: |
| return None |
| try: |
| n = float(str(v).replace(",", "").replace("$", "").strip()) |
| except ValueError: |
| return fail("rChargesFormat", f"TotalCharges '{v}' is not a valid number.") |
| if n < 0: |
| return fail("rChargesFormat", f"TotalCharges {n} cannot be negative.") |
| return None |
|
|
|
|
| |
| |
| |
|
|
| def rule_phone_format(row: Dict) -> Optional[RuleResult]: |
| v = _nonempty(row, "Phone Number") or _nonempty(row, "Phone") |
| if not v: |
| return None |
| if not _PHONE_PATTERN.match(v): |
| return fail("rPhoneFormat", f"Phone '{v}' must contain 10 digits (formatting allowed).") |
| digits = re.sub(r"\D", "", v) |
| if len(digits) != 10: |
| return fail("rPhoneFormat", f"Phone '{v}' must contain exactly 10 digits, got {len(digits)}.") |
| return None |
|
|
|
|
| def rule_policy_format(row: Dict) -> Optional[RuleResult]: |
| v = _nonempty(row, "PolicyNumber") |
| if v and not _POLICY_PATTERN.match(v): |
| return fail("rPolicyFormat", |
| f"PolicyNumber '{v}' must be 3β30 alphanumeric characters (hyphens OK).") |
| return None |
|
|
|
|
| |
| |
| |
|
|
| FIELD_FORMAT_RULES: List[RuleFunc] = [ |
| rule_firstname_format, |
| rule_lastname_format, |
| rule_city_format, |
| rule_state_valid, |
| rule_zip_format, |
| rule_pos_valid, |
| rule_bill_as_valid, |
| rule_dob_format, |
| rule_dos_format, |
| rule_sub_date_format, |
| rule_adm_date_format, |
| rule_dis_date_format, |
| rule_dob_range, |
| rule_units_format, |
| rule_charges_format, |
| rule_phone_format, |
| rule_policy_format, |
| ] |
|
|