RejecetedDeniedClaimModel / Field_Format_Rules.py
AinaUmer's picture
Upload Field_Format_Rules.py
5b47095 verified
"""
Field_Format_Rules.py
=====================
Validates the *format* of every common claim field β€” rejects malformed values,
not just missing ones. Companion to `Hard_Rule_Engine` (which checks for
presence) and `Demographic_CPT_Rules` (which checks clinical appropriateness).
All accepted-value sets live at the top of this file. Add an entry, rule
auto-picks it up.
Rule names emitted
------------------
rFirstNameFormat, rLastNameFormat Names with digits / bad chars
rCityFormat City with digits / odd chars
rStateInvalid State not a recognised US code
rZipFormat Zip not 5 or 9 digits
rPOSInvalid POS not in CMS 2-digit list
rBillAsInvalid BillAs not Primary/Secondary/etc.
rDateFormat Date field not parseable
rDOBRange DOB outside sensible range
rUnitsFormat ServiceUnits not a positive integer
rChargesFormat TotalCharges not a positive number
rPhoneFormat Phone not 10-digit US format
rPolicyFormat PolicyNumber contains bad chars
"""
from __future__ import annotations
import re
from datetime import date
from typing import Dict, List, Optional
from Hard_Rule_Engine import RuleResult, RuleFunc, fail, get
# ═══════════════════════════════════════════════════════════════════════════
# VALID-VALUE TABLES (edit to extend)
# ═══════════════════════════════════════════════════════════════════════════
VALID_US_STATES: set = {
"AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA",
"HI","ID","IL","IN","IA","KS","KY","LA","ME","MD",
"MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ",
"NM","NY","NC","ND","OH","OK","OR","PA","RI","SC",
"SD","TN","TX","UT","VT","VA","WA","WV","WI","WY",
# Territories + DC
"DC","PR","VI","GU","AS","MP",
# Military
"AE","AP","AA",
}
# Medicare Place-of-Service 2-digit codes (CMS list abridged β€” add more as needed)
VALID_POS_CODES: set = {
"01","02","03","04","05","06","07","08","09",
"10","11","12","13","14","15","16","17","18","19",
"20","21","22","23","24","25","26","27","31","32",
"33","34","41","42","49","50","51","52","53","54",
"55","56","57","58","60","61","62","65","71","72",
"81","99",
}
VALID_BILL_AS: set = {
"primary", "secondary", "tertiary", "self-pay", "selfpay", "self pay",
"workers comp", "workerscomp", "auto", "liability", "other",
}
# ═══════════════════════════════════════════════════════════════════════════
# REGEXES
# ═══════════════════════════════════════════════════════════════════════════
# Names: letters + space/hyphen/apostrophe/period; 1-50 chars.
_NAME_PATTERN = re.compile(r"^[A-Za-z][A-Za-z\s\-'\.]{0,49}$")
# City: letters + space/hyphen/period; 1-60 chars.
_CITY_PATTERN = re.compile(r"^[A-Za-z][A-Za-z\s\-'\.]{0,59}$")
# Zip: 5 digits, optional -4 digits.
_ZIP_PATTERN = re.compile(r"^\d{5}(-\d{4})?$")
# Policy: alphanumeric + optional hyphens; 3-30 chars.
_POLICY_PATTERN = re.compile(r"^[A-Za-z0-9\-]{3,30}$")
# Phone: 10 digits (allowing common separators).
_PHONE_PATTERN = re.compile(r"^[\d\s\-\.\(\)\+]{10,20}$")
# ISO date (YYYY-MM-DD) β€” we accept this format only.
_DATE_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}")
def _nonempty(row: Dict, key: str) -> str:
return str(get(row, key, "")).strip()
# ═══════════════════════════════════════════════════════════════════════════
# NAME RULES
# ═══════════════════════════════════════════════════════════════════════════
def rule_firstname_format(row: Dict) -> Optional[RuleResult]:
v = _nonempty(row, "FirstName")
if v and not _NAME_PATTERN.match(v):
return fail("rFirstNameFormat",
f"FirstName '{v}' contains invalid characters. Use letters, hyphens, apostrophes, or spaces only.")
return None
def rule_lastname_format(row: Dict) -> Optional[RuleResult]:
v = _nonempty(row, "LastName")
if v and not _NAME_PATTERN.match(v):
return fail("rLastNameFormat",
f"LastName '{v}' contains invalid characters. Use letters, hyphens, apostrophes, or spaces only.")
return None
# ═══════════════════════════════════════════════════════════════════════════
# ADDRESS RULES
# ═══════════════════════════════════════════════════════════════════════════
def rule_city_format(row: Dict) -> Optional[RuleResult]:
v = _nonempty(row, "City")
if v and not _CITY_PATTERN.match(v):
return fail("rCityFormat",
f"City '{v}' contains invalid characters. Use letters, hyphens, or spaces only.")
return None
def rule_state_valid(row: Dict) -> Optional[RuleResult]:
v = _nonempty(row, "State").upper()
if v and v not in VALID_US_STATES:
return fail("rStateInvalid",
f"State '{v}' is not a recognised US postal code (AL–WY, DC, PR, etc.).")
return None
def rule_zip_format(row: Dict) -> Optional[RuleResult]:
v = _nonempty(row, "Zip")
if v and not _ZIP_PATTERN.match(v):
return fail("rZipFormat",
f"Zip '{v}' must be 5 digits or 5+4 format (e.g. 60601 or 60601-1234).")
return None
# ═══════════════════════════════════════════════════════════════════════════
# POS / BILL-AS RULES
# ═══════════════════════════════════════════════════════════════════════════
def rule_pos_valid(row: Dict) -> Optional[RuleResult]:
v = _nonempty(row, "POS")
if v and v not in VALID_POS_CODES:
return fail("rPOSInvalid",
f"POS '{v}' is not a recognised CMS Place-of-Service code.")
return None
def rule_bill_as_valid(row: Dict) -> Optional[RuleResult]:
v = _nonempty(row, "BillAs").lower()
if v and v not in VALID_BILL_AS:
return fail("rBillAsInvalid",
f"BillAs '{get(row,'BillAs')}' is not valid. Use Primary, Secondary, Tertiary, Self-Pay, or Other.")
return None
# ═══════════════════════════════════════════════════════════════════════════
# DATE RULES
# ═══════════════════════════════════════════════════════════════════════════
def _parse_iso_date(value: str) -> Optional[date]:
value = value.strip()
if not _DATE_PATTERN.match(value):
return None
try:
return date.fromisoformat(value[:10])
except ValueError:
return None
def _date_format_rule(field: str, rule_name: str, msg_label: str):
"""Factory: a rule that requires `field` to be a parseable ISO date."""
def rule(row: Dict) -> Optional[RuleResult]:
v = _nonempty(row, field)
if v and _parse_iso_date(v) is None:
return fail(rule_name, f"{msg_label} '{v}' is not a valid date (expected YYYY-MM-DD).")
return None
rule.__name__ = f"rule_{rule_name}"
return rule
rule_dob_format = _date_format_rule("DateOfBirth", "rDateFormatDOB", "DateOfBirth")
rule_dos_format = _date_format_rule("DOS", "rDateFormatDOS", "DOS")
rule_sub_date_format = _date_format_rule("SubmissionDate", "rDateFormatSub", "SubmissionDate")
rule_adm_date_format = _date_format_rule("AdmissionDate", "rDateFormatAdm", "AdmissionDate")
rule_dis_date_format = _date_format_rule("DischargeDate", "rDateFormatDis", "DischargeDate")
def rule_dob_range(row: Dict) -> Optional[RuleResult]:
v = _nonempty(row, "DateOfBirth")
if not v:
return None
dob = _parse_iso_date(v)
if dob is None:
return None # format rule already reports
today = date.today()
if dob > today:
return fail("rDOBRange", f"DateOfBirth {dob} is in the future.")
if dob.year < 1900:
return fail("rDOBRange", f"DateOfBirth year {dob.year} is unreasonable.")
age = (today - dob).days / 365.25
if age > 125:
return fail("rDOBRange", f"DateOfBirth implies age {age:.0f} years β€” check the date.")
return None
# ═══════════════════════════════════════════════════════════════════════════
# NUMERIC FIELDS
# ═══════════════════════════════════════════════════════════════════════════
def rule_units_format(row: Dict) -> Optional[RuleResult]:
v = _nonempty(row, "ServiceUnits")
if not v:
return None
try:
n = float(v)
except ValueError:
return fail("rUnitsFormat", f"ServiceUnits '{v}' must be a positive integer.")
if n <= 0 or n != int(n):
return fail("rUnitsFormat", f"ServiceUnits '{v}' must be a positive integer.")
return None
def rule_charges_format(row: Dict) -> Optional[RuleResult]:
v = _nonempty(row, "TotalCharges")
if not v:
return None
try:
n = float(str(v).replace(",", "").replace("$", "").strip())
except ValueError:
return fail("rChargesFormat", f"TotalCharges '{v}' is not a valid number.")
if n < 0:
return fail("rChargesFormat", f"TotalCharges {n} cannot be negative.")
return None
# ═══════════════════════════════════════════════════════════════════════════
# PHONE / POLICY
# ═══════════════════════════════════════════════════════════════════════════
def rule_phone_format(row: Dict) -> Optional[RuleResult]:
v = _nonempty(row, "Phone Number") or _nonempty(row, "Phone")
if not v:
return None
if not _PHONE_PATTERN.match(v):
return fail("rPhoneFormat", f"Phone '{v}' must contain 10 digits (formatting allowed).")
digits = re.sub(r"\D", "", v)
if len(digits) != 10:
return fail("rPhoneFormat", f"Phone '{v}' must contain exactly 10 digits, got {len(digits)}.")
return None
def rule_policy_format(row: Dict) -> Optional[RuleResult]:
v = _nonempty(row, "PolicyNumber")
if v and not _POLICY_PATTERN.match(v):
return fail("rPolicyFormat",
f"PolicyNumber '{v}' must be 3–30 alphanumeric characters (hyphens OK).")
return None
# ═══════════════════════════════════════════════════════════════════════════
# MASTER LIST β€” imported by Additional_RBS_Rules.py
# ═══════════════════════════════════════════════════════════════════════════
FIELD_FORMAT_RULES: List[RuleFunc] = [
rule_firstname_format,
rule_lastname_format,
rule_city_format,
rule_state_valid,
rule_zip_format,
rule_pos_valid,
rule_bill_as_valid,
rule_dob_format,
rule_dos_format,
rule_sub_date_format,
rule_adm_date_format,
rule_dis_date_format,
rule_dob_range,
rule_units_format,
rule_charges_format,
rule_phone_format,
rule_policy_format,
]