File size: 10,327 Bytes
1e664a3 1b8d0f1 1e664a3 1b8d0f1 1e664a3 1b8d0f1 1e664a3 1b8d0f1 1e664a3 1b8d0f1 b038fc2 1b8d0f1 1e664a3 b038fc2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 | import pandas as pd
import numpy as np
import os
import random
from datetime import datetime, timedelta
def create_data_folder():
if not os.path.exists('Data'):
os.makedirs('Data')
def generate_claims_data(n=20000):
np.random.seed(42)
service_lines = ['Cardiology', 'Pulmonology', 'Orthopedics', 'Neurology', 'General Surgery', 'Internal Medicine', 'Oncology', 'Endocrinology', 'Gastroenterology']
drgs = np.random.randint(100, 999, size=200)
# Base data
total_charges = np.random.uniform(5000, 150000, n)
service_line_choices = [random.choice(service_lines) for _ in range(n)]
complexity_choices = np.random.choice(['MCC', 'CC', 'Non-CC'], n, p=[0.25, 0.45, 0.3])
# Logic-based Denial Generation for ML Robustness
# 1. High charges > 80k have high denial risk
# 2. Oncology and Cardiology have higher base risk
# 3. MCC complexity significantly adds risk
is_denied = []
for i in range(n):
risk = 0.02 # Baseline 2%
if total_charges[i] > 80000: risk += 0.45
if service_line_choices[i] == 'Oncology': risk += 0.25
if service_line_choices[i] == 'Cardiology': risk += 0.15
if complexity_choices[i] == 'MCC': risk += 0.20
# Clip risk and sample
is_denied.append(1 if random.random() < min(risk, 0.95) else 0)
data = {
'Claim_ID': [f'CLM{100000+i}' for i in range(n)],
'Patient_ID': [f'PT{random.randint(1, 5000)}' for i in range(n)],
'Service_Line': service_line_choices,
'DRG_Code': [random.choice(drgs) for _ in range(n)],
'Admission_Date': [datetime(2023, 1, 1) + timedelta(days=random.randint(0, 700)) for _ in range(n)],
'Primary_Diagnosis': [f'I{random.randint(10, 99)}' for _ in range(n)],
'Total_Charges': total_charges,
'Reimbursement': np.random.uniform(2000, 120000, n),
'Is_Denied': is_denied,
'Complexity_Level': complexity_choices
}
df = pd.DataFrame(data)
df.to_csv('Data/claims.csv', index=False)
print(f"Created Data/claims.csv with {n} rows and ML patterns")
def generate_cms_rules(n=250):
rule_types = ['DRG_Logic', 'CC_MCC_Update', 'Coding_Addition', 'HCC_Revisions', 'Payment_Policy', 'Quality_Penalty', 'Telehealth', 'Site_of_Care', 'OPPS_Bundling', 'NCD_LCD_Update', 'Value_Based_Program']
targets = ['Cardiology', 'Pulmonology', 'Orthopedics', 'Neurology', 'General Surgery', 'Internal Medicine', 'Oncology', 'Endocrinology', 'Gastroenterology', 'Urology', 'Nephrology']
changes = ['Weight Decrease', 'Weight Increase', 'Reclassification', 'New CPT Codes', 'Weight Shift', 'Site-of-care shift', 'Inclusion Shift', 'Readmission Adjustment', 'Rate Standardization', 'APC Bundling', 'Coverage Determination', 'Penalty Increase']
rules = []
# 1. Orthopedic Bundle (User Example 1)
rules.append({
'Rule_ID': 'R2025_BUND_01',
'Type': 'OPPS_Bundling',
'Target': 'Orthopedics',
'Change': 'APC Bundling',
'Impact_Score': 0.85,
'Description': "CMS 2025 OPPS Update: Orthopedic supply costs (HCPCS C1713) are now 'packaged' into APC 5114 flat fee. Separate pass-through billing is no longer permitted."
})
# 2. Sepsis Reclassification (User Demo Question)
rules.append({
'Rule_ID': 'R2025_SEPSIS_02',
'Type': 'DRG_Logic',
'Target': 'Internal Medicine',
'Change': 'Reclassification',
'Impact_Score': 0.78,
'Description': "2025 Sepsis Reclassification: Sepsis with Major Complications (MCC) now requires documented 'Organ System Failure' for DRG 871. Failure to document results in downcode to DRG 872, risking $4,200 loss per case."
})
# 3. TKA Outpatient Shift
rules.append({
'Rule_ID': 'R2025_TKA_03',
'Type': 'Site_of_Care',
'Target': 'Orthopedics',
'Change': 'Site-of-care shift',
'Impact_Score': 0.92,
'Description': "2025 TKA Shift: Total Knee Arthroplasty (TKA) procedures are being shifted from Inpatient (IPPS) to Outpatient (OPPS) for healthy populations. Projected reimbursement drop from $14k to $9k per case."
})
# 4. New Code Addition (Requirement 3B)
rules.append({
'Rule_ID': 'R2025_CODE_05',
'Type': 'Coding_Addition',
'Target': 'Cardiology',
'Change': 'New CPT Codes',
'Impact_Score': 0.45,
'Description': "CMS 2025 update adds new descriptor codes for remote cardiac monitoring. $250 increase in reimbursement per patient per month for eligible heart failure cases."
})
# 5. NCD Coverage Policy (Requirement 1)
rules.append({
'Rule_ID': 'R2025_NCD_06',
'Type': 'NCD_LCD_Update',
'Target': 'Neurology',
'Change': 'Coverage Determination',
'Impact_Score': 0.72,
'Description': "New National Coverage Determination (NCD) for Alzheimer's therapeutics. Stringent clinical criteria for coverage must be met for Medicare payment eligibility."
})
# 6. Value-Based Penalty (Requirement 3E)
rules.append({
'Rule_ID': 'R2025_VBP_07',
'Type': 'Value_Based_Program',
'Target': 'General Surgery',
'Change': 'Penalty Increase',
'Impact_Score': 0.55,
'Description': "MSSP Shared Savings Update: Increased weighting for surgical site infection metrics. Organizations in the bottom 25th percentile face up to 2% billing reduction."
})
for i in range(n - 6):
rule_type = random.choice(rule_types)
target = random.choice(targets)
change = random.choice(changes)
impact = round(random.uniform(0.1, 0.95), 2)
rules.append({
'Rule_ID': f'R2025_{100+i}',
'Type': rule_type,
'Target': target,
'Change': change,
'Impact_Score': impact,
'Description': f"2025 {rule_type} update for {target}. Focus on {change} with a systemic impact of {impact*100}%."
})
df = pd.DataFrame(rules)
df.to_csv('Data/cms_rules_2025.csv', index=False)
print(f"Created Data/cms_rules_2025.csv with {n} rules")
def generate_chargemaster(n=2500):
"""Generates Temple's Chargemaster (CDM) table with batch records."""
service_lines = ['Orthopedics', 'Cardiology', 'Pulmonology', 'Oncology', 'Internal Medicine', 'Neurology', 'General Surgery', 'Endocrinology', 'Gastroenterology']
codes = []
descriptions = []
sl_list = []
status_list = []
price_list = []
# 1. Inject the "Conflict Case" (Requirement: "Revenue Loss from Missing a CMS Code Change")
# At least 15% of records will be related to our "Orthopedic Bundling" example
for i in range(350):
codes.append(f'HCPCS_C1713_{i}')
descriptions.append('Orthopedic Implant (Plate/Screw) - Unit A')
sl_list.append('Orthopedics')
status_list.append('Pass-Through') # THE CONFLICT: Should be 'Packaged'
price_list.append(7000)
# 2. Add other Orthopedic codes
for i in range(400):
codes.append(f'CPT_27447_{i}')
descriptions.append('Total Knee Arthroplasty (TKA)')
sl_list.append('Orthopedics')
status_list.append('Active')
price_list.append(15000)
# 3. Fill the rest with random data
for i in range(n - len(codes)):
sl = random.choice(service_lines)
codes.append(f'CODE_{10000+i}')
descriptions.append(f'Hospital Procedure/Supply {i}')
sl_list.append(sl)
status_list.append(random.choice(['Active', 'Pass-Through', 'Inactive']))
price_list.append(random.randint(50, 45000))
data = {
'CDM_Code': codes,
'Description': descriptions,
'Service_Line': sl_list,
'Status': status_list,
'Base_Charge': price_list
}
df = pd.DataFrame(data)
df.to_csv('Data/chargemaster.csv', index=False)
print(f"Created Data/chargemaster.csv with {len(df)} records")
def generate_hcc_weights():
conditions = [
'Diabetes w/ Complications', 'COPD', 'CHF', 'End-Stage Renal Disease',
'Major Depressive Disorder', 'Morbid Obesity', 'Rheumatoid Arthritis',
'Vascular Disease', 'Cerebral Palsy', 'Multiple Sclerosis',
'HIV/AIDS', 'Metastatic Cancer', 'Septicemia', 'Drug/Alcohol Use Disorder',
'Paraplegia', 'Intestinal Obstruction', 'Severe Hematological Disorders',
'Heart Failure', 'Acute Myocardial Infarction', 'Angina Pectoris'
]
data = {
'HCC_Code': [f'HCC{i}' for i in range(1, len(conditions) + 1)],
'Description': conditions,
'Old_Weight': np.random.uniform(0.1, 2.5, len(conditions)),
'New_Weight': np.random.uniform(0.1, 2.5, len(conditions))
}
df = pd.DataFrame(data)
df.to_csv('Data/hcc_weights.csv', index=False)
print("Created Data/hcc_weights.csv")
def generate_denials_data(n=3000):
"""Generates sample denial data for executive summary KPIs."""
np.random.seed(99)
statuses = ['Paid', 'Denied', 'Open', 'Appealed', 'Partially Paid']
service_lines = ['Cardiology', 'Pulmonology', 'Orthopedics', 'Neurology', 'Surgery', 'Medicine', 'Oncology', 'Endocrinology', 'Gastroenterology']
data = {
'Denial_ID': [f'DEN{50000+i}' for i in range(n)],
'Claim_ID': [f'CLM{200000+i}' for i in range(n)],
'Service_Line': [random.choice(service_lines) for _ in range(n)],
'Status': [random.choice(statuses) for _ in range(n)],
'Denied_Amount': np.random.uniform(500, 25000, n),
'Reason': [f'Reason Code {random.randint(1, 45)}' for _ in range(n)]
}
df = pd.DataFrame(data)
df.to_csv('Data/sample_denials_3000.csv', index=False)
print("Created Data/sample_denials_3000.csv")
if __name__ == '__main__':
# Bypass bottleneck bottleneck
os.environ['BOTTLENECK_DISABLE'] = '1'
create_data_folder()
generate_claims_data()
generate_cms_rules()
generate_chargemaster()
generate_hcc_weights()
generate_denials_data()
|