| | import pandas as pd
|
| | import numpy as np
|
| | import os
|
| | import random
|
| | from datetime import datetime, timedelta
|
| |
|
| | def create_data_folder():
|
| | if not os.path.exists('Data'):
|
| | os.makedirs('Data')
|
| |
|
| | def generate_claims_data(n=20000):
|
| | np.random.seed(42)
|
| | service_lines = ['Cardiology', 'Pulmonology', 'Orthopedics', 'Neurology', 'General Surgery', 'Internal Medicine', 'Oncology', 'Endocrinology', 'Gastroenterology']
|
| | drgs = np.random.randint(100, 999, size=200)
|
| |
|
| |
|
| | total_charges = np.random.uniform(5000, 150000, n)
|
| | service_line_choices = [random.choice(service_lines) for _ in range(n)]
|
| | complexity_choices = np.random.choice(['MCC', 'CC', 'Non-CC'], n, p=[0.25, 0.45, 0.3])
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | is_denied = []
|
| | for i in range(n):
|
| | risk = 0.02
|
| | if total_charges[i] > 80000: risk += 0.45
|
| | if service_line_choices[i] == 'Oncology': risk += 0.25
|
| | if service_line_choices[i] == 'Cardiology': risk += 0.15
|
| | if complexity_choices[i] == 'MCC': risk += 0.20
|
| |
|
| |
|
| | is_denied.append(1 if random.random() < min(risk, 0.95) else 0)
|
| |
|
| | data = {
|
| | 'Claim_ID': [f'CLM{100000+i}' for i in range(n)],
|
| | 'Patient_ID': [f'PT{random.randint(1, 5000)}' for i in range(n)],
|
| | 'Service_Line': service_line_choices,
|
| | 'DRG_Code': [random.choice(drgs) for _ in range(n)],
|
| | 'Admission_Date': [datetime(2023, 1, 1) + timedelta(days=random.randint(0, 700)) for _ in range(n)],
|
| | 'Primary_Diagnosis': [f'I{random.randint(10, 99)}' for _ in range(n)],
|
| | 'Total_Charges': total_charges,
|
| | 'Reimbursement': np.random.uniform(2000, 120000, n),
|
| | 'Is_Denied': is_denied,
|
| | 'Complexity_Level': complexity_choices
|
| | }
|
| |
|
| | df = pd.DataFrame(data)
|
| | df.to_csv('Data/claims.csv', index=False)
|
| | print(f"Created Data/claims.csv with {n} rows and ML patterns")
|
| |
|
| | def generate_cms_rules(n=250):
|
| | rule_types = ['DRG_Logic', 'CC_MCC_Update', 'Coding_Addition', 'HCC_Revisions', 'Payment_Policy', 'Quality_Penalty', 'Telehealth', 'Site_of_Care', 'OPPS_Bundling', 'NCD_LCD_Update', 'Value_Based_Program']
|
| | targets = ['Cardiology', 'Pulmonology', 'Orthopedics', 'Neurology', 'General Surgery', 'Internal Medicine', 'Oncology', 'Endocrinology', 'Gastroenterology', 'Urology', 'Nephrology']
|
| | changes = ['Weight Decrease', 'Weight Increase', 'Reclassification', 'New CPT Codes', 'Weight Shift', 'Site-of-care shift', 'Inclusion Shift', 'Readmission Adjustment', 'Rate Standardization', 'APC Bundling', 'Coverage Determination', 'Penalty Increase']
|
| |
|
| | rules = []
|
| |
|
| | rules.append({
|
| | 'Rule_ID': 'R2025_BUND_01',
|
| | 'Type': 'OPPS_Bundling',
|
| | 'Target': 'Orthopedics',
|
| | 'Change': 'APC Bundling',
|
| | 'Impact_Score': 0.85,
|
| | 'Description': "CMS 2025 OPPS Update: Orthopedic supply costs (HCPCS C1713) are now 'packaged' into APC 5114 flat fee. Separate pass-through billing is no longer permitted."
|
| | })
|
| |
|
| |
|
| | rules.append({
|
| | 'Rule_ID': 'R2025_SEPSIS_02',
|
| | 'Type': 'DRG_Logic',
|
| | 'Target': 'Internal Medicine',
|
| | 'Change': 'Reclassification',
|
| | 'Impact_Score': 0.78,
|
| | 'Description': "2025 Sepsis Reclassification: Sepsis with Major Complications (MCC) now requires documented 'Organ System Failure' for DRG 871. Failure to document results in downcode to DRG 872, risking $4,200 loss per case."
|
| | })
|
| |
|
| |
|
| | rules.append({
|
| | 'Rule_ID': 'R2025_TKA_03',
|
| | 'Type': 'Site_of_Care',
|
| | 'Target': 'Orthopedics',
|
| | 'Change': 'Site-of-care shift',
|
| | 'Impact_Score': 0.92,
|
| | 'Description': "2025 TKA Shift: Total Knee Arthroplasty (TKA) procedures are being shifted from Inpatient (IPPS) to Outpatient (OPPS) for healthy populations. Projected reimbursement drop from $14k to $9k per case."
|
| | })
|
| |
|
| |
|
| | rules.append({
|
| | 'Rule_ID': 'R2025_CODE_05',
|
| | 'Type': 'Coding_Addition',
|
| | 'Target': 'Cardiology',
|
| | 'Change': 'New CPT Codes',
|
| | 'Impact_Score': 0.45,
|
| | 'Description': "CMS 2025 update adds new descriptor codes for remote cardiac monitoring. $250 increase in reimbursement per patient per month for eligible heart failure cases."
|
| | })
|
| |
|
| |
|
| | rules.append({
|
| | 'Rule_ID': 'R2025_NCD_06',
|
| | 'Type': 'NCD_LCD_Update',
|
| | 'Target': 'Neurology',
|
| | 'Change': 'Coverage Determination',
|
| | 'Impact_Score': 0.72,
|
| | 'Description': "New National Coverage Determination (NCD) for Alzheimer's therapeutics. Stringent clinical criteria for coverage must be met for Medicare payment eligibility."
|
| | })
|
| |
|
| |
|
| | rules.append({
|
| | 'Rule_ID': 'R2025_VBP_07',
|
| | 'Type': 'Value_Based_Program',
|
| | 'Target': 'General Surgery',
|
| | 'Change': 'Penalty Increase',
|
| | 'Impact_Score': 0.55,
|
| | 'Description': "MSSP Shared Savings Update: Increased weighting for surgical site infection metrics. Organizations in the bottom 25th percentile face up to 2% billing reduction."
|
| | })
|
| |
|
| | for i in range(n - 6):
|
| | rule_type = random.choice(rule_types)
|
| | target = random.choice(targets)
|
| | change = random.choice(changes)
|
| | impact = round(random.uniform(0.1, 0.95), 2)
|
| |
|
| | rules.append({
|
| | 'Rule_ID': f'R2025_{100+i}',
|
| | 'Type': rule_type,
|
| | 'Target': target,
|
| | 'Change': change,
|
| | 'Impact_Score': impact,
|
| | 'Description': f"2025 {rule_type} update for {target}. Focus on {change} with a systemic impact of {impact*100}%."
|
| | })
|
| |
|
| | df = pd.DataFrame(rules)
|
| | df.to_csv('Data/cms_rules_2025.csv', index=False)
|
| | print(f"Created Data/cms_rules_2025.csv with {n} rules")
|
| |
|
| | def generate_chargemaster(n=2500):
|
| | """Generates Temple's Chargemaster (CDM) table with batch records."""
|
| | service_lines = ['Orthopedics', 'Cardiology', 'Pulmonology', 'Oncology', 'Internal Medicine', 'Neurology', 'General Surgery', 'Endocrinology', 'Gastroenterology']
|
| |
|
| | codes = []
|
| | descriptions = []
|
| | sl_list = []
|
| | status_list = []
|
| | price_list = []
|
| |
|
| |
|
| |
|
| | for i in range(350):
|
| | codes.append(f'HCPCS_C1713_{i}')
|
| | descriptions.append('Orthopedic Implant (Plate/Screw) - Unit A')
|
| | sl_list.append('Orthopedics')
|
| | status_list.append('Pass-Through')
|
| | price_list.append(7000)
|
| |
|
| |
|
| | for i in range(400):
|
| | codes.append(f'CPT_27447_{i}')
|
| | descriptions.append('Total Knee Arthroplasty (TKA)')
|
| | sl_list.append('Orthopedics')
|
| | status_list.append('Active')
|
| | price_list.append(15000)
|
| |
|
| |
|
| | for i in range(n - len(codes)):
|
| | sl = random.choice(service_lines)
|
| | codes.append(f'CODE_{10000+i}')
|
| | descriptions.append(f'Hospital Procedure/Supply {i}')
|
| | sl_list.append(sl)
|
| | status_list.append(random.choice(['Active', 'Pass-Through', 'Inactive']))
|
| | price_list.append(random.randint(50, 45000))
|
| |
|
| | data = {
|
| | 'CDM_Code': codes,
|
| | 'Description': descriptions,
|
| | 'Service_Line': sl_list,
|
| | 'Status': status_list,
|
| | 'Base_Charge': price_list
|
| | }
|
| | df = pd.DataFrame(data)
|
| | df.to_csv('Data/chargemaster.csv', index=False)
|
| | print(f"Created Data/chargemaster.csv with {len(df)} records")
|
| |
|
| | def generate_hcc_weights():
|
| | conditions = [
|
| | 'Diabetes w/ Complications', 'COPD', 'CHF', 'End-Stage Renal Disease',
|
| | 'Major Depressive Disorder', 'Morbid Obesity', 'Rheumatoid Arthritis',
|
| | 'Vascular Disease', 'Cerebral Palsy', 'Multiple Sclerosis',
|
| | 'HIV/AIDS', 'Metastatic Cancer', 'Septicemia', 'Drug/Alcohol Use Disorder',
|
| | 'Paraplegia', 'Intestinal Obstruction', 'Severe Hematological Disorders',
|
| | 'Heart Failure', 'Acute Myocardial Infarction', 'Angina Pectoris'
|
| | ]
|
| | data = {
|
| | 'HCC_Code': [f'HCC{i}' for i in range(1, len(conditions) + 1)],
|
| | 'Description': conditions,
|
| | 'Old_Weight': np.random.uniform(0.1, 2.5, len(conditions)),
|
| | 'New_Weight': np.random.uniform(0.1, 2.5, len(conditions))
|
| | }
|
| | df = pd.DataFrame(data)
|
| | df.to_csv('Data/hcc_weights.csv', index=False)
|
| | print("Created Data/hcc_weights.csv")
|
| |
|
| | def generate_denials_data(n=3000):
|
| | """Generates sample denial data for executive summary KPIs."""
|
| | np.random.seed(99)
|
| | statuses = ['Paid', 'Denied', 'Open', 'Appealed', 'Partially Paid']
|
| | service_lines = ['Cardiology', 'Pulmonology', 'Orthopedics', 'Neurology', 'Surgery', 'Medicine', 'Oncology', 'Endocrinology', 'Gastroenterology']
|
| |
|
| | data = {
|
| | 'Denial_ID': [f'DEN{50000+i}' for i in range(n)],
|
| | 'Claim_ID': [f'CLM{200000+i}' for i in range(n)],
|
| | 'Service_Line': [random.choice(service_lines) for _ in range(n)],
|
| | 'Status': [random.choice(statuses) for _ in range(n)],
|
| | 'Denied_Amount': np.random.uniform(500, 25000, n),
|
| | 'Reason': [f'Reason Code {random.randint(1, 45)}' for _ in range(n)]
|
| | }
|
| | df = pd.DataFrame(data)
|
| | df.to_csv('Data/sample_denials_3000.csv', index=False)
|
| | print("Created Data/sample_denials_3000.csv")
|
| |
|
| | if __name__ == '__main__':
|
| |
|
| | os.environ['BOTTLENECK_DISABLE'] = '1'
|
| | create_data_folder()
|
| | generate_claims_data()
|
| | generate_cms_rules()
|
| | generate_chargemaster()
|
| | generate_hcc_weights()
|
| | generate_denials_data()
|
| |
|