File size: 10,327 Bytes
1e664a3
 
 
 
 
 
 
1b8d0f1
 
1e664a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b8d0f1
 
1e664a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b8d0f1
 
1e664a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b8d0f1
 
1e664a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b8d0f1
 
b038fc2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b8d0f1
 
1e664a3
 
 
 
 
 
 
 
 
b038fc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
import pandas as pd
import numpy as np
import os
import random
from datetime import datetime, timedelta

def create_data_folder():
    if not os.path.exists('Data'):
        os.makedirs('Data')

def generate_claims_data(n=20000):
    np.random.seed(42)
    service_lines = ['Cardiology', 'Pulmonology', 'Orthopedics', 'Neurology', 'General Surgery', 'Internal Medicine', 'Oncology', 'Endocrinology', 'Gastroenterology']
    drgs = np.random.randint(100, 999, size=200)
    
    # Base data
    total_charges = np.random.uniform(5000, 150000, n)
    service_line_choices = [random.choice(service_lines) for _ in range(n)]
    complexity_choices = np.random.choice(['MCC', 'CC', 'Non-CC'], n, p=[0.25, 0.45, 0.3])
    
    # Logic-based Denial Generation for ML Robustness
    # 1. High charges > 80k have high denial risk
    # 2. Oncology and Cardiology have higher base risk
    # 3. MCC complexity significantly adds risk
    is_denied = []
    for i in range(n):
        risk = 0.02 # Baseline 2%
        if total_charges[i] > 80000: risk += 0.45
        if service_line_choices[i] == 'Oncology': risk += 0.25
        if service_line_choices[i] == 'Cardiology': risk += 0.15
        if complexity_choices[i] == 'MCC': risk += 0.20
        
        # Clip risk and sample
        is_denied.append(1 if random.random() < min(risk, 0.95) else 0)

    data = {
        'Claim_ID': [f'CLM{100000+i}' for i in range(n)],
        'Patient_ID': [f'PT{random.randint(1, 5000)}' for i in range(n)],
        'Service_Line': service_line_choices,
        'DRG_Code': [random.choice(drgs) for _ in range(n)],
        'Admission_Date': [datetime(2023, 1, 1) + timedelta(days=random.randint(0, 700)) for _ in range(n)],
        'Primary_Diagnosis': [f'I{random.randint(10, 99)}' for _ in range(n)],
        'Total_Charges': total_charges,
        'Reimbursement': np.random.uniform(2000, 120000, n),
        'Is_Denied': is_denied,
        'Complexity_Level': complexity_choices
    }
    
    df = pd.DataFrame(data)
    df.to_csv('Data/claims.csv', index=False)
    print(f"Created Data/claims.csv with {n} rows and ML patterns")

def generate_cms_rules(n=250):
    rule_types = ['DRG_Logic', 'CC_MCC_Update', 'Coding_Addition', 'HCC_Revisions', 'Payment_Policy', 'Quality_Penalty', 'Telehealth', 'Site_of_Care', 'OPPS_Bundling', 'NCD_LCD_Update', 'Value_Based_Program']
    targets = ['Cardiology', 'Pulmonology', 'Orthopedics', 'Neurology', 'General Surgery', 'Internal Medicine', 'Oncology', 'Endocrinology', 'Gastroenterology', 'Urology', 'Nephrology']
    changes = ['Weight Decrease', 'Weight Increase', 'Reclassification', 'New CPT Codes', 'Weight Shift', 'Site-of-care shift', 'Inclusion Shift', 'Readmission Adjustment', 'Rate Standardization', 'APC Bundling', 'Coverage Determination', 'Penalty Increase']
    
    rules = []
    # 1. Orthopedic Bundle (User Example 1)
    rules.append({
        'Rule_ID': 'R2025_BUND_01',
        'Type': 'OPPS_Bundling',
        'Target': 'Orthopedics',
        'Change': 'APC Bundling',
        'Impact_Score': 0.85,
        'Description': "CMS 2025 OPPS Update: Orthopedic supply costs (HCPCS C1713) are now 'packaged' into APC 5114 flat fee. Separate pass-through billing is no longer permitted."
    })
    
    # 2. Sepsis Reclassification (User Demo Question)
    rules.append({
        'Rule_ID': 'R2025_SEPSIS_02',
        'Type': 'DRG_Logic',
        'Target': 'Internal Medicine',
        'Change': 'Reclassification',
        'Impact_Score': 0.78,
        'Description': "2025 Sepsis Reclassification: Sepsis with Major Complications (MCC) now requires documented 'Organ System Failure' for DRG 871. Failure to document results in downcode to DRG 872, risking $4,200 loss per case."
    })

    # 3. TKA Outpatient Shift
    rules.append({
        'Rule_ID': 'R2025_TKA_03',
        'Type': 'Site_of_Care',
        'Target': 'Orthopedics',
        'Change': 'Site-of-care shift',
        'Impact_Score': 0.92,
        'Description': "2025 TKA Shift: Total Knee Arthroplasty (TKA) procedures are being shifted from Inpatient (IPPS) to Outpatient (OPPS) for healthy populations. Projected reimbursement drop from $14k to $9k per case."
    })

    # 4. New Code Addition (Requirement 3B)
    rules.append({
        'Rule_ID': 'R2025_CODE_05',
        'Type': 'Coding_Addition',
        'Target': 'Cardiology',
        'Change': 'New CPT Codes',
        'Impact_Score': 0.45,
        'Description': "CMS 2025 update adds new descriptor codes for remote cardiac monitoring. $250 increase in reimbursement per patient per month for eligible heart failure cases."
    })

    # 5. NCD Coverage Policy (Requirement 1)
    rules.append({
        'Rule_ID': 'R2025_NCD_06',
        'Type': 'NCD_LCD_Update',
        'Target': 'Neurology',
        'Change': 'Coverage Determination',
        'Impact_Score': 0.72,
        'Description': "New National Coverage Determination (NCD) for Alzheimer's therapeutics. Stringent clinical criteria for coverage must be met for Medicare payment eligibility."
    })

    # 6. Value-Based Penalty (Requirement 3E)
    rules.append({
        'Rule_ID': 'R2025_VBP_07',
        'Type': 'Value_Based_Program',
        'Target': 'General Surgery',
        'Change': 'Penalty Increase',
        'Impact_Score': 0.55,
        'Description': "MSSP Shared Savings Update: Increased weighting for surgical site infection metrics. Organizations in the bottom 25th percentile face up to 2% billing reduction."
    })

    for i in range(n - 6):
        rule_type = random.choice(rule_types)
        target = random.choice(targets)
        change = random.choice(changes)
        impact = round(random.uniform(0.1, 0.95), 2)
        
        rules.append({
            'Rule_ID': f'R2025_{100+i}',
            'Type': rule_type,
            'Target': target,
            'Change': change,
            'Impact_Score': impact,
            'Description': f"2025 {rule_type} update for {target}. Focus on {change} with a systemic impact of {impact*100}%."
        })
        
    df = pd.DataFrame(rules)
    df.to_csv('Data/cms_rules_2025.csv', index=False)
    print(f"Created Data/cms_rules_2025.csv with {n} rules")

def generate_chargemaster(n=2500):
    """Generates Temple's Chargemaster (CDM) table with batch records."""
    service_lines = ['Orthopedics', 'Cardiology', 'Pulmonology', 'Oncology', 'Internal Medicine', 'Neurology', 'General Surgery', 'Endocrinology', 'Gastroenterology']
    
    codes = []
    descriptions = []
    sl_list = []
    status_list = []
    price_list = []
    
    # 1. Inject the "Conflict Case" (Requirement: "Revenue Loss from Missing a CMS Code Change")
    # At least 15% of records will be related to our "Orthopedic Bundling" example
    for i in range(350):
        codes.append(f'HCPCS_C1713_{i}')
        descriptions.append('Orthopedic Implant (Plate/Screw) - Unit A')
        sl_list.append('Orthopedics')
        status_list.append('Pass-Through') # THE CONFLICT: Should be 'Packaged'
        price_list.append(7000)
        
    # 2. Add other Orthopedic codes
    for i in range(400):
        codes.append(f'CPT_27447_{i}')
        descriptions.append('Total Knee Arthroplasty (TKA)')
        sl_list.append('Orthopedics')
        status_list.append('Active')
        price_list.append(15000)

    # 3. Fill the rest with random data
    for i in range(n - len(codes)):
        sl = random.choice(service_lines)
        codes.append(f'CODE_{10000+i}')
        descriptions.append(f'Hospital Procedure/Supply {i}')
        sl_list.append(sl)
        status_list.append(random.choice(['Active', 'Pass-Through', 'Inactive']))
        price_list.append(random.randint(50, 45000))
        
    data = {
        'CDM_Code': codes,
        'Description': descriptions,
        'Service_Line': sl_list,
        'Status': status_list,
        'Base_Charge': price_list
    }
    df = pd.DataFrame(data)
    df.to_csv('Data/chargemaster.csv', index=False)
    print(f"Created Data/chargemaster.csv with {len(df)} records")

def generate_hcc_weights():
    conditions = [
        'Diabetes w/ Complications', 'COPD', 'CHF', 'End-Stage Renal Disease', 
        'Major Depressive Disorder', 'Morbid Obesity', 'Rheumatoid Arthritis', 
        'Vascular Disease', 'Cerebral Palsy', 'Multiple Sclerosis',
        'HIV/AIDS', 'Metastatic Cancer', 'Septicemia', 'Drug/Alcohol Use Disorder',
        'Paraplegia', 'Intestinal Obstruction', 'Severe Hematological Disorders',
        'Heart Failure', 'Acute Myocardial Infarction', 'Angina Pectoris'
    ]
    data = {
        'HCC_Code': [f'HCC{i}' for i in range(1, len(conditions) + 1)],
        'Description': conditions,
        'Old_Weight': np.random.uniform(0.1, 2.5, len(conditions)),
        'New_Weight': np.random.uniform(0.1, 2.5, len(conditions))
    }
    df = pd.DataFrame(data)
    df.to_csv('Data/hcc_weights.csv', index=False)
    print("Created Data/hcc_weights.csv")

def generate_denials_data(n=3000):
    """Generates sample denial data for executive summary KPIs."""
    np.random.seed(99)
    statuses = ['Paid', 'Denied', 'Open', 'Appealed', 'Partially Paid']
    service_lines = ['Cardiology', 'Pulmonology', 'Orthopedics', 'Neurology', 'Surgery', 'Medicine', 'Oncology', 'Endocrinology', 'Gastroenterology']
    
    data = {
        'Denial_ID': [f'DEN{50000+i}' for i in range(n)],
        'Claim_ID': [f'CLM{200000+i}' for i in range(n)],
        'Service_Line': [random.choice(service_lines) for _ in range(n)],
        'Status': [random.choice(statuses) for _ in range(n)],
        'Denied_Amount': np.random.uniform(500, 25000, n),
        'Reason': [f'Reason Code {random.randint(1, 45)}' for _ in range(n)]
    }
    df = pd.DataFrame(data)
    df.to_csv('Data/sample_denials_3000.csv', index=False)
    print("Created Data/sample_denials_3000.csv")

if __name__ == '__main__':
    # Bypass bottleneck bottleneck
    os.environ['BOTTLENECK_DISABLE'] = '1'
    create_data_folder()
    generate_claims_data()
    generate_cms_rules()
    generate_chargemaster()
    generate_hcc_weights()
    generate_denials_data()