| import streamlit as st |
| import pandas as pd |
| import numpy as np |
|
|
| |
| np.random.seed(42) |
|
|
| |
| def generate_realistic_data(num_patients=100): |
| |
| patient_ids = [] |
| ages = [] |
| menopausal_status = [] |
| tumor_sizes = [] |
| lymph_nodes = [] |
| grades = [] |
| stages = [] |
| er_status = [] |
| pr_status = [] |
| her2_status = [] |
| ki67_level = [] |
| tnbc_status = [] |
| brca_mutation = [] |
| overall_health = [] |
| genomic_score = [] |
| treatment = [] |
|
|
| for i in range(num_patients): |
| |
| patient_id = i + 1 |
| patient_ids.append(patient_id) |
|
|
| |
| age = int(np.random.normal(60, 10)) |
| age = max(30, min(age, 80)) |
| ages.append(age) |
|
|
| |
| menopausal = 'Post-menopausal' if age >= 50 else 'Pre-menopausal' |
| menopausal_status.append(menopausal) |
|
|
| |
| tumor_size = round(np.random.lognormal(mean=0.7, sigma=0.5), 2) |
| tumor_sizes.append(tumor_size) |
|
|
| |
| lymph_node = 'Positive' if (tumor_size > 2.0 and np.random.rand() < 0.6) or (tumor_size <= 2.0 and np.random.rand() < 0.3) else 'Negative' |
| lymph_nodes.append(lymph_node) |
|
|
| |
| grade = np.random.choice([1, 2, 3], p=[0.1, 0.4, 0.5] if tumor_size > 2.0 else [0.3, 0.5, 0.2]) |
| grades.append(grade) |
|
|
| |
| if tumor_size <= 2.0 and lymph_node == 'Negative': |
| stage = 'I' |
| elif (tumor_size > 2.0 and tumor_size <= 5.0) and lymph_node == 'Negative': |
| stage = 'II' |
| elif lymph_node == 'Positive' or tumor_size > 5.0: |
| stage = 'III' |
| else: |
| stage = 'II' |
| if np.random.rand() < 0.05: |
| stage = 'IV' |
| stages.append(stage) |
|
|
| |
| er = np.random.choice(['Positive', 'Negative'], p=[0.75, 0.25]) |
| pr = 'Positive' if er == 'Positive' and np.random.rand() > 0.1 else 'Negative' |
| er_status.append(er) |
| pr_status.append(pr) |
|
|
| |
| her2 = np.random.choice(['Positive', 'Negative'], p=[0.3, 0.7] if grade == 3 else [0.15, 0.85]) |
| her2_status.append(her2) |
|
|
| |
| ki67 = 'High' if grade == 3 and np.random.rand() < 0.8 else 'Low' |
| ki67_level.append(ki67) |
|
|
| |
| tnbc = 'Positive' if er == 'Negative' and pr == 'Negative' and her2 == 'Negative' else 'Negative' |
| tnbc_status.append(tnbc) |
|
|
| |
| brca = 'Positive' if (tnbc == 'Positive' or age < 40) and np.random.rand() < 0.2 else 'Negative' |
| brca_mutation.append(brca) |
|
|
| |
| health = 'Good' if age < 65 and np.random.rand() < 0.9 else 'Poor' |
| overall_health.append(health) |
|
|
| |
| recurrence_score = np.random.choice(['Low', 'Intermediate', 'High'], p=[0.6, 0.3, 0.1]) if er == 'Positive' and her2 == 'Negative' else 'N/A' |
| genomic_score.append(recurrence_score) |
|
|
| |
| if stage in ['I', 'II']: |
| if tnbc == 'Positive': |
| treat = 'Surgery, Chemotherapy, and Radiation Therapy' + (', plus PARP Inhibitors' if brca == 'Positive' else '') |
| elif er == 'Positive' and recurrence_score != 'N/A': |
| if recurrence_score == 'High': |
| treat = 'Surgery, Chemotherapy, Hormone Therapy, and Radiation Therapy' |
| elif recurrence_score == 'Intermediate': |
| treat = 'Surgery, Consider Chemotherapy, Hormone Therapy, and Radiation Therapy' |
| else: |
| treat = 'Surgery, Hormone Therapy, and Radiation Therapy' |
| elif her2 == 'Positive': |
| treat = 'Surgery, HER2-Targeted Therapy, Chemotherapy, and Radiation Therapy' |
| else: |
| treat = 'Surgery, Chemotherapy, and Radiation Therapy' |
| elif stage == 'III': |
| treat = 'Neoadjuvant Chemotherapy, Surgery, Radiation Therapy' + (', HER2-Targeted Therapy' if her2 == 'Positive' else '') + (', Hormone Therapy' if er == 'Positive' else '') |
| else: |
| treat = 'Systemic Therapy (' + ', '.join([option for option in ['Hormone Therapy' if er == 'Positive' else '', 'HER2-Targeted Therapy' if her2 == 'Positive' else '', 'Chemotherapy' if tnbc == 'Positive' else ''] if option]) + '), Palliative Care' if health == 'Good' else 'Palliative Care Only' |
|
|
| treatment.append(treat) |
|
|
| |
| data = { |
| 'Patient ID': patient_ids, |
| 'Age': ages, |
| 'Menopausal Status': menopausal_status, |
| 'Tumor Size (cm)': tumor_sizes, |
| 'Lymph Node Involvement': lymph_nodes, |
| 'Tumor Grade': grades, |
| 'Tumor Stage': stages, |
| 'ER Status': er_status, |
| 'PR Status': pr_status, |
| 'HER2 Status': her2_status, |
| 'Ki-67 Level': ki67_level, |
| 'TNBC Status': tnbc_status, |
| 'BRCA Mutation': brca_mutation, |
| 'Overall Health': overall_health, |
| 'Genomic Recurrence Score': genomic_score, |
| 'Treatment': treatment, |
| } |
|
|
| return pd.DataFrame(data) |
|
|
| |
| def generate_fuzzy_data(df, error_rate=0.1): |
| fuzzy_df = df.copy() |
| num_rows, num_cols = fuzzy_df.shape |
| |
| |
| for _ in range(int(num_rows * num_cols * error_rate)): |
| row = np.random.randint(0, num_rows) |
| col = np.random.randint(0, num_cols) |
| |
| value = fuzzy_df.iloc[row, col] |
| |
| if isinstance(value, str): |
| if value in ['Post-menopausal', 'Pre-menopausal']: |
| fuzzy_df.iloc[row, col] = 'Post-menopausal' if value == 'Pre-menopausal' else 'Pre-menopausal' |
| elif value in ['Positive', 'Negative']: |
| fuzzy_df.iloc[row, col] = 'Negative' if value == 'Positive' else 'Positive' |
| elif isinstance(value, (int, float)): |
| noise = np.random.normal(0, 0.1 * value) |
| fuzzy_df.iloc[row, col] += noise |
| |
| return fuzzy_df |
|
|
| def main(): |
| st.title('Synthetic Data Generator: Clean and Fuzzy (Noisy)') |
| st.write('This app generates synthetic breast cancer patient data and provides downloads for both clean and fuzzy datasets.') |
| |
| num_patients = st.number_input('Number of Patients to Generate', min_value=10, max_value=10000, value=100, step=10) |
| |
| if st.button('Generate Data'): |
| perfect_data = generate_realistic_data(num_patients) |
| fuzzy_data = generate_fuzzy_data(perfect_data, error_rate=0.1) |
|
|
| st.subheader('Perfect Data') |
| st.dataframe(perfect_data) |
| st.download_button('Download Perfect Data', perfect_data.to_csv(index=False), file_name='perfect_data.csv') |
|
|
| st.subheader('Fuzzy Data (10% Error Rate)') |
| st.dataframe(fuzzy_data) |
| st.download_button('Download Fuzzy Data', fuzzy_data.to_csv(index=False), file_name='fuzzy_data.csv') |
|
|
| if __name__ == '__main__': |
| main() |