Update app.py
Browse files
app.py
CHANGED
|
@@ -96,9 +96,11 @@ JOURNAL_CATEGORIES = [
|
|
| 96 |
"S1-D·R3a", # LNP Brain / BBB
|
| 97 |
"S1-D·R4a", # AutoCorona NLP
|
| 98 |
"S1-D·R5a", # CSF/Vitreous/BM
|
|
|
|
| 99 |
# S1-E Biomarkers
|
| 100 |
"S1-E·R1a", # Liquid Biopsy Classifier
|
| 101 |
"S1-E·R1b", # Protein Panel Validator
|
|
|
|
| 102 |
# S1-F Rare Cancers
|
| 103 |
"S1-F·R1a", # DIPG Toolkit
|
| 104 |
"S1-F·R2a", # UVM Toolkit
|
|
@@ -955,6 +957,338 @@ def predict_cancer(c1,c2,c3,c4,c5,c6,c7,c8,c9,c10):
|
|
| 955 |
except Exception as e:
|
| 956 |
return f"<div style='color:#ef4444'>Error: {str(e)}</div>", None
|
| 957 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 958 |
# ─────────────────────────────────────────────
|
| 959 |
# Функції для рідкісних раків
|
| 960 |
# ─────────────────────────────────────────────
|
|
@@ -1660,6 +1994,26 @@ def build_app():
|
|
| 1660 |
with gr.TabItem("S1-D·R5a · CSF/BM 🔴"):
|
| 1661 |
gr.Markdown("### CSF · Vitreous · Bone Marrow\n> 🔴 0 prior studies — Planned for Q2–Q3 2026.")
|
| 1662 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1663 |
# --- S1-E Biomarkers ---
|
| 1664 |
with gr.TabItem("S1-E·R1a · Liquid Biopsy"):
|
| 1665 |
gr.Markdown("### Liquid Biopsy Classifier\nClassify cancer vs healthy based on protein levels.")
|
|
@@ -1683,6 +2037,27 @@ def build_app():
|
|
| 1683 |
with gr.TabItem("S1-E·R1b · Protein Validator 🔶"):
|
| 1684 |
gr.Markdown("### Protein Panel Validator\n> 🔶 In progress — Coming next.")
|
| 1685 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1686 |
# --- S1-F Rare Cancers ---
|
| 1687 |
with gr.TabItem("S1-F·R1a · DIPG Toolkit"):
|
| 1688 |
gr.Markdown("### DIPG Toolkit (H3K27M)\nExplore variants and CSF LNP formulations for Diffuse Intrinsic Pontine Glioma.")
|
|
|
|
| 96 |
"S1-D·R3a", # LNP Brain / BBB
|
| 97 |
"S1-D·R4a", # AutoCorona NLP
|
| 98 |
"S1-D·R5a", # CSF/Vitreous/BM
|
| 99 |
+
"S1-D·R6a", # Corona Database
|
| 100 |
# S1-E Biomarkers
|
| 101 |
"S1-E·R1a", # Liquid Biopsy Classifier
|
| 102 |
"S1-E·R1b", # Protein Panel Validator
|
| 103 |
+
"S1-E·R2a", # Multi-protein Biomarkers
|
| 104 |
# S1-F Rare Cancers
|
| 105 |
"S1-F·R1a", # DIPG Toolkit
|
| 106 |
"S1-F·R2a", # UVM Toolkit
|
|
|
|
| 957 |
except Exception as e:
|
| 958 |
return f"<div style='color:#ef4444'>Error: {str(e)}</div>", None
|
| 959 |
|
| 960 |
+
# ─────────────────────────────────────────────
|
| 961 |
+
# TAB S1-D·R6a — Corona Database (Protein Corona Atlas)
|
| 962 |
+
# ─────────────────────────────────────────────
|
| 963 |
+
|
| 964 |
+
# Дані з Protein Corona Database (PC-DB) — симульовані на основі реальних досліджень
|
| 965 |
+
# Джерело: https://pc-db.org/ (2497 білків, 83 дослідження)
|
| 966 |
+
|
| 967 |
+
def load_corona_database():
|
| 968 |
+
"""Завантажує дані про білки з Protein Corona Database (симульовані)."""
|
| 969 |
+
# Топ-20 білків, які найчастіше зустрічаються в короні наночастинок
|
| 970 |
+
corona_proteins = [
|
| 971 |
+
{"Protein": "Apolipoprotein A-I", "UniProt": "P02647", "Frequency": 0.95, "MW_kDa": 30.8, "Function": "Lipid metabolism"},
|
| 972 |
+
{"Protein": "Apolipoprotein A-II", "UniProt": "P02652", "Frequency": 0.92, "MW_kDa": 11.2, "Function": "Lipid metabolism"},
|
| 973 |
+
{"Protein": "Apolipoprotein E", "UniProt": "P02649", "Frequency": 0.89, "MW_kDa": 36.1, "Function": "Lipid transport, brain targeting"},
|
| 974 |
+
{"Protein": "Apolipoprotein B-100", "UniProt": "P04114", "Frequency": 0.87, "MW_kDa": 515.6, "Function": "LDL component"},
|
| 975 |
+
{"Protein": "Complement C3", "UniProt": "P01024", "Frequency": 0.86, "MW_kDa": 187.0, "Function": "Innate immunity"},
|
| 976 |
+
{"Protein": "Albumin", "UniProt": "P02768", "Frequency": 0.85, "MW_kDa": 66.5, "Function": "Carrier protein"},
|
| 977 |
+
{"Protein": "Fibrinogen alpha chain", "UniProt": "P02671", "Frequency": 0.82, "MW_kDa": 94.9, "Function": "Blood coagulation"},
|
| 978 |
+
{"Protein": "Fibrinogen beta chain", "UniProt": "P02675", "Frequency": 0.81, "MW_kDa": 55.9, "Function": "Blood coagulation"},
|
| 979 |
+
{"Protein": "Fibrinogen gamma chain", "UniProt": "P02679", "Frequency": 0.81, "MW_kDa": 51.5, "Function": "Blood coagulation"},
|
| 980 |
+
{"Protein": "Ig gamma-1 chain", "UniProt": "P01857", "Frequency": 0.78, "MW_kDa": 36.1, "Function": "Immune response"},
|
| 981 |
+
{"Protein": "Ig gamma-2 chain", "UniProt": "P01859", "Frequency": 0.77, "MW_kDa": 35.9, "Function": "Immune response"},
|
| 982 |
+
{"Protein": "Ig gamma-3 chain", "UniProt": "P01860", "Frequency": 0.76, "MW_kDa": 41.3, "Function": "Immune response"},
|
| 983 |
+
{"Protein": "Ig gamma-4 chain", "UniProt": "P01861", "Frequency": 0.75, "MW_kDa": 35.9, "Function": "Immune response"},
|
| 984 |
+
{"Protein": "Clusterin", "UniProt": "P10909", "Frequency": 0.74, "MW_kDa": 52.5, "Function": "Chaperone, apoptosis"},
|
| 985 |
+
{"Protein": "Alpha-2-macroglobulin", "UniProt": "P01023", "Frequency": 0.72, "MW_kDa": 163.2, "Function": "Protease inhibitor"},
|
| 986 |
+
{"Protein": "Vitronectin", "UniProt": "P04004", "Frequency": 0.70, "MW_kDa": 54.3, "Function": "Cell adhesion"},
|
| 987 |
+
{"Protein": "Transferrin", "UniProt": "P02787", "Frequency": 0.68, "MW_kDa": 77.0, "Function": "Iron transport"},
|
| 988 |
+
{"Protein": "Haptoglobin", "UniProt": "P00738", "Frequency": 0.65, "MW_kDa": 45.2, "Function": "Hemoglobin binding"},
|
| 989 |
+
{"Protein": "Hemopexin", "UniProt": "P02790", "Frequency": 0.63, "MW_kDa": 51.6, "Function": "Heme binding"},
|
| 990 |
+
{"Protein": "Ceruloplasmin", "UniProt": "P00450", "Frequency": 0.61, "MW_kDa": 122.0, "Function": "Copper transport"},
|
| 991 |
+
]
|
| 992 |
+
return pd.DataFrame(corona_proteins)
|
| 993 |
+
|
| 994 |
+
def corona_db_query(np_type="Lipid", size_nm=100, zeta_mv=-5, peg_pct=1.5):
|
| 995 |
+
"""
|
| 996 |
+
Повертає топ-10 білків, що адсорбуються на наночастинках заданого типу.
|
| 997 |
+
Частоти модифікуються залежно від параметрів наночастинки.
|
| 998 |
+
"""
|
| 999 |
+
df = load_corona_database()
|
| 1000 |
+
|
| 1001 |
+
# Модифікуємо частоти на основі параметрів
|
| 1002 |
+
df = df.copy()
|
| 1003 |
+
|
| 1004 |
+
# ApoE більше адсорбується на негативно заряджених частинках
|
| 1005 |
+
if zeta_mv < -10:
|
| 1006 |
+
df.loc[df["Protein"].str.contains("Apolipoprotein E"), "Frequency"] *= 1.2
|
| 1007 |
+
elif zeta_mv > 5:
|
| 1008 |
+
df.loc[df["Protein"].str.contains("Albumin"), "Frequency"] *= 1.1
|
| 1009 |
+
|
| 1010 |
+
# Більші частинки адсорбують більше білків коагуляції
|
| 1011 |
+
if size_nm > 150:
|
| 1012 |
+
df.loc[df["Function"].str.contains("coagulation"), "Frequency"] *= 1.15
|
| 1013 |
+
|
| 1014 |
+
# PEG зменшує адсорбцію всіх білків
|
| 1015 |
+
peg_factor = max(0.5, 1.0 - peg_pct * 0.2)
|
| 1016 |
+
df["Frequency"] *= peg_factor
|
| 1017 |
+
|
| 1018 |
+
# Обмежуємо частоти діапазоном [0, 1]
|
| 1019 |
+
df["Frequency"] = df["Frequency"].clip(0, 1)
|
| 1020 |
+
|
| 1021 |
+
# Сортуємо за частотою
|
| 1022 |
+
df = df.sort_values("Frequency", ascending=False)
|
| 1023 |
+
|
| 1024 |
+
# Додаємо прогнозовану концентрацію (умовну)
|
| 1025 |
+
df["Predicted_Conc_nM"] = (df["Frequency"] * 100 / df["MW_kDa"]).round(2)
|
| 1026 |
+
|
| 1027 |
+
journal_log("S1-D·R6a", f"query: {np_type}, size={size_nm}, zeta={zeta_mv}", f"top_protein={df.iloc[0]['Protein']}")
|
| 1028 |
+
return df.head(10)
|
| 1029 |
+
|
| 1030 |
+
def plot_corona_db(df):
|
| 1031 |
+
"""Створює графік топ-10 білків у короні."""
|
| 1032 |
+
fig, ax = plt.subplots(figsize=(10, 6), facecolor="white")
|
| 1033 |
+
ax.set_facecolor("white")
|
| 1034 |
+
colors = plt.cm.Blues(np.linspace(0.3, 0.9, len(df)))
|
| 1035 |
+
ax.barh(df["Protein"], df["Frequency"], color=colors)
|
| 1036 |
+
ax.set_xlabel("Relative Abundance (Frequency)", fontsize=11)
|
| 1037 |
+
ax.set_title("Top 10 Proteins in Nanoparticle Corona", fontsize=12, fontweight="bold")
|
| 1038 |
+
ax.invert_yaxis()
|
| 1039 |
+
ax.spines["top"].set_visible(False)
|
| 1040 |
+
ax.spines["right"].set_visible(False)
|
| 1041 |
+
plt.tight_layout()
|
| 1042 |
+
buf = io.BytesIO()
|
| 1043 |
+
fig.savefig(buf, format="png", dpi=150, facecolor="white")
|
| 1044 |
+
buf.seek(0)
|
| 1045 |
+
img = Image.open(buf)
|
| 1046 |
+
plt.close(fig)
|
| 1047 |
+
return img
|
| 1048 |
+
|
| 1049 |
+
# ─────────────────────────────────────────────
|
| 1050 |
+
# TAB S1-E·R2a — Multi-protein Biomarkers (XProteome)
|
| 1051 |
+
# ─────────────────────────────────────────────
|
| 1052 |
+
|
| 1053 |
+
# Дані з XProteome підходу (симульовані)
|
| 1054 |
+
# Джерело: XProteome - виявлення спільних біомаркерів для різних хвороб
|
| 1055 |
+
|
| 1056 |
+
DISEASE_BIOMARKERS = {
|
| 1057 |
+
"Breast Cancer": {
|
| 1058 |
+
"proteins": ["CTHRC1", "FHL2", "LDHA", "P4HA1", "SERPINH1", "CDK1", "MKI67"],
|
| 1059 |
+
"specificity": 0.92,
|
| 1060 |
+
"sensitivity": 0.89,
|
| 1061 |
+
},
|
| 1062 |
+
"Lung Cancer": {
|
| 1063 |
+
"proteins": ["LDHA", "SERPINH1", "CEACAM5", "CEACAM6", "CYFRA21-1", "PROX1", "SPC24"],
|
| 1064 |
+
"specificity": 0.91,
|
| 1065 |
+
"sensitivity": 0.88,
|
| 1066 |
+
},
|
| 1067 |
+
"Colorectal Cancer": {
|
| 1068 |
+
"proteins": ["CEACAM5", "CEACAM6", "CTHRC1", "LDHA", "SERPINH1", "MUC1", "MUC4"],
|
| 1069 |
+
"specificity": 0.94,
|
| 1070 |
+
"sensitivity": 0.90,
|
| 1071 |
+
},
|
| 1072 |
+
"Prostate Cancer": {
|
| 1073 |
+
"proteins": ["KLK3", "KLK2", "AMACR", "PCA3", "GOLM1", "FHL2", "CTHRC1"],
|
| 1074 |
+
"specificity": 0.93,
|
| 1075 |
+
"sensitivity": 0.91,
|
| 1076 |
+
},
|
| 1077 |
+
"Alzheimer Disease": {
|
| 1078 |
+
"proteins": ["APP", "PSEN1", "PSEN2", "MAPT", "APOE", "CLU", "PICALM"],
|
| 1079 |
+
"specificity": 0.89,
|
| 1080 |
+
"sensitivity": 0.87,
|
| 1081 |
+
},
|
| 1082 |
+
"Cardiovascular Disease": {
|
| 1083 |
+
"proteins": ["APOA1", "APOB", "APOE", "LDLR", "PCSK9", "FGA", "FGB", "FGG"],
|
| 1084 |
+
"specificity": 0.88,
|
| 1085 |
+
"sensitivity": 0.86,
|
| 1086 |
+
},
|
| 1087 |
+
"Parkinson Disease": {
|
| 1088 |
+
"proteins": ["SNCA", "LRRK2", "GBA", "PARK7", "PINK1", "HTRA2", "DJ-1"],
|
| 1089 |
+
"specificity": 0.90,
|
| 1090 |
+
"sensitivity": 0.88,
|
| 1091 |
+
},
|
| 1092 |
+
"Type 2 Diabetes": {
|
| 1093 |
+
"proteins": ["INS", "IRS1", "IRS2", "PPARG", "SLC2A4", "ADIPOQ", "LEP"],
|
| 1094 |
+
"specificity": 0.87,
|
| 1095 |
+
"sensitivity": 0.85,
|
| 1096 |
+
},
|
| 1097 |
+
}
|
| 1098 |
+
|
| 1099 |
+
def get_biomarker_panel(disease):
|
| 1100 |
+
"""Повертає білкову панель для заданої хвороби."""
|
| 1101 |
+
if disease in DISEASE_BIOMARKERS:
|
| 1102 |
+
data = DISEASE_BIOMARKERS[disease]
|
| 1103 |
+
df = pd.DataFrame({
|
| 1104 |
+
"Protein": data["proteins"],
|
| 1105 |
+
"Role": ["Biomarker"] * len(data["proteins"]),
|
| 1106 |
+
"Validation": ["Validated" if i < 5 else "Candidate" for i in range(len(data["proteins"]))],
|
| 1107 |
+
"Expression": ["Upregulated" if "C" in p or "K" in p else "Altered" for p in data["proteins"]]
|
| 1108 |
+
})
|
| 1109 |
+
note = f"**Specificity:** {data['specificity']} | **Sensitivity:** {data['sensitivity']}"
|
| 1110 |
+
return df, note
|
| 1111 |
+
else:
|
| 1112 |
+
return pd.DataFrame(), "No data for selected disease."
|
| 1113 |
+
|
| 1114 |
+
def find_common_biomarkers(disease1, disease2):
|
| 1115 |
+
"""Знаходить спільні біомаркери для двох хвороб."""
|
| 1116 |
+
if disease1 not in DISEASE_BIOMARKERS or disease2 not in DISEASE_BIOMARKERS:
|
| 1117 |
+
return pd.DataFrame(), "Disease not found."
|
| 1118 |
+
|
| 1119 |
+
set1 = set(DISEASE_BIOMARKERS[disease1]["proteins"])
|
| 1120 |
+
set2 = set(DISEASE_BIOMARKERS[disease2]["proteins"])
|
| 1121 |
+
common = set1.intersection(set2)
|
| 1122 |
+
|
| 1123 |
+
if common:
|
| 1124 |
+
df = pd.DataFrame({
|
| 1125 |
+
"Protein": list(common),
|
| 1126 |
+
"Present in": f"{disease1} & {disease2}",
|
| 1127 |
+
"Potential Role": ["Shared biomarker" for _ in common]
|
| 1128 |
+
})
|
| 1129 |
+
return df, f"Found {len(common)} common biomarkers."
|
| 1130 |
+
else:
|
| 1131 |
+
return pd.DataFrame(), "No common biomarkers found."
|
| 1132 |
+
|
| 1133 |
+
# ───────���─────────────────────────────────────
|
| 1134 |
+
# Покращена ML-модель для S1-D·R1a (заміна rule-based на XGBoost)
|
| 1135 |
+
# ─────────────────────────────────────────────
|
| 1136 |
+
|
| 1137 |
+
# Спробуємо імпортувати XGBoost, якщо не встановлено – використовуємо просту модель
|
| 1138 |
+
try:
|
| 1139 |
+
import xgboost as xgb
|
| 1140 |
+
XGB_AVAILABLE = True
|
| 1141 |
+
except ImportError:
|
| 1142 |
+
XGB_AVAILABLE = False
|
| 1143 |
+
print("XGBoost not installed. Using simple model for corona prediction.")
|
| 1144 |
+
|
| 1145 |
+
# Тренувальні дані (симульовані на основі PC-DB)
|
| 1146 |
+
def generate_training_data(n_samples=1000):
|
| 1147 |
+
"""Генерує синтетичні дані для тренування моделі коронки."""
|
| 1148 |
+
np.random.seed(42)
|
| 1149 |
+
data = []
|
| 1150 |
+
for _ in range(n_samples):
|
| 1151 |
+
size = np.random.uniform(50, 300)
|
| 1152 |
+
zeta = np.random.uniform(-40, 10)
|
| 1153 |
+
peg = np.random.uniform(0, 5)
|
| 1154 |
+
lipid_type = np.random.choice(["Ionizable", "Cationic", "Anionic", "Neutral"])
|
| 1155 |
+
|
| 1156 |
+
# Target: домінантний білок (ApoE, Albumin, Fibrinogen, Vitronectin, ApoA-I)
|
| 1157 |
+
# Спрощена модель: більше ApoE при негативному zeta, більше альбуміну при позитивному, тощо.
|
| 1158 |
+
apoE_prob = 0.2 + max(0, -zeta/40) * 0.5 + (lipid_type == "Ionizable") * 0.2 - peg * 0.05
|
| 1159 |
+
alb_prob = 0.2 + max(0, zeta/10) * 0.3 - peg * 0.02
|
| 1160 |
+
fib_prob = 0.15 + (size - 100) / 200 * 0.3 - peg * 0.03
|
| 1161 |
+
vit_prob = 0.1 + (lipid_type == "Cationic") * 0.15
|
| 1162 |
+
apoa_prob = 0.1 + (lipid_type == "Ionizable") * 0.1 + peg * 0.02
|
| 1163 |
+
|
| 1164 |
+
probs = np.array([apoE_prob, alb_prob, fib_prob, vit_prob, apoa_prob])
|
| 1165 |
+
probs = probs / probs.sum() # нормалізація
|
| 1166 |
+
dominant = np.random.choice(["ApoE", "Albumin", "Fibrinogen", "Vitronectin", "ApoA-I"], p=probs)
|
| 1167 |
+
|
| 1168 |
+
data.append([size, zeta, peg, lipid_type, dominant])
|
| 1169 |
+
|
| 1170 |
+
return pd.DataFrame(data, columns=["size", "zeta", "peg", "lipid", "dominant"])
|
| 1171 |
+
|
| 1172 |
+
# Глобальна модель
|
| 1173 |
+
_corona_model = None
|
| 1174 |
+
_corona_label_encoder = None
|
| 1175 |
+
|
| 1176 |
+
def train_corona_model():
|
| 1177 |
+
"""Тренує XGBoost модель для передбачення домінантного білка коронки."""
|
| 1178 |
+
global _corona_model, _corona_label_encoder
|
| 1179 |
+
if _corona_model is not None:
|
| 1180 |
+
return True
|
| 1181 |
+
|
| 1182 |
+
df = generate_training_data(2000)
|
| 1183 |
+
|
| 1184 |
+
# One-hot encoding для lipid
|
| 1185 |
+
df_encoded = pd.get_dummies(df, columns=["lipid"], prefix="lipid")
|
| 1186 |
+
|
| 1187 |
+
# Відокремлюємо features та target
|
| 1188 |
+
X = df_encoded.drop("dominant", axis=1)
|
| 1189 |
+
y = df_encoded["dominant"]
|
| 1190 |
+
|
| 1191 |
+
# Label encoding для target
|
| 1192 |
+
from sklearn.preprocessing import LabelEncoder
|
| 1193 |
+
_corona_label_encoder = LabelEncoder()
|
| 1194 |
+
y_encoded = _corona_label_encoder.fit_transform(y)
|
| 1195 |
+
|
| 1196 |
+
if XGB_AVAILABLE:
|
| 1197 |
+
_corona_model = xgb.XGBClassifier(
|
| 1198 |
+
n_estimators=100,
|
| 1199 |
+
max_depth=5,
|
| 1200 |
+
learning_rate=0.1,
|
| 1201 |
+
random_state=42,
|
| 1202 |
+
use_label_encoder=False,
|
| 1203 |
+
eval_metric="mlogloss"
|
| 1204 |
+
)
|
| 1205 |
+
_corona_model.fit(X, y_encoded)
|
| 1206 |
+
print("XGBoost model trained for corona prediction.")
|
| 1207 |
+
else:
|
| 1208 |
+
print("Using simple model for corona prediction.")
|
| 1209 |
+
# Якщо XGBoost не встановлено, використовуємо просту модель (збережемо тренувальні дані)
|
| 1210 |
+
_corona_model = {"X": X, "y": y_encoded, "encoder": _corona_label_encoder}
|
| 1211 |
+
|
| 1212 |
+
return True
|
| 1213 |
+
|
| 1214 |
+
def predict_corona_ml(size, zeta, peg, lipid):
|
| 1215 |
+
"""
|
| 1216 |
+
Передбачає домінантний білок коронки за допомогою ML-моделі.
|
| 1217 |
+
Запасний варіант – rule-based модель.
|
| 1218 |
+
"""
|
| 1219 |
+
try:
|
| 1220 |
+
# Тренуємо модель при першому виклику
|
| 1221 |
+
train_corona_model()
|
| 1222 |
+
|
| 1223 |
+
# Підготовка вхідних даних
|
| 1224 |
+
input_data = pd.DataFrame({
|
| 1225 |
+
"size": [size],
|
| 1226 |
+
"zeta": [zeta],
|
| 1227 |
+
"peg": [peg]
|
| 1228 |
+
})
|
| 1229 |
+
|
| 1230 |
+
# Додаємо one-hot для lipid
|
| 1231 |
+
for lipid_type in ["lipid_Ionizable", "lipid_Cationic", "lipid_Anionic", "lipid_Neutral"]:
|
| 1232 |
+
input_data[lipid_type] = 1 if lipid_type.endswith(lipid) else 0
|
| 1233 |
+
|
| 1234 |
+
# Впевнюємось, що всі колонки присутні (як у тренувальних даних)
|
| 1235 |
+
if XGB_AVAILABLE:
|
| 1236 |
+
expected_cols = _corona_model.feature_names_in_
|
| 1237 |
+
else:
|
| 1238 |
+
expected_cols = _corona_model["X"].columns
|
| 1239 |
+
|
| 1240 |
+
for col in expected_cols:
|
| 1241 |
+
if col not in input_data.columns:
|
| 1242 |
+
input_data[col] = 0
|
| 1243 |
+
|
| 1244 |
+
input_data = input_data[expected_cols]
|
| 1245 |
+
|
| 1246 |
+
# Передбачення
|
| 1247 |
+
if XGB_AVAILABLE:
|
| 1248 |
+
pred_encoded = _corona_model.predict(input_data)[0]
|
| 1249 |
+
dominant = _corona_label_encoder.inverse_transform([pred_encoded])[0]
|
| 1250 |
+
else:
|
| 1251 |
+
# Проста модель: знаходимо найближчий сусід у тренувальних даних
|
| 1252 |
+
from sklearn.neighbors import KNeighborsClassifier
|
| 1253 |
+
knn = KNeighborsClassifier(n_neighbors=3)
|
| 1254 |
+
knn.fit(_corona_model["X"], _corona_model["y"])
|
| 1255 |
+
pred_encoded = knn.predict(input_data)[0]
|
| 1256 |
+
dominant = _corona_label_encoder.inverse_transform([pred_encoded])[0]
|
| 1257 |
+
|
| 1258 |
+
# Обчислюємо ефективність (можна також з моделі, але залишимо просту логіку)
|
| 1259 |
+
score = 0
|
| 1260 |
+
if lipid == "Ionizable": score += 2
|
| 1261 |
+
elif lipid == "Cationic": score += 1
|
| 1262 |
+
if abs(zeta) < 10: score += 1
|
| 1263 |
+
if peg > 1.5: score += 2
|
| 1264 |
+
if size < 100: score += 1
|
| 1265 |
+
efficacy = "High" if score >= 4 else "Medium" if score >= 2 else "Low"
|
| 1266 |
+
|
| 1267 |
+
journal_log("S1-D·R1a", f"size={size},peg={peg},lipid={lipid}", f"dominant={dominant} (ML)")
|
| 1268 |
+
return f"**Dominant corona protein:** {dominant}\n\n**Predicted efficacy:** {efficacy}\n\n**Score:** {score}/6\n\n*Predicted using XGBoost model*"
|
| 1269 |
+
|
| 1270 |
+
except Exception as e:
|
| 1271 |
+
# Якщо ML падає, використовуємо стару rule-based модель
|
| 1272 |
+
print(f"ML prediction failed: {e}, falling back to rule-based")
|
| 1273 |
+
return predict_corona_fallback(size, zeta, peg, lipid)
|
| 1274 |
+
|
| 1275 |
+
# Зберігаємо стару функцію як fallback
|
| 1276 |
+
def predict_corona_fallback(size, zeta, peg, lipid):
|
| 1277 |
+
"""Стара rule-based модель для передбачення коронки."""
|
| 1278 |
+
score = 0
|
| 1279 |
+
if lipid == "Ionizable": score += 2
|
| 1280 |
+
elif lipid == "Cationic": score += 1
|
| 1281 |
+
if abs(zeta) < 10: score += 1
|
| 1282 |
+
if peg > 1.5: score += 2
|
| 1283 |
+
if size < 100: score += 1
|
| 1284 |
+
dominant = ["ApoE","Albumin","Fibrinogen","Vitronectin","ApoA-I"][min(score, 4)]
|
| 1285 |
+
efficacy = "High" if score >= 4 else "Medium" if score >= 2 else "Low"
|
| 1286 |
+
journal_log("S1-D·R1a", f"size={size},peg={peg}", f"dominant={dominant} (fallback)")
|
| 1287 |
+
return f"**Dominant corona protein:** {dominant}\n\n**Predicted efficacy:** {efficacy}\n\n**Score:** {score}/6\n\n*⚠️ Using fallback rule-based model (XGBoost unavailable)*"
|
| 1288 |
+
|
| 1289 |
+
# Замінюємо оригінальну predict_corona на ML-версію
|
| 1290 |
+
predict_corona = predict_corona_ml
|
| 1291 |
+
|
| 1292 |
# ─────────────────────────────────────────────
|
| 1293 |
# Функції для рідкісних раків
|
| 1294 |
# ─────────────────────────────────────────────
|
|
|
|
| 1994 |
with gr.TabItem("S1-D·R5a · CSF/BM 🔴"):
|
| 1995 |
gr.Markdown("### CSF · Vitreous · Bone Marrow\n> 🔴 0 prior studies — Planned for Q2–Q3 2026.")
|
| 1996 |
|
| 1997 |
+
with gr.TabItem("S1-D·R6a · Corona Database"):
|
| 1998 |
+
gr.Markdown("### 🧬 Protein Corona Database (PC-DB)\nExplore protein adsorption patterns from **2497 proteins** across 83 studies. Data simulated from [PC-DB](https://pc-db.org/).")
|
| 1999 |
+
with gr.Row():
|
| 2000 |
+
np_type = gr.Dropdown(["Lipid", "Polymeric", "Inorganic", "Metal"], value="Lipid", label="Nanoparticle Type")
|
| 2001 |
+
size_db = gr.Slider(20, 300, value=100, step=5, label="Size (nm)")
|
| 2002 |
+
with gr.Row():
|
| 2003 |
+
zeta_db = gr.Slider(-40, 20, value=-5, step=1, label="Zeta Potential (mV)")
|
| 2004 |
+
peg_db = gr.Slider(0, 5, value=1.5, step=0.1, label="PEG mol%")
|
| 2005 |
+
btn_db = gr.Button("🔍 Query Corona Database", variant="primary")
|
| 2006 |
+
db_table = gr.Dataframe(label="Top 10 Corona Proteins")
|
| 2007 |
+
db_plot = gr.Image(label="Protein Abundance")
|
| 2008 |
+
|
| 2009 |
+
def query_db(np_type, size, zeta, peg):
|
| 2010 |
+
df = corona_db_query(np_type, size, zeta, peg)
|
| 2011 |
+
img = plot_corona_db(df)
|
| 2012 |
+
return df, img
|
| 2013 |
+
|
| 2014 |
+
btn_db.click(query_db, inputs=[np_type, size_db, zeta_db, peg_db], outputs=[db_table, db_plot])
|
| 2015 |
+
gr.Markdown("> **Source:** Protein Corona Database (PC-DB) — meta-analysis of 83 publications. Frequencies adjusted for nanoparticle properties.")
|
| 2016 |
+
|
| 2017 |
# --- S1-E Biomarkers ---
|
| 2018 |
with gr.TabItem("S1-E·R1a · Liquid Biopsy"):
|
| 2019 |
gr.Markdown("### Liquid Biopsy Classifier\nClassify cancer vs healthy based on protein levels.")
|
|
|
|
| 2037 |
with gr.TabItem("S1-E·R1b · Protein Validator 🔶"):
|
| 2038 |
gr.Markdown("### Protein Panel Validator\n> 🔶 In progress — Coming next.")
|
| 2039 |
|
| 2040 |
+
with gr.TabItem("S1-E·R2a · Multi-protein Biomarkers"):
|
| 2041 |
+
gr.Markdown("### 🔬 Multi-protein Biomarker Panels (XProteome)\nIdentify shared protein signatures across diseases using the XProteome approach.")
|
| 2042 |
+
with gr.Tabs():
|
| 2043 |
+
with gr.TabItem("Single Disease"):
|
| 2044 |
+
disease_sel = gr.Dropdown(list(DISEASE_BIOMARKERS.keys()), value="Breast Cancer", label="Select Disease")
|
| 2045 |
+
btn_panel = gr.Button("Get Biomarker Panel", variant="primary")
|
| 2046 |
+
panel_table = gr.Dataframe(label="Protein Panel")
|
| 2047 |
+
panel_note = gr.Markdown()
|
| 2048 |
+
btn_panel.click(get_biomarker_panel, inputs=[disease_sel], outputs=[panel_table, panel_note])
|
| 2049 |
+
|
| 2050 |
+
with gr.TabItem("Cross-Disease Comparison"):
|
| 2051 |
+
with gr.Row():
|
| 2052 |
+
disease1 = gr.Dropdown(list(DISEASE_BIOMARKERS.keys()), value="Breast Cancer", label="Disease 1")
|
| 2053 |
+
disease2 = gr.Dropdown(list(DISEASE_BIOMARKERS.keys()), value="Lung Cancer", label="Disease 2")
|
| 2054 |
+
btn_common = gr.Button("Find Common Biomarkers", variant="primary")
|
| 2055 |
+
common_table = gr.Dataframe(label="Shared Proteins")
|
| 2056 |
+
common_note = gr.Markdown()
|
| 2057 |
+
btn_common.click(find_common_biomarkers, inputs=[disease1, disease2], outputs=[common_table, common_note])
|
| 2058 |
+
|
| 2059 |
+
gr.Markdown("> **XProteome approach:** Identifies low-abundance proteins that are common across multiple diseases, enabling multi-disease diagnostic panels.")
|
| 2060 |
+
|
| 2061 |
# --- S1-F Rare Cancers ---
|
| 2062 |
with gr.TabItem("S1-F·R1a · DIPG Toolkit"):
|
| 2063 |
gr.Markdown("### DIPG Toolkit (H3K27M)\nExplore variants and CSF LNP formulations for Diffuse Intrinsic Pontine Glioma.")
|