Update app.py
Browse files
app.py
CHANGED
|
@@ -18,7 +18,7 @@ from transformers import AutoTokenizer, AutoModel, pipeline as hf_pipeline
|
|
| 18 |
# =================================================================
|
| 19 |
# PART 0: THE BRIDGE (Automatic Brain Setup)
|
| 20 |
# =================================================================
|
| 21 |
-
st.set_page_config(page_title="PFAS Discovery AI", layout="wide")
|
| 22 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 23 |
MODEL_NAME = "JuIm/SMILES_BERT"
|
| 24 |
|
|
@@ -85,18 +85,16 @@ def sanity_check_class(smiles, predicted_class):
|
|
| 85 |
def mutate_smart(s):
|
| 86 |
"""
|
| 87 |
Aggressive 'Safe-by-Design' Mutation
|
| 88 |
-
Prioritizes breaking chains and adding degradation handles.
|
| 89 |
"""
|
| 90 |
try:
|
| 91 |
chars = list(sf.split_selfies(sf.encoder(s)))
|
| 92 |
# Action 1: Insert Polar Handles (High Probability)
|
| 93 |
if random.random() < 0.9:
|
| 94 |
insert_idx = random.randint(0, len(chars))
|
| 95 |
-
# [O]=Ether, [N]=Amine, [C][=O]=Carbonyl (Degradable)
|
| 96 |
atom = random.choice(["[O]", "[N]", "[C][=O]", "[C][O]"])
|
| 97 |
chars.insert(insert_idx, atom)
|
| 98 |
|
| 99 |
-
# Action 2: Cap Ends
|
| 100 |
if random.random() < 0.6:
|
| 101 |
chars.append(random.choice(["[O]", "[N]", "[C][=O][O]"]))
|
| 102 |
|
|
@@ -121,11 +119,13 @@ if clf is None:
|
|
| 121 |
st.stop()
|
| 122 |
|
| 123 |
# =================================================================
|
| 124 |
-
# PART 2: THE UI (
|
| 125 |
# =================================================================
|
| 126 |
st.title("π§ͺ End-to-End PFAS Discovery AI")
|
| 127 |
-
st.markdown("Powered by
|
|
|
|
| 128 |
|
|
|
|
| 129 |
st.sidebar.header("1. Input Data")
|
| 130 |
input_type = st.sidebar.radio("Source:", ["Single Molecule", "Batch CSV"])
|
| 131 |
|
|
@@ -143,6 +143,15 @@ else:
|
|
| 143 |
st.sidebar.header("2. Pipeline Mode")
|
| 144 |
mode = st.sidebar.selectbox("Mode:", ["Screening (Analyze)", "Discovery (Optimize)"])
|
| 145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
if st.sidebar.button("π Run Pipeline") and data:
|
| 147 |
st.info(f"Running **{mode}** on {len(data)} molecules...")
|
| 148 |
|
|
@@ -214,13 +223,12 @@ if st.sidebar.button("π Run Pipeline") and data:
|
|
| 214 |
})
|
| 215 |
|
| 216 |
# ------------------------------------------------------------------
|
| 217 |
-
#
|
| 218 |
# ------------------------------------------------------------------
|
| 219 |
res_df = pd.DataFrame(results)
|
| 220 |
|
| 221 |
-
|
| 222 |
-
st.
|
| 223 |
-
st.dataframe(res_df)
|
| 224 |
st.download_button("Download CSV", res_df.to_csv(index=False).encode('utf-8'), "results.csv", "text/csv")
|
| 225 |
|
| 226 |
st.markdown("---")
|
|
@@ -228,53 +236,65 @@ if st.sidebar.button("π Run Pipeline") and data:
|
|
| 228 |
|
| 229 |
col1, col2 = st.columns(2)
|
| 230 |
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
# GRAPH 1: 3D DISCOVERY CUBE
|
| 234 |
with col1:
|
| 235 |
-
st.subheader("π§ 1. Multi-Dimensional Risk")
|
| 236 |
fig_3d = px.scatter_3d(
|
| 237 |
res_df,
|
| 238 |
x='Bioaccumulation', y='Mobility', z='Persistence',
|
| 239 |
-
color='Subclass',
|
| 240 |
-
|
| 241 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
)
|
| 243 |
-
fig_3d.update_layout(margin=dict(l=0, r=0, b=0, t=
|
| 244 |
st.plotly_chart(fig_3d, use_container_width=True)
|
| 245 |
|
| 246 |
# GRAPH 2: CLASS DISTRIBUTION (Bar Chart)
|
| 247 |
with col2:
|
| 248 |
-
st.subheader("π 2.
|
|
|
|
|
|
|
|
|
|
| 249 |
fig_bar = px.bar(
|
| 250 |
-
|
| 251 |
-
title="
|
| 252 |
-
color_discrete_map=
|
|
|
|
|
|
|
| 253 |
)
|
|
|
|
| 254 |
st.plotly_chart(fig_bar, use_container_width=True)
|
| 255 |
|
| 256 |
col3, col4 = st.columns(2)
|
| 257 |
|
| 258 |
-
# GRAPH 3: PARALLEL COORDINATES (
|
| 259 |
with col3:
|
| 260 |
-
st.subheader("π 3. Property
|
| 261 |
-
# Normalize Subclass to integer for coloring if needed, or use Bioacc
|
| 262 |
fig_para = px.parallel_coordinates(
|
| 263 |
res_df,
|
| 264 |
dimensions=['Persistence', 'Mobility', 'Bioaccumulation'],
|
| 265 |
color="Bioaccumulation",
|
| 266 |
-
color_continuous_scale=
|
| 267 |
-
title="Trace: Persist
|
|
|
|
| 268 |
)
|
|
|
|
| 269 |
st.plotly_chart(fig_para, use_container_width=True)
|
| 270 |
|
| 271 |
# GRAPH 4: DISTRIBUTION VIOLIN PLOT
|
| 272 |
with col4:
|
| 273 |
-
st.subheader("π» 4.
|
| 274 |
fig_vio = px.violin(
|
| 275 |
res_df, y="Bioaccumulation", x="Subclass",
|
| 276 |
color="Subclass", box=True, points="all",
|
| 277 |
-
color_discrete_map=
|
| 278 |
-
|
|
|
|
| 279 |
)
|
|
|
|
|
|
|
| 280 |
st.plotly_chart(fig_vio, use_container_width=True)
|
|
|
|
| 18 |
# =================================================================
|
| 19 |
# PART 0: THE BRIDGE (Automatic Brain Setup)
|
| 20 |
# =================================================================
|
| 21 |
+
st.set_page_config(page_title="PFAS Discovery AI", layout="wide", initial_sidebar_state="expanded")
|
| 22 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 23 |
MODEL_NAME = "JuIm/SMILES_BERT"
|
| 24 |
|
|
|
|
| 85 |
def mutate_smart(s):
|
| 86 |
"""
|
| 87 |
Aggressive 'Safe-by-Design' Mutation
|
|
|
|
| 88 |
"""
|
| 89 |
try:
|
| 90 |
chars = list(sf.split_selfies(sf.encoder(s)))
|
| 91 |
# Action 1: Insert Polar Handles (High Probability)
|
| 92 |
if random.random() < 0.9:
|
| 93 |
insert_idx = random.randint(0, len(chars))
|
|
|
|
| 94 |
atom = random.choice(["[O]", "[N]", "[C][=O]", "[C][O]"])
|
| 95 |
chars.insert(insert_idx, atom)
|
| 96 |
|
| 97 |
+
# Action 2: Cap Ends
|
| 98 |
if random.random() < 0.6:
|
| 99 |
chars.append(random.choice(["[O]", "[N]", "[C][=O][O]"]))
|
| 100 |
|
|
|
|
| 119 |
st.stop()
|
| 120 |
|
| 121 |
# =================================================================
|
| 122 |
+
# PART 2: THE UI (Aesthetic Upgrade)
|
| 123 |
# =================================================================
|
| 124 |
st.title("π§ͺ End-to-End PFAS Discovery AI")
|
| 125 |
+
st.markdown("### π€ Powered by Evolutionary Optimization & Deep Learning")
|
| 126 |
+
st.markdown("---")
|
| 127 |
|
| 128 |
+
# Sidebar
|
| 129 |
st.sidebar.header("1. Input Data")
|
| 130 |
input_type = st.sidebar.radio("Source:", ["Single Molecule", "Batch CSV"])
|
| 131 |
|
|
|
|
| 143 |
st.sidebar.header("2. Pipeline Mode")
|
| 144 |
mode = st.sidebar.selectbox("Mode:", ["Screening (Analyze)", "Discovery (Optimize)"])
|
| 145 |
|
| 146 |
+
# Define Custom Color Palette (Professional & Safe)
|
| 147 |
+
COLOR_MAP = {
|
| 148 |
+
"Non-PFAS": "#2ecc71", # Emerald Green
|
| 149 |
+
"PFCA": "#e74c3c", # Alizarin Red
|
| 150 |
+
"PFSA": "#9b59b6", # Amethyst Purple
|
| 151 |
+
"General PFAS": "#f39c12", # Orange
|
| 152 |
+
"Invalid": "#95a5a6" # Grey
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
if st.sidebar.button("π Run Pipeline") and data:
|
| 156 |
st.info(f"Running **{mode}** on {len(data)} molecules...")
|
| 157 |
|
|
|
|
| 223 |
})
|
| 224 |
|
| 225 |
# ------------------------------------------------------------------
|
| 226 |
+
# AESTHETIC DASHBOARD
|
| 227 |
# ------------------------------------------------------------------
|
| 228 |
res_df = pd.DataFrame(results)
|
| 229 |
|
| 230 |
+
st.markdown("### π Analysis Results")
|
| 231 |
+
st.dataframe(res_df.style.highlight_min(axis=0, subset=['Bioaccumulation'], color='#d4edda'))
|
|
|
|
| 232 |
st.download_button("Download CSV", res_df.to_csv(index=False).encode('utf-8'), "results.csv", "text/csv")
|
| 233 |
|
| 234 |
st.markdown("---")
|
|
|
|
| 236 |
|
| 237 |
col1, col2 = st.columns(2)
|
| 238 |
|
| 239 |
+
# GRAPH 1: 3D DISCOVERY CUBE (Dark Mode)
|
|
|
|
|
|
|
| 240 |
with col1:
|
| 241 |
+
st.subheader("π§ 1. Multi-Dimensional Risk Space")
|
| 242 |
fig_3d = px.scatter_3d(
|
| 243 |
res_df,
|
| 244 |
x='Bioaccumulation', y='Mobility', z='Persistence',
|
| 245 |
+
color='Subclass',
|
| 246 |
+
symbol='Type' if 'Type' in res_df.columns else 'Subclass',
|
| 247 |
+
color_discrete_map=COLOR_MAP,
|
| 248 |
+
opacity=0.9,
|
| 249 |
+
size_max=12,
|
| 250 |
+
template="plotly_dark", # <--- PRETTY DARK MODE
|
| 251 |
+
title="Risk Landscape (Interactive)"
|
| 252 |
)
|
| 253 |
+
fig_3d.update_layout(margin=dict(l=0, r=0, b=0, t=40), height=500)
|
| 254 |
st.plotly_chart(fig_3d, use_container_width=True)
|
| 255 |
|
| 256 |
# GRAPH 2: CLASS DISTRIBUTION (Bar Chart)
|
| 257 |
with col2:
|
| 258 |
+
st.subheader("π 2. Safety Classification")
|
| 259 |
+
count_df = res_df['Subclass'].value_counts().reset_index()
|
| 260 |
+
count_df.columns = ['Subclass', 'Count']
|
| 261 |
+
|
| 262 |
fig_bar = px.bar(
|
| 263 |
+
count_df, x="Subclass", y="Count", color="Subclass",
|
| 264 |
+
title="Molecule Counts by Class",
|
| 265 |
+
color_discrete_map=COLOR_MAP,
|
| 266 |
+
template="plotly_dark",
|
| 267 |
+
text_auto=True # <--- Adds numbers on bars
|
| 268 |
)
|
| 269 |
+
fig_bar.update_layout(height=500)
|
| 270 |
st.plotly_chart(fig_bar, use_container_width=True)
|
| 271 |
|
| 272 |
col3, col4 = st.columns(2)
|
| 273 |
|
| 274 |
+
# GRAPH 3: PARALLEL COORDINATES (Fixed Color Scale)
|
| 275 |
with col3:
|
| 276 |
+
st.subheader("π 3. Property Trace")
|
|
|
|
| 277 |
fig_para = px.parallel_coordinates(
|
| 278 |
res_df,
|
| 279 |
dimensions=['Persistence', 'Mobility', 'Bioaccumulation'],
|
| 280 |
color="Bioaccumulation",
|
| 281 |
+
color_continuous_scale="Spectral_r", # <--- FIXED: Standard built-in scale (Red=High Risk)
|
| 282 |
+
title="Trace: Persist β Mobile β Bioacc",
|
| 283 |
+
template="plotly_dark"
|
| 284 |
)
|
| 285 |
+
fig_para.update_layout(height=500)
|
| 286 |
st.plotly_chart(fig_para, use_container_width=True)
|
| 287 |
|
| 288 |
# GRAPH 4: DISTRIBUTION VIOLIN PLOT
|
| 289 |
with col4:
|
| 290 |
+
st.subheader("π» 4. Bioaccumulation Spread")
|
| 291 |
fig_vio = px.violin(
|
| 292 |
res_df, y="Bioaccumulation", x="Subclass",
|
| 293 |
color="Subclass", box=True, points="all",
|
| 294 |
+
color_discrete_map=COLOR_MAP,
|
| 295 |
+
template="plotly_dark",
|
| 296 |
+
title="Distribution Density"
|
| 297 |
)
|
| 298 |
+
fig_vio.add_hline(y=3.5, line_dash="dash", line_color="orange", annotation_text="Safety Limit")
|
| 299 |
+
fig_vio.update_layout(height=500)
|
| 300 |
st.plotly_chart(fig_vio, use_container_width=True)
|