tueniuu commited on
Commit
b14db7e
Β·
verified Β·
1 Parent(s): b8b30b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -30
app.py CHANGED
@@ -18,7 +18,7 @@ from transformers import AutoTokenizer, AutoModel, pipeline as hf_pipeline
18
  # =================================================================
19
  # PART 0: THE BRIDGE (Automatic Brain Setup)
20
  # =================================================================
21
- st.set_page_config(page_title="PFAS Discovery AI", layout="wide")
22
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
23
  MODEL_NAME = "JuIm/SMILES_BERT"
24
 
@@ -85,18 +85,16 @@ def sanity_check_class(smiles, predicted_class):
85
  def mutate_smart(s):
86
  """
87
  Aggressive 'Safe-by-Design' Mutation
88
- Prioritizes breaking chains and adding degradation handles.
89
  """
90
  try:
91
  chars = list(sf.split_selfies(sf.encoder(s)))
92
  # Action 1: Insert Polar Handles (High Probability)
93
  if random.random() < 0.9:
94
  insert_idx = random.randint(0, len(chars))
95
- # [O]=Ether, [N]=Amine, [C][=O]=Carbonyl (Degradable)
96
  atom = random.choice(["[O]", "[N]", "[C][=O]", "[C][O]"])
97
  chars.insert(insert_idx, atom)
98
 
99
- # Action 2: Cap Ends to increase Solubility
100
  if random.random() < 0.6:
101
  chars.append(random.choice(["[O]", "[N]", "[C][=O][O]"]))
102
 
@@ -121,11 +119,13 @@ if clf is None:
121
  st.stop()
122
 
123
  # =================================================================
124
- # PART 2: THE UI (With 4-Graph Dashboard)
125
  # =================================================================
126
  st.title("πŸ§ͺ End-to-End PFAS Discovery AI")
127
- st.markdown("Powered by **Evolutionary Optimization** & **Advanced Visualization**")
 
128
 
 
129
  st.sidebar.header("1. Input Data")
130
  input_type = st.sidebar.radio("Source:", ["Single Molecule", "Batch CSV"])
131
 
@@ -143,6 +143,15 @@ else:
143
  st.sidebar.header("2. Pipeline Mode")
144
  mode = st.sidebar.selectbox("Mode:", ["Screening (Analyze)", "Discovery (Optimize)"])
145
 
 
 
 
 
 
 
 
 
 
146
  if st.sidebar.button("πŸš€ Run Pipeline") and data:
147
  st.info(f"Running **{mode}** on {len(data)} molecules...")
148
 
@@ -214,13 +223,12 @@ if st.sidebar.button("πŸš€ Run Pipeline") and data:
214
  })
215
 
216
  # ------------------------------------------------------------------
217
- # VISUALIZATION DASHBOARD
218
  # ------------------------------------------------------------------
219
  res_df = pd.DataFrame(results)
220
 
221
- # 1. RESULTS TABLE
222
- st.subheader("πŸ“Š Data Table")
223
- st.dataframe(res_df)
224
  st.download_button("Download CSV", res_df.to_csv(index=False).encode('utf-8'), "results.csv", "text/csv")
225
 
226
  st.markdown("---")
@@ -228,53 +236,65 @@ if st.sidebar.button("πŸš€ Run Pipeline") and data:
228
 
229
  col1, col2 = st.columns(2)
230
 
231
- color_map = {"Non-PFAS": "green", "PFCA": "red", "PFSA": "purple", "General PFAS": "orange"}
232
-
233
- # GRAPH 1: 3D DISCOVERY CUBE
234
  with col1:
235
- st.subheader("🧊 1. Multi-Dimensional Risk")
236
  fig_3d = px.scatter_3d(
237
  res_df,
238
  x='Bioaccumulation', y='Mobility', z='Persistence',
239
- color='Subclass', symbol='Type' if 'Type' in res_df.columns else 'Subclass',
240
- color_discrete_map=color_map, opacity=0.8, size_max=10,
241
- title="Bioacc vs Mobility vs Persistence"
 
 
 
 
242
  )
243
- fig_3d.update_layout(margin=dict(l=0, r=0, b=0, t=30))
244
  st.plotly_chart(fig_3d, use_container_width=True)
245
 
246
  # GRAPH 2: CLASS DISTRIBUTION (Bar Chart)
247
  with col2:
248
- st.subheader("πŸ“Š 2. Class Composition")
 
 
 
249
  fig_bar = px.bar(
250
- res_df, x="Subclass", color="Subclass",
251
- title="Count of Molecules by Class",
252
- color_discrete_map=color_map
 
 
253
  )
 
254
  st.plotly_chart(fig_bar, use_container_width=True)
255
 
256
  col3, col4 = st.columns(2)
257
 
258
- # GRAPH 3: PARALLEL COORDINATES (The "Trace" Graph)
259
  with col3:
260
- st.subheader("πŸ“‰ 3. Property Tracing")
261
- # Normalize Subclass to integer for coloring if needed, or use Bioacc
262
  fig_para = px.parallel_coordinates(
263
  res_df,
264
  dimensions=['Persistence', 'Mobility', 'Bioaccumulation'],
265
  color="Bioaccumulation",
266
- color_continuous_scale=px.colors.diverging.TealRose,
267
- title="Trace: Persist -> Mobile -> Bioacc"
 
268
  )
 
269
  st.plotly_chart(fig_para, use_container_width=True)
270
 
271
  # GRAPH 4: DISTRIBUTION VIOLIN PLOT
272
  with col4:
273
- st.subheader("🎻 4. Risk Distribution")
274
  fig_vio = px.violin(
275
  res_df, y="Bioaccumulation", x="Subclass",
276
  color="Subclass", box=True, points="all",
277
- color_discrete_map=color_map,
278
- title="Bioaccumulation Spread per Class"
 
279
  )
 
 
280
  st.plotly_chart(fig_vio, use_container_width=True)
 
18
  # =================================================================
19
  # PART 0: THE BRIDGE (Automatic Brain Setup)
20
  # =================================================================
21
+ st.set_page_config(page_title="PFAS Discovery AI", layout="wide", initial_sidebar_state="expanded")
22
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
23
  MODEL_NAME = "JuIm/SMILES_BERT"
24
 
 
85
  def mutate_smart(s):
86
  """
87
  Aggressive 'Safe-by-Design' Mutation
 
88
  """
89
  try:
90
  chars = list(sf.split_selfies(sf.encoder(s)))
91
  # Action 1: Insert Polar Handles (High Probability)
92
  if random.random() < 0.9:
93
  insert_idx = random.randint(0, len(chars))
 
94
  atom = random.choice(["[O]", "[N]", "[C][=O]", "[C][O]"])
95
  chars.insert(insert_idx, atom)
96
 
97
+ # Action 2: Cap Ends
98
  if random.random() < 0.6:
99
  chars.append(random.choice(["[O]", "[N]", "[C][=O][O]"]))
100
 
 
119
  st.stop()
120
 
121
  # =================================================================
122
+ # PART 2: THE UI (Aesthetic Upgrade)
123
  # =================================================================
124
  st.title("πŸ§ͺ End-to-End PFAS Discovery AI")
125
+ st.markdown("### πŸ€– Powered by Evolutionary Optimization & Deep Learning")
126
+ st.markdown("---")
127
 
128
+ # Sidebar
129
  st.sidebar.header("1. Input Data")
130
  input_type = st.sidebar.radio("Source:", ["Single Molecule", "Batch CSV"])
131
 
 
143
  st.sidebar.header("2. Pipeline Mode")
144
  mode = st.sidebar.selectbox("Mode:", ["Screening (Analyze)", "Discovery (Optimize)"])
145
 
146
+ # Define Custom Color Palette (Professional & Safe)
147
+ COLOR_MAP = {
148
+ "Non-PFAS": "#2ecc71", # Emerald Green
149
+ "PFCA": "#e74c3c", # Alizarin Red
150
+ "PFSA": "#9b59b6", # Amethyst Purple
151
+ "General PFAS": "#f39c12", # Orange
152
+ "Invalid": "#95a5a6" # Grey
153
+ }
154
+
155
  if st.sidebar.button("πŸš€ Run Pipeline") and data:
156
  st.info(f"Running **{mode}** on {len(data)} molecules...")
157
 
 
223
  })
224
 
225
  # ------------------------------------------------------------------
226
+ # AESTHETIC DASHBOARD
227
  # ------------------------------------------------------------------
228
  res_df = pd.DataFrame(results)
229
 
230
+ st.markdown("### πŸ“Š Analysis Results")
231
+ st.dataframe(res_df.style.highlight_min(axis=0, subset=['Bioaccumulation'], color='#d4edda'))
 
232
  st.download_button("Download CSV", res_df.to_csv(index=False).encode('utf-8'), "results.csv", "text/csv")
233
 
234
  st.markdown("---")
 
236
 
237
  col1, col2 = st.columns(2)
238
 
239
+ # GRAPH 1: 3D DISCOVERY CUBE (Dark Mode)
 
 
240
  with col1:
241
+ st.subheader("🧊 1. Multi-Dimensional Risk Space")
242
  fig_3d = px.scatter_3d(
243
  res_df,
244
  x='Bioaccumulation', y='Mobility', z='Persistence',
245
+ color='Subclass',
246
+ symbol='Type' if 'Type' in res_df.columns else 'Subclass',
247
+ color_discrete_map=COLOR_MAP,
248
+ opacity=0.9,
249
+ size_max=12,
250
+ template="plotly_dark", # <--- PRETTY DARK MODE
251
+ title="Risk Landscape (Interactive)"
252
  )
253
+ fig_3d.update_layout(margin=dict(l=0, r=0, b=0, t=40), height=500)
254
  st.plotly_chart(fig_3d, use_container_width=True)
255
 
256
  # GRAPH 2: CLASS DISTRIBUTION (Bar Chart)
257
  with col2:
258
+ st.subheader("πŸ“Š 2. Safety Classification")
259
+ count_df = res_df['Subclass'].value_counts().reset_index()
260
+ count_df.columns = ['Subclass', 'Count']
261
+
262
  fig_bar = px.bar(
263
+ count_df, x="Subclass", y="Count", color="Subclass",
264
+ title="Molecule Counts by Class",
265
+ color_discrete_map=COLOR_MAP,
266
+ template="plotly_dark",
267
+ text_auto=True # <--- Adds numbers on bars
268
  )
269
+ fig_bar.update_layout(height=500)
270
  st.plotly_chart(fig_bar, use_container_width=True)
271
 
272
  col3, col4 = st.columns(2)
273
 
274
+ # GRAPH 3: PARALLEL COORDINATES (Fixed Color Scale)
275
  with col3:
276
+ st.subheader("πŸ“‰ 3. Property Trace")
 
277
  fig_para = px.parallel_coordinates(
278
  res_df,
279
  dimensions=['Persistence', 'Mobility', 'Bioaccumulation'],
280
  color="Bioaccumulation",
281
+ color_continuous_scale="Spectral_r", # <--- FIXED: Standard built-in scale (Red=High Risk)
282
+ title="Trace: Persist β†’ Mobile β†’ Bioacc",
283
+ template="plotly_dark"
284
  )
285
+ fig_para.update_layout(height=500)
286
  st.plotly_chart(fig_para, use_container_width=True)
287
 
288
  # GRAPH 4: DISTRIBUTION VIOLIN PLOT
289
  with col4:
290
+ st.subheader("🎻 4. Bioaccumulation Spread")
291
  fig_vio = px.violin(
292
  res_df, y="Bioaccumulation", x="Subclass",
293
  color="Subclass", box=True, points="all",
294
+ color_discrete_map=COLOR_MAP,
295
+ template="plotly_dark",
296
+ title="Distribution Density"
297
  )
298
+ fig_vio.add_hline(y=3.5, line_dash="dash", line_color="orange", annotation_text="Safety Limit")
299
+ fig_vio.update_layout(height=500)
300
  st.plotly_chart(fig_vio, use_container_width=True)