42Cummer commited on
Commit
178c45c
·
verified ·
1 Parent(s): fdaed96

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -67
app.py CHANGED
@@ -1,13 +1,16 @@
1
  import gradio as gr # type: ignore
2
  import os
3
- from gradio_molecule3d import Molecule3D
4
- from Bio.PDB import PDBParser
 
 
5
 
6
  # Import your custom modules from the /scripts folder
7
  from scripts.download import download_and_clean_pdb
8
  from scripts.generator import run_broteinshake_generator
9
  from scripts.refine import polish_design, process_results
10
  from scripts.visualize import create_design_plot
 
11
 
12
  # --- HELPER FUNCTIONS ---
13
 
@@ -42,22 +45,22 @@ def load_pdb_and_extract_chains(pdb_id):
42
  # Single-chain proteins are supported (will use different ProteinMPNN command)
43
  # For single-chain, the only chain will be automatically selected for redesign
44
  if len(chains) == 1:
45
- status_msg = f"Loaded {pdb_id.upper()}: Single-chain protein - will redesign chain {chains[0]}"
46
  # Auto-select the chain for single-chain proteins
47
  return gr.update(choices=chains, value=chains), status_msg, gr.update(interactive=True), chains
48
 
49
- status_msg = f"Loaded {pdb_id.upper()}: Found {len(chains)} chain(s) - {', '.join(chains)}"
50
  # Initially disable button - user must select at least one chain
51
  return gr.update(choices=chains, value=chains), status_msg, gr.update(interactive=False), chains
52
  except Exception as e:
53
- error_msg = f"Error loading {pdb_id.upper()}: {str(e)}"
54
  print(error_msg)
55
  return gr.update(choices=[], value=[]), error_msg, gr.update(interactive=False), []
56
 
57
  def validate_chain_selection(selected_chains, available_chains_state):
58
  """Validate that at least one chain is selected and at least one remains fixed (for multi-chain)."""
59
  if not selected_chains or len(selected_chains) == 0:
60
- warning = "⚠️ Please select at least one chain to redesign"
61
  return gr.update(interactive=False), warning, available_chains_state
62
 
63
  # Get available chains from state
@@ -65,15 +68,15 @@ def validate_chain_selection(selected_chains, available_chains_state):
65
 
66
  # For single-chain proteins, allow selecting the only chain
67
  if len(available_chains) == 1:
68
- warning = f"Single-chain protein: Will redesign chain {available_chains[0]}"
69
  return gr.update(interactive=True), warning, available_chains_state
70
 
71
  # For multi-chain: Check if all chains are selected (would leave no fixed chains)
72
  if available_chains and len(selected_chains) >= len(available_chains):
73
- warning = f"⚠️ Cannot select all chains - at least one chain must remain fixed. Selected: {', '.join(selected_chains)}"
74
  return gr.update(interactive=False), warning, available_chains_state
75
 
76
- warning = f"{len(selected_chains)} chain(s) selected for redesign: {', '.join(selected_chains)}"
77
  return gr.update(interactive=True), warning, available_chains_state
78
 
79
  def get_all_sequences(fasta_file: str) -> str:
@@ -160,9 +163,9 @@ def run_part1(pdb_id, fixed_chains, variable_chains, temperature=0.1, selected_c
160
  raise ValueError(f"Cannot redesign all chains - at least one chain must remain fixed. Selected: {', '.join(selected_chains)}, Available: {', '.join(all_chains)}")
161
 
162
  if is_single_chain:
163
- print(f"📋 Single-chain mode: Redesigning chain {variable_chains}")
164
  else:
165
- print(f"📋 Using chain selector: Fixed={fixed_chains}, Variable={variable_chains}")
166
  else:
167
  # If no chains selected, use text inputs (default behavior)
168
  # For single-chain, if variable_chains is empty, use the only chain
@@ -174,12 +177,12 @@ def run_part1(pdb_id, fixed_chains, variable_chains, temperature=0.1, selected_c
174
  all_selected = set(fixed_chains + variable_chains)
175
  if len(all_selected) >= len(all_chains):
176
  raise ValueError(f"Cannot redesign all chains - at least one chain must remain fixed.")
177
- print(f"📋 Using text inputs: Fixed={fixed_chains}, Variable={variable_chains}")
178
 
179
  # Step 2: Generate Optimized Sequences
180
  # This creates the .fa files you need for the ESM Atlas
181
- print(f"🌡️ Temperature: {temperature}")
182
- print(f"🔧 Parameters: Fixed chains={fixed_chains}, Variable chains={variable_chains}, Temp={temperature}")
183
  run_broteinshake_generator(pdb_path, fixed_chains, variable_chains, num_seqs=20, temp=temperature)
184
 
185
  # Get all sequences and the best one
@@ -199,58 +202,72 @@ def run_part1(pdb_id, fixed_chains, variable_chains, temperature=0.1, selected_c
199
 
200
  # Format status with best sequence
201
  status_message = (
202
- f"Design Complete! {num_designs} designs generated.\n\n"
203
- f"Lead Candidate (Best Score: {best_score:.4f}):\n"
204
  f"{best_sequence}\n\n"
205
- f"📋 Copy the lead sequence above and fold it at [esmatlas.com](https://esmatlas.com/resources?action=fold)."
206
  )
207
 
208
  return all_sequences, evolution_plot, status_message
209
  except Exception as e:
210
- return "", None, f"Error in Part 1: {str(e)}"
211
 
212
- def run_part2(pdb_id, uploaded_esm_file):
213
- """Aligns the ESM-folded PDB to the target structure."""
 
 
 
214
  try:
215
- if uploaded_esm_file is None:
216
- return None, "⚠️ Please upload the PDB from ESM Atlas first."
 
217
 
218
- # Validate inputs
219
  if not pdb_id or not pdb_id.strip():
220
- return None, "Error: PDB ID is required."
 
 
 
 
 
 
 
221
 
222
- # Get file path
223
- file_path = uploaded_esm_file.name if hasattr(uploaded_esm_file, 'name') else uploaded_esm_file
224
- if not file_path or not os.path.exists(file_path):
225
- return None, f"❌ Error: Uploaded file not found: {file_path}"
 
 
 
 
 
 
 
 
 
226
 
227
- # Call the new lightweight Biopython-only script
228
- print(f"🔍 Starting alignment for PDB ID: {pdb_id}, File: {file_path}")
229
- final_pdb_path, global_rmsd, core_rmsd, high_conf_rmsd = polish_design(pdb_id, file_path)
230
 
231
- # Validate output
232
  if not final_pdb_path or not os.path.exists(final_pdb_path):
233
- return None, f"Error: Alignment failed - output file not created: {final_pdb_path}"
234
 
235
  if high_conf_rmsd is None:
236
- return None, "Error: Alignment failed - RMSD calculation returned None"
237
 
238
- print(f"✅ Alignment successful: Global RMSD = {global_rmsd:.3f} Å, Core RMSD = {core_rmsd:.3f} Å, High-Conf RMSD = {high_conf_rmsd:.3f} Å")
239
 
240
- # Generate detailed validation report
241
  report = process_results(pdb_id, final_pdb_path, global_rmsd, high_conf_rmsd)
242
 
 
 
 
243
  return final_pdb_path, report
244
- except FileNotFoundError as e:
245
- error_msg = f"❌ File Error: {str(e)}"
246
- print(error_msg)
247
- return None, error_msg
248
- except ValueError as e:
249
- error_msg = f"❌ Validation Error: {str(e)}"
250
- print(error_msg)
251
- return None, error_msg
252
  except Exception as e:
253
- error_msg = f"Unexpected Error: {str(e)}\n\nTraceback:\n{type(e).__name__}"
254
  print(error_msg)
255
  import traceback
256
  traceback.print_exc()
@@ -297,7 +314,7 @@ with gr.Blocks(theme=dark_biohub, css=biohub_css) as demo:
297
  # Header
298
  gr.HTML("""
299
  <div id='biohub-header'>
300
- <h1 style='color: white; margin: 0;'>🧪 BroteinShake</h1>
301
  </div>
302
  """)
303
 
@@ -307,11 +324,11 @@ with gr.Blocks(theme=dark_biohub, css=biohub_css) as demo:
307
  gr.Markdown("Enter a PDB ID to 'repaint' its binder interface using ProteinMPNN.")
308
 
309
  pdb_input = gr.Textbox(label="Target PDB ID", placeholder="e.g., 3kas", value="")
310
- load_pdb_btn = gr.Button("📥 Load PDB", variant="secondary")
311
- pdb_status = gr.Markdown("💡 Enter a PDB ID and click 'Load PDB' to begin")
312
 
313
  with gr.Column():
314
- gr.Markdown("### ⚙️ Design Parameters")
315
 
316
  # Temperature (T) is the most critical knob for sequence recovery
317
  sampling_temp = gr.Slider(
@@ -327,7 +344,7 @@ with gr.Blocks(theme=dark_biohub, css=biohub_css) as demo:
327
  info="Identify which chains ProteinMPNN should modify (will populate after loading PDB)"
328
  )
329
 
330
- chain_warning = gr.Markdown("💡 Select at least one chain to enable generation", visible=True)
331
 
332
  # Hidden state to track if we've successfully parsed the PDB
333
  pdb_state = gr.State()
@@ -338,7 +355,7 @@ with gr.Blocks(theme=dark_biohub, css=biohub_css) as demo:
338
  v_chains = gr.Textbox(label="Variable Chains (Key)", value="B")
339
 
340
  # Generate button (initially disabled)
341
- gen_btn = gr.Button("🚀 Generate Optimized Sequences", variant="primary", interactive=False)
342
 
343
  # Load PDB and extract chains when button is clicked
344
  load_pdb_btn.click(
@@ -373,7 +390,7 @@ with gr.Blocks(theme=dark_biohub, css=biohub_css) as demo:
373
 
374
  # TAB 2: STRUCTURAL VALIDATION
375
  with gr.Tab("2. Structural Validation"):
376
- gr.Markdown("### 🧬 Final Structure Preview")
377
 
378
  # Updated REPS for local dev visibility
379
  REPS = [
@@ -382,48 +399,54 @@ with gr.Blocks(theme=dark_biohub, css=biohub_css) as demo:
382
  "style": "cartoon",
383
  "color": "spectrum",
384
  "opacity": 1.0
385
- # Removing "chain": "A" is the key fix here!
386
  }
387
  ]
388
 
389
- # This component ACTUALLY supports .pdb natively
390
  protein_view = Molecule3D(label="3D Structure Viewer (Refined Shuttle)", reps=REPS, elem_id="molecule-viewer")
391
 
392
  with gr.Row():
393
- esm_upload = gr.File(label="Upload ESM-Folded PDB", file_types=[".pdb"])
 
 
 
 
 
 
394
  refined_download = gr.File(label="Download Aligned Lead (.pdb)")
395
 
396
- validate_btn = gr.Button("Run Structural Alignment", variant="primary")
397
  status2 = gr.Textbox(label="Validation Report", interactive=False, lines=5)
398
 
399
- # Update viewer - REPS are set on component creation, just return the file path
400
- def run_validation_with_view(pdb_id, file):
401
  try:
402
- # This is your existing alignment/refine logic
403
- final_pdb, report = run_part2(pdb_id, file)
404
 
405
  if final_pdb is not None and os.path.exists(final_pdb):
406
- # Verify we're using the refined shuttle (Refined_Shuttle.pdb)
407
  if "Refined_Shuttle.pdb" in final_pdb or os.path.basename(final_pdb) == "Refined_Shuttle.pdb":
408
- print(f"🎯 Visualizing refined shuttle: {final_pdb}")
409
  else:
410
- print(f"⚠️ Warning: Expected Refined_Shuttle.pdb but got: {final_pdb}")
411
 
412
- # Molecule3D expects just the file path string (REPS are already set on component)
413
  return final_pdb, report, final_pdb
 
414
  except Exception as e:
415
- error_msg = f"Error generating 3D view: {str(e)}"
416
  print(error_msg)
417
  import traceback
418
  traceback.print_exc()
419
  return None, error_msg, None
420
 
 
421
  validate_btn.click(
422
  run_validation_with_view,
423
- inputs=[pdb_input, esm_upload],
424
  outputs=[refined_download, status2, protein_view]
425
  )
426
-
427
  # Launch the app
428
  if __name__ == "__main__":
429
  # Docker deployment for HuggingFace Spaces
 
1
  import gradio as gr # type: ignore
2
  import os
3
+ from gradio_molecule3d import Molecule3D #type: ignore
4
+ from Bio.PDB import PDBParser #type: ignore
5
+
6
+ import time
7
 
8
  # Import your custom modules from the /scripts folder
9
  from scripts.download import download_and_clean_pdb
10
  from scripts.generator import run_broteinshake_generator
11
  from scripts.refine import polish_design, process_results
12
  from scripts.visualize import create_design_plot
13
+ from scripts.foldprotein import fold_protein_sequence
14
 
15
  # --- HELPER FUNCTIONS ---
16
 
 
45
  # Single-chain proteins are supported (will use different ProteinMPNN command)
46
  # For single-chain, the only chain will be automatically selected for redesign
47
  if len(chains) == 1:
48
+ status_msg = f"Loaded {pdb_id.upper()}: Single-chain protein - will redesign chain {chains[0]}"
49
  # Auto-select the chain for single-chain proteins
50
  return gr.update(choices=chains, value=chains), status_msg, gr.update(interactive=True), chains
51
 
52
+ status_msg = f"Loaded {pdb_id.upper()}: Found {len(chains)} chain(s) - {', '.join(chains)}"
53
  # Initially disable button - user must select at least one chain
54
  return gr.update(choices=chains, value=chains), status_msg, gr.update(interactive=False), chains
55
  except Exception as e:
56
+ error_msg = f"Error loading {pdb_id.upper()}: {str(e)}"
57
  print(error_msg)
58
  return gr.update(choices=[], value=[]), error_msg, gr.update(interactive=False), []
59
 
60
  def validate_chain_selection(selected_chains, available_chains_state):
61
  """Validate that at least one chain is selected and at least one remains fixed (for multi-chain)."""
62
  if not selected_chains or len(selected_chains) == 0:
63
+ warning = "Please select at least one chain to redesign"
64
  return gr.update(interactive=False), warning, available_chains_state
65
 
66
  # Get available chains from state
 
68
 
69
  # For single-chain proteins, allow selecting the only chain
70
  if len(available_chains) == 1:
71
+ warning = f"Single-chain protein: Will redesign chain {available_chains[0]}"
72
  return gr.update(interactive=True), warning, available_chains_state
73
 
74
  # For multi-chain: Check if all chains are selected (would leave no fixed chains)
75
  if available_chains and len(selected_chains) >= len(available_chains):
76
+ warning = f"Cannot select all chains - at least one chain must remain fixed. Selected: {', '.join(selected_chains)}"
77
  return gr.update(interactive=False), warning, available_chains_state
78
 
79
+ warning = f"{len(selected_chains)} chain(s) selected for redesign: {', '.join(selected_chains)}"
80
  return gr.update(interactive=True), warning, available_chains_state
81
 
82
  def get_all_sequences(fasta_file: str) -> str:
 
163
  raise ValueError(f"Cannot redesign all chains - at least one chain must remain fixed. Selected: {', '.join(selected_chains)}, Available: {', '.join(all_chains)}")
164
 
165
  if is_single_chain:
166
+ print(f"Single-chain mode: Redesigning chain {variable_chains}")
167
  else:
168
+ print(f"Using chain selector: Fixed={fixed_chains}, Variable={variable_chains}")
169
  else:
170
  # If no chains selected, use text inputs (default behavior)
171
  # For single-chain, if variable_chains is empty, use the only chain
 
177
  all_selected = set(fixed_chains + variable_chains)
178
  if len(all_selected) >= len(all_chains):
179
  raise ValueError(f"Cannot redesign all chains - at least one chain must remain fixed.")
180
+ print(f"Using text inputs: Fixed={fixed_chains}, Variable={variable_chains}")
181
 
182
  # Step 2: Generate Optimized Sequences
183
  # This creates the .fa files you need for the ESM Atlas
184
+ print(f"Temperature: {temperature}")
185
+ print(f"Parameters: Fixed chains={fixed_chains}, Variable chains={variable_chains}, Temp={temperature}")
186
  run_broteinshake_generator(pdb_path, fixed_chains, variable_chains, num_seqs=20, temp=temperature)
187
 
188
  # Get all sequences and the best one
 
202
 
203
  # Format status with best sequence
204
  status_message = (
205
+ f"Design Complete! {num_designs} designs generated.\n\n"
206
+ f"Lead Candidate (Best Score: {best_score:.4f}):\n"
207
  f"{best_sequence}\n\n"
208
+
209
  )
210
 
211
  return all_sequences, evolution_plot, status_message
212
  except Exception as e:
213
+ return "", None, f"Error in Part 1: {str(e)}"
214
 
215
+ def run_part2(pdb_id, sequence):
216
+ """
217
+ 1. Folds the input sequence using ESM Atlas (API).
218
+ 2. Aligns the folded structure to the target PDB (polish_design).
219
+ """
220
  try:
221
+ # --- 1. Validate Inputs ---
222
+ if not sequence or not sequence.strip():
223
+ return None, "Error: Please enter a protein sequence."
224
 
 
225
  if not pdb_id or not pdb_id.strip():
226
+ return None, "Error: PDB ID is required."
227
+
228
+ print(f"Starting Pipeline for {pdb_id}...")
229
+ print(f" - Sequence Length: {len(sequence)} residues")
230
+
231
+ # --- 2. Fold Sequence (Automated) ---
232
+ # Calls the script in scripts/foldprotein.py
233
+ pdb_content = fold_protein_sequence(sequence)
234
 
235
+ if not pdb_content:
236
+ return None, "Error: Protein folding failed. The API might be down or the sequence is invalid."
237
+
238
+ # Save the raw folded structure to a temp file
239
+ raw_fold_path = f"temp_fold_{pdb_id}_{int(time.time())}.pdb"
240
+ with open(raw_fold_path, "w") as f:
241
+ f.write(pdb_content)
242
+
243
+ if not os.path.exists(raw_fold_path):
244
+ return None, f"Error: Could not save folded PDB to {raw_fold_path}"
245
+
246
+ # --- 3. Align & Polish (Existing Logic) ---
247
+ print(f"Aligning folded structure to target {pdb_id}...")
248
 
249
+ # Pass the generated file path to the existing polish_design function
250
+ final_pdb_path, global_rmsd, core_rmsd, high_conf_rmsd = polish_design(pdb_id, raw_fold_path)
 
251
 
252
+ # --- 4. Validate Alignment Output ---
253
  if not final_pdb_path or not os.path.exists(final_pdb_path):
254
+ return None, f"Error: Alignment failed - output file not created: {final_pdb_path}"
255
 
256
  if high_conf_rmsd is None:
257
+ return None, "Error: Alignment failed - RMSD calculation returned None"
258
 
259
+ print(f"Success: Global RMSD={global_rmsd:.3f}A | Core RMSD={core_rmsd:.3f}A")
260
 
261
+ # --- 5. Generate Report ---
262
  report = process_results(pdb_id, final_pdb_path, global_rmsd, high_conf_rmsd)
263
 
264
+ # Clean up the raw unaligned fold to save space
265
+ os.remove(raw_fold_path)
266
+
267
  return final_pdb_path, report
268
+
 
 
 
 
 
 
 
269
  except Exception as e:
270
+ error_msg = f"Unexpected Error: {str(e)}"
271
  print(error_msg)
272
  import traceback
273
  traceback.print_exc()
 
314
  # Header
315
  gr.HTML("""
316
  <div id='biohub-header'>
317
+ <h1 style='color: white; margin: 0;'>BroteinShake</h1>
318
  </div>
319
  """)
320
 
 
324
  gr.Markdown("Enter a PDB ID to 'repaint' its binder interface using ProteinMPNN.")
325
 
326
  pdb_input = gr.Textbox(label="Target PDB ID", placeholder="e.g., 3kas", value="")
327
+ load_pdb_btn = gr.Button("Load PDB", variant="secondary")
328
+ pdb_status = gr.Markdown("Enter a PDB ID and click 'Load PDB' to begin")
329
 
330
  with gr.Column():
331
+ gr.Markdown("### Design Parameters")
332
 
333
  # Temperature (T) is the most critical knob for sequence recovery
334
  sampling_temp = gr.Slider(
 
344
  info="Identify which chains ProteinMPNN should modify (will populate after loading PDB)"
345
  )
346
 
347
+ chain_warning = gr.Markdown("Select at least one chain to enable generation", visible=True)
348
 
349
  # Hidden state to track if we've successfully parsed the PDB
350
  pdb_state = gr.State()
 
355
  v_chains = gr.Textbox(label="Variable Chains (Key)", value="B")
356
 
357
  # Generate button (initially disabled)
358
+ gen_btn = gr.Button("Generate Optimized Sequences", variant="primary", interactive=False)
359
 
360
  # Load PDB and extract chains when button is clicked
361
  load_pdb_btn.click(
 
390
 
391
  # TAB 2: STRUCTURAL VALIDATION
392
  with gr.Tab("2. Structural Validation"):
393
+ gr.Markdown("### Final Structure Preview")
394
 
395
  # Updated REPS for local dev visibility
396
  REPS = [
 
399
  "style": "cartoon",
400
  "color": "spectrum",
401
  "opacity": 1.0
 
402
  }
403
  ]
404
 
405
+ # 3D Viewer component
406
  protein_view = Molecule3D(label="3D Structure Viewer (Refined Shuttle)", reps=REPS, elem_id="molecule-viewer")
407
 
408
  with gr.Row():
409
+ # REPLACEMENT: Text Area instead of File Upload
410
+ sequence_input = gr.Textbox(
411
+ label="Paste Protein Sequence",
412
+ placeholder="Paste the sequence generated in Tab 1 here (e.g., MKTII...)",
413
+ lines=4,
414
+ max_lines=8
415
+ )
416
  refined_download = gr.File(label="Download Aligned Lead (.pdb)")
417
 
418
+ validate_btn = gr.Button("Run Structural Alignment", variant="primary")
419
  status2 = gr.Textbox(label="Validation Report", interactive=False, lines=5)
420
 
421
+ # Wrapper function now accepts 'sequence' instead of 'file'
422
+ def run_validation_with_view(pdb_id, sequence):
423
  try:
424
+ # Call the updated run_part2 logic (API Fold -> Align)
425
+ final_pdb, report = run_part2(pdb_id, sequence)
426
 
427
  if final_pdb is not None and os.path.exists(final_pdb):
428
+ # Verify we're using the refined shuttle
429
  if "Refined_Shuttle.pdb" in final_pdb or os.path.basename(final_pdb) == "Refined_Shuttle.pdb":
430
+ print(f"Visualizing refined shuttle: {final_pdb}")
431
  else:
432
+ print(f"Warning: Expected Refined_Shuttle.pdb but got: {final_pdb}")
433
 
434
+ # Return path for download, report text, and path for 3D viewer
435
  return final_pdb, report, final_pdb
436
+
437
  except Exception as e:
438
+ error_msg = f"Error generating 3D view: {str(e)}"
439
  print(error_msg)
440
  import traceback
441
  traceback.print_exc()
442
  return None, error_msg, None
443
 
444
+ # Updated inputs to include sequence_input
445
  validate_btn.click(
446
  run_validation_with_view,
447
+ inputs=[pdb_input, sequence_input],
448
  outputs=[refined_download, status2, protein_view]
449
  )
 
450
  # Launch the app
451
  if __name__ == "__main__":
452
  # Docker deployment for HuggingFace Spaces