import gradio as gr # type: ignore import os from gradio_molecule3d import Molecule3D #type: ignore from Bio.PDB import PDBParser #type: ignore import time # Import your custom modules from the /scripts folder from scripts.download import download_and_clean_pdb from scripts.generator import run_broteinshake_generator from scripts.refine import polish_design, process_results from scripts.visualize import create_design_plot from scripts.foldprotein import fold_protein_sequence # --- HELPER FUNCTIONS --- def get_pdb_chains(pdb_file): """Extracts unique chain IDs from a PDB file.""" if not pdb_file or not os.path.exists(pdb_file): return [] try: parser = PDBParser(QUIET=True) structure = parser.get_structure("temp", pdb_file) chains = [chain.id for model in structure for chain in model] return sorted(list(set(chains))) except Exception as e: print(f"Error extracting chains: {e}") return [] def load_pdb_and_extract_chains(pdb_id): """Download PDB and extract chains for selection.""" if not pdb_id or not pdb_id.strip(): return gr.update(choices=[], value=[]), "⚠️ Please enter a PDB ID", gr.update(interactive=False), [] try: # Download the PDB pdb_path = download_and_clean_pdb(pdb_id.strip(), data_dir="data") # Extract chains chains = get_pdb_chains(pdb_path) if not chains: return gr.update(choices=[], value=[]), f"⚠️ No chains found in {pdb_id.upper()}", gr.update(interactive=False), [] # Single-chain proteins are supported (will use different ProteinMPNN command) # For single-chain, the only chain will be automatically selected for redesign if len(chains) == 1: status_msg = f"Loaded {pdb_id.upper()}: Single-chain protein - will redesign chain {chains[0]}" # Auto-select the chain for single-chain proteins return gr.update(choices=chains, value=chains), status_msg, gr.update(interactive=True), chains status_msg = f"Loaded {pdb_id.upper()}: Found {len(chains)} chain(s) - {', '.join(chains)}" # Initially disable button - user must select at least one chain return gr.update(choices=chains, value=chains), status_msg, gr.update(interactive=False), chains except Exception as e: error_msg = f"Error loading {pdb_id.upper()}: {str(e)}" print(error_msg) return gr.update(choices=[], value=[]), error_msg, gr.update(interactive=False), [] def validate_chain_selection(selected_chains, available_chains_state): """Validate that at least one chain is selected and at least one remains fixed (for multi-chain).""" if not selected_chains or len(selected_chains) == 0: warning = "Please select at least one chain to redesign" return gr.update(interactive=False), warning, available_chains_state # Get available chains from state available_chains = available_chains_state if available_chains_state else [] # For single-chain proteins, allow selecting the only chain if len(available_chains) == 1: warning = f"Single-chain protein: Will redesign chain {available_chains[0]}" return gr.update(interactive=True), warning, available_chains_state # For multi-chain: Check if all chains are selected (would leave no fixed chains) if available_chains and len(selected_chains) >= len(available_chains): warning = f"Cannot select all chains - at least one chain must remain fixed. Selected: {', '.join(selected_chains)}" return gr.update(interactive=False), warning, available_chains_state warning = f"{len(selected_chains)} chain(s) selected for redesign: {', '.join(selected_chains)}" return gr.update(interactive=True), warning, available_chains_state def get_all_sequences(fasta_file: str) -> str: """Get all designed sequences from FASTA file.""" sequences = [] with open(fasta_file, 'r') as f: lines = [line.strip() for line in f.readlines() if line.strip()] for i in range(0, len(lines), 2): if i + 1 >= len(lines): break header = lines[i] sequence = lines[i+1] # Skip the original native sequence (first entry) if "sample" not in header: continue sequences.append(f"{header}\n{sequence}") if sequences: return "\n\n".join(sequences) else: raise ValueError(f"No valid designs found in {fasta_file}") def extract_best_sequence(fasta_file: str) -> str: """Extract the best sequence (lowest score) from FASTA file.""" best_score = float('inf') best_header = "" best_seq = "" with open(fasta_file, 'r') as f: lines = [line.strip() for line in f.readlines() if line.strip()] for i in range(0, len(lines), 2): if i + 1 >= len(lines): break header = lines[i] sequence = lines[i+1] # Skip the original native sequence (first entry) if "sample" not in header: continue # Parse the score: "score=0.7647" try: score_part = [p for p in header.split(',') if 'score' in p][0] score = float(score_part.split('=')[1]) if score < best_score: best_score = score best_header = header best_seq = sequence except (IndexError, ValueError): continue if best_seq: return f"{best_header}\n{best_seq}" else: raise ValueError(f"No valid designs found in {fasta_file}") def run_part1(pdb_id, fixed_chains, variable_chains, temperature=0.1, selected_chains=None): """Downloads the PDB and runs ProteinMPNN design.""" try: # Step 1: Secure the template pdb_path = download_and_clean_pdb(pdb_id, data_dir="data") # Handle chain selection logic # If chains are selected via checkbox, use those as variable chains # Otherwise, use the text input (backward compatibility) all_chains = get_pdb_chains(pdb_path) # Check if single-chain protein is_single_chain = len(all_chains) == 1 if selected_chains and len(selected_chains) > 0: # Selected chains = variable chains, rest = fixed variable_chains = "".join(selected_chains) fixed_chains = "".join([c for c in all_chains if c not in selected_chains]) # For single-chain: no fixed chains (will use different ProteinMPNN command) # For multi-chain: Validate must have at least one fixed chain if not is_single_chain and (not fixed_chains or len(fixed_chains) == 0): raise ValueError(f"Cannot redesign all chains - at least one chain must remain fixed. Selected: {', '.join(selected_chains)}, Available: {', '.join(all_chains)}") if is_single_chain: print(f"Single-chain mode: Redesigning chain {variable_chains}") else: print(f"Using chain selector: Fixed={fixed_chains}, Variable={variable_chains}") else: # If no chains selected, use text inputs (default behavior) # For single-chain, if variable_chains is empty, use the only chain if is_single_chain and not variable_chains: variable_chains = all_chains[0] fixed_chains = "" # For multi-chain: Validate text inputs don't select all chains elif not is_single_chain and fixed_chains and variable_chains: all_selected = set(fixed_chains + variable_chains) if len(all_selected) >= len(all_chains): raise ValueError(f"Cannot redesign all chains - at least one chain must remain fixed.") print(f"Using text inputs: Fixed={fixed_chains}, Variable={variable_chains}") # Step 2: Generate Optimized Sequences # This creates the .fa files you need for the ESM Atlas print(f"Temperature: {temperature}") print(f"Parameters: Fixed chains={fixed_chains}, Variable chains={variable_chains}, Temp={temperature}") run_broteinshake_generator(pdb_path, fixed_chains, variable_chains, num_seqs=20, temp=temperature) # Get all sequences and the best one fa_file = os.path.join("generated", pdb_id.lower(), "seqs", f"{pdb_id.lower()}_clones.fa") all_sequences = get_all_sequences(fa_file) best_sequence = extract_best_sequence(fa_file) # Generate the dashboard plot evolution_plot = create_design_plot(fa_file) # Parse score from header for status message score_part = [p for p in best_sequence.split('\n')[0].split(',') if 'score' in p][0] best_score = float(score_part.split('=')[1]) # Count number of designs num_designs = len([s for s in all_sequences.split('\n\n') if s.strip()]) # Format status with best sequence status_message = ( f"Design Complete! {num_designs} designs generated.\n\n" f"Lead Candidate (Best Score: {best_score:.4f}):\n" f"{best_sequence}\n\n" ) return all_sequences, evolution_plot, status_message except Exception as e: return "", None, f"Error in Part 1: {str(e)}" def run_part2(pdb_id, sequence): """ 1. Folds the input sequence using ESM Atlas (API). 2. Aligns the folded structure to the target PDB (polish_design). """ try: # --- 1. Validate Inputs --- if not sequence or not sequence.strip(): return None, "Error: Please enter a protein sequence." if not pdb_id or not pdb_id.strip(): return None, "Error: PDB ID is required." print(f"Starting Pipeline for {pdb_id}...") print(f" - Sequence Length: {len(sequence)} residues") # --- 2. Fold Sequence (Automated) --- # Calls the script in scripts/foldprotein.py pdb_content = fold_protein_sequence(sequence) if not pdb_content: return None, "Error: Protein folding failed. The API might be down or the sequence is invalid." # Save the raw folded structure to a temp file raw_fold_path = f"temp_fold_{pdb_id}_{int(time.time())}.pdb" with open(raw_fold_path, "w") as f: f.write(pdb_content) if not os.path.exists(raw_fold_path): return None, f"Error: Could not save folded PDB to {raw_fold_path}" # --- 3. Align & Polish (Existing Logic) --- print(f"Aligning folded structure to target {pdb_id}...") # Pass the generated file path to the existing polish_design function final_pdb_path, global_rmsd, core_rmsd, high_conf_rmsd = polish_design(pdb_id, raw_fold_path) # --- 4. Validate Alignment Output --- if not final_pdb_path or not os.path.exists(final_pdb_path): return None, f"Error: Alignment failed - output file not created: {final_pdb_path}" if high_conf_rmsd is None: return None, "Error: Alignment failed - RMSD calculation returned None" print(f"Success: Global RMSD={global_rmsd:.3f}A | Core RMSD={core_rmsd:.3f}A") # --- 5. Generate Report --- report = process_results(pdb_id, final_pdb_path, global_rmsd, high_conf_rmsd) # Clean up the raw unaligned fold to save space os.remove(raw_fold_path) return final_pdb_path, report except Exception as e: error_msg = f"Unexpected Error: {str(e)}" print(error_msg) import traceback traceback.print_exc() return None, error_msg # --- GRADIO INTERFACE --- # 1. Simple Dark Theme with Blue and Emerald Accents dark_biohub = gr.themes.Base( primary_hue="blue", secondary_hue="emerald", neutral_hue="slate", ).set( body_background_fill="#0f172a", block_background_fill="#1e293b", body_text_color="#f1f5f9", button_primary_background_fill="#10b981", button_primary_text_color="#ffffff", ) # 2. Targeted CSS for the 3D Viewer & Header biohub_css = """ /* Remove the footer for a clean portfolio look */ footer {display: none !important;} /* Fix the 3D viewer background to match the dark theme */ #molecule-viewer { background-color: #111827 !important; border: 1px solid #374151 !important; border-radius: 12px; } /* Header Styling */ #biohub-header { background: linear-gradient(135deg, #064e3b 0%, #1e40af 100%); padding: 1.5rem; border-radius: 12px; border: 1px solid #10b981; margin-bottom: 1rem; } """ with gr.Blocks(theme=dark_biohub, css=biohub_css) as demo: # Header gr.HTML("""