42Cummer commited on
Commit
5e03e0c
·
verified ·
1 Parent(s): 14bee30

Upload refine.py

Browse files
Files changed (1) hide show
  1. scripts/refine.py +26 -18
scripts/refine.py CHANGED
@@ -1,52 +1,60 @@
1
  import os
2
- from Bio.PDB import PDBParser, Superimposer
3
  from openmm.app import PDBFile, ForceField, Simulation, Modeller
4
  from openmm import LangevinIntegrator
5
  from openmm.unit import kelvin, picosecond
6
 
 
 
 
 
 
 
7
  def polish_design(target_pdb_id, uploaded_file_path):
8
  # 1. Setup paths
9
  target_path = os.path.join("data", f"{target_pdb_id.lower()}.pdb")
 
10
 
11
- # 2. ALIGNMENT (Biopython)
12
  parser = PDBParser(QUIET=True)
13
  target_struct = parser.get_structure("target", target_path)
14
  design_struct = parser.get_structure("design", uploaded_file_path)
15
 
 
 
 
 
 
 
16
  sup = Superimposer()
17
  t_atoms = [a for a in target_struct.get_atoms() if a.get_name() == 'CA']
18
  d_atoms = [a for a in design_struct.get_atoms() if a.get_name() == 'CA']
19
-
20
  sup.set_atoms(t_atoms[:len(d_atoms)], d_atoms)
21
  sup.apply(design_struct.get_atoms())
22
  rmsd = sup.rms
23
 
24
- # 3. PHYSICS (OpenMM Modeller replaces PDBFixer)
25
- # Load the "broken" PDB
26
- pdb = PDBFile(uploaded_file_path)
27
-
28
- # Use Modeller to repair the C-terminus
29
- # Modeller.addHydrogens and addExtraParticles are usually the fix,
30
- # but we can force it to ignore the missing bond during template matching.
31
- forcefield = ForceField('amber14-all.xml', 'amber14/tip3p.xml')
32
  modeller = Modeller(pdb.topology, pdb.positions)
 
33
 
34
- # THE FIX: This adds the missing OXT or Hydrogens that OpenMM is whining about
35
- modeller.addHydrogens(forcefield)
36
 
37
- # Setup the system using the REPAIRED modeller topology
38
- # Note: ignoreExternalBonds=True kills the LYS 155 error
39
  system = forcefield.createSystem(modeller.topology, ignoreExternalBonds=True)
 
 
40
  integrator = LangevinIntegrator(300*kelvin, 1/picosecond, 0.002*picosecond)
41
  simulation = Simulation(modeller.topology, system, integrator)
42
  simulation.context.setPositions(modeller.positions)
43
-
44
  simulation.minimizeEnergy(maxIterations=100)
45
 
46
- # 4. EXPORT
47
  output_name = "Broteinshake_Lead_Refined.pdb"
48
  with open(output_name, "w") as f:
49
  PDBFile.writeFile(simulation.topology,
50
  simulation.context.getState(getPositions=True).getPositions(), f)
51
 
52
- return output_name, rmsd
 
1
  import os
2
+ from Bio.PDB import PDBParser, Superimposer, PDBIO, Select
3
  from openmm.app import PDBFile, ForceField, Simulation, Modeller
4
  from openmm import LangevinIntegrator
5
  from openmm.unit import kelvin, picosecond
6
 
7
+ # A custom selector to strip out any 'weird' terminal atoms OpenMM hates
8
+ class TerminalSanitizer(Select):
9
+ def accept_atom(self, atom):
10
+ # We strip OXT (terminal oxygen) and any hydrogen that shouldn't be there
11
+ return atom.get_name() not in ["OXT", "H1", "H2", "H3"]
12
+
13
  def polish_design(target_pdb_id, uploaded_file_path):
14
  # 1. Setup paths
15
  target_path = os.path.join("data", f"{target_pdb_id.lower()}.pdb")
16
+ sanitized_path = "sanitized_design.pdb"
17
 
18
+ # 2. SANITIZE & ALIGN (The "Shut Up" Step)
19
  parser = PDBParser(QUIET=True)
20
  target_struct = parser.get_structure("target", target_path)
21
  design_struct = parser.get_structure("design", uploaded_file_path)
22
 
23
+ # Strip terminal junk before OpenMM sees it
24
+ io = PDBIO()
25
+ io.set_structure(design_struct)
26
+ io.save(sanitized_path, TerminalSanitizer())
27
+
28
+ # Standard Alignment to hit that 0.75 Å RMSD
29
  sup = Superimposer()
30
  t_atoms = [a for a in target_struct.get_atoms() if a.get_name() == 'CA']
31
  d_atoms = [a for a in design_struct.get_atoms() if a.get_name() == 'CA']
 
32
  sup.set_atoms(t_atoms[:len(d_atoms)], d_atoms)
33
  sup.apply(design_struct.get_atoms())
34
  rmsd = sup.rms
35
 
36
+ # 3. PHYSICS (The Lobotomized Loader)
37
+ # We load the SANITIZED file, not the raw upload
38
+ pdb = PDBFile(sanitized_path)
 
 
 
 
 
39
  modeller = Modeller(pdb.topology, pdb.positions)
40
+ forcefield = ForceField('amber14-all.xml', 'amber14/tip3p.xml')
41
 
42
+ # Re-add hydrogens properly according to the forcefield
43
+ modeller.addHydrogens(forcefield, pH=7.4)
44
 
45
+ # Ignore bonds and force system creation
 
46
  system = forcefield.createSystem(modeller.topology, ignoreExternalBonds=True)
47
+
48
+ # 4. MINIMIZE
49
  integrator = LangevinIntegrator(300*kelvin, 1/picosecond, 0.002*picosecond)
50
  simulation = Simulation(modeller.topology, system, integrator)
51
  simulation.context.setPositions(modeller.positions)
 
52
  simulation.minimizeEnergy(maxIterations=100)
53
 
54
+ # 5. EXPORT FINAL LEAD
55
  output_name = "Broteinshake_Lead_Refined.pdb"
56
  with open(output_name, "w") as f:
57
  PDBFile.writeFile(simulation.topology,
58
  simulation.context.getState(getPositions=True).getPositions(), f)
59
 
60
+ return output_name, rmsd