AmberFlow / add_caps.py
hemantn's picture
Deploy AmberFlow to Hugging Face Spaces
cc7c981
# Written by Mohd Ibrahim
# Technical University of Munich
# Email: ibrahim.mohd@tum.de
import numpy as np
import MDAnalysis as mda
import argparse
import warnings
warnings.filterwarnings("ignore")
np.random.seed(42)
parser = argparse.ArgumentParser(
description="Add capping groups ACE and NME to protein termini. "
"Remove hydrogens before using this script")
parser.add_argument('-i', dest='in_file', type=str,
default='protein_noh.pdb', help='pdb file')
parser.add_argument('-o', dest='out_file', type=str,
default='protein_noh_cap.pdb', help='output file')
args = parser.parse_args()
in_file = args.in_file
out_file = args.out_file
def create_universe(n_atoms, name, resname, positions, resids, segid):
u_new = mda.Universe.empty(
n_atoms=n_atoms,
n_residues=n_atoms,
atom_resindex=np.arange(n_atoms),
residue_segindex=np.arange(n_atoms),
n_segments=n_atoms,
trajectory=True
)
u_new.add_TopologyAttr('name', name)
u_new.add_TopologyAttr('resid', resids)
u_new.add_TopologyAttr('resname', resname)
u_new.atoms.positions = positions
u_new.add_TopologyAttr('segid', n_atoms * [segid])
u_new.add_TopologyAttr('chainID', n_atoms * [segid])
return u_new
def get_nme_pos(end_residue):
if "OXT" in end_residue.names:
index = np.where(end_residue.names == "OXT")[0][0]
N_position = end_residue.positions[index]
index_c = np.where(end_residue.names == "C")[0][0]
carbon_position = end_residue.positions[index_c]
vector = N_position - carbon_position
vector /= np.sqrt(sum(vector**2))
C_position = N_position + vector * 1.36
return N_position, C_position
else:
index_o = np.where(end_residue.names == "O")[0][0]
index_ca = np.where(end_residue.names == "CA")[0][0]
mid_point = (end_residue.positions[index_o] +
end_residue.positions[index_ca]) / 2
index_c = np.where(end_residue.names == "C")[0][0]
vector = end_residue.positions[index_c] - mid_point
vector /= np.sqrt(sum(vector**2))
N_position = end_residue.positions[index_c] + 1.36 * vector
C_position = N_position + 1.36 * vector
return N_position, C_position
def get_ace_pos(end_residue):
index_ca = np.where(end_residue.names == "CA")[0][0]
index_n = np.where(end_residue.names == "N")[0][0]
vector = end_residue.positions[index_n] - end_residue.positions[index_ca]
vector /= np.sqrt(sum(vector**2))
C1_position = end_residue.positions[index_n] + 1.36 * vector
xa, ya, za = end_residue.positions[index_ca]
xg, yg, zg = C1_position
orientation = np.array([2 * np.random.rand() - 1,
2 * np.random.rand() - 1,
2 * np.random.rand() - 1])
nx, ny, nz = orientation / np.sqrt(sum(orientation**2))
x1 = xg - (xa - xg) / 2 + np.sqrt(3) * (ny * (za - zg) - nz * (ya - yg)) / 2
y1 = yg - (ya - yg) / 2 + np.sqrt(3) * (nz * (xa - xg) - nx * (za - zg)) / 2
z1 = zg - (za - zg) / 2 + np.sqrt(3) * (nx * (ya - yg) - ny * (xa - xg)) / 2
x2 = xg - (xa - xg) / 2 - np.sqrt(3) * (ny * (za - zg) - nz * (ya - yg)) / 2
y2 = yg - (ya - yg) / 2 - np.sqrt(3) * (nz * (xa - xg) - nx * (za - zg)) / 2
z2 = zg - (za - zg) / 2 - np.sqrt(3) * (nx * (ya - yg) - ny * (xa - xg)) / 2
C2_position = np.array([x1, y1, z1])
O_position = np.array([x2, y2, z2])
vector = C2_position - C1_position
vector /= np.sqrt(sum(vector**2))
C2_position = C1_position + 1.36 * vector
vector = O_position - C1_position
vector /= np.sqrt(sum(vector**2))
O_position = C1_position + 1.36 * vector
return C1_position, C2_position, O_position
# ----------- Main processing -----------
u = mda.Universe(in_file)
res_start = 0
segment_universes = []
for seg in u.segments:
chain = u.select_atoms(f"segid {seg.segid}")
# ACE
resid_c = chain.residues.resids[0]
end_residue = u.select_atoms(f"segid {seg.segid} and resid {resid_c}")
c1_pos, c2_pos, o_pos = get_ace_pos(end_residue)
# keep original mapping (C, CH3, O)
ace_names = ["C", "CH3", "O"]
ace_positions = [c1_pos, c2_pos, o_pos]
resid = chain.residues.resids[0]
ace_universe = create_universe(
n_atoms=len(ace_positions),
name=ace_names,
resname=len(ace_names) * ["ACE"],
positions=ace_positions,
resids=resid * np.ones(len(ace_names)),
segid=chain.segids[0]
)
# >>> Reorder rows only: CH3, C, O <<<
ace_universe = mda.Merge(
ace_universe.atoms.select_atoms("name CH3"),
ace_universe.atoms.select_atoms("name C"),
ace_universe.atoms.select_atoms("name O")
)
# NME
resid_c = chain.residues.resids[-1]
end_residue = u.select_atoms(f"segid {seg.segid} and resid {resid_c}")
nme_positions = get_nme_pos(end_residue)
nme_names = ["N", "C"]
resid = chain.residues.resids[-1] + 2
nme_universe = create_universe(
n_atoms=len(nme_names),
name=nme_names,
resname=len(nme_names) * ["NME"],
positions=nme_positions,
resids=resid * np.ones(len(nme_names)),
segid=chain.segids[0]
)
# Remove OXT if present
if "OXT" in end_residue.names:
index = np.where(end_residue.names == "OXT")[0][0]
OXT = end_residue[index]
Chain = u.select_atoms(f"segid {seg.segid} and not index {OXT.index}")
else:
Chain = u.select_atoms(f"segid {seg.segid}")
# Merge ACE, protein, NME
u_all = mda.Merge(ace_universe.atoms, Chain, nme_universe.atoms)
# Renumber residues
resids_ace = [res_start + 1] * 3
resids_pro = np.arange(resids_ace[0] + 1,
Chain.residues.n_residues + resids_ace[0] + 1)
resids_nme = [resids_pro[-1] + 1] * 2
u_all.atoms.residues.resids = np.concatenate(
[resids_ace, resids_pro, resids_nme]
)
res_start = u_all.atoms.residues.resids[-1]
segment_universes.append(u_all)
# Join and write output
all_uni = mda.Merge(*(seg.atoms for seg in segment_universes))
all_uni.atoms.write(out_file)