File size: 6,240 Bytes
cc7c981 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
# Written by Mohd Ibrahim
# Technical University of Munich
# Email: ibrahim.mohd@tum.de
import numpy as np
import MDAnalysis as mda
import argparse
import warnings
warnings.filterwarnings("ignore")
np.random.seed(42)
parser = argparse.ArgumentParser(
description="Add capping groups ACE and NME to protein termini. "
"Remove hydrogens before using this script")
parser.add_argument('-i', dest='in_file', type=str,
default='protein_noh.pdb', help='pdb file')
parser.add_argument('-o', dest='out_file', type=str,
default='protein_noh_cap.pdb', help='output file')
args = parser.parse_args()
in_file = args.in_file
out_file = args.out_file
def create_universe(n_atoms, name, resname, positions, resids, segid):
u_new = mda.Universe.empty(
n_atoms=n_atoms,
n_residues=n_atoms,
atom_resindex=np.arange(n_atoms),
residue_segindex=np.arange(n_atoms),
n_segments=n_atoms,
trajectory=True
)
u_new.add_TopologyAttr('name', name)
u_new.add_TopologyAttr('resid', resids)
u_new.add_TopologyAttr('resname', resname)
u_new.atoms.positions = positions
u_new.add_TopologyAttr('segid', n_atoms * [segid])
u_new.add_TopologyAttr('chainID', n_atoms * [segid])
return u_new
def get_nme_pos(end_residue):
if "OXT" in end_residue.names:
index = np.where(end_residue.names == "OXT")[0][0]
N_position = end_residue.positions[index]
index_c = np.where(end_residue.names == "C")[0][0]
carbon_position = end_residue.positions[index_c]
vector = N_position - carbon_position
vector /= np.sqrt(sum(vector**2))
C_position = N_position + vector * 1.36
return N_position, C_position
else:
index_o = np.where(end_residue.names == "O")[0][0]
index_ca = np.where(end_residue.names == "CA")[0][0]
mid_point = (end_residue.positions[index_o] +
end_residue.positions[index_ca]) / 2
index_c = np.where(end_residue.names == "C")[0][0]
vector = end_residue.positions[index_c] - mid_point
vector /= np.sqrt(sum(vector**2))
N_position = end_residue.positions[index_c] + 1.36 * vector
C_position = N_position + 1.36 * vector
return N_position, C_position
def get_ace_pos(end_residue):
index_ca = np.where(end_residue.names == "CA")[0][0]
index_n = np.where(end_residue.names == "N")[0][0]
vector = end_residue.positions[index_n] - end_residue.positions[index_ca]
vector /= np.sqrt(sum(vector**2))
C1_position = end_residue.positions[index_n] + 1.36 * vector
xa, ya, za = end_residue.positions[index_ca]
xg, yg, zg = C1_position
orientation = np.array([2 * np.random.rand() - 1,
2 * np.random.rand() - 1,
2 * np.random.rand() - 1])
nx, ny, nz = orientation / np.sqrt(sum(orientation**2))
x1 = xg - (xa - xg) / 2 + np.sqrt(3) * (ny * (za - zg) - nz * (ya - yg)) / 2
y1 = yg - (ya - yg) / 2 + np.sqrt(3) * (nz * (xa - xg) - nx * (za - zg)) / 2
z1 = zg - (za - zg) / 2 + np.sqrt(3) * (nx * (ya - yg) - ny * (xa - xg)) / 2
x2 = xg - (xa - xg) / 2 - np.sqrt(3) * (ny * (za - zg) - nz * (ya - yg)) / 2
y2 = yg - (ya - yg) / 2 - np.sqrt(3) * (nz * (xa - xg) - nx * (za - zg)) / 2
z2 = zg - (za - zg) / 2 - np.sqrt(3) * (nx * (ya - yg) - ny * (xa - xg)) / 2
C2_position = np.array([x1, y1, z1])
O_position = np.array([x2, y2, z2])
vector = C2_position - C1_position
vector /= np.sqrt(sum(vector**2))
C2_position = C1_position + 1.36 * vector
vector = O_position - C1_position
vector /= np.sqrt(sum(vector**2))
O_position = C1_position + 1.36 * vector
return C1_position, C2_position, O_position
# ----------- Main processing -----------
u = mda.Universe(in_file)
res_start = 0
segment_universes = []
for seg in u.segments:
chain = u.select_atoms(f"segid {seg.segid}")
# ACE
resid_c = chain.residues.resids[0]
end_residue = u.select_atoms(f"segid {seg.segid} and resid {resid_c}")
c1_pos, c2_pos, o_pos = get_ace_pos(end_residue)
# keep original mapping (C, CH3, O)
ace_names = ["C", "CH3", "O"]
ace_positions = [c1_pos, c2_pos, o_pos]
resid = chain.residues.resids[0]
ace_universe = create_universe(
n_atoms=len(ace_positions),
name=ace_names,
resname=len(ace_names) * ["ACE"],
positions=ace_positions,
resids=resid * np.ones(len(ace_names)),
segid=chain.segids[0]
)
# >>> Reorder rows only: CH3, C, O <<<
ace_universe = mda.Merge(
ace_universe.atoms.select_atoms("name CH3"),
ace_universe.atoms.select_atoms("name C"),
ace_universe.atoms.select_atoms("name O")
)
# NME
resid_c = chain.residues.resids[-1]
end_residue = u.select_atoms(f"segid {seg.segid} and resid {resid_c}")
nme_positions = get_nme_pos(end_residue)
nme_names = ["N", "C"]
resid = chain.residues.resids[-1] + 2
nme_universe = create_universe(
n_atoms=len(nme_names),
name=nme_names,
resname=len(nme_names) * ["NME"],
positions=nme_positions,
resids=resid * np.ones(len(nme_names)),
segid=chain.segids[0]
)
# Remove OXT if present
if "OXT" in end_residue.names:
index = np.where(end_residue.names == "OXT")[0][0]
OXT = end_residue[index]
Chain = u.select_atoms(f"segid {seg.segid} and not index {OXT.index}")
else:
Chain = u.select_atoms(f"segid {seg.segid}")
# Merge ACE, protein, NME
u_all = mda.Merge(ace_universe.atoms, Chain, nme_universe.atoms)
# Renumber residues
resids_ace = [res_start + 1] * 3
resids_pro = np.arange(resids_ace[0] + 1,
Chain.residues.n_residues + resids_ace[0] + 1)
resids_nme = [resids_pro[-1] + 1] * 2
u_all.atoms.residues.resids = np.concatenate(
[resids_ace, resids_pro, resids_nme]
)
res_start = u_all.atoms.residues.resids[-1]
segment_universes.append(u_all)
# Join and write output
all_uni = mda.Merge(*(seg.atoms for seg in segment_universes))
all_uni.atoms.write(out_file)
|