|
|
|
|
|
|
|
|
|
|
|
|
|
|
import numpy as np |
|
|
import MDAnalysis as mda |
|
|
import argparse |
|
|
import warnings |
|
|
warnings.filterwarnings("ignore") |
|
|
|
|
|
np.random.seed(42) |
|
|
|
|
|
parser = argparse.ArgumentParser( |
|
|
description="Add capping groups ACE and NME to protein termini. " |
|
|
"Remove hydrogens before using this script") |
|
|
parser.add_argument('-i', dest='in_file', type=str, |
|
|
default='protein_noh.pdb', help='pdb file') |
|
|
parser.add_argument('-o', dest='out_file', type=str, |
|
|
default='protein_noh_cap.pdb', help='output file') |
|
|
|
|
|
args = parser.parse_args() |
|
|
in_file = args.in_file |
|
|
out_file = args.out_file |
|
|
|
|
|
|
|
|
def create_universe(n_atoms, name, resname, positions, resids, segid): |
|
|
u_new = mda.Universe.empty( |
|
|
n_atoms=n_atoms, |
|
|
n_residues=n_atoms, |
|
|
atom_resindex=np.arange(n_atoms), |
|
|
residue_segindex=np.arange(n_atoms), |
|
|
n_segments=n_atoms, |
|
|
trajectory=True |
|
|
) |
|
|
u_new.add_TopologyAttr('name', name) |
|
|
u_new.add_TopologyAttr('resid', resids) |
|
|
u_new.add_TopologyAttr('resname', resname) |
|
|
u_new.atoms.positions = positions |
|
|
u_new.add_TopologyAttr('segid', n_atoms * [segid]) |
|
|
u_new.add_TopologyAttr('chainID', n_atoms * [segid]) |
|
|
return u_new |
|
|
|
|
|
|
|
|
def get_nme_pos(end_residue): |
|
|
if "OXT" in end_residue.names: |
|
|
index = np.where(end_residue.names == "OXT")[0][0] |
|
|
N_position = end_residue.positions[index] |
|
|
index_c = np.where(end_residue.names == "C")[0][0] |
|
|
carbon_position = end_residue.positions[index_c] |
|
|
vector = N_position - carbon_position |
|
|
vector /= np.sqrt(sum(vector**2)) |
|
|
C_position = N_position + vector * 1.36 |
|
|
return N_position, C_position |
|
|
else: |
|
|
index_o = np.where(end_residue.names == "O")[0][0] |
|
|
index_ca = np.where(end_residue.names == "CA")[0][0] |
|
|
mid_point = (end_residue.positions[index_o] + |
|
|
end_residue.positions[index_ca]) / 2 |
|
|
index_c = np.where(end_residue.names == "C")[0][0] |
|
|
vector = end_residue.positions[index_c] - mid_point |
|
|
vector /= np.sqrt(sum(vector**2)) |
|
|
N_position = end_residue.positions[index_c] + 1.36 * vector |
|
|
C_position = N_position + 1.36 * vector |
|
|
return N_position, C_position |
|
|
|
|
|
|
|
|
def get_ace_pos(end_residue): |
|
|
index_ca = np.where(end_residue.names == "CA")[0][0] |
|
|
index_n = np.where(end_residue.names == "N")[0][0] |
|
|
vector = end_residue.positions[index_n] - end_residue.positions[index_ca] |
|
|
vector /= np.sqrt(sum(vector**2)) |
|
|
C1_position = end_residue.positions[index_n] + 1.36 * vector |
|
|
|
|
|
xa, ya, za = end_residue.positions[index_ca] |
|
|
xg, yg, zg = C1_position |
|
|
|
|
|
orientation = np.array([2 * np.random.rand() - 1, |
|
|
2 * np.random.rand() - 1, |
|
|
2 * np.random.rand() - 1]) |
|
|
nx, ny, nz = orientation / np.sqrt(sum(orientation**2)) |
|
|
|
|
|
x1 = xg - (xa - xg) / 2 + np.sqrt(3) * (ny * (za - zg) - nz * (ya - yg)) / 2 |
|
|
y1 = yg - (ya - yg) / 2 + np.sqrt(3) * (nz * (xa - xg) - nx * (za - zg)) / 2 |
|
|
z1 = zg - (za - zg) / 2 + np.sqrt(3) * (nx * (ya - yg) - ny * (xa - xg)) / 2 |
|
|
|
|
|
x2 = xg - (xa - xg) / 2 - np.sqrt(3) * (ny * (za - zg) - nz * (ya - yg)) / 2 |
|
|
y2 = yg - (ya - yg) / 2 - np.sqrt(3) * (nz * (xa - xg) - nx * (za - zg)) / 2 |
|
|
z2 = zg - (za - zg) / 2 - np.sqrt(3) * (nx * (ya - yg) - ny * (xa - xg)) / 2 |
|
|
|
|
|
C2_position = np.array([x1, y1, z1]) |
|
|
O_position = np.array([x2, y2, z2]) |
|
|
|
|
|
vector = C2_position - C1_position |
|
|
vector /= np.sqrt(sum(vector**2)) |
|
|
C2_position = C1_position + 1.36 * vector |
|
|
|
|
|
vector = O_position - C1_position |
|
|
vector /= np.sqrt(sum(vector**2)) |
|
|
O_position = C1_position + 1.36 * vector |
|
|
|
|
|
return C1_position, C2_position, O_position |
|
|
|
|
|
|
|
|
|
|
|
u = mda.Universe(in_file) |
|
|
res_start = 0 |
|
|
segment_universes = [] |
|
|
|
|
|
for seg in u.segments: |
|
|
chain = u.select_atoms(f"segid {seg.segid}") |
|
|
|
|
|
|
|
|
resid_c = chain.residues.resids[0] |
|
|
end_residue = u.select_atoms(f"segid {seg.segid} and resid {resid_c}") |
|
|
c1_pos, c2_pos, o_pos = get_ace_pos(end_residue) |
|
|
|
|
|
|
|
|
ace_names = ["C", "CH3", "O"] |
|
|
ace_positions = [c1_pos, c2_pos, o_pos] |
|
|
resid = chain.residues.resids[0] |
|
|
ace_universe = create_universe( |
|
|
n_atoms=len(ace_positions), |
|
|
name=ace_names, |
|
|
resname=len(ace_names) * ["ACE"], |
|
|
positions=ace_positions, |
|
|
resids=resid * np.ones(len(ace_names)), |
|
|
segid=chain.segids[0] |
|
|
) |
|
|
|
|
|
|
|
|
ace_universe = mda.Merge( |
|
|
ace_universe.atoms.select_atoms("name CH3"), |
|
|
ace_universe.atoms.select_atoms("name C"), |
|
|
ace_universe.atoms.select_atoms("name O") |
|
|
) |
|
|
|
|
|
|
|
|
resid_c = chain.residues.resids[-1] |
|
|
end_residue = u.select_atoms(f"segid {seg.segid} and resid {resid_c}") |
|
|
nme_positions = get_nme_pos(end_residue) |
|
|
nme_names = ["N", "C"] |
|
|
resid = chain.residues.resids[-1] + 2 |
|
|
nme_universe = create_universe( |
|
|
n_atoms=len(nme_names), |
|
|
name=nme_names, |
|
|
resname=len(nme_names) * ["NME"], |
|
|
positions=nme_positions, |
|
|
resids=resid * np.ones(len(nme_names)), |
|
|
segid=chain.segids[0] |
|
|
) |
|
|
|
|
|
|
|
|
if "OXT" in end_residue.names: |
|
|
index = np.where(end_residue.names == "OXT")[0][0] |
|
|
OXT = end_residue[index] |
|
|
Chain = u.select_atoms(f"segid {seg.segid} and not index {OXT.index}") |
|
|
else: |
|
|
Chain = u.select_atoms(f"segid {seg.segid}") |
|
|
|
|
|
|
|
|
u_all = mda.Merge(ace_universe.atoms, Chain, nme_universe.atoms) |
|
|
|
|
|
|
|
|
resids_ace = [res_start + 1] * 3 |
|
|
resids_pro = np.arange(resids_ace[0] + 1, |
|
|
Chain.residues.n_residues + resids_ace[0] + 1) |
|
|
resids_nme = [resids_pro[-1] + 1] * 2 |
|
|
u_all.atoms.residues.resids = np.concatenate( |
|
|
[resids_ace, resids_pro, resids_nme] |
|
|
) |
|
|
res_start = u_all.atoms.residues.resids[-1] |
|
|
segment_universes.append(u_all) |
|
|
|
|
|
|
|
|
all_uni = mda.Merge(*(seg.atoms for seg in segment_universes)) |
|
|
all_uni.atoms.write(out_file) |
|
|
|