# Written by Mohd Ibrahim # Technical University of Munich # Email: ibrahim.mohd@tum.de import numpy as np import MDAnalysis as mda import argparse import warnings warnings.filterwarnings("ignore") np.random.seed(42) parser = argparse.ArgumentParser( description="Add capping groups ACE and NME to protein termini. " "Remove hydrogens before using this script") parser.add_argument('-i', dest='in_file', type=str, default='protein_noh.pdb', help='pdb file') parser.add_argument('-o', dest='out_file', type=str, default='protein_noh_cap.pdb', help='output file') args = parser.parse_args() in_file = args.in_file out_file = args.out_file def create_universe(n_atoms, name, resname, positions, resids, segid): u_new = mda.Universe.empty( n_atoms=n_atoms, n_residues=n_atoms, atom_resindex=np.arange(n_atoms), residue_segindex=np.arange(n_atoms), n_segments=n_atoms, trajectory=True ) u_new.add_TopologyAttr('name', name) u_new.add_TopologyAttr('resid', resids) u_new.add_TopologyAttr('resname', resname) u_new.atoms.positions = positions u_new.add_TopologyAttr('segid', n_atoms * [segid]) u_new.add_TopologyAttr('chainID', n_atoms * [segid]) return u_new def get_nme_pos(end_residue): if "OXT" in end_residue.names: index = np.where(end_residue.names == "OXT")[0][0] N_position = end_residue.positions[index] index_c = np.where(end_residue.names == "C")[0][0] carbon_position = end_residue.positions[index_c] vector = N_position - carbon_position vector /= np.sqrt(sum(vector**2)) C_position = N_position + vector * 1.36 return N_position, C_position else: index_o = np.where(end_residue.names == "O")[0][0] index_ca = np.where(end_residue.names == "CA")[0][0] mid_point = (end_residue.positions[index_o] + end_residue.positions[index_ca]) / 2 index_c = np.where(end_residue.names == "C")[0][0] vector = end_residue.positions[index_c] - mid_point vector /= np.sqrt(sum(vector**2)) N_position = end_residue.positions[index_c] + 1.36 * vector C_position = N_position + 1.36 * vector return N_position, C_position def get_ace_pos(end_residue): index_ca = np.where(end_residue.names == "CA")[0][0] index_n = np.where(end_residue.names == "N")[0][0] vector = end_residue.positions[index_n] - end_residue.positions[index_ca] vector /= np.sqrt(sum(vector**2)) C1_position = end_residue.positions[index_n] + 1.36 * vector xa, ya, za = end_residue.positions[index_ca] xg, yg, zg = C1_position orientation = np.array([2 * np.random.rand() - 1, 2 * np.random.rand() - 1, 2 * np.random.rand() - 1]) nx, ny, nz = orientation / np.sqrt(sum(orientation**2)) x1 = xg - (xa - xg) / 2 + np.sqrt(3) * (ny * (za - zg) - nz * (ya - yg)) / 2 y1 = yg - (ya - yg) / 2 + np.sqrt(3) * (nz * (xa - xg) - nx * (za - zg)) / 2 z1 = zg - (za - zg) / 2 + np.sqrt(3) * (nx * (ya - yg) - ny * (xa - xg)) / 2 x2 = xg - (xa - xg) / 2 - np.sqrt(3) * (ny * (za - zg) - nz * (ya - yg)) / 2 y2 = yg - (ya - yg) / 2 - np.sqrt(3) * (nz * (xa - xg) - nx * (za - zg)) / 2 z2 = zg - (za - zg) / 2 - np.sqrt(3) * (nx * (ya - yg) - ny * (xa - xg)) / 2 C2_position = np.array([x1, y1, z1]) O_position = np.array([x2, y2, z2]) vector = C2_position - C1_position vector /= np.sqrt(sum(vector**2)) C2_position = C1_position + 1.36 * vector vector = O_position - C1_position vector /= np.sqrt(sum(vector**2)) O_position = C1_position + 1.36 * vector return C1_position, C2_position, O_position # ----------- Main processing ----------- u = mda.Universe(in_file) res_start = 0 segment_universes = [] for seg in u.segments: chain = u.select_atoms(f"segid {seg.segid}") # ACE resid_c = chain.residues.resids[0] end_residue = u.select_atoms(f"segid {seg.segid} and resid {resid_c}") c1_pos, c2_pos, o_pos = get_ace_pos(end_residue) # keep original mapping (C, CH3, O) ace_names = ["C", "CH3", "O"] ace_positions = [c1_pos, c2_pos, o_pos] resid = chain.residues.resids[0] ace_universe = create_universe( n_atoms=len(ace_positions), name=ace_names, resname=len(ace_names) * ["ACE"], positions=ace_positions, resids=resid * np.ones(len(ace_names)), segid=chain.segids[0] ) # >>> Reorder rows only: CH3, C, O <<< ace_universe = mda.Merge( ace_universe.atoms.select_atoms("name CH3"), ace_universe.atoms.select_atoms("name C"), ace_universe.atoms.select_atoms("name O") ) # NME resid_c = chain.residues.resids[-1] end_residue = u.select_atoms(f"segid {seg.segid} and resid {resid_c}") nme_positions = get_nme_pos(end_residue) nme_names = ["N", "C"] resid = chain.residues.resids[-1] + 2 nme_universe = create_universe( n_atoms=len(nme_names), name=nme_names, resname=len(nme_names) * ["NME"], positions=nme_positions, resids=resid * np.ones(len(nme_names)), segid=chain.segids[0] ) # Remove OXT if present if "OXT" in end_residue.names: index = np.where(end_residue.names == "OXT")[0][0] OXT = end_residue[index] Chain = u.select_atoms(f"segid {seg.segid} and not index {OXT.index}") else: Chain = u.select_atoms(f"segid {seg.segid}") # Merge ACE, protein, NME u_all = mda.Merge(ace_universe.atoms, Chain, nme_universe.atoms) # Renumber residues resids_ace = [res_start + 1] * 3 resids_pro = np.arange(resids_ace[0] + 1, Chain.residues.n_residues + resids_ace[0] + 1) resids_nme = [resids_pro[-1] + 1] * 2 u_all.atoms.residues.resids = np.concatenate( [resids_ace, resids_pro, resids_nme] ) res_start = u_all.atoms.residues.resids[-1] segment_universes.append(u_all) # Join and write output all_uni = mda.Merge(*(seg.atoms for seg in segment_universes)) all_uni.atoms.write(out_file)