Spaces:
Runtime error
Runtime error
| import os | |
| import multiprocessing | |
| from multiprocessing import Pool | |
| from turtle import width | |
| import numpy as np | |
| from moleculekit.molecule import Molecule | |
| from scipy.spatial import KDTree | |
| from sklearn.cluster import AgglomerativeClustering | |
| def create_grid_fromBB(boundingBox, voxelSize=1): | |
| """Create a grid from a bounding box. | |
| Parameters | |
| ---------- | |
| boundingBox : list | |
| List of the form [xmin, xmax, ymin, ymax, zmin, zmax] | |
| voxelSize : float | |
| Size of the voxels in Angstrom | |
| Returns | |
| ------- | |
| grid : numpy.ndarray | |
| Grid of shape (nx, ny, nz) | |
| box_N : numpy.ndarray | |
| Number of voxels in each dimension | |
| """ | |
| # increase grid by 0.5 to sample everything | |
| xrange = np.arange(boundingBox[0][0], boundingBox[1][0] + 0.5, step=voxelSize) | |
| yrange = np.arange(boundingBox[0][1], boundingBox[1][1] + 0.5, step=voxelSize) | |
| zrange = np.arange(boundingBox[0][2], boundingBox[1][2] + 0.5, step=voxelSize) | |
| gridpoints = np.zeros((xrange.shape[0] * yrange.shape[0] * zrange.shape[0], 3)) | |
| i = 0 | |
| for x in xrange: | |
| for y in yrange: | |
| for z in zrange: | |
| gridpoints[i][0] = x | |
| gridpoints[i][1] = y | |
| gridpoints[i][2] = z | |
| i += 1 | |
| return gridpoints, (xrange.shape[0], yrange.shape[0], zrange.shape[0]) | |
| def get_bb(points): | |
| """Return bounding box from a set of points (N,3) | |
| Parameters | |
| ---------- | |
| points : numpy.ndarray | |
| Set of points (N,3) | |
| Returns | |
| ------- | |
| boundingBox : list | |
| List of the form [xmin, xmax, ymin, ymax, zmin, zmax] | |
| """ | |
| minx = np.min(points[:, 0]) | |
| maxx = np.max(points[:, 0]) | |
| miny = np.min(points[:, 1]) | |
| maxy = np.max(points[:, 1]) | |
| minz = np.min(points[:, 2]) | |
| maxz = np.max(points[:, 2]) | |
| bb = [[minx, miny, minz], [maxx, maxy, maxz]] | |
| return bb | |
| def get_all_protein_resids(pdb_file): | |
| """Return all protein residues from a pdb file | |
| Parameters | |
| ---------- | |
| pdb_file : str | |
| Path to pdb file | |
| Returns | |
| ------- | |
| resids : numpy.ndarray | |
| indexes of ca atoms | |
| """ | |
| try: | |
| prot = Molecule(pdb_file) | |
| except: | |
| exit("could not read file") | |
| prot.filter("protein and not hydrogen") | |
| return prot.get("index", sel="name CA") | |
| def get_all_metalbinding_resids(pdb_file): | |
| """Return all metal binding residues from a pdb file | |
| Parameters | |
| ---------- | |
| pdb_file : str | |
| Path to pdb file | |
| Returns | |
| ------- | |
| resids : numpy.ndarray | |
| indexes of name CA that are metal binding | |
| """ | |
| try: | |
| prot = Molecule(pdb_file) | |
| except: | |
| exit("could not read file") | |
| prot.filter("protein and not hydrogen") | |
| return prot.get( | |
| "index", | |
| sel="name CA and resname HIS HID HIE HIP CYS CYX GLU GLH GLN ASP ASH ASN GLN MET", | |
| ) | |
| def get_all_resids_from_list(pdb_file, resids): | |
| """Return all metal binding residues from a pdb file | |
| Parameters | |
| ---------- | |
| pdb_file : str | |
| Path to pdb file | |
| resids : list | |
| id of resids that are metal binding | |
| Returns | |
| ------- | |
| resids : numpy.ndarray | |
| indexes of name CA resids | |
| """ | |
| try: | |
| prot = Molecule(pdb_file) | |
| except: | |
| exit("could not read file") | |
| prot.filter("protein and not hydrogen") | |
| return prot.get( | |
| "index", | |
| sel=f"name CA and resid {resids}", | |
| ) | |
| def compute_average_p_fast(point, cutoff=1): | |
| """Using KDTree find the closest gridpoints | |
| Parameters | |
| ---------- | |
| point : numpy.ndarray | |
| Point of shape (3,) | |
| cutoff : float | |
| Cutoff distance in Angstrom | |
| Returns | |
| ------- | |
| average_p : numpy.ndarray | |
| Average probability of shape (1,)""" | |
| p = 0 | |
| nearest_neighbors, indices = tree.query( | |
| point, k=15, distance_upper_bound=cutoff, workers=1 | |
| ) | |
| if np.min(nearest_neighbors) != np.inf: | |
| p = np.mean(output_v[indices[nearest_neighbors != np.inf]]) | |
| return p | |
| def get_probability_mean(grid, prot_centers, pvalues): | |
| """Compute the mean probability of all gridpoints from the globalgrid based on the individual boxes | |
| Parameters | |
| ---------- | |
| grid : numpy.ndarray | |
| Grid of shape (nx, ny, nz) | |
| prot_centers : numpy.ndarray | |
| Protein centers of shape (N,3) | |
| pvalues : numpy.ndarray | |
| Probability values of shape (N,1) | |
| Returns | |
| ------- | |
| mean_p : numpy.ndarray | |
| Mean probability over grid of shape (nx, ny, nz) | |
| """ | |
| global output_v | |
| output_v = pvalues | |
| global prot_v | |
| prot_v = prot_centers | |
| cpuCount = multiprocessing.cpu_count() | |
| global tree | |
| tree = KDTree(prot_v) | |
| p = Pool(cpuCount) | |
| results = p.map(compute_average_p_fast, grid) | |
| return np.array(results) | |
| def write_cubefile(bb, pvalues, box_N, outname="Metal3D_pmap.cube", gridres=1): | |
| """Write a cube file from a probability map | |
| The cube specification from gaussian is used, distance are converted to bohr | |
| Parameters | |
| ---------- | |
| bb : list | |
| List of the form [xmin, xmax, ymin, ymax, zmin, zmax] | |
| pvalues : numpy.ndarray | |
| Probability values of shape (nx, ny, nz) | |
| box_N : tuple | |
| Number of voxels in each dimension | |
| outname : str | |
| Name of the output file | |
| gridres:float | |
| Resolution of the grid used for writing the voxels | |
| """ | |
| with open(outname, "w") as cube: | |
| cube.write(" Metal3D Cube File\n") | |
| cube.write(" Outer Loop: X, Middle Loop y, inner Loop z\n") | |
| angstromToBohr = 1.89 | |
| cube.write( | |
| f" 1 {bb[0][0]*angstromToBohr: .6f} {bb[0][1]*angstromToBohr: .6f} {bb[0][2]*angstromToBohr: .6f}\n" | |
| ) | |
| cube.write( | |
| f"{str(box_N[0]).rjust(5)} {1.890000*gridres:.9f} 0.000000 0.000000\n" | |
| ) | |
| cube.write( | |
| f"{str(box_N[1]).rjust(5)} 0.000000 {1.890000*gridres:.9f} 0.000000\n" | |
| ) | |
| cube.write( | |
| f"{str(box_N[2]).rjust(5)} 0.000000 0.000000 {1.890000*gridres:.9f}\n" | |
| ) | |
| cube.write(" 1 1.000000 0.000000 0.000000 0.000000\n") | |
| o = pvalues.reshape(box_N) | |
| for x in range(box_N[0]): | |
| for y in range(box_N[1]): | |
| for z in range(box_N[2]): | |
| cube.write(f" {o[x][y][z]: .5E}") | |
| if z % 6 == 5: | |
| cube.write("\n") | |
| cube.write("\n") | |
| def find_unique_sites( | |
| pvalues, grid, writeprobes=False, probefile="probes.pdb", threshold=5, p=0.75 | |
| ): | |
| """The probability voxels are points and the voxel clouds may contain multiple metals | |
| This function finds the unique sites and returns the coordinates of the unique sites with the highest p for each cluster. | |
| It uses the AgglomerativeClustering algorithm to find the unique sites. | |
| The threshold is the maximum distance between two points in the same cluster it can be changed to get more metal points. | |
| Parameters | |
| ---------- | |
| pvalues : numpy.ndarray | |
| Probability values of shape (N, 1) | |
| grid : numpy.ndarray | |
| Grid of shape (N, 3) | |
| writeprobes : bool | |
| If True, write the probes to a pdb file | |
| probefile : str | |
| Name of the output file | |
| threshold : float | |
| Maximum distance between two points in the same cluster | |
| p : float | |
| Minimum probability of a point to be considered a unique site | |
| """ | |
| points = grid[pvalues > p] | |
| point_p = pvalues[pvalues > p] | |
| if len(points) == 0: | |
| return "no metals found" | |
| clustering = AgglomerativeClustering( | |
| n_clusters=None, linkage="complete", distance_threshold=threshold | |
| ).fit(points) | |
| message = f"min metal p={p}, n(metals) found: {clustering.n_clusters_}" | |
| sites = [] | |
| for i in range(clustering.n_clusters_): | |
| c_points = points[clustering.labels_ == i] | |
| c_points_p = point_p[clustering.labels_ == i] | |
| position = c_points[np.argmax(c_points_p)] | |
| sites.append((position, np.max(c_points_p))) | |
| if writeprobes: | |
| print(f"writing probes to {probefile}") | |
| with open(probefile, "w") as f: | |
| for i, site in enumerate(sites): | |
| f.write( | |
| f"HETATM {i+1:3} ZN ZN A {i+1:3} {site[0][0]: 8.3f}{site[0][1]: 8.3f}{site[0][2]: 8.3f} {site[1]:.2f} 0.0 ZN2+\n" | |
| ) | |
| return message | |