| | |
| | import os,sys,glob,torch,random |
| | import numpy as np |
| | import argparse |
| | try: |
| | import pyrosetta |
| | pyrosetta.init() |
| | APPROX = False |
| | except: |
| | print("WARNING: pyRosetta not found, will use an approximate SSE calculation") |
| | APPROX = True |
| |
|
| | def main(): |
| | args=get_args() |
| | assert args.input_pdb or args.pdb_dir is not None, 'Need to provide either an input pdb (--input_pdb) or a path to pdbs (--pdb_dir)' |
| | assert not (args.input_pdb is not None and args.pdb_dir is not None), 'Need to provide either --input_pdb or --pdb_dir, not both' |
| |
|
| | os.makedirs(args.out_dir, exist_ok=True) |
| | if args.pdb_dir is not None: |
| | pdbs=glob.glob(f'{args.pdb_dir}/*pdb') |
| | else: |
| | pdbs=[args.input_pdb] |
| | for pdb in pdbs: |
| | name=os.path.split(pdb)[1][:-4] |
| | secstruc_dict=extract_secstruc(pdb) |
| | xyz,_,_ = parse_pdb_torch(pdb) |
| | ss, idx = ss_to_tensor(secstruc_dict) |
| | block_adj = construct_block_adj_matrix(torch.FloatTensor(ss), torch.tensor(xyz)).float() |
| | ss_tens, mask = mask_ss(ss, idx, max_mask=0) |
| | ss_argmax = torch.argmax(ss_tens[:,:4], dim=1).float() |
| | torch.save(ss_argmax, os.path.join(args.out_dir, f'{name}_ss.pt')) |
| | torch.save(block_adj, os.path.join(args.out_dir, f'{name}_adj.pt')) |
| |
|
| | def get_args(): |
| | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
| | parser.add_argument("--pdb_dir",required=False, help="path to directory of pdbs. Either pass this or the path to a specific pdb (--input_pdb)", default=None) |
| | parser.add_argument("--input_pdb", required=False, help="path to input pdb. Either provide this of path to directory of pdbs (--pdb_dir)", default=None) |
| | parser.add_argument("--out_dir",dest="out_dir", required=True, help='need to specify an output path') |
| | args = parser.parse_args() |
| | return args |
| |
|
| |
|
| | def extract_secstruc(fn): |
| | pdb=parse_pdb(fn) |
| | idx = pdb['idx'] |
| | if APPROX: |
| | aa_sequence = pdb["seq"] |
| | secstruct = get_sse(pdb["xyz"][:,1]) |
| | else: |
| | dssp = pyrosetta.rosetta.core.scoring.dssp |
| | pose = pyrosetta.io.pose_from_pdb(fn) |
| | dssp.Dssp(pose).insert_ss_into_pose(pose, True) |
| | aa_sequence = pose.sequence() |
| | secstruct = pose.secstruct() |
| | secstruc_dict = {'sequence':[i for i in aa_sequence], |
| | 'idx':[int(i) for i in idx], |
| | 'ss':[i for i in secstruct]} |
| | return secstruc_dict |
| |
|
| | def ss_to_tensor(ss): |
| | """ |
| | Function to convert ss files to indexed tensors |
| | 0 = Helix |
| | 1 = Strand |
| | 2 = Loop |
| | 3 = Mask/unknown |
| | 4 = idx for pdb |
| | """ |
| | ss_conv = {'H':0,'E':1,'L':2} |
| | idx = np.array(ss['idx']) |
| | ss_int = np.array([int(ss_conv[i]) for i in ss['ss']]) |
| | return ss_int, idx |
| |
|
| | def mask_ss(ss, idx, min_mask = 0, max_mask = 1.0): |
| | mask_prop = random.uniform(min_mask, max_mask) |
| | transitions = np.where(ss[:-1] - ss[1:] != 0)[0] |
| | stuck_counter = 0 |
| | while len(ss[ss == 3])/len(ss) < mask_prop or stuck_counter > 100: |
| | width = random.randint(1,9) |
| | start = random.choice(transitions) |
| | offset = random.randint(-8,1) |
| | try: |
| |
|
| | ss[start+offset:start+offset+width] = 3 |
| | except: |
| | stuck_counter += 1 |
| | pass |
| | ss = torch.tensor(ss) |
| | ss = torch.nn.functional.one_hot(ss, num_classes=4) |
| | ss = torch.cat((ss, torch.tensor(idx)[...,None]), dim=-1) |
| | |
| | mask=torch.tensor(np.where(np.argmax(ss[:,:-1].numpy(), axis=-1) == 3)) |
| | return ss, mask |
| |
|
| | def generate_Cbeta(N,Ca,C): |
| | |
| | b = Ca - N |
| | c = C - Ca |
| | a = torch.cross(b, c, dim=-1) |
| | |
| | |
| | Cb = -0.57910144*a + 0.5689693*b - 0.5441217*c + Ca |
| |
|
| | return Cb |
| |
|
| | def get_pair_dist(a, b): |
| | """calculate pair distances between two sets of points |
| | |
| | Parameters |
| | ---------- |
| | a,b : pytorch tensors of shape [batch,nres,3] |
| | store Cartesian coordinates of two sets of atoms |
| | Returns |
| | ------- |
| | dist : pytorch tensor of shape [batch,nres,nres] |
| | stores paitwise distances between atoms in a and b |
| | """ |
| |
|
| | dist = torch.cdist(a, b, p=2) |
| | return dist |
| |
|
| |
|
| | def construct_block_adj_matrix( sstruct, xyz, cutoff=6, include_loops=False ): |
| | ''' |
| | Given a sstruct specification and backbone coordinates, build a block adjacency matrix. |
| | |
| | Input: |
| | |
| | sstruct (torch.FloatTensor): (L) length tensor with numeric encoding of sstruct at each position |
| | |
| | xyz (torch.FloatTensor): (L,3,3) tensor of Cartesian coordinates of backbone N,Ca,C atoms |
| | |
| | cutoff (float): The Cb distance cutoff under which residue pairs are considered adjacent |
| | By eye, Nate thinks 6A is a good Cb distance cutoff |
| | |
| | Output: |
| | |
| | block_adj (torch.FloatTensor): (L,L) boolean matrix where adjacent secondary structure contacts are 1 |
| | ''' |
| |
|
| | L = xyz.shape[0] |
| | |
| | |
| | N = xyz[:,0] |
| | Ca = xyz[:,1] |
| | C = xyz[:,2] |
| | |
| | |
| | Cb = generate_Cbeta(N,Ca,C) |
| | |
| | |
| | dist = get_pair_dist(Cb,Cb) |
| | dist[torch.isnan(dist)] = 999.9 |
| |
|
| | dist += 999.9*torch.eye(L,device=xyz.device) |
| | |
| | |
| | |
| | |
| | in_segment = True |
| | segments = [] |
| |
|
| | begin = -1 |
| | end = -1 |
| |
|
| | for i in range(sstruct.shape[0]): |
| | |
| | if i == 0: |
| | begin = 0 |
| | continue |
| |
|
| | if not sstruct[i] == sstruct[i-1]: |
| | end = i |
| | segments.append( (sstruct[i-1], begin, end) ) |
| |
|
| | begin = i |
| |
|
| | |
| | if not end == sstruct.shape[0]: |
| | segments.append( (sstruct[-1], begin, sstruct.shape[0]) ) |
| |
|
| |
|
| | block_adj = torch.zeros_like(dist) |
| | for i in range(len(segments)): |
| | curr_segment = segments[i] |
| |
|
| | if curr_segment[0] == 2 and not include_loops: continue |
| |
|
| | begin_i = curr_segment[1] |
| | end_i = curr_segment[2] |
| | for j in range(i+1, len(segments)): |
| | j_segment = segments[j] |
| |
|
| | if j_segment[0] == 2 and not include_loops: continue |
| |
|
| | begin_j = j_segment[1] |
| | end_j = j_segment[2] |
| |
|
| | if torch.any( dist[begin_i:end_i, begin_j:end_j] < cutoff ): |
| | |
| | block_adj[begin_i:end_i, begin_j:end_j] = torch.ones(end_i - begin_i, end_j - begin_j) |
| | block_adj[begin_j:end_j, begin_i:end_i] = torch.ones(end_j - begin_j, end_i - begin_i) |
| | return block_adj |
| |
|
| | def parse_pdb_torch(filename): |
| | lines = open(filename,'r').readlines() |
| | return parse_pdb_lines_torch(lines) |
| |
|
| | |
| | def parse_pdb_lines_torch(lines): |
| |
|
| | |
| | pdb_idx = [] |
| | for l in lines: |
| | if l[:4]=="ATOM" and l[12:16].strip()=="CA": |
| | idx = ( l[21:22].strip(), int(l[22:26].strip()) ) |
| | if idx not in pdb_idx: |
| | pdb_idx.append(idx) |
| | |
| | |
| | xyz = np.full((len(pdb_idx), 27, 3), np.nan, dtype=np.float32) |
| | for l in lines: |
| | if l[:4] != "ATOM": |
| | continue |
| | chain, resNo, atom, aa = l[21:22], int(l[22:26]), ' '+l[12:16].strip().ljust(3), l[17:20] |
| | idx = pdb_idx.index((chain,resNo)) |
| | for i_atm, tgtatm in enumerate(aa2long[aa2num[aa]]): |
| | if tgtatm == atom: |
| | xyz[idx,i_atm,:] = [float(l[30:38]), float(l[38:46]), float(l[46:54])] |
| | break |
| | |
| | mask = np.logical_not(np.isnan(xyz[...,0])) |
| | xyz[np.isnan(xyz[...,0])] = 0.0 |
| |
|
| | return xyz,mask,np.array(pdb_idx) |
| |
|
| | def parse_pdb(filename, **kwargs): |
| | '''extract xyz coords for all heavy atoms''' |
| | lines = open(filename,'r').readlines() |
| | return parse_pdb_lines(lines, **kwargs) |
| |
|
| | def parse_pdb_lines(lines, parse_hetatom=False, ignore_het_h=True): |
| | |
| | res = [(l[22:26],l[17:20]) for l in lines if l[:4]=="ATOM" and l[12:16].strip()=="CA"] |
| | seq = [aa2num[r[1]] if r[1] in aa2num.keys() else 20 for r in res] |
| | pdb_idx = [( l[21:22].strip(), int(l[22:26].strip()) ) for l in lines if l[:4]=="ATOM" and l[12:16].strip()=="CA"] |
| | |
| | |
| | xyz = np.full((len(res), 27, 3), np.nan, dtype=np.float32) |
| | for l in lines: |
| | if l[:4] != "ATOM": |
| | continue |
| | chain, resNo, atom, aa = l[21:22], int(l[22:26]), ' '+l[12:16].strip().ljust(3), l[17:20] |
| | idx = pdb_idx.index((chain,resNo)) |
| | for i_atm, tgtatm in enumerate(aa2long[aa2num[aa]]): |
| | if tgtatm is not None and tgtatm.strip() == atom.strip(): |
| | xyz[idx,i_atm,:] = [float(l[30:38]), float(l[38:46]), float(l[46:54])] |
| | break |
| | |
| | |
| | mask = np.logical_not(np.isnan(xyz[...,0])) |
| | xyz[np.isnan(xyz[...,0])] = 0.0 |
| | |
| | new_idx = [] |
| | i_unique = [] |
| | for i,idx in enumerate(pdb_idx): |
| | if idx not in new_idx: |
| | new_idx.append(idx) |
| | i_unique.append(i) |
| | |
| | pdb_idx = new_idx |
| | xyz = xyz[i_unique] |
| | mask = mask[i_unique] |
| | seq = np.array(seq)[i_unique] |
| |
|
| | out = {'xyz':xyz, |
| | 'mask':mask, |
| | 'idx':np.array([i[1] for i in pdb_idx]), |
| | 'seq':np.array(seq), |
| | 'pdb_idx': pdb_idx, |
| | } |
| | |
| | if parse_hetatom: |
| | xyz_het, info_het = [], [] |
| | for l in lines: |
| | if l[:6]=='HETATM' and not (ignore_het_h and l[77]=='H'): |
| | info_het.append(dict( |
| | idx=int(l[7:11]), |
| | atom_id=l[12:16], |
| | atom_type=l[77], |
| | name=l[16:20] |
| | )) |
| | xyz_het.append([float(l[30:38]), float(l[38:46]), float(l[46:54])]) |
| |
|
| | out['xyz_het'] = np.array(xyz_het) |
| | out['info_het'] = info_het |
| |
|
| | return out |
| |
|
| | num2aa=[ |
| | 'ALA','ARG','ASN','ASP','CYS', |
| | 'GLN','GLU','GLY','HIS','ILE', |
| | 'LEU','LYS','MET','PHE','PRO', |
| | 'SER','THR','TRP','TYR','VAL', |
| | 'UNK','MAS', |
| | ] |
| |
|
| | aa2num= {x:i for i,x in enumerate(num2aa)} |
| | |
| | aa2long=[ |
| | (" N "," CA "," C "," O "," CB ", None, None, None, None, None, None, None, None, None," H "," HA ","1HB ","2HB ","3HB ", None, None, None, None, None, None, None, None), |
| | (" N "," CA "," C "," O "," CB "," CG "," CD "," NE "," CZ "," NH1"," NH2", None, None, None," H "," HA ","1HB ","2HB ","1HG ","2HG ","1HD ","2HD "," HE ","1HH1","2HH1","1HH2","2HH2"), |
| | (" N "," CA "," C "," O "," CB "," CG "," OD1"," ND2", None, None, None, None, None, None," H "," HA ","1HB ","2HB ","1HD2","2HD2", None, None, None, None, None, None, None), |
| | (" N "," CA "," C "," O "," CB "," CG "," OD1"," OD2", None, None, None, None, None, None," H "," HA ","1HB ","2HB ", None, None, None, None, None, None, None, None, None), |
| | (" N "," CA "," C "," O "," CB "," SG ", None, None, None, None, None, None, None, None," H "," HA ","1HB ","2HB "," HG ", None, None, None, None, None, None, None, None), |
| | (" N "," CA "," C "," O "," CB "," CG "," CD "," OE1"," NE2", None, None, None, None, None," H "," HA ","1HB ","2HB ","1HG ","2HG ","1HE2","2HE2", None, None, None, None, None), |
| | (" N "," CA "," C "," O "," CB "," CG "," CD "," OE1"," OE2", None, None, None, None, None," H "," HA ","1HB ","2HB ","1HG ","2HG ", None, None, None, None, None, None, None), |
| | (" N "," CA "," C "," O ", None, None, None, None, None, None, None, None, None, None," H ","1HA ","2HA ", None, None, None, None, None, None, None, None, None, None), |
| | (" N "," CA "," C "," O "," CB "," CG "," ND1"," CD2"," CE1"," NE2", None, None, None, None," H "," HA ","1HB ","2HB "," HD2"," HE1"," HE2", None, None, None, None, None, None), |
| | (" N "," CA "," C "," O "," CB "," CG1"," CG2"," CD1", None, None, None, None, None, None," H "," HA "," HB ","1HG2","2HG2","3HG2","1HG1","2HG1","1HD1","2HD1","3HD1", None, None), |
| | (" N "," CA "," C "," O "," CB "," CG "," CD1"," CD2", None, None, None, None, None, None," H "," HA ","1HB ","2HB "," HG ","1HD1","2HD1","3HD1","1HD2","2HD2","3HD2", None, None), |
| | (" N "," CA "," C "," O "," CB "," CG "," CD "," CE "," NZ ", None, None, None, None, None," H "," HA ","1HB ","2HB ","1HG ","2HG ","1HD ","2HD ","1HE ","2HE ","1HZ ","2HZ ","3HZ "), |
| | (" N "," CA "," C "," O "," CB "," CG "," SD "," CE ", None, None, None, None, None, None," H "," HA ","1HB ","2HB ","1HG ","2HG ","1HE ","2HE ","3HE ", None, None, None, None), |
| | (" N "," CA "," C "," O "," CB "," CG "," CD1"," CD2"," CE1"," CE2"," CZ ", None, None, None," H "," HA ","1HB ","2HB "," HD1"," HD2"," HE1"," HE2"," HZ ", None, None, None, None), |
| | (" N "," CA "," C "," O "," CB "," CG "," CD ", None, None, None, None, None, None, None," HA ","1HB ","2HB ","1HG ","2HG ","1HD ","2HD ", None, None, None, None, None, None), |
| | (" N "," CA "," C "," O "," CB "," OG ", None, None, None, None, None, None, None, None," H "," HG "," HA ","1HB ","2HB ", None, None, None, None, None, None, None, None), |
| | (" N "," CA "," C "," O "," CB "," OG1"," CG2", None, None, None, None, None, None, None," H "," HG1"," HA "," HB ","1HG2","2HG2","3HG2", None, None, None, None, None, None), |
| | (" N "," CA "," C "," O "," CB "," CG "," CD1"," CD2"," NE1"," CE2"," CE3"," CZ2"," CZ3"," CH2"," H "," HA ","1HB ","2HB "," HD1"," HE1"," HZ2"," HH2"," HZ3"," HE3", None, None, None), |
| | (" N "," CA "," C "," O "," CB "," CG "," CD1"," CD2"," CE1"," CE2"," CZ "," OH ", None, None," H "," HA ","1HB ","2HB "," HD1"," HE1"," HE2"," HD2"," HH ", None, None, None, None), |
| | (" N "," CA "," C "," O "," CB "," CG1"," CG2", None, None, None, None, None, None, None," H "," HA "," HB ","1HG1","2HG1","3HG1","1HG2","2HG2","3HG2", None, None, None, None), |
| | (" N "," CA "," C "," O "," CB ", None, None, None, None, None, None, None, None, None," H "," HA ","1HB ","2HB ","3HB ", None, None, None, None, None, None, None, None), |
| | (" N "," CA "," C "," O "," CB ", None, None, None, None, None, None, None, None, None," H "," HA ","1HB ","2HB ","3HB ", None, None, None, None, None, None, None, None), |
| | ] |
| |
|
| | def get_sse(ca_coord): |
| | ''' |
| | calculates the SSE of a peptide chain based on the P-SEA algorithm (Labesse 1997) |
| | code borrowed from biokite: https://github.com/biokit/biokit |
| | ''' |
| | def vector_dot(v1,v2): return (v1*v2).sum(-1) |
| | def norm_vector(v): return v / np.linalg.norm(v, axis=-1, keepdims=True) |
| | def displacement(atoms1, atoms2): |
| | v1 = np.asarray(atoms1) |
| | v2 = np.asarray(atoms2) |
| | if len(v1.shape) <= len(v2.shape): |
| | diff = v2 - v1 |
| | else: |
| | diff = -(v1 - v2) |
| | return diff |
| | def distance(atoms1, atoms2): |
| | diff = displacement(atoms1, atoms2) |
| | return np.sqrt(vector_dot(diff, diff)) |
| |
|
| | def angle(atoms1, atoms2, atoms3): |
| | v1 = norm_vector(displacement(atoms1, atoms2)) |
| | v2 = norm_vector(displacement(atoms3, atoms2)) |
| | return np.arccos(vector_dot(v1,v2)) |
| |
|
| | def dihedral(atoms1, atoms2, atoms3, atoms4): |
| | v1 = norm_vector(displacement(atoms1, atoms2)) |
| | v2 = norm_vector(displacement(atoms2, atoms3)) |
| | v3 = norm_vector(displacement(atoms3, atoms4)) |
| | |
| | n1 = np.cross(v1, v2) |
| | n2 = np.cross(v2, v3) |
| | |
| | |
| | x = vector_dot(n1,n2) |
| | y = vector_dot(np.cross(n1,n2), v2) |
| | return np.arctan2(y,x) |
| |
|
| | _radians_to_angle = 2*np.pi/360 |
| |
|
| | _r_helix = ((89-12)*_radians_to_angle, (89+12)*_radians_to_angle) |
| | _a_helix = ((50-20)*_radians_to_angle, (50+20)*_radians_to_angle) |
| | _d2_helix = ((5.5-0.5), (5.5+0.5)) |
| | _d3_helix = ((5.3-0.5), (5.3+0.5)) |
| | _d4_helix = ((6.4-0.6), (6.4+0.6)) |
| |
|
| | _r_strand = ((124-14)*_radians_to_angle, (124+14)*_radians_to_angle) |
| | _a_strand = ((-180)*_radians_to_angle, (-125)*_radians_to_angle, |
| | (145)*_radians_to_angle, (180)*_radians_to_angle) |
| | _d2_strand = ((6.7-0.6), (6.7+0.6)) |
| | _d3_strand = ((9.9-0.9), (9.9+0.9)) |
| | _d4_strand = ((12.4-1.1), (12.4+1.1)) |
| |
|
| | |
| |
|
| | d2i_coord = np.full(( len(ca_coord), 2, 3 ), np.nan) |
| | d3i_coord = np.full(( len(ca_coord), 2, 3 ), np.nan) |
| | d4i_coord = np.full(( len(ca_coord), 2, 3 ), np.nan) |
| | ri_coord = np.full(( len(ca_coord), 3, 3 ), np.nan) |
| | ai_coord = np.full(( len(ca_coord), 4, 3 ), np.nan) |
| | |
| | |
| | |
| | |
| | for i in range(1, len(ca_coord)-1): d2i_coord[i] = (ca_coord[i-1], ca_coord[i+1]) |
| | for i in range(1, len(ca_coord)-2): d3i_coord[i] = (ca_coord[i-1], ca_coord[i+2]) |
| | for i in range(1, len(ca_coord)-3): d4i_coord[i] = (ca_coord[i-1], ca_coord[i+3]) |
| | for i in range(1, len(ca_coord)-1): ri_coord[i] = (ca_coord[i-1], ca_coord[i], ca_coord[i+1]) |
| | for i in range(1, len(ca_coord)-2): ai_coord[i] = (ca_coord[i-1], ca_coord[i], ca_coord[i+1], ca_coord[i+2]) |
| | |
| | d2i = distance(d2i_coord[:,0], d2i_coord[:,1]) |
| | d3i = distance(d3i_coord[:,0], d3i_coord[:,1]) |
| | d4i = distance(d4i_coord[:,0], d4i_coord[:,1]) |
| | ri = angle(ri_coord[:,0], ri_coord[:,1], ri_coord[:,2]) |
| | ai = dihedral(ai_coord[:,0], ai_coord[:,1], ai_coord[:,2], ai_coord[:,3]) |
| | |
| | sse = ["L"] * len(ca_coord) |
| | |
| | |
| | |
| | is_pot_helix = np.zeros(len(sse), dtype=bool) |
| | for i in range(len(sse)): |
| | if ( |
| | d3i[i] >= _d3_helix[0] and d3i[i] <= _d3_helix[1] |
| | and d4i[i] >= _d4_helix[0] and d4i[i] <= _d4_helix[1] |
| | ) or ( |
| | ri[i] >= _r_helix[0] and ri[i] <= _r_helix[1] |
| | and ai[i] >= _a_helix[0] and ai[i] <= _a_helix[1] |
| | ): |
| | is_pot_helix[i] = True |
| | |
| | is_helix = np.zeros(len(sse), dtype=bool) |
| | counter = 0 |
| | for i in range(len(sse)): |
| | if is_pot_helix[i]: |
| | counter += 1 |
| | else: |
| | if counter >= 5: |
| | is_helix[i-counter : i] = True |
| | counter = 0 |
| | |
| | i = 0 |
| | while i < len(sse): |
| | if is_helix[i]: |
| | sse[i] = "H" |
| | if ( |
| | d3i[i-1] >= _d3_helix[0] and d3i[i-1] <= _d3_helix[1] |
| | ) or ( |
| | ri[i-1] >= _r_helix[0] and ri[i-1] <= _r_helix[1] |
| | ): |
| | sse[i-1] = "H" |
| | sse[i] = "H" |
| | if ( |
| | d3i[i+1] >= _d3_helix[0] and d3i[i+1] <= _d3_helix[1] |
| | ) or ( |
| | ri[i+1] >= _r_helix[0] and ri[i+1] <= _r_helix[1] |
| | ): |
| | sse[i+1] = "H" |
| | i += 1 |
| | |
| | |
| | |
| | is_pot_strand = np.zeros(len(sse), dtype=bool) |
| | for i in range(len(sse)): |
| | if ( d2i[i] >= _d2_strand[0] and d2i[i] <= _d2_strand[1] |
| | and d3i[i] >= _d3_strand[0] and d3i[i] <= _d3_strand[1] |
| | and d4i[i] >= _d4_strand[0] and d4i[i] <= _d4_strand[1] |
| | ) or ( |
| | ri[i] >= _r_strand[0] and ri[i] <= _r_strand[1] |
| | and ( (ai[i] >= _a_strand[0] and ai[i] <= _a_strand[1]) |
| | or (ai[i] >= _a_strand[2] and ai[i] <= _a_strand[3])) |
| | ): |
| | is_pot_strand[i] = True |
| | |
| | |
| | |
| | pot_strand_coord = ca_coord[is_pot_strand] |
| | is_strand = np.zeros(len(sse), dtype=bool) |
| | counter = 0 |
| | contacts = 0 |
| | for i in range(len(sse)): |
| | if is_pot_strand[i]: |
| | counter += 1 |
| | coord = ca_coord[i] |
| | for strand_coord in ca_coord: |
| | dist = distance(coord, strand_coord) |
| | if dist >= 4.2 and dist <= 5.2: |
| | contacts += 1 |
| | else: |
| | if counter >= 4: |
| | is_strand[i-counter : i] = True |
| | elif counter == 3 and contacts >= 5: |
| | is_strand[i-counter : i] = True |
| | counter = 0 |
| | contacts = 0 |
| | |
| | i = 0 |
| | while i < len(sse): |
| | if is_strand[i]: |
| | sse[i] = "E" |
| | if d3i[i-1] >= _d3_strand[0] and d3i[i-1] <= _d3_strand[1]: |
| | sse[i-1] = "E" |
| | sse[i] = "E" |
| | if d3i[i+1] >= _d3_strand[0] and d3i[i+1] <= _d3_strand[1]: |
| | sse[i+1] = "E" |
| | i += 1 |
| | return sse |
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|