Spaces:
Sleeping
Sleeping
| import sys | |
| import os | |
| sys.path.append("..") | |
| import rdkit | |
| import rdkit.Chem as Chem | |
| import copy | |
| import pickle | |
| from tqdm.auto import tqdm | |
| from protein_ligand import PDBProtein, parse_sdf_file | |
| if __name__ == "__main__": | |
| NUM_ATOMS = [0, 5, 11, 8, 8, 6, 9, 9, 4, 10, 8, 8, 9, 8, 11, 7, 6, 7, 14, 12, 7] | |
| cnt = 0 | |
| edit = 0 | |
| num_res = 0 | |
| index_path = '/data/zaixi/Pocket_Design/data/crossdocked_pocket10/index.pkl' | |
| raw_path = '/data/zaixi/Pocket_Design/data/crossdocked_pocket10' | |
| with open(index_path, 'rb') as f: | |
| index = pickle.load(f) | |
| for i, (pocket_fn, ligand_fn, _, rmsd_str) in enumerate(tqdm(index[20000:21000])): | |
| if pocket_fn is None: continue | |
| try: | |
| pdb_data = PDBProtein(os.path.join(raw_path, pocket_fn)) | |
| pocket_dict = pdb_data.to_dict_atom() | |
| residue_dict = pdb_data.to_dict_residue() | |
| ligand_dict = parse_sdf_file(os.path.join(raw_path, ligand_fn)) | |
| mask = pdb_data.query_residues_ligand(ligand_dict) | |
| for k, residue in enumerate(pdb_data.residues): | |
| if mask[k]: | |
| assert len(residue['atoms']) == NUM_ATOMS[pdb_data.AA_NAME_NUMBER[residue['name']]] | |
| edit += mask.sum() | |
| num_res+= len(residue_dict['amino_acid']) | |
| cnt += 1 | |
| except: | |
| continue | |
| # number of molecules and vocab | |
| print('Total number of molecules', cnt) | |
| print('average residues:', num_res / cnt) | |
| print('average editable residues:', edit / cnt) | |