Spaces:
Running
on
Zero
Running
on
Zero
| from data_utils import parse_PDB, align_pdb_dict_formats | |
| import os | |
| import re | |
| import json | |
| import yaml | |
| from tqdm import tqdm | |
| in_dir = yaml.load(open('configs/data_config.yaml', 'r'), Loader=yaml.FullLoader)['pdb_dir'] | |
| out_dir = yaml.load(open('configs/data_config.yaml', 'r'), Loader=yaml.FullLoader)['preprocessed_dir'] | |
| fold_list = [] | |
| fold_files = os.listdir(in_dir) | |
| fold_files = [filename for filename in fold_files if re.match(".*\.pdb$", filename)] | |
| for file in tqdm(fold_files): | |
| _name= file.split('_')[0] | |
| _chain = file.split('_')[1].split('.')[0] | |
| _path = f'{in_dir}/{file}' | |
| old_pdb = parse_PDB(_path,name=_name, input_chain_list=[_chain])[0] | |
| new_pdb = align_pdb_dict_formats(old_pdb,_chain) | |
| fold_list.append(new_pdb) | |
| with open(f'{out_dir}/chain_set.jsonl','w') as f: | |
| for dict in fold_list: | |
| json.dump(dict,f) | |
| f.write('\n') | |