Spaces:
Running
on
Zero
Running
on
Zero
File size: 873 Bytes
7968cb0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
from data_utils import parse_PDB, align_pdb_dict_formats
import os
import re
import json
import yaml
from tqdm import tqdm
in_dir = yaml.load(open('configs/data_config.yaml', 'r'), Loader=yaml.FullLoader)['pdb_dir']
out_dir = yaml.load(open('configs/data_config.yaml', 'r'), Loader=yaml.FullLoader)['preprocessed_dir']
fold_list = []
fold_files = os.listdir(in_dir)
fold_files = [filename for filename in fold_files if re.match(".*\.pdb$", filename)]
for file in tqdm(fold_files):
_name= file.split('_')[0]
_chain = file.split('_')[1].split('.')[0]
_path = f'{in_dir}/{file}'
old_pdb = parse_PDB(_path,name=_name, input_chain_list=[_chain])[0]
new_pdb = align_pdb_dict_formats(old_pdb,_chain)
fold_list.append(new_pdb)
with open(f'{out_dir}/chain_set.jsonl','w') as f:
for dict in fold_list:
json.dump(dict,f)
f.write('\n')
|