File size: 873 Bytes
7968cb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from data_utils import parse_PDB, align_pdb_dict_formats
import os
import re
import json
import yaml
from tqdm import tqdm

in_dir = yaml.load(open('configs/data_config.yaml', 'r'), Loader=yaml.FullLoader)['pdb_dir']
out_dir = yaml.load(open('configs/data_config.yaml', 'r'), Loader=yaml.FullLoader)['preprocessed_dir']

fold_list = []
fold_files = os.listdir(in_dir)
fold_files = [filename for filename in fold_files if re.match(".*\.pdb$", filename)]

for file in tqdm(fold_files):
    _name= file.split('_')[0]
    _chain = file.split('_')[1].split('.')[0]
    _path = f'{in_dir}/{file}'
    old_pdb = parse_PDB(_path,name=_name, input_chain_list=[_chain])[0]
    new_pdb = align_pdb_dict_formats(old_pdb,_chain)
    fold_list.append(new_pdb)

with open(f'{out_dir}/chain_set.jsonl','w') as f:
    for dict in fold_list:
        json.dump(dict,f)
        f.write('\n')