Spaces:
Sleeping
Sleeping
change
Browse files- .gitignore +1 -0
- generate.py +21 -3
- main.py +16 -10
- test_cut.csv +11 -0
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
__pycache__
|
generate.py
CHANGED
|
@@ -47,6 +47,7 @@ class GenerateRunner():
|
|
| 47 |
self.exist_flag = Path(f'{self.save_path}/generated_molecules.csv').exists()
|
| 48 |
self.overwrite = opt.overwrite
|
| 49 |
self.dev_no = opt.dev_no
|
|
|
|
| 50 |
global LOG
|
| 51 |
LOG = ul.get_logger(name="generate",
|
| 52 |
log_path=os.path.join(self.save_path, 'generate.log'))
|
|
@@ -59,6 +60,17 @@ class GenerateRunner():
|
|
| 59 |
self.vocab = vocab
|
| 60 |
self.tokenizer = mv.SMILESTokenizer()
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
def initialize_dataloader(self, opt, vocab, test_file):
|
| 63 |
"""
|
| 64 |
Initialize dataloader
|
|
@@ -84,7 +96,10 @@ class GenerateRunner():
|
|
| 84 |
# torch.cuda.set_device(1)
|
| 85 |
# current_device = torch.cuda.current_device()
|
| 86 |
# print("当前使用的 CUDA 设备编号是:", current_device)
|
| 87 |
-
device = torch.device(f'cuda:{self.dev_no}')
|
|
|
|
|
|
|
|
|
|
| 88 |
# 构造loader
|
| 89 |
dataloader_test = self.initialize_dataloader(opt, self.vocab, opt.test_file_name)
|
| 90 |
|
|
@@ -273,12 +288,15 @@ def run_main():
|
|
| 273 |
parser = argparse.ArgumentParser(
|
| 274 |
description='generate.py',
|
| 275 |
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
| 276 |
-
|
|
|
|
| 277 |
opts.generate_opts(parser)
|
| 278 |
opt = parser.parse_args()
|
| 279 |
opt.test_file_name = prepare_input(opt)
|
| 280 |
-
|
|
|
|
| 281 |
runner = GenerateRunner(opt)
|
|
|
|
| 282 |
runner.generate(opt)
|
| 283 |
|
| 284 |
|
|
|
|
| 47 |
self.exist_flag = Path(f'{self.save_path}/generated_molecules.csv').exists()
|
| 48 |
self.overwrite = opt.overwrite
|
| 49 |
self.dev_no = opt.dev_no
|
| 50 |
+
self.device = torch.device('cpu')
|
| 51 |
global LOG
|
| 52 |
LOG = ul.get_logger(name="generate",
|
| 53 |
log_path=os.path.join(self.save_path, 'generate.log'))
|
|
|
|
| 60 |
self.vocab = vocab
|
| 61 |
self.tokenizer = mv.SMILESTokenizer()
|
| 62 |
|
| 63 |
+
# 加载模型
|
| 64 |
+
file_name = os.path.join(opt.model_path, f'model_{opt.epoch}.pt')
|
| 65 |
+
if opt.model_choice == 'transformer':
|
| 66 |
+
self.model = EncoderDecoder.load_from_file(file_name)
|
| 67 |
+
self.model.to(self.device)
|
| 68 |
+
self.model.eval()
|
| 69 |
+
elif opt.model_choice == 'seq2seq':
|
| 70 |
+
self.model = Model.load_from_file(file_name, evaluation_mode=True)
|
| 71 |
+
self.model.network.encoder.to(self.device)
|
| 72 |
+
self.model.network.decoder.to(self.device)
|
| 73 |
+
|
| 74 |
def initialize_dataloader(self, opt, vocab, test_file):
|
| 75 |
"""
|
| 76 |
Initialize dataloader
|
|
|
|
| 96 |
# torch.cuda.set_device(1)
|
| 97 |
# current_device = torch.cuda.current_device()
|
| 98 |
# print("当前使用的 CUDA 设备编号是:", current_device)
|
| 99 |
+
# device = torch.device(f'cuda:{self.dev_no}')
|
| 100 |
+
device = torch.device('cpu')
|
| 101 |
+
print(f"-------device:---------")
|
| 102 |
+
print(device)
|
| 103 |
# 构造loader
|
| 104 |
dataloader_test = self.initialize_dataloader(opt, self.vocab, opt.test_file_name)
|
| 105 |
|
|
|
|
| 288 |
parser = argparse.ArgumentParser(
|
| 289 |
description='generate.py',
|
| 290 |
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
| 291 |
+
print("--------parser-------------")
|
| 292 |
+
print(parser)
|
| 293 |
opts.generate_opts(parser)
|
| 294 |
opt = parser.parse_args()
|
| 295 |
opt.test_file_name = prepare_input(opt)
|
| 296 |
+
print("opt输出如下")
|
| 297 |
+
print(opt)
|
| 298 |
runner = GenerateRunner(opt)
|
| 299 |
+
print()
|
| 300 |
runner.generate(opt)
|
| 301 |
|
| 302 |
|
main.py
CHANGED
|
@@ -69,19 +69,24 @@ def calculate_descriptors(smiles):
|
|
| 69 |
def run_generate_runner(const_smiles, var_smiles, main_cls, minor_cls, delta_value, num_samples):
|
| 70 |
# 初始化生成器的配置选项
|
| 71 |
opt = {
|
| 72 |
-
'
|
| 73 |
-
'
|
| 74 |
-
'
|
| 75 |
-
'
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
}
|
| 81 |
|
| 82 |
# 将 opt 字典转换为 Options 对象
|
| 83 |
opt = Options(**opt)
|
| 84 |
-
|
|
|
|
| 85 |
runner = GenerateRunner(opt)
|
| 86 |
|
| 87 |
# 创建数据
|
|
@@ -134,7 +139,7 @@ def run_generate_runner(const_smiles, var_smiles, main_cls, minor_cls, delta_val
|
|
| 134 |
return result
|
| 135 |
|
| 136 |
|
| 137 |
-
@app.get("/fragmentize
|
| 138 |
async def fragmentize(smiles: str = Query(..., description="SMILES string of the molecule")):
|
| 139 |
try:
|
| 140 |
fragment_df = fragmentize_molecule(smiles)
|
|
@@ -147,6 +152,7 @@ async def fragmentize(smiles: str = Query(..., description="SMILES string of the
|
|
| 147 |
async def generate_molecules(request: GenerateRequest):
|
| 148 |
try:
|
| 149 |
# 调用 SMILES 生成逻辑
|
|
|
|
| 150 |
result = run_generate_runner(request.constSmiles, request.varSmiles, request.mainCls, request.minorCls, request.deltaValue, request.num)
|
| 151 |
return result
|
| 152 |
except Exception as e:
|
|
|
|
| 69 |
def run_generate_runner(const_smiles, var_smiles, main_cls, minor_cls, delta_value, num_samples):
|
| 70 |
# 初始化生成器的配置选项
|
| 71 |
opt = {
|
| 72 |
+
'batch_size': num_samples,
|
| 73 |
+
'data_path' : './',
|
| 74 |
+
'decode_type' : 'multinomial',
|
| 75 |
+
'dev_no' : 0,
|
| 76 |
+
'epoch' : 20,
|
| 77 |
+
'model_choice' : 'transformer',
|
| 78 |
+
'model_path' : './raw_pretrain_frag/checkpoint',
|
| 79 |
+
'num_samples' : 50,
|
| 80 |
+
'overwrite' : True,
|
| 81 |
+
'save_directory' : './demo_gen',
|
| 82 |
+
'test_file_name' : 'test_cut',
|
| 83 |
+
'vocab_path' : './'
|
| 84 |
}
|
| 85 |
|
| 86 |
# 将 opt 字典转换为 Options 对象
|
| 87 |
opt = Options(**opt)
|
| 88 |
+
print("--------------opt---------------")
|
| 89 |
+
print(opt)
|
| 90 |
runner = GenerateRunner(opt)
|
| 91 |
|
| 92 |
# 创建数据
|
|
|
|
| 139 |
return result
|
| 140 |
|
| 141 |
|
| 142 |
+
@app.get("/fragmentize", response_model=FragmentResponse)
|
| 143 |
async def fragmentize(smiles: str = Query(..., description="SMILES string of the molecule")):
|
| 144 |
try:
|
| 145 |
fragment_df = fragmentize_molecule(smiles)
|
|
|
|
| 152 |
async def generate_molecules(request: GenerateRequest):
|
| 153 |
try:
|
| 154 |
# 调用 SMILES 生成逻辑
|
| 155 |
+
print("123123")
|
| 156 |
result = run_generate_runner(request.constSmiles, request.varSmiles, request.mainCls, request.minorCls, request.deltaValue, request.num)
|
| 157 |
return result
|
| 158 |
except Exception as e:
|
test_cut.csv
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cpd1SMILES,cpd2SMILES,constantSMILES,fromVarSMILES,toVarSMILES,Delta_Value,main_cls,minor_cls,value_type,target_name
|
| 2 |
+
Cc1c(CCC(=O)NC(Cc2ccccc2)C(=O)O)c(=O)oc2cc3oc4c(c3cc12)CCCC4,Cc1c(CCC(=O)Nc2ccc(C(=O)O)cc2)c(=O)oc2cc3oc4c(c3cc12)CCCC4,[*:1]C(=O)O.[*:2]NC(=O)CCc1c(C)c2cc3c4c(oc3cc2oc1=O)CCCC4,[*:1]C([*:2])Cc1ccccc1,[*:1]c1ccc([*:2])cc1,"(0.5, 1.0]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
|
| 3 |
+
Cc1oc2c(C)c3oc(=O)c(CCC(=O)Nc4cccc(C(=O)O)c4)c(C)c3cc2c1C,Cc1oc2c(C)c3oc(=O)c(CC(=O)Nc4cccc(C(=O)O)c4)c(C)c3cc2c1C,[*:1]C(=O)Nc1cccc(C(=O)O)c1.[*:2]c1c(C)c2cc3c(C)c(C)oc3c(C)c2oc1=O,[*:1]CC[*:2],[*:1]C[*:2],"(-1.0, -0.5]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
|
| 4 |
+
Cc1nc(Nc2cccc(-c3cccc(-c4nc5cc(CN6CC[C@@H](C(=O)O)C6)cc(C#N)c5o4)c3C)c2C)c2ncc(CN3CC[C@](C)(O)C3)cc2n1,Cc1nc(Nc2cccc(-c3cccc(-c4nc5cc(CN6CCC(C(=O)O)CC6)cc(C#N)c5o4)c3C)c2C)c2ncc(CN3CC[C@](C)(O)C3)cc2n1,[*:1]C(=O)O.[*:2]Cc1cc(C#N)c2oc(-c3cccc(-c4cccc(Nc5nc(C)nc6cc(CN7CC[C@](C)(O)C7)cnc56)c4C)c3C)nc2c1,[*:1][C@@H]1CCN([*:2])C1,[*:1]C1CCN([*:2])CC1,"(-0.5, 0.0]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
|
| 5 |
+
Cc1c(CCC(=O)Nc2ccc(C(=O)O)cc2)c(=O)oc2cc3occ(C(C)(C)C)c3cc12,Cc1c(CCC(=O)Nc2cccc(C(=O)O)c2)c(=O)oc2cc3occ(C(C)(C)C)c3cc12,[*:2]C(=O)O.[*:1]CCc1c(C)c2cc3c(C(C)(C)C)coc3cc2oc1=O,[*:1]C(=O)Nc1ccc([*:2])cc1,[*:1]C(=O)Nc1cccc([*:2])c1,"(-0.5, 0.0]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
|
| 6 |
+
Cc1c(CCC(=O)Nc2cccc(C(=O)O)c2)c(=O)oc2cc3occ(C(C)(C)C)c3cc12,Cc1c(CCC(=O)Nc2ccccc2C(=O)O)c(=O)oc2cc3occ(C(C)(C)C)c3cc12,[*:2]C(=O)O.[*:1]CCc1c(C)c2cc3c(C(C)(C)C)coc3cc2oc1=O,[*:1]C(=O)Nc1cccc([*:2])c1,[*:1]C(=O)Nc1ccccc1[*:2],"(-0.5, 0.0]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
|
| 7 |
+
Cc1c(CCC(=O)Nc2ccc(CC(=O)O)cc2)c(=O)oc2cc3occ(C(C)(C)C)c3cc12,Cc1c(CCC(=O)Nc2cccc(C(=O)O)c2)c(=O)oc2cc3occ(C(C)(C)C)c3cc12,[*:1]C(=O)O.[*:2]NC(=O)CCc1c(C)c2cc3c(C(C)(C)C)coc3cc2oc1=O,[*:1]Cc1ccc([*:2])cc1,[*:1]c1cccc([*:2])c1,"(0.0, 0.5]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
|
| 8 |
+
Cc1c(CC(=O)Nc2ccc(C(N)=O)cc2)c(=O)oc2c(C)c3oc4c(c3cc12)CCCC4,Cc1c(CC(=O)Nc2ccc(O)cc2)c(=O)oc2c(C)c3oc4c(c3cc12)CCCC4,[*:1]NC(=O)Cc1c(C)c2cc3c4c(oc3c(C)c2oc1=O)CCCC4,[*:1]c1ccc(C(N)=O)cc1,[*:1]c1ccc(O)cc1,"(-0.5, 0.0]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
|
| 9 |
+
Cc1nc(Nc2cccc(-c3cccc(-c4nc5cc(CN6CCC(C(=O)O)CC6)cc(C#N)c5o4)c3C)c2C)c2ncc(CN3CC[C@@H](O)C3)cc2n1,Cc1c(Nc2nc(CO)nc3cc(CN4CC[C@@H](O)C4)cnc23)cccc1-c1cccc(-c2nc3cc(CN4CCC(C(=O)O)CC4)cc(C#N)c3o2)c1C,[*:1]Cc1nc(Nc2cccc(-c3cccc(-c4nc5cc(CN6CCC(C(=O)O)CC6)cc(C#N)c5o4)c3C)c2C)c2ncc(CN3CC[C@@H](O)C3)cc2n1,[*:1][H],[*:1]O,"(-0.5, 0.0]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
|
| 10 |
+
Cc1c(CCC(=O)Nc2ccccc2C(=O)O)c(=O)oc2cc3occ(C(C)(C)C)c3cc12,Cc1c(CCC(=O)Nc2cccc(C(=O)O)c2)c(=O)oc2cc3occ(C(C)(C)C)c3cc12,[*:2]C(=O)O.[*:1]CCc1c(C)c2cc3c(C(C)(C)C)coc3cc2oc1=O,[*:1]C(=O)Nc1ccccc1[*:2],[*:1]C(=O)Nc1cccc([*:2])c1,"(-0.5, 0.0]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
|
| 11 |
+
Cc1c(Nc2nc(C(F)F)nc3cc(CN4CC[C@@H](O)C4)cnc23)cccc1-c1cccc(-c2nc3cc(CN4CC[C@@](C)(C(=O)O)C4)cc(C#N)c3o2)c1C,Cc1c(Nc2nc(C(F)F)nc3cc(CN4CC[C@@H](O)C4)cnc23)cccc1-c1cccc(-c2nc3cc(CN4CCC(C)(C(=O)O)CC4)cc(C#N)c3o2)c1C,[*:2]C(=O)O.[*:1]Cc1cc(C#N)c2oc(-c3cccc(-c4cccc(Nc5nc(C(F)F)nc6cc(CN7CC[C@@H](O)C7)cnc56)c4C)c3C)nc2c1,[*:1]N1CC[C@]([*:2])(C)C1,[*:1]N1CCC([*:2])(C)CC1,"(-0.5, 0.0]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
|