Songyou commited on
Commit
17b0e42
·
1 Parent(s): 2df9869
Files changed (4) hide show
  1. .gitignore +1 -0
  2. generate.py +21 -3
  3. main.py +16 -10
  4. test_cut.csv +11 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
generate.py CHANGED
@@ -47,6 +47,7 @@ class GenerateRunner():
47
  self.exist_flag = Path(f'{self.save_path}/generated_molecules.csv').exists()
48
  self.overwrite = opt.overwrite
49
  self.dev_no = opt.dev_no
 
50
  global LOG
51
  LOG = ul.get_logger(name="generate",
52
  log_path=os.path.join(self.save_path, 'generate.log'))
@@ -59,6 +60,17 @@ class GenerateRunner():
59
  self.vocab = vocab
60
  self.tokenizer = mv.SMILESTokenizer()
61
 
 
 
 
 
 
 
 
 
 
 
 
62
  def initialize_dataloader(self, opt, vocab, test_file):
63
  """
64
  Initialize dataloader
@@ -84,7 +96,10 @@ class GenerateRunner():
84
  # torch.cuda.set_device(1)
85
  # current_device = torch.cuda.current_device()
86
  # print("当前使用的 CUDA 设备编号是:", current_device)
87
- device = torch.device(f'cuda:{self.dev_no}')
 
 
 
88
  # 构造loader
89
  dataloader_test = self.initialize_dataloader(opt, self.vocab, opt.test_file_name)
90
 
@@ -273,12 +288,15 @@ def run_main():
273
  parser = argparse.ArgumentParser(
274
  description='generate.py',
275
  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
276
-
 
277
  opts.generate_opts(parser)
278
  opt = parser.parse_args()
279
  opt.test_file_name = prepare_input(opt)
280
-
 
281
  runner = GenerateRunner(opt)
 
282
  runner.generate(opt)
283
 
284
 
 
47
  self.exist_flag = Path(f'{self.save_path}/generated_molecules.csv').exists()
48
  self.overwrite = opt.overwrite
49
  self.dev_no = opt.dev_no
50
+ self.device = torch.device('cpu')
51
  global LOG
52
  LOG = ul.get_logger(name="generate",
53
  log_path=os.path.join(self.save_path, 'generate.log'))
 
60
  self.vocab = vocab
61
  self.tokenizer = mv.SMILESTokenizer()
62
 
63
+ # 加载模型
64
+ file_name = os.path.join(opt.model_path, f'model_{opt.epoch}.pt')
65
+ if opt.model_choice == 'transformer':
66
+ self.model = EncoderDecoder.load_from_file(file_name)
67
+ self.model.to(self.device)
68
+ self.model.eval()
69
+ elif opt.model_choice == 'seq2seq':
70
+ self.model = Model.load_from_file(file_name, evaluation_mode=True)
71
+ self.model.network.encoder.to(self.device)
72
+ self.model.network.decoder.to(self.device)
73
+
74
  def initialize_dataloader(self, opt, vocab, test_file):
75
  """
76
  Initialize dataloader
 
96
  # torch.cuda.set_device(1)
97
  # current_device = torch.cuda.current_device()
98
  # print("当前使用的 CUDA 设备编号是:", current_device)
99
+ # device = torch.device(f'cuda:{self.dev_no}')
100
+ device = torch.device('cpu')
101
+ print(f"-------device:---------")
102
+ print(device)
103
  # 构造loader
104
  dataloader_test = self.initialize_dataloader(opt, self.vocab, opt.test_file_name)
105
 
 
288
  parser = argparse.ArgumentParser(
289
  description='generate.py',
290
  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
291
+ print("--------parser-------------")
292
+ print(parser)
293
  opts.generate_opts(parser)
294
  opt = parser.parse_args()
295
  opt.test_file_name = prepare_input(opt)
296
+ print("opt输出如下")
297
+ print(opt)
298
  runner = GenerateRunner(opt)
299
+ print()
300
  runner.generate(opt)
301
 
302
 
main.py CHANGED
@@ -69,19 +69,24 @@ def calculate_descriptors(smiles):
69
  def run_generate_runner(const_smiles, var_smiles, main_cls, minor_cls, delta_value, num_samples):
70
  # 初始化生成器的配置选项
71
  opt = {
72
- 'model_choice': 'transformer',
73
- 'model_path': '$(pwd)/raw_pretrain_frag/checkpoint',
74
- 'vocab_path': '$(pwd)',
75
- 'epoch': 20,
76
- # 'save_directory': '$(pwd)/demo_gen',
77
- # 'data_path': '/home/yichao/zhilian/GenAICode/CLModel_v2_zl',
78
- # 'test_file_name': 'test_100',
79
- 'batch_size': num_samples
 
 
 
 
80
  }
81
 
82
  # 将 opt 字典转换为 Options 对象
83
  opt = Options(**opt)
84
-
 
85
  runner = GenerateRunner(opt)
86
 
87
  # 创建数据
@@ -134,7 +139,7 @@ def run_generate_runner(const_smiles, var_smiles, main_cls, minor_cls, delta_val
134
  return result
135
 
136
 
137
- @app.get("/fragmentize/", response_model=FragmentResponse)
138
  async def fragmentize(smiles: str = Query(..., description="SMILES string of the molecule")):
139
  try:
140
  fragment_df = fragmentize_molecule(smiles)
@@ -147,6 +152,7 @@ async def fragmentize(smiles: str = Query(..., description="SMILES string of the
147
  async def generate_molecules(request: GenerateRequest):
148
  try:
149
  # 调用 SMILES 生成逻辑
 
150
  result = run_generate_runner(request.constSmiles, request.varSmiles, request.mainCls, request.minorCls, request.deltaValue, request.num)
151
  return result
152
  except Exception as e:
 
69
  def run_generate_runner(const_smiles, var_smiles, main_cls, minor_cls, delta_value, num_samples):
70
  # 初始化生成器的配置选项
71
  opt = {
72
+ 'batch_size': num_samples,
73
+ 'data_path' : './',
74
+ 'decode_type' : 'multinomial',
75
+ 'dev_no' : 0,
76
+ 'epoch' : 20,
77
+ 'model_choice' : 'transformer',
78
+ 'model_path' : './raw_pretrain_frag/checkpoint',
79
+ 'num_samples' : 50,
80
+ 'overwrite' : True,
81
+ 'save_directory' : './demo_gen',
82
+ 'test_file_name' : 'test_cut',
83
+ 'vocab_path' : './'
84
  }
85
 
86
  # 将 opt 字典转换为 Options 对象
87
  opt = Options(**opt)
88
+ print("--------------opt---------------")
89
+ print(opt)
90
  runner = GenerateRunner(opt)
91
 
92
  # 创建数据
 
139
  return result
140
 
141
 
142
+ @app.get("/fragmentize", response_model=FragmentResponse)
143
  async def fragmentize(smiles: str = Query(..., description="SMILES string of the molecule")):
144
  try:
145
  fragment_df = fragmentize_molecule(smiles)
 
152
  async def generate_molecules(request: GenerateRequest):
153
  try:
154
  # 调用 SMILES 生成逻辑
155
+ print("123123")
156
  result = run_generate_runner(request.constSmiles, request.varSmiles, request.mainCls, request.minorCls, request.deltaValue, request.num)
157
  return result
158
  except Exception as e:
test_cut.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cpd1SMILES,cpd2SMILES,constantSMILES,fromVarSMILES,toVarSMILES,Delta_Value,main_cls,minor_cls,value_type,target_name
2
+ Cc1c(CCC(=O)NC(Cc2ccccc2)C(=O)O)c(=O)oc2cc3oc4c(c3cc12)CCCC4,Cc1c(CCC(=O)Nc2ccc(C(=O)O)cc2)c(=O)oc2cc3oc4c(c3cc12)CCCC4,[*:1]C(=O)O.[*:2]NC(=O)CCc1c(C)c2cc3c4c(oc3cc2oc1=O)CCCC4,[*:1]C([*:2])Cc1ccccc1,[*:1]c1ccc([*:2])cc1,"(0.5, 1.0]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
3
+ Cc1oc2c(C)c3oc(=O)c(CCC(=O)Nc4cccc(C(=O)O)c4)c(C)c3cc2c1C,Cc1oc2c(C)c3oc(=O)c(CC(=O)Nc4cccc(C(=O)O)c4)c(C)c3cc2c1C,[*:1]C(=O)Nc1cccc(C(=O)O)c1.[*:2]c1c(C)c2cc3c(C)c(C)oc3c(C)c2oc1=O,[*:1]CC[*:2],[*:1]C[*:2],"(-1.0, -0.5]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
4
+ Cc1nc(Nc2cccc(-c3cccc(-c4nc5cc(CN6CC[C@@H](C(=O)O)C6)cc(C#N)c5o4)c3C)c2C)c2ncc(CN3CC[C@](C)(O)C3)cc2n1,Cc1nc(Nc2cccc(-c3cccc(-c4nc5cc(CN6CCC(C(=O)O)CC6)cc(C#N)c5o4)c3C)c2C)c2ncc(CN3CC[C@](C)(O)C3)cc2n1,[*:1]C(=O)O.[*:2]Cc1cc(C#N)c2oc(-c3cccc(-c4cccc(Nc5nc(C)nc6cc(CN7CC[C@](C)(O)C7)cnc56)c4C)c3C)nc2c1,[*:1][C@@H]1CCN([*:2])C1,[*:1]C1CCN([*:2])CC1,"(-0.5, 0.0]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
5
+ Cc1c(CCC(=O)Nc2ccc(C(=O)O)cc2)c(=O)oc2cc3occ(C(C)(C)C)c3cc12,Cc1c(CCC(=O)Nc2cccc(C(=O)O)c2)c(=O)oc2cc3occ(C(C)(C)C)c3cc12,[*:2]C(=O)O.[*:1]CCc1c(C)c2cc3c(C(C)(C)C)coc3cc2oc1=O,[*:1]C(=O)Nc1ccc([*:2])cc1,[*:1]C(=O)Nc1cccc([*:2])c1,"(-0.5, 0.0]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
6
+ Cc1c(CCC(=O)Nc2cccc(C(=O)O)c2)c(=O)oc2cc3occ(C(C)(C)C)c3cc12,Cc1c(CCC(=O)Nc2ccccc2C(=O)O)c(=O)oc2cc3occ(C(C)(C)C)c3cc12,[*:2]C(=O)O.[*:1]CCc1c(C)c2cc3c(C(C)(C)C)coc3cc2oc1=O,[*:1]C(=O)Nc1cccc([*:2])c1,[*:1]C(=O)Nc1ccccc1[*:2],"(-0.5, 0.0]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
7
+ Cc1c(CCC(=O)Nc2ccc(CC(=O)O)cc2)c(=O)oc2cc3occ(C(C)(C)C)c3cc12,Cc1c(CCC(=O)Nc2cccc(C(=O)O)c2)c(=O)oc2cc3occ(C(C)(C)C)c3cc12,[*:1]C(=O)O.[*:2]NC(=O)CCc1c(C)c2cc3c(C(C)(C)C)coc3cc2oc1=O,[*:1]Cc1ccc([*:2])cc1,[*:1]c1cccc([*:2])c1,"(0.0, 0.5]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
8
+ Cc1c(CC(=O)Nc2ccc(C(N)=O)cc2)c(=O)oc2c(C)c3oc4c(c3cc12)CCCC4,Cc1c(CC(=O)Nc2ccc(O)cc2)c(=O)oc2c(C)c3oc4c(c3cc12)CCCC4,[*:1]NC(=O)Cc1c(C)c2cc3c4c(oc3c(C)c2oc1=O)CCCC4,[*:1]c1ccc(C(N)=O)cc1,[*:1]c1ccc(O)cc1,"(-0.5, 0.0]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
9
+ Cc1nc(Nc2cccc(-c3cccc(-c4nc5cc(CN6CCC(C(=O)O)CC6)cc(C#N)c5o4)c3C)c2C)c2ncc(CN3CC[C@@H](O)C3)cc2n1,Cc1c(Nc2nc(CO)nc3cc(CN4CC[C@@H](O)C4)cnc23)cccc1-c1cccc(-c2nc3cc(CN4CCC(C(=O)O)CC4)cc(C#N)c3o2)c1C,[*:1]Cc1nc(Nc2cccc(-c3cccc(-c4nc5cc(CN6CCC(C(=O)O)CC6)cc(C#N)c5o4)c3C)c2C)c2ncc(CN3CC[C@@H](O)C3)cc2n1,[*:1][H],[*:1]O,"(-0.5, 0.0]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
10
+ Cc1c(CCC(=O)Nc2ccccc2C(=O)O)c(=O)oc2cc3occ(C(C)(C)C)c3cc12,Cc1c(CCC(=O)Nc2cccc(C(=O)O)c2)c(=O)oc2cc3occ(C(C)(C)C)c3cc12,[*:2]C(=O)O.[*:1]CCc1c(C)c2cc3c(C(C)(C)C)coc3cc2oc1=O,[*:1]C(=O)Nc1ccccc1[*:2],[*:1]C(=O)Nc1cccc([*:2])c1,"(-0.5, 0.0]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit
11
+ Cc1c(Nc2nc(C(F)F)nc3cc(CN4CC[C@@H](O)C4)cnc23)cccc1-c1cccc(-c2nc3cc(CN4CC[C@@](C)(C(=O)O)C4)cc(C#N)c3o2)c1C,Cc1c(Nc2nc(C(F)F)nc3cc(CN4CC[C@@H](O)C4)cnc23)cccc1-c1cccc(-c2nc3cc(CN4CCC(C)(C(=O)O)CC4)cc(C#N)c3o2)c1C,[*:2]C(=O)O.[*:1]Cc1cc(C#N)c2oc(-c3cccc(-c4cccc(Nc5nc(C(F)F)nc6cc(CN7CC[C@@H](O)C7)cnc56)c4C)c3C)nc2c1,[*:1]N1CC[C@]([*:2])(C)C1,[*:1]N1CCC([*:2])(C)CC1,"(-0.5, 0.0]",activity,EC50,seq,Nuclear factor NF-kappa-B p105 subunit