Joey Callanan commited on
Commit ·
43e7ae4
1
Parent(s): a70ff5b
Creating new Space
Browse files- .gitignore +1 -0
- Gen_PartialSMILES2.py +307 -0
- QUICK_START.md +63 -0
- README.md +107 -4
- README_MODULAR.md +96 -0
- __pycache__/app.cpython-313.pyc +0 -0
- __pycache__/molecule_render_demo.cpython-314.pyc +0 -0
- __pycache__/run_apps.cpython-313.pyc +0 -0
- __pycache__/test_molecular_analysis.cpython-313.pyc +0 -0
- app.py +20 -0
- generated_molecules.csv +214 -0
- legacy/app_legacy.py +1081 -0
- main.py +11 -0
- molecule_render_demo.py +146 -0
- requirements.txt +3 -0
- run.py +85 -0
- smiles_to_csv.py +25 -0
- src/__init__.py +9 -0
- src/__pycache__/__init__.cpython-313.pyc +0 -0
- src/__pycache__/__init__.cpython-314.pyc +0 -0
- src/__pycache__/app.cpython-313.pyc +0 -0
- src/__pycache__/app.cpython-314.pyc +0 -0
- src/ai/__init__.py +20 -0
- src/ai/__pycache__/__init__.cpython-313.pyc +0 -0
- src/ai/__pycache__/services.cpython-313.pyc +0 -0
- src/ai/services.py +163 -0
- src/app.py +247 -0
- src/clm/__init__.py +0 -0
- src/clm/model_new_torch.pt +3 -0
- src/config/__init__.py +30 -0
- src/config/__pycache__/__init__.cpython-313.pyc +0 -0
- src/config/__pycache__/settings.cpython-313.pyc +0 -0
- src/config/settings.py +401 -0
- src/molecules/__init__.py +31 -0
- src/molecules/__pycache__/__init__.cpython-313.pyc +0 -0
- src/molecules/__pycache__/analysis.cpython-313.pyc +0 -0
- src/molecules/__pycache__/variations.cpython-313.pyc +0 -0
- src/molecules/analysis.py +247 -0
- src/molecules/variations.py +116 -0
- src/ui/__init__.py +29 -0
- src/ui/__pycache__/__init__.cpython-313.pyc +0 -0
- src/ui/__pycache__/components.cpython-313.pyc +0 -0
- src/ui/__pycache__/handlers.cpython-313.pyc +0 -0
- src/ui/components.py +473 -0
- src/ui/handlers.py +297 -0
- test_error_handling.py +55 -0
- test_molecule_image.py +48 -0
- test_variation_selection.py +44 -0
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
venv/
|
Gen_PartialSMILES2.py
ADDED
|
@@ -0,0 +1,307 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from rdkit import RDLogger
|
| 2 |
+
from augmentation import *
|
| 3 |
+
|
| 4 |
+
# Disable all RDKit warnings and errors
|
| 5 |
+
RDLogger.DisableLog('rdApp.*')
|
| 6 |
+
from rdkit import Chem
|
| 7 |
+
import random
|
| 8 |
+
import torch
|
| 9 |
+
# import F
|
| 10 |
+
import torch.nn.functional as F
|
| 11 |
+
import math
|
| 12 |
+
import torch
|
| 13 |
+
import pandas as pd
|
| 14 |
+
from rdkit.Chem import AllChem
|
| 15 |
+
import argparse
|
| 16 |
+
from SmilesPE.pretokenizer import atomwise_tokenizer
|
| 17 |
+
# SMILES tokenizer
|
| 18 |
+
import pathlib
|
| 19 |
+
from rdkit.Chem.Scaffolds.MurckoScaffold import GetScaffoldForMol
|
| 20 |
+
from rdkit import Chem
|
| 21 |
+
from rdkit.Chem import AllChem, DataStructs
|
| 22 |
+
import numpy as np
|
| 23 |
+
from itertools import combinations
|
| 24 |
+
import re
|
| 25 |
+
from collections import defaultdict
|
| 26 |
+
import partialsmiles as ps
|
| 27 |
+
# from Join import join_scaf_deco
|
| 28 |
+
from collections import OrderedDict
|
| 29 |
+
from SmilesPE.pretokenizer import atomwise_tokenizer
|
| 30 |
+
|
| 31 |
+
class AtomwiseTokenizer():
|
| 32 |
+
def __init__(self, str_bos="<can>", str_eos="<eos>"):
|
| 33 |
+
self.bos_token = str_bos
|
| 34 |
+
self.eos_token = str_eos
|
| 35 |
+
def tokenize(self, smiles):
|
| 36 |
+
return atomwise_tokenizer(smiles)
|
| 37 |
+
def convert_tokens_to_string(self, tokens):
|
| 38 |
+
return "".join(tokens)
|
| 39 |
+
def assign_vocab(self, vocab):
|
| 40 |
+
self.vocab = vocab
|
| 41 |
+
self.vocab_inv = {v: k for k, v in vocab.items()}
|
| 42 |
+
self.eos_token_id = vocab[self.eos_token]
|
| 43 |
+
self.bos_token_id = vocab[self.bos_token]
|
| 44 |
+
def decode(self, ids,skip_special_tokens=True):
|
| 45 |
+
if isinstance(ids, torch.Tensor):
|
| 46 |
+
return "".join([self.vocab_inv[id] for id in ids.cpu().numpy()])
|
| 47 |
+
return "".join([self.vocab_inv[id] for id in ids])
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def gen_psv_table(partial_smiles, vocab,eos_str,sep_str,partial_valid):
|
| 51 |
+
psv_table = []
|
| 52 |
+
for token in vocab.keys():
|
| 53 |
+
if token == eos_str or token == sep_str:
|
| 54 |
+
psv_table.append(partial_valid)
|
| 55 |
+
else:
|
| 56 |
+
try:
|
| 57 |
+
mol = ps.ParseSmiles(partial_smiles + token, partial=True)
|
| 58 |
+
assert mol is not None
|
| 59 |
+
psv_table.append(True)
|
| 60 |
+
except:
|
| 61 |
+
psv_table.append(False)
|
| 62 |
+
return psv_table
|
| 63 |
+
|
| 64 |
+
def calculate_bm_scaffold(smiles):
|
| 65 |
+
try:
|
| 66 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 67 |
+
# return Chem.MolToSmiles(AllChem.GetMolecularScaffold(mol))
|
| 68 |
+
return Chem.MolToSmiles(GetScaffoldForMol(mol))
|
| 69 |
+
except:
|
| 70 |
+
return None
|
| 71 |
+
|
| 72 |
+
def get_morgan_fp(smiles, radius=2, n_bits=2048):
|
| 73 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 74 |
+
if mol is None:
|
| 75 |
+
return None
|
| 76 |
+
return AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
|
| 77 |
+
|
| 78 |
+
def compute_internal_diversity(smiles_list):
|
| 79 |
+
fps = [get_morgan_fp(sm) for sm in smiles_list]
|
| 80 |
+
fps = [fp for fp in fps if fp is not None]
|
| 81 |
+
if len(fps) < 2:
|
| 82 |
+
return 0.0 # Not enough valid molecules
|
| 83 |
+
similarities = []
|
| 84 |
+
for fp1, fp2 in combinations(fps, 2):
|
| 85 |
+
sim = DataStructs.TanimotoSimilarity(fp1, fp2)
|
| 86 |
+
similarities.append(sim)
|
| 87 |
+
mean_sim = np.mean(similarities)
|
| 88 |
+
int_div = 1 - mean_sim
|
| 89 |
+
return int_div
|
| 90 |
+
|
| 91 |
+
def atomwise_tokenizer_fixed(x):
|
| 92 |
+
list_subSMILES = [atomwise_tokenizer(subSMILES) for subSMILES in x.split("|")]
|
| 93 |
+
y_in = list_subSMILES[0]
|
| 94 |
+
for i in range(len(list_subSMILES)-1):
|
| 95 |
+
y_in += ["|"] + list_subSMILES[i+1]
|
| 96 |
+
return y_in
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def customized_forward(model, x_in, y_in, y_out=None,boundary=None, return_last_hidden_state=False):
|
| 101 |
+
x_in = model.drop(model.tok_emb(x_in) + model.pos_emb[:, :x_in.size()[1], :])
|
| 102 |
+
y_in = model.drop(model.tok_emb(y_in) + model.pos_emb[:, :y_in.size()[1], :])
|
| 103 |
+
#
|
| 104 |
+
for encoder_block in model.encoder_blocks:
|
| 105 |
+
x_in = encoder_block(x_in)
|
| 106 |
+
x_in = model.ln_f(x_in)
|
| 107 |
+
for decoder_block in model.decoder_blocks:
|
| 108 |
+
y_in = decoder_block(x_in,y_in)
|
| 109 |
+
y_in = model.ln_f(y_in)
|
| 110 |
+
logits = model.head(y_in)
|
| 111 |
+
loss = None
|
| 112 |
+
if y_out is not None:
|
| 113 |
+
loss = F.cross_entropy(logits.view(-1, logits.size(-1)), y_out.view(-1))
|
| 114 |
+
if return_last_hidden_state:
|
| 115 |
+
return logits, y_in
|
| 116 |
+
else:
|
| 117 |
+
return logits, loss
|
| 118 |
+
|
| 119 |
+
def path_aligned_generation(
|
| 120 |
+
model,
|
| 121 |
+
tokenizer,
|
| 122 |
+
max_length=256,
|
| 123 |
+
batch_size=128,
|
| 124 |
+
device="cuda:0",
|
| 125 |
+
budget_generation=10,
|
| 126 |
+
sample_suffix="Cc1ccccc1",
|
| 127 |
+
tensor_scaffold=None,
|
| 128 |
+
boundary=None,
|
| 129 |
+
n_generation=100,
|
| 130 |
+
supress_eos=False,
|
| 131 |
+
max_molwt=1000,
|
| 132 |
+
max_clogp=10,
|
| 133 |
+
max_rotatable_bond=10,
|
| 134 |
+
use_merge=True,
|
| 135 |
+
top_k=0,
|
| 136 |
+
top_p=1.,
|
| 137 |
+
min_prefix_length=4,
|
| 138 |
+
typical_sampling=False,
|
| 139 |
+
contrastive_search=False,
|
| 140 |
+
pre_check_merge=False,
|
| 141 |
+
):
|
| 142 |
+
model.to(device)
|
| 143 |
+
model.eval()
|
| 144 |
+
# generated_smiles = set()
|
| 145 |
+
generated_smiles = OrderedDict()
|
| 146 |
+
dict_inchikey_count = defaultdict(int)
|
| 147 |
+
dict_inchikey_merged_path = defaultdict(OrderedDict)
|
| 148 |
+
dict_path_inchikey = {}
|
| 149 |
+
iteration_counter = 0
|
| 150 |
+
total_merge_count = 0
|
| 151 |
+
n_calls = 0
|
| 152 |
+
n_repeated = 0
|
| 153 |
+
n_supressed_eos = 0
|
| 154 |
+
n_invalid = 0
|
| 155 |
+
count_merged = 0
|
| 156 |
+
with torch.no_grad():
|
| 157 |
+
while len(generated_smiles) < n_generation:
|
| 158 |
+
tensor_generation = torch.zeros(batch_size,2).long().to(device)
|
| 159 |
+
tensor_generation[:,0] = tokenizer.bos_token_id
|
| 160 |
+
tensor_generation[:,1] = tokenizer.vocab["[*]"]
|
| 161 |
+
for step_idx in range(1,max_length-1):
|
| 162 |
+
inputs = tensor_generation[:,:step_idx+1].to(device)
|
| 163 |
+
# outputs = model(inputs)
|
| 164 |
+
if tensor_scaffold is not None:
|
| 165 |
+
logits, base_h = customized_forward(model, tensor_scaffold[:inputs.shape[0]], inputs, None, boundary, return_last_hidden_state=True)
|
| 166 |
+
# logits, last_hidden_state = model.forward(tensor_scaffold[:inputs.shape[0]], inputs, None, boundary, return_last_hidden_state=True)
|
| 167 |
+
logits = logits[:,-1,:]
|
| 168 |
+
n_calls += inputs.shape[0]
|
| 169 |
+
else:
|
| 170 |
+
outputs = model.forward(inputs)
|
| 171 |
+
logits = outputs.logits[:,-1,:]
|
| 172 |
+
n_calls += inputs.shape[0]
|
| 173 |
+
# sample from the logits
|
| 174 |
+
list_supress_eos = []
|
| 175 |
+
list_merged_idx = []
|
| 176 |
+
list_finished_idx = []
|
| 177 |
+
list_invalid_idx = []
|
| 178 |
+
filter_value = -float('Inf')
|
| 179 |
+
if top_k > 0:
|
| 180 |
+
indices_to_remove = logits < torch.topk(logits,top_k,dim=-1)[0][:,[-1]]
|
| 181 |
+
logits[indices_to_remove] = filter_value
|
| 182 |
+
if top_p < 1.:
|
| 183 |
+
sorted_logits, sorted_indices = torch.sort(logits, descending=True)
|
| 184 |
+
cumulative_probs = torch.cumsum(F.softmax(sorted_logits,dim=-1),dim=-1)
|
| 185 |
+
sorted_indices_to_remove = cumulative_probs > top_p
|
| 186 |
+
sorted_indices_to_remove[...,1:] = sorted_indices_to_remove[...,:-1].clone()
|
| 187 |
+
sorted_indices_to_remove[...,0] = 0
|
| 188 |
+
sorted_logits[sorted_indices_to_remove] = filter_value
|
| 189 |
+
logits = torch.gather(sorted_logits, -1, sorted_indices.argsort(-1))
|
| 190 |
+
next_token_id = torch.multinomial(F.softmax(logits,dim=-1),num_samples=1)
|
| 191 |
+
# import pdb; pdb.set_trace()
|
| 192 |
+
current_prefix = [tokenizer.decode(tensor_generation[sample_idx,1:step_idx+1]) for sample_idx in range(tensor_generation.shape[0])]
|
| 193 |
+
# import pdb; pdb.set_trace()
|
| 194 |
+
if step_idx > 0:
|
| 195 |
+
for sample_idx, current_decoded in enumerate(current_prefix):
|
| 196 |
+
mol = None
|
| 197 |
+
try:
|
| 198 |
+
mol = Chem.MolFromSmiles(current_decoded)
|
| 199 |
+
except:
|
| 200 |
+
mol = None
|
| 201 |
+
if mol is not None and current_decoded not in generated_smiles:
|
| 202 |
+
generated_smiles[current_decoded] = 1
|
| 203 |
+
list_finished_idx.append(sample_idx)
|
| 204 |
+
keep_mask = torch.ones(tensor_generation.shape[0], dtype=torch.bool)
|
| 205 |
+
keep_mask[list_finished_idx] = False
|
| 206 |
+
tensor_generation = torch.cat([tensor_generation[keep_mask],next_token_id[keep_mask]],dim=1)
|
| 207 |
+
# terminate if all samples reached the end
|
| 208 |
+
if tensor_generation.shape[0] == 0:
|
| 209 |
+
break
|
| 210 |
+
str_print = f"Iteration {iteration_counter:05d}"
|
| 211 |
+
str_print += f" step {step_idx:05d}"
|
| 212 |
+
str_print += f" merged_t {total_merge_count:05d}"
|
| 213 |
+
str_print += f" merged_c {count_merged:05d}"
|
| 214 |
+
str_print += f" dict_prefix {len(dict_path_inchikey):05d}"
|
| 215 |
+
str_print += f" dict_inch {len(dict_inchikey_merged_path):05d}"
|
| 216 |
+
# str_print += f" eos {tensor_generation.shape[0]-n_eos_tokens:05d}"
|
| 217 |
+
str_print += f" gen_c {tensor_generation.shape[0]:05d}"
|
| 218 |
+
str_print += f" gen_t {len(generated_smiles):08d}"
|
| 219 |
+
str_print += f" n_calls {n_calls:08d}"
|
| 220 |
+
str_print += f" n_repeated {n_repeated:05d}"
|
| 221 |
+
# str_print += f" n_supressed_eos {n_supressed_eos:05d}"
|
| 222 |
+
str_print += f" n_invalid {n_invalid:05d}"
|
| 223 |
+
# str_print += f" n_supressed_eos {n_supressed_eos:05d}"
|
| 224 |
+
print(str_print)
|
| 225 |
+
# logger.info(str_print)
|
| 226 |
+
# print(f"Iteration {iteration_counter:05d} step {step_idx:05d} merged total {total_merge_count:05d} current {count_merged:05d} dict_prefix {len(dict_path_inchikey):05d} dict_inch {len(dict_inchikey_merged_path):05d} eos {tensor_generation.shape[0]-n_eos_tokens:05d} current {tensor_generation.shape[0]:05d} generated {len(generated_smiles):08d} n_calls {n_calls:05d} n_repeated {n_repeated:05d}")
|
| 227 |
+
# get generated smiles and remove the merged prefixes
|
| 228 |
+
iteration_counter += 1
|
| 229 |
+
total_merge_count += count_merged
|
| 230 |
+
return generated_smiles, dict_inchikey_merged_path, dict_inchikey_count, dict_path_inchikey, total_merge_count, n_calls, n_repeated
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
ATTACHMENT_POINT_TOKEN = "*"
|
| 235 |
+
ATTACHMENT_POINT_NUM_REGEXP = r"\[{}:(\d+)\]".format(re.escape(ATTACHMENT_POINT_TOKEN))
|
| 236 |
+
ATTACHMENT_POINT_REGEXP = r"(?:{0}|\[{0}[^\]]*\])".format(re.escape(ATTACHMENT_POINT_TOKEN))
|
| 237 |
+
ATTACHMENT_POINT_NO_BRACKETS_REGEXP = r"(?<!\[){}".format(re.escape(ATTACHMENT_POINT_TOKEN))
|
| 238 |
+
|
| 239 |
+
parser = argparse.ArgumentParser()
|
| 240 |
+
parser.add_argument("--save_dir", type=str, default="entropy/gpt2_zinc_87m")
|
| 241 |
+
parser.add_argument("--model_name", type=str, default="gpt2_zinc_87m")
|
| 242 |
+
parser.add_argument("--generate_mode", type=str, default="scaffold_decorator")
|
| 243 |
+
parser.add_argument("--filepath_scaffold", type=str, default="/shared/healthinfolab/xiw14035/TF_debug/SCMG/SCMG/20250505/scaf_5.smi")
|
| 244 |
+
parser.add_argument("--model_path", type=str, default="")
|
| 245 |
+
parser.add_argument("--n_to_gen", type=int, default=100)
|
| 246 |
+
parser.add_argument("--max_length", type=int, default=30)
|
| 247 |
+
parser.add_argument("--max_molwt", type=float, default=500)
|
| 248 |
+
parser.add_argument("--max_clogp", type=float, default=4.5)
|
| 249 |
+
parser.add_argument("--max_rotatable_bond", type=int, default=8)
|
| 250 |
+
parser.add_argument("--min_prefix_length", type=int, default=4)
|
| 251 |
+
parser.add_argument("--top_p", type=float, default=1.0)
|
| 252 |
+
parser.add_argument("--top_k", type=int, default=10)
|
| 253 |
+
# list of decode methods
|
| 254 |
+
parser.add_argument("--decode_methods", type=str, default="Structure-Aware_Decoding")
|
| 255 |
+
args = parser.parse_args()
|
| 256 |
+
# example: python PTS_Generate.py --save_dir "entropy/gpt2_zinc_87m" --model_name "gpt2_zinc_87m" --generate_mode "scaffold_decorator" --filepath_scaffold "scaf_5.smi" --model_path "" --decode_methods "Structure-Aware_Decoding"
|
| 257 |
+
pathlib.Path(args.save_dir).mkdir(parents=True, exist_ok=True)
|
| 258 |
+
# device = torch.device("cuda:0")
|
| 259 |
+
device = torch.device("cpu")
|
| 260 |
+
|
| 261 |
+
model = torch.load("src/clm/model_new_torch.pt",weights_only=False, map_location="cpu")
|
| 262 |
+
vocab = model.vocab_encoder
|
| 263 |
+
tokenizer = AtomwiseTokenizer(str_bos="<scmg_char_cano>", str_eos="<eos>")
|
| 264 |
+
tokenizer.assign_vocab(vocab)
|
| 265 |
+
tokenizer.sep_token = "|"
|
| 266 |
+
tokenizer.sep_token_id = vocab[tokenizer.sep_token]
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
def path_aligned_generation_supress_eos(model,tokenizer,max_length=256,n_generation=100,batch_size=128,device="cuda:0",tensor_scaffold=None,boundary=None,budget_generation=10,max_molwt=1000,max_clogp=10,max_rotatable_bond=10):
|
| 270 |
+
return path_aligned_generation(model,tokenizer,max_length=max_length,n_generation=n_generation,batch_size=batch_size,device=device,tensor_scaffold=tensor_scaffold,boundary=boundary,budget_generation=budget_generation,supress_eos=True,max_molwt=max_molwt,max_clogp=max_clogp,max_rotatable_bond=max_rotatable_bond)
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
model.to(device)
|
| 274 |
+
model.eval()
|
| 275 |
+
budget_generation = 10
|
| 276 |
+
batch_size = 512
|
| 277 |
+
scaf_smi = "[*]c1ccccc1"
|
| 278 |
+
if len(scaf_smi) > 0:
|
| 279 |
+
if "[*]" not in scaf_smi:
|
| 280 |
+
raise ValueError("Scaffold does not contain attachment point")
|
| 281 |
+
sequence_scaffold = [tokenizer.bos_token_id] + [vocab[a] for a in tokenizer.tokenize(scaf_smi)] + [tokenizer.eos_token_id]
|
| 282 |
+
tensor_scaffold = torch.tensor(sequence_scaffold).unsqueeze(0).to(device).repeat(batch_size,1)
|
| 283 |
+
boundary = torch.zeros(batch_size,1).long().to(device) + tensor_scaffold.shape[1] + 1
|
| 284 |
+
else:
|
| 285 |
+
tensor_scaffold = None
|
| 286 |
+
boundary = None
|
| 287 |
+
|
| 288 |
+
df_result = pd.DataFrame(columns=["n_to_gen", "gen_func_name", "internal_diversity", "n_bm_scaffold"])
|
| 289 |
+
|
| 290 |
+
# set seed for everything
|
| 291 |
+
seed_value = 42
|
| 292 |
+
random.seed(seed_value)
|
| 293 |
+
np.random.seed(seed_value)
|
| 294 |
+
torch.manual_seed(seed_value)
|
| 295 |
+
torch.cuda.manual_seed(seed_value)
|
| 296 |
+
torch.cuda.manual_seed_all(seed_value)
|
| 297 |
+
torch.backends.cudnn.deterministic = True
|
| 298 |
+
torch.backends.cudnn.benchmark = False
|
| 299 |
+
|
| 300 |
+
n_to_gen = args.n_to_gen
|
| 301 |
+
generated_smiles_raw, dict_inchikey_merged_path, dict_inchikey_count, dict_path_inchikey, total_merge_count, n_calls, n_repeated = path_aligned_generation(model,tokenizer=tokenizer,max_length=args.max_length,n_generation=n_to_gen,batch_size=batch_size,device=device,tensor_scaffold=tensor_scaffold,boundary=boundary,budget_generation=budget_generation,max_molwt=args.max_molwt,max_clogp=args.max_clogp,max_rotatable_bond=args.max_rotatable_bond,use_merge=True,min_prefix_length=args.min_prefix_length)
|
| 302 |
+
generated_smiles = dict([(smiles.split("<can>")[-1], freq) for smiles, freq in generated_smiles_raw.items()])
|
| 303 |
+
|
| 304 |
+
pd.DataFrame({
|
| 305 |
+
"smiles": list(generated_smiles.keys()),
|
| 306 |
+
"count": list(generated_smiles.values())
|
| 307 |
+
}).to_csv("generated_molecules.csv", index=False)
|
QUICK_START.md
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🚀 Quick Start Guide
|
| 2 |
+
|
| 3 |
+
## Running the HITL Drug Discovery Application
|
| 4 |
+
|
| 5 |
+
### Option 1: Easy Launcher (Recommended)
|
| 6 |
+
```bash
|
| 7 |
+
python run.py
|
| 8 |
+
```
|
| 9 |
+
|
| 10 |
+
### Option 2: Direct Launch
|
| 11 |
+
```bash
|
| 12 |
+
# Activate virtual environment
|
| 13 |
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
| 14 |
+
|
| 15 |
+
# Run the application
|
| 16 |
+
python app.py
|
| 17 |
+
```
|
| 18 |
+
|
| 19 |
+
### Option 3: Legacy Version (Backup)
|
| 20 |
+
```bash
|
| 21 |
+
# Activate virtual environment
|
| 22 |
+
source venv/bin/activate
|
| 23 |
+
|
| 24 |
+
# Run legacy version
|
| 25 |
+
python legacy/app_legacy.py
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
## 🌐 Access the Application
|
| 29 |
+
|
| 30 |
+
Once running, open your browser and go to:
|
| 31 |
+
**http://localhost:7860**
|
| 32 |
+
|
| 33 |
+
## 🛑 Stop the Application
|
| 34 |
+
|
| 35 |
+
Press `Ctrl+C` in the terminal to stop the application.
|
| 36 |
+
|
| 37 |
+
## 📁 Project Structure
|
| 38 |
+
|
| 39 |
+
- **`app.py`** - Main entry point (works for both local and Hugging Face Spaces)
|
| 40 |
+
- **`run.py`** - Easy launcher script
|
| 41 |
+
- **`src/`** - Modular source code
|
| 42 |
+
- **`legacy/`** - Legacy monolithic version (backup)
|
| 43 |
+
- **`venv/`** - Virtual environment
|
| 44 |
+
|
| 45 |
+
## 🔧 Troubleshooting
|
| 46 |
+
|
| 47 |
+
### Virtual Environment Issues
|
| 48 |
+
```bash
|
| 49 |
+
# Create virtual environment
|
| 50 |
+
python -m venv venv
|
| 51 |
+
|
| 52 |
+
# Activate it
|
| 53 |
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
| 54 |
+
|
| 55 |
+
# Install dependencies
|
| 56 |
+
pip install -r requirements.txt
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
### Import Errors
|
| 60 |
+
Make sure you're in the project directory and virtual environment is activated.
|
| 61 |
+
|
| 62 |
+
### Port Already in Use
|
| 63 |
+
If port 7860 is busy, the app will automatically use the next available port.
|
README.md
CHANGED
|
@@ -1,12 +1,115 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.49.1
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: HITL Drug Discovery
|
| 3 |
+
emoji: 🧬
|
| 4 |
+
colorFrom: blue
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.49.1
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
hf_oauth: true
|
| 11 |
+
hf_oauth_scopes:
|
| 12 |
+
- inference-api
|
| 13 |
+
license: apache-2.0
|
| 14 |
+
short_description: HITL Drug Discovery with AI Chat and Molecular Analysis
|
| 15 |
---
|
| 16 |
|
| 17 |
+
# 🧬 HITL Drug Discovery Platform
|
| 18 |
+
|
| 19 |
+
An advanced drug discovery platform combining AI-powered chat with molecular visualization and property analysis using [Gradio](https://gradio.app), [RDKit](https://www.rdkit.org/), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
|
| 20 |
+
|
| 21 |
+
## 🚀 Quick Start
|
| 22 |
+
|
| 23 |
+
### Running the Application
|
| 24 |
+
```bash
|
| 25 |
+
# Easy launcher (recommended)
|
| 26 |
+
python run.py
|
| 27 |
+
|
| 28 |
+
# Direct launch
|
| 29 |
+
python app.py
|
| 30 |
+
|
| 31 |
+
# Legacy version (backup)
|
| 32 |
+
python legacy/app_legacy.py
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
### Access
|
| 36 |
+
Once running, open your browser to: **http://localhost:7860**
|
| 37 |
+
|
| 38 |
+
## ✨ Key Features
|
| 39 |
+
|
| 40 |
+
- **🤖 AI Chat Assistant**: Expert medicinal chemistry guidance and structure generation
|
| 41 |
+
- **🔬 Molecular Analysis**: Calculate drug-likeness properties and Lipinski's Rule of Five
|
| 42 |
+
- **🎨 Interactive Visualization**: High-quality molecular structure rendering with multiple styles
|
| 43 |
+
- **📚 Drug Discovery Library**: Curated collection of pharmaceutical compounds
|
| 44 |
+
- **⚡ Real-time Property Calculation**: Molecular weight, LogP, TPSA, hydrogen bonding, and more
|
| 45 |
+
- **🔄 Chemical Variations**: Generate multiple visualization styles of molecular structures
|
| 46 |
+
- **💾 Bookmarking**: Save and manage favorite molecular structures
|
| 47 |
+
|
| 48 |
+
## 🏗️ Project Structure
|
| 49 |
+
|
| 50 |
+
```
|
| 51 |
+
HITL_Drug_Discovery/
|
| 52 |
+
├── app.py # Main entry point
|
| 53 |
+
├── run.py # Easy launcher script
|
| 54 |
+
├── requirements.txt # Dependencies
|
| 55 |
+
├── src/ # Modular source code
|
| 56 |
+
│ ├── app.py # Main application orchestrator
|
| 57 |
+
│ ├── molecules/ # Molecular analysis and variations
|
| 58 |
+
│ │ ├── analysis.py # Property calculations & validation
|
| 59 |
+
│ │ └── variations.py # Structure variation generation
|
| 60 |
+
│ ├── ai/ # AI services
|
| 61 |
+
│ │ └── services.py # AI chat and structure generation
|
| 62 |
+
│ ├── ui/ # UI components and handlers
|
| 63 |
+
│ │ ├── components.py # UI component definitions
|
| 64 |
+
│ │ └── handlers.py # Event handlers and business logic
|
| 65 |
+
│ └── config/ # Configuration and settings
|
| 66 |
+
│ └── settings.py # App configuration and styling
|
| 67 |
+
└── legacy/ # Legacy monolithic version (backup)
|
| 68 |
+
└── app_legacy.py
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
## 🔧 Technical Architecture
|
| 72 |
+
|
| 73 |
+
### Modular Design Benefits
|
| 74 |
+
- **Maintainability**: Each module has a single responsibility
|
| 75 |
+
- **Testability**: Individual modules can be tested in isolation
|
| 76 |
+
- **Scalability**: Easy to add new features without affecting existing code
|
| 77 |
+
- **Readability**: Clear separation of concerns
|
| 78 |
+
- **Reusability**: Components can be reused across different parts of the app
|
| 79 |
+
|
| 80 |
+
### Key Modules
|
| 81 |
+
- **`molecules/analysis.py`**: SMILES validation, property calculations, drug-likeness assessment
|
| 82 |
+
- **`molecules/variations.py`**: Generation of multiple molecular structure visualizations
|
| 83 |
+
- **`ai/services.py`**: AI-powered chat responses using Hugging Face models
|
| 84 |
+
- **`ui/components.py`**: Reusable UI component definitions for Gradio interface
|
| 85 |
+
- **`ui/handlers.py`**: Event handlers, business logic, and state management
|
| 86 |
+
|
| 87 |
+
## 🛠️ Troubleshooting
|
| 88 |
+
|
| 89 |
+
### Virtual Environment Issues
|
| 90 |
+
```bash
|
| 91 |
+
# Create virtual environment
|
| 92 |
+
python -m venv venv
|
| 93 |
+
|
| 94 |
+
# Activate it
|
| 95 |
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
| 96 |
+
|
| 97 |
+
# Install dependencies
|
| 98 |
+
pip install -r requirements.txt
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
### Common Issues
|
| 102 |
+
- **Import Errors**: Make sure you're in the project directory and virtual environment is activated
|
| 103 |
+
- **Port Already in Use**: The app will automatically use the next available port
|
| 104 |
+
- **Missing Dependencies**: Run `pip install -r requirements.txt`
|
| 105 |
+
|
| 106 |
+
## 🚀 Future Enhancements
|
| 107 |
+
|
| 108 |
+
With the modular structure, it's easy to:
|
| 109 |
+
- Add new molecular analysis algorithms
|
| 110 |
+
- Implement additional AI models
|
| 111 |
+
- Create new UI components
|
| 112 |
+
- Add database integration
|
| 113 |
+
- Implement user authentication
|
| 114 |
+
- Add API endpoints
|
| 115 |
+
- Create unit tests for each module
|
README_MODULAR.md
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# HITL Drug Discovery - Modular Architecture
|
| 2 |
+
|
| 3 |
+
## 🏗️ Project Structure
|
| 4 |
+
|
| 5 |
+
```
|
| 6 |
+
HITL_Drug_Discovery/
|
| 7 |
+
├── main.py # Main entry point
|
| 8 |
+
├── app.py # Legacy monolithic app (backup)
|
| 9 |
+
├── requirements.txt # Dependencies
|
| 10 |
+
├── README.md # Original README
|
| 11 |
+
├── README_MODULAR.md # This file
|
| 12 |
+
└── src/ # Modular source code
|
| 13 |
+
├── __init__.py
|
| 14 |
+
├── app.py # Main application orchestrator
|
| 15 |
+
├── molecules/ # Molecular analysis and variations
|
| 16 |
+
│ ├── __init__.py
|
| 17 |
+
│ ├── analysis.py # Molecular property calculations
|
| 18 |
+
│ └── variations.py # Structure variation generation
|
| 19 |
+
├── ai/ # AI services
|
| 20 |
+
│ ├── __init__.py
|
| 21 |
+
│ └── services.py # AI chat and structure generation
|
| 22 |
+
├── ui/ # UI components and handlers
|
| 23 |
+
│ ├── __init__.py
|
| 24 |
+
│ ├── components.py # UI component definitions
|
| 25 |
+
│ └── handlers.py # Event handlers and business logic
|
| 26 |
+
└── config/ # Configuration and settings
|
| 27 |
+
├── __init__.py
|
| 28 |
+
└── settings.py # App configuration and constants
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
## 🚀 Running the Application
|
| 32 |
+
|
| 33 |
+
### Option 1: Modular Version (Recommended)
|
| 34 |
+
```bash
|
| 35 |
+
python main.py
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
### Option 2: Legacy Monolithic Version
|
| 39 |
+
```bash
|
| 40 |
+
python app.py
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
## 📁 Module Descriptions
|
| 44 |
+
|
| 45 |
+
### `src/molecules/`
|
| 46 |
+
- **`analysis.py`**: Molecular property calculations, SMILES validation, drug-likeness assessment
|
| 47 |
+
- **`variations.py`**: Generation of multiple molecular structure visualizations
|
| 48 |
+
|
| 49 |
+
### `src/ai/`
|
| 50 |
+
- **`services.py`**: AI-powered chat responses and structure generation using OpenAI GPT-OSS-20B
|
| 51 |
+
|
| 52 |
+
### `src/ui/`
|
| 53 |
+
- **`components.py`**: Reusable UI component definitions for Gradio interface
|
| 54 |
+
- **`handlers.py`**: Event handlers, business logic, and state management
|
| 55 |
+
|
| 56 |
+
### `src/config/`
|
| 57 |
+
- **`settings.py`**: Application configuration, constants, and styling
|
| 58 |
+
|
| 59 |
+
### `src/app.py`
|
| 60 |
+
- **Main orchestrator**: Ties all modules together and creates the Gradio interface
|
| 61 |
+
|
| 62 |
+
## 🔧 Benefits of Modular Architecture
|
| 63 |
+
|
| 64 |
+
1. **Maintainability**: Each module has a single responsibility
|
| 65 |
+
2. **Testability**: Individual modules can be tested in isolation
|
| 66 |
+
3. **Scalability**: Easy to add new features without affecting existing code
|
| 67 |
+
4. **Readability**: Clear separation of concerns
|
| 68 |
+
5. **Reusability**: Components can be reused across different parts of the app
|
| 69 |
+
6. **Professional**: Industry-standard project structure
|
| 70 |
+
|
| 71 |
+
## 🧪 Key Features
|
| 72 |
+
|
| 73 |
+
- **Molecular Analysis**: SMILES validation, property calculations, drug-likeness assessment
|
| 74 |
+
- **Chemical Variations**: Multiple visualization styles and rendering options
|
| 75 |
+
- **AI Structure Generation**: AI-powered molecular structure suggestions
|
| 76 |
+
- **Interactive UI**: Clean, modern interface with real-time feedback
|
| 77 |
+
- **Bookmarking**: Save and manage favorite molecular structures
|
| 78 |
+
|
| 79 |
+
## 🔄 Migration from Monolithic
|
| 80 |
+
|
| 81 |
+
The original `app.py` (1082 lines) has been refactored into:
|
| 82 |
+
- **6 focused modules** with clear responsibilities
|
| 83 |
+
- **~200 lines per module** for better maintainability
|
| 84 |
+
- **Clean imports** and dependencies
|
| 85 |
+
- **Preserved functionality** with improved organization
|
| 86 |
+
|
| 87 |
+
## 🚀 Future Enhancements
|
| 88 |
+
|
| 89 |
+
With the modular structure, it's now easy to:
|
| 90 |
+
- Add new molecular analysis algorithms
|
| 91 |
+
- Implement additional AI models
|
| 92 |
+
- Create new UI components
|
| 93 |
+
- Add database integration
|
| 94 |
+
- Implement user authentication
|
| 95 |
+
- Add API endpoints
|
| 96 |
+
- Create unit tests for each module
|
__pycache__/app.cpython-313.pyc
ADDED
|
Binary file (888 Bytes). View file
|
|
|
__pycache__/molecule_render_demo.cpython-314.pyc
ADDED
|
Binary file (13.8 kB). View file
|
|
|
__pycache__/run_apps.cpython-313.pyc
ADDED
|
Binary file (3.75 kB). View file
|
|
|
__pycache__/test_molecular_analysis.cpython-313.pyc
ADDED
|
Binary file (4.23 kB). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
HITL Drug Discovery Application
|
| 3 |
+
|
| 4 |
+
Main entry point for both local development and Hugging Face Spaces.
|
| 5 |
+
This file imports and runs the modular application.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from src.app import create_app
|
| 9 |
+
|
| 10 |
+
# Create the application
|
| 11 |
+
app = create_app()
|
| 12 |
+
|
| 13 |
+
# Launch the application
|
| 14 |
+
if __name__ == "__main__":
|
| 15 |
+
print("🧬 HITL Drug Discovery Application")
|
| 16 |
+
print("🌐 Starting application...")
|
| 17 |
+
print("📍 Available at: http://localhost:7860")
|
| 18 |
+
print("⏹️ Press Ctrl+C to stop")
|
| 19 |
+
print("-" * 50)
|
| 20 |
+
app.launch()
|
generated_molecules.csv
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
smiles,count
|
| 2 |
+
[*],1
|
| 3 |
+
[*]C,1
|
| 4 |
+
[*]Cl,1
|
| 5 |
+
[*]N,1
|
| 6 |
+
[*]Br,1
|
| 7 |
+
[*]O,1
|
| 8 |
+
[*][C@@H],1
|
| 9 |
+
[*]F,1
|
| 10 |
+
[*]S,1
|
| 11 |
+
[*][C@H],1
|
| 12 |
+
[*]CC,1
|
| 13 |
+
[*]OC,1
|
| 14 |
+
[*]CO,1
|
| 15 |
+
[*]NC,1
|
| 16 |
+
[*]CN,1
|
| 17 |
+
[*]SC,1
|
| 18 |
+
[*]N[C@@H],1
|
| 19 |
+
[*]/C,1
|
| 20 |
+
[*]C#C,1
|
| 21 |
+
[*]C#N,1
|
| 22 |
+
[*]CO[C@H],1
|
| 23 |
+
[*]CNS,1
|
| 24 |
+
[*]OCC,1
|
| 25 |
+
[*]CCC,1
|
| 26 |
+
[*]NCC,1
|
| 27 |
+
[*]C=C,1
|
| 28 |
+
[*]SCC,1
|
| 29 |
+
[*]CCN,1
|
| 30 |
+
[*]CN[C@H],1
|
| 31 |
+
[*]CC[C@@],1
|
| 32 |
+
[*]CNC,1
|
| 33 |
+
[*][C@@H](C),1
|
| 34 |
+
[*]C(F),1
|
| 35 |
+
[*]CC=C,1
|
| 36 |
+
[*]N(C),1
|
| 37 |
+
[*]C(C),1
|
| 38 |
+
[*]C#CC,1
|
| 39 |
+
[*][C@H](C),1
|
| 40 |
+
[*]S(C),1
|
| 41 |
+
[*]C(N),1
|
| 42 |
+
[*]CCCC,1
|
| 43 |
+
[*]/C=C,1
|
| 44 |
+
[*]OCCN,1
|
| 45 |
+
[*]CC#N,1
|
| 46 |
+
[*]C(=O),1
|
| 47 |
+
[*]C(=C),1
|
| 48 |
+
[*]OC(C),1
|
| 49 |
+
[*]N(C)C,1
|
| 50 |
+
[*]S(=O),1
|
| 51 |
+
[*]C(=N),1
|
| 52 |
+
[*]OC(F),1
|
| 53 |
+
[*]CN(C),1
|
| 54 |
+
[*]C(C)N,1
|
| 55 |
+
[*]OCC#C,1
|
| 56 |
+
[*][C@H](C)N,1
|
| 57 |
+
[*]C1CC1,1
|
| 58 |
+
[*]C(C)C,1
|
| 59 |
+
[*]C(=O)O,1
|
| 60 |
+
[*]C(=O)C,1
|
| 61 |
+
[*]C(=O)N,1
|
| 62 |
+
[*]NC(=O),1
|
| 63 |
+
[*]C(=N)N,1
|
| 64 |
+
[*]C(C)=O,1
|
| 65 |
+
[*]OC(C)C,1
|
| 66 |
+
[*]CC(=O),1
|
| 67 |
+
[*]C(C#C),1
|
| 68 |
+
[*]c1cn[nH]c1,1
|
| 69 |
+
[*]c1ccn[nH]1,1
|
| 70 |
+
[*]N1CCCC1,1
|
| 71 |
+
[*]C(F)(F),1
|
| 72 |
+
[*]c1ccco1,1
|
| 73 |
+
[*]N1CCC[C@@H]1,1
|
| 74 |
+
[*]C(=O)OC,1
|
| 75 |
+
[*]C(=O)CC,1
|
| 76 |
+
[*]C(=O)NN,1
|
| 77 |
+
[*]NC(=O)N,1
|
| 78 |
+
[*]C(=O)NC,1
|
| 79 |
+
[*]n1cncc1,1
|
| 80 |
+
[*]OC(C)=O,1
|
| 81 |
+
[*]C(=O)NS,1
|
| 82 |
+
[*]NC(=O)C,1
|
| 83 |
+
[*]CCC(=O),1
|
| 84 |
+
[*]c1ccc[nH]1,1
|
| 85 |
+
[*]n1cccn1,1
|
| 86 |
+
[*]C(C)(C),1
|
| 87 |
+
[*]n1ccnc1,1
|
| 88 |
+
[*]C(=O)CN,1
|
| 89 |
+
[*]c1cccs1,1
|
| 90 |
+
[*]c1ccccc1,1
|
| 91 |
+
[*]c1cccnc1,1
|
| 92 |
+
[*]c1cncnc1,1
|
| 93 |
+
[*]C(F)(F)F,1
|
| 94 |
+
[*]C1CCCCN1,1
|
| 95 |
+
[*]C(=O)N/N,1
|
| 96 |
+
[*]C1CCOCC1,1
|
| 97 |
+
[*]C(=O)NCC,1
|
| 98 |
+
[*]N1CCOCC1,1
|
| 99 |
+
[*]C(=O)NNC,1
|
| 100 |
+
[*]NC(=O)CS,1
|
| 101 |
+
[*]C(=O)OCC,1
|
| 102 |
+
[*]c1ccncc1,1
|
| 103 |
+
[*]N1CCCCC1,1
|
| 104 |
+
[*]NC(=O)CO,1
|
| 105 |
+
[*]S(C)(=O),1
|
| 106 |
+
[*]NC(=O)NC,1
|
| 107 |
+
[*]c1ccccn1,1
|
| 108 |
+
[*]NC(=O)CN,1
|
| 109 |
+
[*]Oc1ccccc1,1
|
| 110 |
+
[*]c1ccccc1N,1
|
| 111 |
+
[*]OC1CCCCC1,1
|
| 112 |
+
[*]C(=O)NCCC,1
|
| 113 |
+
[*]c1ccccc1Cl,1
|
| 114 |
+
[*]C(=O)NCCN,1
|
| 115 |
+
[*]S(=O)(=O),1
|
| 116 |
+
[*]CN1CCOCC1,1
|
| 117 |
+
[*]c1ccccc1C,1
|
| 118 |
+
[*]c1ccccc1F,1
|
| 119 |
+
[*]c1cnn(C)c1,1
|
| 120 |
+
[*]C(=O)NC(C),1
|
| 121 |
+
[*]n1nc(C)cc1,1
|
| 122 |
+
[*]C1NC[C@@H](C)N1,1
|
| 123 |
+
[*]n1cnc(C)c1,1
|
| 124 |
+
[*]S(C)(=O)=O,1
|
| 125 |
+
[*][C@@H](NC(C)=O),1
|
| 126 |
+
[*]S(=O)(=O)N,1
|
| 127 |
+
[*]OCc1ccccc1,1
|
| 128 |
+
[*]c1cccc(F)c1,1
|
| 129 |
+
[*]c1ccc(F)cc1,1
|
| 130 |
+
[*]c1cccc(Cl)c1,1
|
| 131 |
+
[*]C1CC(=O)NN1,1
|
| 132 |
+
[*]c1ccnc(N)n1,1
|
| 133 |
+
[*]c1ccc(O)cc1,1
|
| 134 |
+
[*]C1CC(=O)NC1,1
|
| 135 |
+
[*]c1ccc(O)nn1,1
|
| 136 |
+
[*]C(=O)N1CCCC1,1
|
| 137 |
+
[*]C(=O)NNC(=O),1
|
| 138 |
+
[*]C(=O)OCC(=O),1
|
| 139 |
+
[*]CC1OC(=O)CC1,1
|
| 140 |
+
[*]C1=NNC(=O)CC1,1
|
| 141 |
+
[*]C(=O)NNC(=O)C,1
|
| 142 |
+
[*]C1CCCN(CC#C)C1,1
|
| 143 |
+
[*]Cc1cccc(=O)n1O,1
|
| 144 |
+
[*]C1(C)SCC(=O)N1,1
|
| 145 |
+
[*]Cc1ccc2cc[nH]c2c1,1
|
| 146 |
+
[*]c1cccc2c1CNCC2,1
|
| 147 |
+
[*]c1cncc2c1CCCC2,1
|
| 148 |
+
[*]CN1CCc2cncnc2C1,1
|
| 149 |
+
[*]C(Nc1ccc(C)cc1),1
|
| 150 |
+
[*]Nc1cnc2ccccc2n1,1
|
| 151 |
+
[*]c1nc2cc(N)ccc2o1,1
|
| 152 |
+
[*]NCc1cc2ccccc2nc1,1
|
| 153 |
+
[*]N1C(=O)CC(=O)NC1,1
|
| 154 |
+
[*]N(C)Cc1noc(CC)n1,1
|
| 155 |
+
[*]COc1cccc2cccnc12,1
|
| 156 |
+
[*]C#Cc1ccnc(OCC)c1,1
|
| 157 |
+
[*]c1ccc(Cl)c2c1CNC2,1
|
| 158 |
+
[*]c1cn(C)c(=O)[nH]c1=O,1
|
| 159 |
+
[*]C(=O)Nc1ccc(Cl)cc1,1
|
| 160 |
+
[*]Nc1ncc2c(n1)CCCC2,1
|
| 161 |
+
[*]c1ccnc(C2CCCN2)n1,1
|
| 162 |
+
[*]c1ccc(CC(=O)NO)s1,1
|
| 163 |
+
[*]N1C2CCCC1CC(=O)C2,1
|
| 164 |
+
[*]C(=O)c1cc(CCC)ccc1,1
|
| 165 |
+
[*]NCc1cccc(C(=O)O)c1,1
|
| 166 |
+
[*]N1CCN(Cc2ccco2)CC1,1
|
| 167 |
+
[*]C(=O)N1CC2CCNCC2C1,1
|
| 168 |
+
[*]c1cccc(N2CCOCC2)n1,1
|
| 169 |
+
[*]Nc1ccnc2cc(Cl)ccc12,1
|
| 170 |
+
[*]CCc1nc2cc(N)ccc2o1,1
|
| 171 |
+
[*]Cc1c[nH]c2nc(O)nc-2c1,1
|
| 172 |
+
[*]c1ccc(C(=O)O)c(O)c1,1
|
| 173 |
+
[*]C(=O)CC1(C#N)CCOCC1,1
|
| 174 |
+
[*]c1ccc(CCN=C(NN)N)o1,1
|
| 175 |
+
[*]c1ccc2c(c1)OC(=O)C2,1
|
| 176 |
+
[*]c1ccc(S(N)(=O)=O)cc1,1
|
| 177 |
+
[*]C(=O)Nc1c[nH]c(=O)[nH]c1=O,1
|
| 178 |
+
[*]c1nc2c(c(=O)[nH]1)COCC2,1
|
| 179 |
+
[*]NC(=O)c1ccc(F)c(F)c1,1
|
| 180 |
+
[*]Cc1ccnc2c1CC[C@@H]1CNCCN21,1
|
| 181 |
+
[*]c1cc(=O)c2cc(N)ccc2o1,1
|
| 182 |
+
[*]NC(=O)Nc1nc2c(s1)CCC2,1
|
| 183 |
+
[*]c1cc(C)nc2ccc3nc[nH]c3c12,1
|
| 184 |
+
[*]c1cnc2ccc(NC(C)=O)cn12,1
|
| 185 |
+
[*]c1cc(=O)c2cc(OC)ccc2o1,1
|
| 186 |
+
[*]NC(=O)c1cc2cc(Cl)ccc2s1,1
|
| 187 |
+
[*]NC(=O)C1CCCN([C@@H](C)CC)C1,1
|
| 188 |
+
[*]C(=O)N1CCN(C)C(C)(C)C1,1
|
| 189 |
+
[*]C(=O)Nc1ccc2nc(C)sc2c1,1
|
| 190 |
+
[*]OC1CCN(Cc2ccc(Cl)s2)CC1,1
|
| 191 |
+
[*]C1CCN(C(CN)c2ccccc2)CC1,1
|
| 192 |
+
[*]c1ccc(NCc2cccc(O)c2)cc1,1
|
| 193 |
+
[*]N1CC(NC(=O)C2CCCCC2)CC1,1
|
| 194 |
+
[*]NC(=O)c1cc2c([nH]c1=O)CCC2,1
|
| 195 |
+
[*]c1ccc(SC(F)(F)C(=O)O)cc1,1
|
| 196 |
+
[*]c1ccc(C(=O)NCc2ccco2)cc1,1
|
| 197 |
+
[*]N1CC(OC)C2=C1CC(C)(C)NS2,1
|
| 198 |
+
[*]C1=NCC(=O)Nc2cc(OC)ccc21,1
|
| 199 |
+
[*]c1ccc(-c2cccc3[nH]ccc23)nc1,1
|
| 200 |
+
[*]c1ccnc(COc2ccc(CO)cc2)n1,1
|
| 201 |
+
[*]c1cc2c(c(C(=O)O)c1)OCCNC2,1
|
| 202 |
+
[*]NCc1cc2n(n1)-c1ccccc1CCC2,1
|
| 203 |
+
[*]Nc1cc(=O)oc2c1ccc1ccccc12,1
|
| 204 |
+
[*]c1ccc(NC(=N)c2ccc(Cl)s2)cc1,1
|
| 205 |
+
[*]c1cc(C(F)(F)F)nn1-c1ccccc1,1
|
| 206 |
+
[*]C1CNCc2ncnc(CCc3ccccc3)c21,1
|
| 207 |
+
[*]c1ccc(Cl)c2c1NS(=O)(=O)C/C2,1
|
| 208 |
+
[*]NC(=O)C12CC3CC(CC(C3)C1)C2,1
|
| 209 |
+
[*]Oc1ccc2oc(CCN3CCC[C@H]3C)cc2c1,1
|
| 210 |
+
[*]N1C(=O)CC(N2CC(C)CC(C)C2)C1,1
|
| 211 |
+
[*]C(=O)N1CC(=O)Nc2c(C)cccc2C1,1
|
| 212 |
+
[*]NCc1cccc(-c2c[nH]c(=O)[nH]c2=O)c1,1
|
| 213 |
+
[*]C1CNCCc2cc(Cl)c(OCCC)c(O)c21,1
|
| 214 |
+
[*]c1cnc(NC)c(-c2ccc(OC)cc2)n1,1
|
legacy/app_legacy.py
ADDED
|
@@ -0,0 +1,1081 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from huggingface_hub import InferenceClient
|
| 3 |
+
from rdkit import Chem
|
| 4 |
+
from rdkit.Chem import Draw, Descriptors, Crippen
|
| 5 |
+
|
| 6 |
+
def calculate_molecular_properties(smiles):
|
| 7 |
+
"""Calculate key molecular properties for drug discovery."""
|
| 8 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 9 |
+
if not mol:
|
| 10 |
+
return None
|
| 11 |
+
|
| 12 |
+
properties = {
|
| 13 |
+
'Molecular Weight': round(Descriptors.MolWt(mol), 2),
|
| 14 |
+
'LogP': round(Crippen.MolLogP(mol), 2),
|
| 15 |
+
'HBD': Descriptors.NumHDonors(mol),
|
| 16 |
+
'HBA': Descriptors.NumHAcceptors(mol),
|
| 17 |
+
'TPSA': round(Descriptors.TPSA(mol), 2),
|
| 18 |
+
'Rotatable Bonds': Descriptors.NumRotatableBonds(mol),
|
| 19 |
+
'Aromatic Rings': Descriptors.NumAromaticRings(mol),
|
| 20 |
+
'Heavy Atoms': mol.GetNumHeavyAtoms()
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
# Lipinski's Rule of Five
|
| 24 |
+
lipinski_violations = 0
|
| 25 |
+
if properties['Molecular Weight'] > 500:
|
| 26 |
+
lipinski_violations += 1
|
| 27 |
+
if properties['LogP'] > 5:
|
| 28 |
+
lipinski_violations += 1
|
| 29 |
+
if properties['HBD'] > 5:
|
| 30 |
+
lipinski_violations += 1
|
| 31 |
+
if properties['HBA'] > 10:
|
| 32 |
+
lipinski_violations += 1
|
| 33 |
+
|
| 34 |
+
properties['Lipinski Violations'] = lipinski_violations
|
| 35 |
+
properties['Drug-like'] = lipinski_violations <= 1
|
| 36 |
+
|
| 37 |
+
return properties
|
| 38 |
+
|
| 39 |
+
def respond(
|
| 40 |
+
message,
|
| 41 |
+
history: list[dict[str, str]],
|
| 42 |
+
system_message,
|
| 43 |
+
max_tokens,
|
| 44 |
+
temperature,
|
| 45 |
+
top_p,
|
| 46 |
+
hf_token: gr.OAuthToken,
|
| 47 |
+
):
|
| 48 |
+
"""
|
| 49 |
+
Enhanced drug discovery chatbot with molecular property integration.
|
| 50 |
+
"""
|
| 51 |
+
client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
|
| 52 |
+
|
| 53 |
+
# Enhanced system message for drug discovery
|
| 54 |
+
enhanced_system_message = f"""{system_message}
|
| 55 |
+
|
| 56 |
+
You are an expert medicinal chemist and drug discovery specialist. You help researchers understand:
|
| 57 |
+
- Molecular properties and drug-likeness
|
| 58 |
+
- Structure-activity relationships (SAR)
|
| 59 |
+
- ADMET properties (Absorption, Distribution, Metabolism, Excretion, Toxicity)
|
| 60 |
+
- Drug design principles and optimization strategies
|
| 61 |
+
- Chemical synthesis and medicinal chemistry
|
| 62 |
+
|
| 63 |
+
When discussing molecules, consider their molecular weight, LogP, hydrogen bonding, and other key properties. Reference the molecular gallery below for visual context."""
|
| 64 |
+
|
| 65 |
+
messages = [{"role": "system", "content": enhanced_system_message}]
|
| 66 |
+
messages.extend(history)
|
| 67 |
+
messages.append({"role": "user", "content": message})
|
| 68 |
+
|
| 69 |
+
response = ""
|
| 70 |
+
|
| 71 |
+
for message in client.chat_completion(
|
| 72 |
+
messages,
|
| 73 |
+
max_tokens=max_tokens,
|
| 74 |
+
stream=True,
|
| 75 |
+
temperature=temperature,
|
| 76 |
+
top_p=top_p,
|
| 77 |
+
):
|
| 78 |
+
choices = message.choices
|
| 79 |
+
token = ""
|
| 80 |
+
if len(choices) and choices[0].delta.content:
|
| 81 |
+
token = choices[0].delta.content
|
| 82 |
+
|
| 83 |
+
response += token
|
| 84 |
+
yield response
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
# ChatInterface configuration for drug discovery
|
| 88 |
+
chatbot = gr.ChatInterface(
|
| 89 |
+
respond,
|
| 90 |
+
type="messages",
|
| 91 |
+
additional_inputs=[
|
| 92 |
+
gr.Textbox(value="You are a friendly drug discovery assistant.", label="System message"),
|
| 93 |
+
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
| 94 |
+
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
| 95 |
+
gr.Slider(
|
| 96 |
+
minimum=0.1,
|
| 97 |
+
maximum=1.0,
|
| 98 |
+
value=0.95,
|
| 99 |
+
step=0.05,
|
| 100 |
+
label="Top-p (nucleus sampling)",
|
| 101 |
+
),
|
| 102 |
+
gr.OAuthToken(label="Hugging Face Token"),
|
| 103 |
+
],
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
# Enhanced drug discovery molecule library
|
| 108 |
+
list_smiles = [
|
| 109 |
+
"C[C@H](N)C(=O)O", # – Alanine (amino acid)
|
| 110 |
+
"CC(=O)OC1=CC=CC=C1C(=O)O", # – Aspirin (NSAID)
|
| 111 |
+
"CCN(CC)CC", # – Triethylamine (base)
|
| 112 |
+
"c1ccccc1O", # – Phenol (aromatic)
|
| 113 |
+
"CC(C)CC(=O)O", # – Valeric acid (fatty acid)
|
| 114 |
+
"CN1C=NC2=C1N=CN2", # – Adenine (nucleobase)
|
| 115 |
+
"O=C(O)C1=CC=CC=C1", # – Benzoic acid (aromatic acid)
|
| 116 |
+
"C1CCCCC1", # – Cyclohexane (cycloalkane)
|
| 117 |
+
"CC(=O)N1CCCCC1", # – N-methylpiperidine (amine)
|
| 118 |
+
]
|
| 119 |
+
|
| 120 |
+
# User's bookmarked molecules (stored in session)
|
| 121 |
+
bookmarked_molecules = []
|
| 122 |
+
|
| 123 |
+
# Drug discovery molecules
|
| 124 |
+
drug_smiles = [
|
| 125 |
+
"CC1=CC=C(C=C1)C(C)NC(=O)C2=CC=C(C=C2)Cl", # – Ibuprofen
|
| 126 |
+
"CC1=CC=C(C=C1)C(C)NC(=O)C2=CC=C(C=C2)F", # – Flurbiprofen
|
| 127 |
+
"CC1=CC=C(C=C1)C(C)NC(=O)C2=CC=C(C=C2)Br", # – Bromfenac
|
| 128 |
+
"CC1=CC=C(C=C1)C(C)NC(=O)C2=CC=C(C=C2)I", # – Iodofenac
|
| 129 |
+
"CC1=CC=C(C=C1)C(C)NC(=O)C2=CC=C(C=C2)[N+](=O)[O-]", # – Nitrofenac
|
| 130 |
+
]
|
| 131 |
+
|
| 132 |
+
def generate_molecule_images():
|
| 133 |
+
"""
|
| 134 |
+
Generates enhanced molecule images with better visualization.
|
| 135 |
+
"""
|
| 136 |
+
images = []
|
| 137 |
+
for smiles in list_smiles:
|
| 138 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 139 |
+
if mol:
|
| 140 |
+
# Create a high-quality image with better rendering
|
| 141 |
+
img = Draw.MolToImage(mol, size=(200, 200), kekulize=True)
|
| 142 |
+
images.append(img)
|
| 143 |
+
else:
|
| 144 |
+
# Add a placeholder if SMILES is invalid
|
| 145 |
+
try:
|
| 146 |
+
from PIL import Image
|
| 147 |
+
images.append(Image.new('RGB', (200, 200), color='white'))
|
| 148 |
+
except ImportError:
|
| 149 |
+
# Fallback if PIL not available
|
| 150 |
+
images.append(None)
|
| 151 |
+
return images
|
| 152 |
+
|
| 153 |
+
def generate_molecule_image(smiles):
|
| 154 |
+
"""Generate a molecular structure image from SMILES string."""
|
| 155 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 156 |
+
if not mol:
|
| 157 |
+
return None
|
| 158 |
+
|
| 159 |
+
# Create a high-quality image
|
| 160 |
+
img = Draw.MolToImage(mol, size=(300, 300), kekulize=True)
|
| 161 |
+
return img
|
| 162 |
+
|
| 163 |
+
def generate_molecule_variations(base_smiles, num_variations=12):
|
| 164 |
+
"""Generate multiple variations of a chemical structure for the grid display."""
|
| 165 |
+
mol = Chem.MolFromSmiles(base_smiles)
|
| 166 |
+
if not mol:
|
| 167 |
+
return []
|
| 168 |
+
|
| 169 |
+
variations = []
|
| 170 |
+
|
| 171 |
+
# Generate different rendering styles and sizes
|
| 172 |
+
sizes = [(150, 150), (180, 180), (200, 200), (160, 160)]
|
| 173 |
+
styles = [True, False] # kekulize vs non-kekulize
|
| 174 |
+
|
| 175 |
+
for i in range(num_variations):
|
| 176 |
+
size = sizes[i % len(sizes)]
|
| 177 |
+
kekulize = styles[i % len(styles)]
|
| 178 |
+
|
| 179 |
+
try:
|
| 180 |
+
# Create variation with different rendering parameters
|
| 181 |
+
img = Draw.MolToImage(mol, size=size, kekulize=kekulize)
|
| 182 |
+
variations.append({
|
| 183 |
+
'image': img,
|
| 184 |
+
'smiles': base_smiles,
|
| 185 |
+
'variation_id': i + 1,
|
| 186 |
+
'size': size,
|
| 187 |
+
'kekulize': kekulize
|
| 188 |
+
})
|
| 189 |
+
except:
|
| 190 |
+
# Fallback to basic rendering
|
| 191 |
+
try:
|
| 192 |
+
img = Draw.MolToImage(mol, size=(150, 150), kekulize=True)
|
| 193 |
+
variations.append({
|
| 194 |
+
'image': img,
|
| 195 |
+
'smiles': base_smiles,
|
| 196 |
+
'variation_id': i + 1,
|
| 197 |
+
'size': (150, 150),
|
| 198 |
+
'kekulize': True
|
| 199 |
+
})
|
| 200 |
+
except:
|
| 201 |
+
continue
|
| 202 |
+
|
| 203 |
+
return variations
|
| 204 |
+
|
| 205 |
+
def generate_chemical_series_variations(base_smiles):
|
| 206 |
+
"""Generate a series of related chemical structures for drug discovery."""
|
| 207 |
+
mol = Chem.MolFromSmiles(base_smiles)
|
| 208 |
+
if not mol:
|
| 209 |
+
return []
|
| 210 |
+
|
| 211 |
+
variations = []
|
| 212 |
+
|
| 213 |
+
# Create different visualization styles
|
| 214 |
+
styles = [
|
| 215 |
+
{'size': (200, 200), 'kekulize': True, 'style': 'Standard'},
|
| 216 |
+
{'size': (180, 180), 'kekulize': False, 'style': 'Kekulé'},
|
| 217 |
+
{'size': (220, 220), 'kekulize': True, 'style': 'Large'},
|
| 218 |
+
{'size': (160, 160), 'kekulize': False, 'style': 'Compact'},
|
| 219 |
+
{'size': (200, 200), 'kekulize': True, 'style': 'Detailed'},
|
| 220 |
+
{'size': (190, 190), 'kekulize': False, 'style': 'Minimal'},
|
| 221 |
+
{'size': (210, 210), 'kekulize': True, 'style': 'Enhanced'},
|
| 222 |
+
{'size': (170, 170), 'kekulize': False, 'style': 'Focused'},
|
| 223 |
+
{'size': (200, 200), 'kekulize': True, 'style': 'Classic'},
|
| 224 |
+
{'size': (185, 185), 'kekulize': False, 'style': 'Modern'},
|
| 225 |
+
{'size': (195, 195), 'kekulize': True, 'style': 'Scientific'},
|
| 226 |
+
{'size': (175, 175), 'kekulize': False, 'style': 'Clean'}
|
| 227 |
+
]
|
| 228 |
+
|
| 229 |
+
for i, style_config in enumerate(styles):
|
| 230 |
+
try:
|
| 231 |
+
img = Draw.MolToImage(mol, size=style_config['size'], kekulize=style_config['kekulize'])
|
| 232 |
+
variations.append({
|
| 233 |
+
'image': img,
|
| 234 |
+
'smiles': base_smiles,
|
| 235 |
+
'variation_id': i + 1,
|
| 236 |
+
'style': style_config['style'],
|
| 237 |
+
'size': style_config['size'],
|
| 238 |
+
'kekulize': style_config['kekulize']
|
| 239 |
+
})
|
| 240 |
+
except:
|
| 241 |
+
continue
|
| 242 |
+
|
| 243 |
+
return variations
|
| 244 |
+
|
| 245 |
+
# Global variables for variations management
|
| 246 |
+
current_variations = []
|
| 247 |
+
current_page = 0
|
| 248 |
+
variations_per_page = 12
|
| 249 |
+
|
| 250 |
+
def generate_variations_for_display(smiles, num_variations=12):
|
| 251 |
+
"""Generate variations and format for gallery display."""
|
| 252 |
+
global current_variations
|
| 253 |
+
variations = generate_chemical_series_variations(smiles)
|
| 254 |
+
current_variations = variations[:num_variations]
|
| 255 |
+
|
| 256 |
+
# Format for gallery display
|
| 257 |
+
gallery_items = []
|
| 258 |
+
for var in current_variations:
|
| 259 |
+
gallery_items.append((var['image'], f"Style: {var['style']}"))
|
| 260 |
+
|
| 261 |
+
return gallery_items, current_variations[0]['image'] if current_variations else None, smiles, current_variations[0]['style'] if current_variations else "None"
|
| 262 |
+
|
| 263 |
+
def select_variation(evt: gr.SelectData):
|
| 264 |
+
"""Handle selection of a variation from the grid."""
|
| 265 |
+
if not current_variations or evt.index >= len(current_variations):
|
| 266 |
+
return None, "", ""
|
| 267 |
+
|
| 268 |
+
selected_var = current_variations[evt.index]
|
| 269 |
+
return selected_var['image'], selected_var['smiles'], selected_var['style']
|
| 270 |
+
|
| 271 |
+
def clear_variations():
|
| 272 |
+
"""Clear all variations and reset display."""
|
| 273 |
+
global current_variations, current_page
|
| 274 |
+
current_variations = []
|
| 275 |
+
current_page = 0
|
| 276 |
+
return [], None, "", ""
|
| 277 |
+
|
| 278 |
+
def update_grid_size(columns):
|
| 279 |
+
"""Update the grid columns dynamically."""
|
| 280 |
+
return gr.Gallery(columns=columns)
|
| 281 |
+
|
| 282 |
+
def navigate_variations(direction):
|
| 283 |
+
"""Navigate through variations pages."""
|
| 284 |
+
global current_page, current_variations, variations_per_page
|
| 285 |
+
|
| 286 |
+
if not current_variations:
|
| 287 |
+
return [], "Page 1 of 1", None, "", ""
|
| 288 |
+
|
| 289 |
+
total_pages = (len(current_variations) + variations_per_page - 1) // variations_per_page
|
| 290 |
+
|
| 291 |
+
if direction == "next":
|
| 292 |
+
current_page = min(current_page + 1, total_pages - 1)
|
| 293 |
+
elif direction == "prev":
|
| 294 |
+
current_page = max(current_page - 1, 0)
|
| 295 |
+
|
| 296 |
+
# Get variations for current page
|
| 297 |
+
start_idx = current_page * variations_per_page
|
| 298 |
+
end_idx = min(start_idx + variations_per_page, len(current_variations))
|
| 299 |
+
page_variations = current_variations[start_idx:end_idx]
|
| 300 |
+
|
| 301 |
+
# Format for gallery display
|
| 302 |
+
gallery_items = []
|
| 303 |
+
for var in page_variations:
|
| 304 |
+
gallery_items.append((var['image'], f"Style: {var['style']}"))
|
| 305 |
+
|
| 306 |
+
page_info = f"Page {current_page + 1} of {total_pages}"
|
| 307 |
+
|
| 308 |
+
return gallery_items, page_info, page_variations[0]['image'] if page_variations else None, page_variations[0]['smiles'] if page_variations else "", page_variations[0]['style'] if page_variations else ""
|
| 309 |
+
|
| 310 |
+
def update_variation_count(count):
|
| 311 |
+
"""Update the number of variations to generate."""
|
| 312 |
+
global variations_per_page
|
| 313 |
+
variations_per_page = count
|
| 314 |
+
return count
|
| 315 |
+
|
| 316 |
+
def generate_ai_structures(message, history, selected_smiles, hf_token: gr.OAuthToken):
|
| 317 |
+
"""Generate new structures using AI based on user request and selected molecule."""
|
| 318 |
+
if not message.strip():
|
| 319 |
+
return history, []
|
| 320 |
+
|
| 321 |
+
client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
|
| 322 |
+
|
| 323 |
+
# Enhanced system message for structure generation
|
| 324 |
+
system_message = f"""You are an expert medicinal chemist and drug discovery specialist. You help generate new chemical structures based on user requests and existing molecules.
|
| 325 |
+
|
| 326 |
+
Current selected molecule SMILES: {selected_smiles}
|
| 327 |
+
|
| 328 |
+
Your task is to:
|
| 329 |
+
1. Understand the user's request for structure modifications
|
| 330 |
+
2. Generate 3-6 new SMILES strings that meet their requirements
|
| 331 |
+
3. Provide brief explanations for each generated structure
|
| 332 |
+
4. Consider drug-likeness, ADMET properties, and medicinal chemistry principles
|
| 333 |
+
|
| 334 |
+
Format your response as:
|
| 335 |
+
STRUCTURE 1: [SMILES] - [Brief explanation]
|
| 336 |
+
STRUCTURE 2: [SMILES] - [Brief explanation]
|
| 337 |
+
etc.
|
| 338 |
+
|
| 339 |
+
Focus on practical, synthesizable structures that address the user's specific request."""
|
| 340 |
+
|
| 341 |
+
messages = [{"role": "system", "content": system_message}]
|
| 342 |
+
messages.extend(history)
|
| 343 |
+
messages.append({"role": "user", "content": message})
|
| 344 |
+
|
| 345 |
+
response = ""
|
| 346 |
+
for message_chunk in client.chat_completion(
|
| 347 |
+
messages,
|
| 348 |
+
max_tokens=512,
|
| 349 |
+
stream=True,
|
| 350 |
+
temperature=0.7,
|
| 351 |
+
top_p=0.9,
|
| 352 |
+
):
|
| 353 |
+
choices = message_chunk.choices
|
| 354 |
+
token = ""
|
| 355 |
+
if len(choices) and choices[0].delta.content:
|
| 356 |
+
token = choices[0].delta.content
|
| 357 |
+
response += token
|
| 358 |
+
yield response
|
| 359 |
+
|
| 360 |
+
def parse_ai_structures(ai_response, selected_smiles):
|
| 361 |
+
"""Parse AI response to extract SMILES strings and generate images."""
|
| 362 |
+
structures = []
|
| 363 |
+
lines = ai_response.split('\n')
|
| 364 |
+
|
| 365 |
+
for line in lines:
|
| 366 |
+
if 'STRUCTURE' in line.upper() and ':' in line:
|
| 367 |
+
try:
|
| 368 |
+
# Extract SMILES from lines like "STRUCTURE 1: CCO - ethanol"
|
| 369 |
+
parts = line.split(':', 1)
|
| 370 |
+
if len(parts) > 1:
|
| 371 |
+
smiles_part = parts[1].split('-')[0].strip()
|
| 372 |
+
# Clean up the SMILES string
|
| 373 |
+
smiles = smiles_part.strip('[]()').strip()
|
| 374 |
+
|
| 375 |
+
# Validate and generate image
|
| 376 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 377 |
+
if mol:
|
| 378 |
+
img = Draw.MolToImage(mol, size=(150, 150), kekulize=True)
|
| 379 |
+
structures.append((img, f"Generated: {smiles}"))
|
| 380 |
+
except:
|
| 381 |
+
continue
|
| 382 |
+
|
| 383 |
+
# If no structures were parsed, generate some variations of the selected molecule
|
| 384 |
+
if not structures and selected_smiles:
|
| 385 |
+
try:
|
| 386 |
+
mol = Chem.MolFromSmiles(selected_smiles)
|
| 387 |
+
if mol:
|
| 388 |
+
# Generate a few variations with different sizes
|
| 389 |
+
for size in [(120, 120), (150, 150), (180, 180)]:
|
| 390 |
+
img = Draw.MolToImage(mol, size=size, kekulize=True)
|
| 391 |
+
structures.append((img, f"Variation: {selected_smiles}"))
|
| 392 |
+
except:
|
| 393 |
+
pass
|
| 394 |
+
|
| 395 |
+
return structures
|
| 396 |
+
|
| 397 |
+
def handle_structure_chat(message, history, selected_smiles, hf_token: gr.OAuthToken):
|
| 398 |
+
"""Handle the structure generation chat."""
|
| 399 |
+
if not message.strip():
|
| 400 |
+
return history, []
|
| 401 |
+
|
| 402 |
+
# Add user message to history
|
| 403 |
+
history.append({"role": "user", "content": message})
|
| 404 |
+
|
| 405 |
+
# Generate AI response
|
| 406 |
+
ai_response = ""
|
| 407 |
+
for chunk in generate_ai_structures(message, history[:-1], selected_smiles, hf_token):
|
| 408 |
+
ai_response = chunk
|
| 409 |
+
|
| 410 |
+
# Add AI response to history
|
| 411 |
+
history.append({"role": "assistant", "content": ai_response})
|
| 412 |
+
|
| 413 |
+
# Parse and generate structure images
|
| 414 |
+
structures = parse_ai_structures(ai_response, selected_smiles)
|
| 415 |
+
|
| 416 |
+
return history, structures
|
| 417 |
+
|
| 418 |
+
def bookmark_molecule(smiles, molecule_name=""):
|
| 419 |
+
"""Add a molecule to the bookmarked collection."""
|
| 420 |
+
global bookmarked_molecules
|
| 421 |
+
|
| 422 |
+
# Validate SMILES first
|
| 423 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 424 |
+
if not mol:
|
| 425 |
+
return "❌ Invalid SMILES string - cannot bookmark"
|
| 426 |
+
|
| 427 |
+
# Check if already bookmarked
|
| 428 |
+
if smiles in [bm['smiles'] for bm in bookmarked_molecules]:
|
| 429 |
+
return "⚠️ Molecule already bookmarked"
|
| 430 |
+
|
| 431 |
+
# Generate a name if not provided
|
| 432 |
+
if not molecule_name:
|
| 433 |
+
molecule_name = f"Bookmarked_{len(bookmarked_molecules) + 1}"
|
| 434 |
+
|
| 435 |
+
# Add to bookmarks
|
| 436 |
+
bookmarked_molecules.append({
|
| 437 |
+
'smiles': smiles,
|
| 438 |
+
'name': molecule_name,
|
| 439 |
+
'timestamp': len(bookmarked_molecules) + 1 # Simple counter
|
| 440 |
+
})
|
| 441 |
+
|
| 442 |
+
return f"✅ Bookmarked: {molecule_name}"
|
| 443 |
+
|
| 444 |
+
def get_bookmarked_molecules():
|
| 445 |
+
"""Get all bookmarked molecules for display."""
|
| 446 |
+
return bookmarked_molecules
|
| 447 |
+
|
| 448 |
+
def remove_bookmark(smiles):
|
| 449 |
+
"""Remove a molecule from bookmarks."""
|
| 450 |
+
global bookmarked_molecules
|
| 451 |
+
|
| 452 |
+
bookmarked_molecules = [bm for bm in bookmarked_molecules if bm['smiles'] != smiles]
|
| 453 |
+
return "🗑️ Removed from bookmarks"
|
| 454 |
+
|
| 455 |
+
def validate_smiles(smiles):
|
| 456 |
+
"""Validate SMILES string and return error message if invalid."""
|
| 457 |
+
if not smiles or not smiles.strip():
|
| 458 |
+
return "Please enter a SMILES string"
|
| 459 |
+
|
| 460 |
+
# Try to parse the SMILES
|
| 461 |
+
mol = Chem.MolFromSmiles(smiles.strip())
|
| 462 |
+
if not mol:
|
| 463 |
+
# Provide more helpful error messages based on common issues
|
| 464 |
+
error_msg = f"❌ **Invalid SMILES string:** `{smiles}`\n\n"
|
| 465 |
+
|
| 466 |
+
# Check for specific common issues
|
| 467 |
+
if smiles.count('(') != smiles.count(')'):
|
| 468 |
+
error_msg += "🔍 **Issue detected:** Unmatched parentheses\n"
|
| 469 |
+
elif smiles.count('[') != smiles.count(']'):
|
| 470 |
+
error_msg += "🔍 **Issue detected:** Unmatched brackets\n"
|
| 471 |
+
elif any(char in smiles for char in ['@', '\\', '/']) and 'C' not in smiles:
|
| 472 |
+
error_msg += "🔍 **Issue detected:** Invalid stereochemistry notation\n"
|
| 473 |
+
else:
|
| 474 |
+
error_msg += "🔍 **Issue detected:** General syntax error\n"
|
| 475 |
+
|
| 476 |
+
error_msg += "\n**💡 Tips for complex SMILES:**\n"
|
| 477 |
+
error_msg += "- Complex molecules are supported! The issue is likely syntax\n"
|
| 478 |
+
error_msg += "- Check parentheses and brackets are balanced\n"
|
| 479 |
+
error_msg += "- Verify ring closure numbers (e.g., C1CCCC1)\n"
|
| 480 |
+
error_msg += "- Use proper stereochemistry notation (@, @@, /, \\)\n"
|
| 481 |
+
error_msg += "- Try breaking complex molecules into smaller parts first\n\n"
|
| 482 |
+
error_msg += "**🧪 Examples of complex valid SMILES:**\n"
|
| 483 |
+
error_msg += "- `CC(=O)OC1=CC=CC=C1C(=O)O` (Aspirin)\n"
|
| 484 |
+
error_msg += "- `CN1C=NC2=C1C(=O)N(C(=O)N2C)C` (Caffeine)\n"
|
| 485 |
+
error_msg += "- `C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O` (Glucose)\n"
|
| 486 |
+
error_msg += "- `CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)` (Penicillin)\n"
|
| 487 |
+
|
| 488 |
+
return error_msg
|
| 489 |
+
|
| 490 |
+
return None
|
| 491 |
+
|
| 492 |
+
def validate_smiles_realtime(smiles):
|
| 493 |
+
"""Real-time SMILES validation for user feedback."""
|
| 494 |
+
if not smiles or not smiles.strip():
|
| 495 |
+
return "✅ Ready to analyze", None
|
| 496 |
+
|
| 497 |
+
validation_error = validate_smiles(smiles)
|
| 498 |
+
if validation_error:
|
| 499 |
+
return f"❌ {validation_error}", None
|
| 500 |
+
|
| 501 |
+
# Try to generate a preview image
|
| 502 |
+
try:
|
| 503 |
+
mol = Chem.MolFromSmiles(smiles.strip())
|
| 504 |
+
if mol:
|
| 505 |
+
img = Draw.MolToImage(mol, size=(200, 200), kekulize=True)
|
| 506 |
+
return "✅ Valid SMILES - Click 'Analyze Molecule'", img
|
| 507 |
+
except:
|
| 508 |
+
pass
|
| 509 |
+
|
| 510 |
+
return "✅ Valid SMILES - Click 'Analyze Molecule'", None
|
| 511 |
+
|
| 512 |
+
def analyze_molecule(smiles):
|
| 513 |
+
"""Analyze a molecule and return its properties with robust error handling."""
|
| 514 |
+
# Validate SMILES first
|
| 515 |
+
validation_error = validate_smiles(smiles)
|
| 516 |
+
if validation_error:
|
| 517 |
+
return validation_error, None
|
| 518 |
+
|
| 519 |
+
# Calculate properties
|
| 520 |
+
properties = calculate_molecular_properties(smiles)
|
| 521 |
+
if not properties:
|
| 522 |
+
return "Error calculating molecular properties", None
|
| 523 |
+
|
| 524 |
+
# Format the properties nicely - use raw string to prevent hyperlink conversion
|
| 525 |
+
result = f"**Molecular Analysis for:**\n```\n{smiles}\n```\n\n"
|
| 526 |
+
result += "**Basic Properties:**\n"
|
| 527 |
+
result += f"- Molecular Weight: {properties['Molecular Weight']} g/mol\n"
|
| 528 |
+
result += f"- LogP: {properties['LogP']}\n"
|
| 529 |
+
result += f"- TPSA: {properties['TPSA']} Ų\n"
|
| 530 |
+
result += f"- Heavy Atoms: {properties['Heavy Atoms']}\n\n"
|
| 531 |
+
|
| 532 |
+
result += "**Hydrogen Bonding:**\n"
|
| 533 |
+
result += f"- HBD (Donors): {properties['HBD']}\n"
|
| 534 |
+
result += f"- HBA (Acceptors): {properties['HBA']}\n\n"
|
| 535 |
+
|
| 536 |
+
result += "**Structural Features:**\n"
|
| 537 |
+
result += f"- Rotatable Bonds: {properties['Rotatable Bonds']}\n"
|
| 538 |
+
result += f"- Aromatic Rings: {properties['Aromatic Rings']}\n\n"
|
| 539 |
+
|
| 540 |
+
result += "**Drug-likeness:**\n"
|
| 541 |
+
result += f"- Lipinski Violations: {properties['Lipinski Violations']}/4\n"
|
| 542 |
+
result += f"- Drug-like: {'Yes' if properties['Drug-like'] else 'No'}\n"
|
| 543 |
+
|
| 544 |
+
# Generate molecular structure image with error handling
|
| 545 |
+
try:
|
| 546 |
+
molecule_img = generate_molecule_image(smiles)
|
| 547 |
+
if not molecule_img:
|
| 548 |
+
result += "\n\n⚠️ **Warning:** Could not generate molecular structure image"
|
| 549 |
+
except Exception as e:
|
| 550 |
+
result += f"\n\n⚠️ **Warning:** Error generating molecular structure: {str(e)}"
|
| 551 |
+
molecule_img = None
|
| 552 |
+
|
| 553 |
+
return result, molecule_img
|
| 554 |
+
|
| 555 |
+
|
| 556 |
+
# Custom CSS for the Chemical Diagram Variations interface
|
| 557 |
+
custom_css = """
|
| 558 |
+
#main_structure {
|
| 559 |
+
border: 3px solid #4CAF50;
|
| 560 |
+
border-radius: 15px;
|
| 561 |
+
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
|
| 562 |
+
}
|
| 563 |
+
|
| 564 |
+
#variations_gallery {
|
| 565 |
+
border: 2px solid #e0e0e0;
|
| 566 |
+
border-radius: 10px;
|
| 567 |
+
background: #f9f9f9;
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
#variations_gallery .gallery-item {
|
| 571 |
+
border: 2px solid #ddd;
|
| 572 |
+
border-radius: 8px;
|
| 573 |
+
margin: 5px;
|
| 574 |
+
transition: all 0.3s ease;
|
| 575 |
+
}
|
| 576 |
+
|
| 577 |
+
#variations_gallery .gallery-item:hover {
|
| 578 |
+
border-color: #4CAF50;
|
| 579 |
+
transform: scale(1.05);
|
| 580 |
+
box-shadow: 0 4px 12px rgba(76, 175, 80, 0.3);
|
| 581 |
+
}
|
| 582 |
+
|
| 583 |
+
#variations_container {
|
| 584 |
+
max-height: 600px;
|
| 585 |
+
overflow-y: auto;
|
| 586 |
+
padding: 10px;
|
| 587 |
+
}
|
| 588 |
+
|
| 589 |
+
#selected_smiles, #selected_style {
|
| 590 |
+
background: #f5f5f5;
|
| 591 |
+
border: 1px solid #ddd;
|
| 592 |
+
border-radius: 5px;
|
| 593 |
+
}
|
| 594 |
+
|
| 595 |
+
.page-info {
|
| 596 |
+
text-align: center;
|
| 597 |
+
font-weight: bold;
|
| 598 |
+
color: #666;
|
| 599 |
+
}
|
| 600 |
+
|
| 601 |
+
.grid-controls {
|
| 602 |
+
background: #f0f0f0;
|
| 603 |
+
padding: 10px;
|
| 604 |
+
border-radius: 8px;
|
| 605 |
+
margin: 10px 0;
|
| 606 |
+
}
|
| 607 |
+
"""
|
| 608 |
+
|
| 609 |
+
with gr.Blocks(title="HITL Drug Discovery", theme=gr.themes.Soft(), css=custom_css) as demo:
|
| 610 |
+
gr.Markdown("# 🧬 Human-in-the-Loop Drug Discovery")
|
| 611 |
+
gr.Markdown("Interactive molecular analysis and AI assistant for drug discovery research")
|
| 612 |
+
|
| 613 |
+
with gr.Row():
|
| 614 |
+
with gr.Column(scale=1):
|
| 615 |
+
gr.Markdown("## 🔬 Molecular Analysis")
|
| 616 |
+
smiles_input = gr.Textbox(
|
| 617 |
+
label="Enter SMILES string",
|
| 618 |
+
placeholder="e.g., C[C@H](N)C(=O)O",
|
| 619 |
+
value="C[C@H](N)C(=O)O",
|
| 620 |
+
info="Enter a valid SMILES string for molecular analysis"
|
| 621 |
+
)
|
| 622 |
+
|
| 623 |
+
# Real-time validation status
|
| 624 |
+
validation_status = gr.Markdown("✅ Ready to analyze")
|
| 625 |
+
preview_image = gr.Image(
|
| 626 |
+
label="Preview",
|
| 627 |
+
show_download_button=False,
|
| 628 |
+
width=200,
|
| 629 |
+
height=200,
|
| 630 |
+
visible=False
|
| 631 |
+
)
|
| 632 |
+
|
| 633 |
+
# Quick access buttons for common molecules
|
| 634 |
+
gr.Markdown("### Quick Examples")
|
| 635 |
+
with gr.Row():
|
| 636 |
+
alanine_btn = gr.Button("Alanine", size="sm")
|
| 637 |
+
aspirin_btn = gr.Button("Aspirin", size="sm")
|
| 638 |
+
ibuprofen_btn = gr.Button("Ibuprofen", size="sm")
|
| 639 |
+
|
| 640 |
+
with gr.Row():
|
| 641 |
+
caffeine_btn = gr.Button("Caffeine", size="sm")
|
| 642 |
+
glucose_btn = gr.Button("Methanol", size="sm")
|
| 643 |
+
benzene_btn = gr.Button("Benzene", size="sm")
|
| 644 |
+
|
| 645 |
+
with gr.Row():
|
| 646 |
+
glucose_btn2 = gr.Button("Acetone", size="sm")
|
| 647 |
+
ethanol_btn = gr.Button("Ethanol", size="sm")
|
| 648 |
+
water_btn = gr.Button("Water", size="sm")
|
| 649 |
+
|
| 650 |
+
with gr.Row():
|
| 651 |
+
complex_btn1 = gr.Button("Glucose", size="sm")
|
| 652 |
+
complex_btn2 = gr.Button("Cholesterol", size="sm")
|
| 653 |
+
complex_btn3 = gr.Button("Penicillin", size="sm")
|
| 654 |
+
|
| 655 |
+
analyze_btn = gr.Button("🔍 Analyze Molecule", variant="primary", size="lg")
|
| 656 |
+
|
| 657 |
+
# Bookmark functionality
|
| 658 |
+
with gr.Row():
|
| 659 |
+
bookmark_name = gr.Textbox(
|
| 660 |
+
placeholder="Enter molecule name (optional)",
|
| 661 |
+
label="Molecule Name",
|
| 662 |
+
scale=2
|
| 663 |
+
)
|
| 664 |
+
bookmark_btn = gr.Button("🔖 Bookmark", variant="secondary", size="sm", scale=1)
|
| 665 |
+
|
| 666 |
+
bookmark_status = gr.Markdown("")
|
| 667 |
+
|
| 668 |
+
gr.Markdown("### 📊 Analysis Results")
|
| 669 |
+
analysis_output = gr.Markdown()
|
| 670 |
+
|
| 671 |
+
with gr.Column(scale=1):
|
| 672 |
+
gr.Markdown("### 🧪 Molecular Structure")
|
| 673 |
+
molecule_image = gr.Image(
|
| 674 |
+
label="Chemical Structure",
|
| 675 |
+
show_download_button=False,
|
| 676 |
+
width=400,
|
| 677 |
+
height=400
|
| 678 |
+
)
|
| 679 |
+
|
| 680 |
+
with gr.Tabs():
|
| 681 |
+
with gr.Tab("🤖 AI Chat Assistant"):
|
| 682 |
+
chatbot.render()
|
| 683 |
+
|
| 684 |
+
with gr.Tab("🧬 Molecular Gallery"):
|
| 685 |
+
gr.Markdown("### Common Drug Discovery Molecules")
|
| 686 |
+
image_components = []
|
| 687 |
+
|
| 688 |
+
# Display images in a 3x3 grid
|
| 689 |
+
with gr.Row():
|
| 690 |
+
for row in range(3):
|
| 691 |
+
with gr.Row():
|
| 692 |
+
for col in range(3):
|
| 693 |
+
idx = row * 3 + col
|
| 694 |
+
img = gr.Image(
|
| 695 |
+
show_download_button=False,
|
| 696 |
+
width=200,
|
| 697 |
+
height=200,
|
| 698 |
+
label=list_smiles[idx],
|
| 699 |
+
)
|
| 700 |
+
image_components.append(img)
|
| 701 |
+
|
| 702 |
+
# Bookmarked molecules section
|
| 703 |
+
gr.Markdown("### 🔖 Your Bookmarked Molecules")
|
| 704 |
+
bookmarked_gallery = gr.Gallery(
|
| 705 |
+
label="Bookmarked Structures",
|
| 706 |
+
show_label=False,
|
| 707 |
+
elem_id="bookmarked_gallery",
|
| 708 |
+
columns=4,
|
| 709 |
+
rows=1,
|
| 710 |
+
height=200,
|
| 711 |
+
object_fit="contain"
|
| 712 |
+
)
|
| 713 |
+
|
| 714 |
+
with gr.Tab("💊 Drug Discovery Library"):
|
| 715 |
+
gr.Markdown("### NSAID Drug Series")
|
| 716 |
+
drug_images = []
|
| 717 |
+
|
| 718 |
+
# Display drug molecules
|
| 719 |
+
with gr.Row():
|
| 720 |
+
for i, smiles in enumerate(drug_smiles):
|
| 721 |
+
if i < 3: # First row
|
| 722 |
+
img = gr.Image(
|
| 723 |
+
show_download_button=False,
|
| 724 |
+
width=200,
|
| 725 |
+
height=200,
|
| 726 |
+
label=smiles,
|
| 727 |
+
)
|
| 728 |
+
drug_images.append(img)
|
| 729 |
+
|
| 730 |
+
with gr.Row():
|
| 731 |
+
for i, smiles in enumerate(drug_smiles):
|
| 732 |
+
if i >= 3: # Second row
|
| 733 |
+
img = gr.Image(
|
| 734 |
+
show_download_button=False,
|
| 735 |
+
width=200,
|
| 736 |
+
height=200,
|
| 737 |
+
label=smiles,
|
| 738 |
+
)
|
| 739 |
+
drug_images.append(img)
|
| 740 |
+
|
| 741 |
+
with gr.Tab("🔬 Chemical Diagram Variations"):
|
| 742 |
+
gr.Markdown("### Interactive Chemical Structure Variations")
|
| 743 |
+
gr.Markdown("Generate and explore multiple visualizations of your chemical structures")
|
| 744 |
+
|
| 745 |
+
with gr.Row():
|
| 746 |
+
with gr.Column(scale=2):
|
| 747 |
+
# Top left: SMILES input and main structure display
|
| 748 |
+
gr.Markdown("#### Input & Main Structure")
|
| 749 |
+
variation_smiles_input = gr.Textbox(
|
| 750 |
+
label="Enter SMILES for variations",
|
| 751 |
+
placeholder="e.g., C[C@H](N)C(=O)O",
|
| 752 |
+
value="C[C@H](N)C(=O)O",
|
| 753 |
+
elem_id="variation_smiles_input"
|
| 754 |
+
)
|
| 755 |
+
|
| 756 |
+
with gr.Row():
|
| 757 |
+
generate_variations_btn = gr.Button("🔄 Generate Variations", variant="primary")
|
| 758 |
+
clear_variations_btn = gr.Button("🗑️ Clear", variant="secondary")
|
| 759 |
+
|
| 760 |
+
# Main display area - large chemical structure
|
| 761 |
+
main_structure_display = gr.Image(
|
| 762 |
+
label="Selected Chemical Structure",
|
| 763 |
+
show_download_button=False,
|
| 764 |
+
width=400,
|
| 765 |
+
height=400,
|
| 766 |
+
elem_id="main_structure"
|
| 767 |
+
)
|
| 768 |
+
|
| 769 |
+
# Structure information
|
| 770 |
+
gr.Markdown("#### Structure Information")
|
| 771 |
+
selected_smiles_display = gr.Textbox(
|
| 772 |
+
label="SMILES String",
|
| 773 |
+
interactive=False,
|
| 774 |
+
elem_id="selected_smiles"
|
| 775 |
+
)
|
| 776 |
+
selected_style_display = gr.Textbox(
|
| 777 |
+
label="Visualization Style",
|
| 778 |
+
interactive=False,
|
| 779 |
+
elem_id="selected_style"
|
| 780 |
+
)
|
| 781 |
+
|
| 782 |
+
# AI Chat for structure generation
|
| 783 |
+
gr.Markdown("#### AI Structure Generator")
|
| 784 |
+
gr.Markdown("Ask the AI to generate new structures based on the selected molecule and your preferences")
|
| 785 |
+
|
| 786 |
+
# Chat interface for structure generation
|
| 787 |
+
structure_chatbot = gr.Chatbot(
|
| 788 |
+
label="Structure Generation Chat",
|
| 789 |
+
height=300,
|
| 790 |
+
elem_id="structure_chatbot",
|
| 791 |
+
type="messages"
|
| 792 |
+
)
|
| 793 |
+
|
| 794 |
+
with gr.Row():
|
| 795 |
+
structure_chat_input = gr.Textbox(
|
| 796 |
+
placeholder="e.g., 'Generate a more drug-like version' or 'Create a derivative with better solubility'",
|
| 797 |
+
label="Your request",
|
| 798 |
+
scale=4
|
| 799 |
+
)
|
| 800 |
+
structure_send_btn = gr.Button("Send", variant="primary", scale=1)
|
| 801 |
+
|
| 802 |
+
# Generated structures display
|
| 803 |
+
gr.Markdown("#### AI Generated Structures")
|
| 804 |
+
ai_generated_grid = gr.Gallery(
|
| 805 |
+
label="AI Generated Structures",
|
| 806 |
+
show_label=False,
|
| 807 |
+
elem_id="ai_generated_grid",
|
| 808 |
+
columns=3,
|
| 809 |
+
rows=2,
|
| 810 |
+
height=200,
|
| 811 |
+
object_fit="contain",
|
| 812 |
+
allow_preview=True
|
| 813 |
+
)
|
| 814 |
+
|
| 815 |
+
with gr.Column(scale=3):
|
| 816 |
+
# Right side: Clean grid of diagram variations
|
| 817 |
+
gr.Markdown("#### Chemical Structure Variations (Click to select)")
|
| 818 |
+
|
| 819 |
+
# Create a scrollable container for the grid
|
| 820 |
+
with gr.Column(elem_id="variations_container"):
|
| 821 |
+
variations_grid = gr.Gallery(
|
| 822 |
+
label="Chemical Structure Variations",
|
| 823 |
+
show_label=False,
|
| 824 |
+
elem_id="variations_gallery",
|
| 825 |
+
columns=4,
|
| 826 |
+
rows=3,
|
| 827 |
+
height=600,
|
| 828 |
+
object_fit="contain",
|
| 829 |
+
allow_preview=True,
|
| 830 |
+
selected_index=0
|
| 831 |
+
)
|
| 832 |
+
|
| 833 |
+
# Navigation controls
|
| 834 |
+
with gr.Row():
|
| 835 |
+
prev_page_btn = gr.Button("⬅️ Previous", size="sm")
|
| 836 |
+
page_info = gr.Markdown("Page 1 of 1", elem_classes="page-info")
|
| 837 |
+
next_page_btn = gr.Button("➡️ Next", size="sm")
|
| 838 |
+
|
| 839 |
+
# Grid controls
|
| 840 |
+
with gr.Row(elem_classes="grid-controls"):
|
| 841 |
+
grid_size_slider = gr.Slider(
|
| 842 |
+
minimum=4,
|
| 843 |
+
maximum=8,
|
| 844 |
+
value=4,
|
| 845 |
+
step=1,
|
| 846 |
+
label="Grid Columns",
|
| 847 |
+
elem_id="grid_size_slider"
|
| 848 |
+
)
|
| 849 |
+
variation_count_slider = gr.Slider(
|
| 850 |
+
minimum=6,
|
| 851 |
+
maximum=24,
|
| 852 |
+
value=12,
|
| 853 |
+
step=6,
|
| 854 |
+
label="Number of Variations",
|
| 855 |
+
elem_id="variation_count_slider"
|
| 856 |
+
)
|
| 857 |
+
|
| 858 |
+
# Event handlers
|
| 859 |
+
demo.load(
|
| 860 |
+
fn=generate_molecule_images,
|
| 861 |
+
inputs=None,
|
| 862 |
+
outputs=image_components
|
| 863 |
+
)
|
| 864 |
+
|
| 865 |
+
# Generate drug images on load
|
| 866 |
+
def generate_drug_images():
|
| 867 |
+
images = []
|
| 868 |
+
for smiles in drug_smiles:
|
| 869 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 870 |
+
if mol:
|
| 871 |
+
img = Draw.MolToImage(mol, size=(200, 200), kekulize=True)
|
| 872 |
+
images.append(img)
|
| 873 |
+
else:
|
| 874 |
+
try:
|
| 875 |
+
from PIL import Image
|
| 876 |
+
images.append(Image.new('RGB', (200, 200), color='white'))
|
| 877 |
+
except ImportError:
|
| 878 |
+
# Fallback if PIL not available
|
| 879 |
+
images.append(None)
|
| 880 |
+
return images
|
| 881 |
+
|
| 882 |
+
demo.load(
|
| 883 |
+
fn=generate_drug_images,
|
| 884 |
+
inputs=None,
|
| 885 |
+
outputs=drug_images
|
| 886 |
+
)
|
| 887 |
+
|
| 888 |
+
# Load default molecular structure on startup
|
| 889 |
+
def load_default_molecule():
|
| 890 |
+
default_smiles = "C[C@H](N)C(=O)O" # Alanine
|
| 891 |
+
analysis_text, molecule_img = analyze_molecule(default_smiles)
|
| 892 |
+
return analysis_text, molecule_img
|
| 893 |
+
|
| 894 |
+
# Event handlers
|
| 895 |
+
demo.load(
|
| 896 |
+
fn=load_default_molecule,
|
| 897 |
+
inputs=None,
|
| 898 |
+
outputs=[analysis_output, molecule_image]
|
| 899 |
+
)
|
| 900 |
+
|
| 901 |
+
# Event handlers for quick example buttons
|
| 902 |
+
alanine_btn.click(
|
| 903 |
+
fn=lambda: ("C[C@H](N)C(=O)O", *analyze_molecule("C[C@H](N)C(=O)O")),
|
| 904 |
+
inputs=None,
|
| 905 |
+
outputs=[smiles_input, analysis_output, molecule_image]
|
| 906 |
+
)
|
| 907 |
+
|
| 908 |
+
aspirin_btn.click(
|
| 909 |
+
fn=lambda: ("CC(=O)OC1=CC=CC=C1C(=O)O", *analyze_molecule("CC(=O)OC1=CC=CC=C1C(=O)O")),
|
| 910 |
+
inputs=None,
|
| 911 |
+
outputs=[smiles_input, analysis_output, molecule_image]
|
| 912 |
+
)
|
| 913 |
+
|
| 914 |
+
ibuprofen_btn.click(
|
| 915 |
+
fn=lambda: ("CC1=CC=C(C=C1)C(C)NC(=O)C2=CC=C(C=C2)Cl", *analyze_molecule("CC1=CC=C(C=C1)C(C)NC(=O)C2=CC=C(C=C2)Cl")),
|
| 916 |
+
inputs=None,
|
| 917 |
+
outputs=[smiles_input, analysis_output, molecule_image]
|
| 918 |
+
)
|
| 919 |
+
|
| 920 |
+
caffeine_btn.click(
|
| 921 |
+
fn=lambda: ("CN1C=NC2=C1C(=O)N(C(=O)N2C)C", *analyze_molecule("CN1C=NC2=C1C(=O)N(C(=O)N2C)C")),
|
| 922 |
+
inputs=None,
|
| 923 |
+
outputs=[smiles_input, analysis_output, molecule_image]
|
| 924 |
+
)
|
| 925 |
+
|
| 926 |
+
glucose_btn.click(
|
| 927 |
+
fn=lambda: ("CO", *analyze_molecule("CO")),
|
| 928 |
+
inputs=None,
|
| 929 |
+
outputs=[smiles_input, analysis_output, molecule_image]
|
| 930 |
+
)
|
| 931 |
+
|
| 932 |
+
benzene_btn.click(
|
| 933 |
+
fn=lambda: ("c1ccccc1", *analyze_molecule("c1ccccc1")),
|
| 934 |
+
inputs=None,
|
| 935 |
+
outputs=[smiles_input, analysis_output, molecule_image]
|
| 936 |
+
)
|
| 937 |
+
|
| 938 |
+
glucose_btn2.click(
|
| 939 |
+
fn=lambda: ("CC(=O)C", *analyze_molecule("CC(=O)C")),
|
| 940 |
+
inputs=None,
|
| 941 |
+
outputs=[smiles_input, analysis_output, molecule_image]
|
| 942 |
+
)
|
| 943 |
+
|
| 944 |
+
ethanol_btn.click(
|
| 945 |
+
fn=lambda: ("CCO", *analyze_molecule("CCO")),
|
| 946 |
+
inputs=None,
|
| 947 |
+
outputs=[smiles_input, analysis_output, molecule_image]
|
| 948 |
+
)
|
| 949 |
+
|
| 950 |
+
water_btn.click(
|
| 951 |
+
fn=lambda: ("O", *analyze_molecule("O")),
|
| 952 |
+
inputs=None,
|
| 953 |
+
outputs=[smiles_input, analysis_output, molecule_image]
|
| 954 |
+
)
|
| 955 |
+
|
| 956 |
+
# Complex molecule examples
|
| 957 |
+
complex_btn1.click(
|
| 958 |
+
fn=lambda: ("C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O", *analyze_molecule("C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O")),
|
| 959 |
+
inputs=None,
|
| 960 |
+
outputs=[smiles_input, analysis_output, molecule_image]
|
| 961 |
+
)
|
| 962 |
+
|
| 963 |
+
complex_btn2.click(
|
| 964 |
+
fn=lambda: ("C[C@H]1CC[C@H]2[C@@H](C)CC[C@H](O)[C@H]2CC[C@H]1O", *analyze_molecule("C[C@H]1CC[C@H]2[C@@H](C)CC[C@H](O)[C@H]2CC[C@H]1O")),
|
| 965 |
+
inputs=None,
|
| 966 |
+
outputs=[smiles_input, analysis_output, molecule_image]
|
| 967 |
+
)
|
| 968 |
+
|
| 969 |
+
complex_btn3.click(
|
| 970 |
+
fn=lambda: ("CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)", *analyze_molecule("CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)")),
|
| 971 |
+
inputs=None,
|
| 972 |
+
outputs=[smiles_input, analysis_output, molecule_image]
|
| 973 |
+
)
|
| 974 |
+
|
| 975 |
+
# Real-time validation as user types
|
| 976 |
+
smiles_input.change(
|
| 977 |
+
fn=validate_smiles_realtime,
|
| 978 |
+
inputs=smiles_input,
|
| 979 |
+
outputs=[validation_status, preview_image]
|
| 980 |
+
)
|
| 981 |
+
|
| 982 |
+
analyze_btn.click(
|
| 983 |
+
fn=analyze_molecule,
|
| 984 |
+
inputs=smiles_input,
|
| 985 |
+
outputs=[analysis_output, molecule_image]
|
| 986 |
+
)
|
| 987 |
+
|
| 988 |
+
# Bookmark functionality
|
| 989 |
+
def bookmark_current_molecule(smiles, name):
|
| 990 |
+
result = bookmark_molecule(smiles, name)
|
| 991 |
+
# Update the bookmarked gallery
|
| 992 |
+
bookmarked_mols = get_bookmarked_molecules()
|
| 993 |
+
gallery_items = []
|
| 994 |
+
for mol in bookmarked_mols:
|
| 995 |
+
# Generate smaller images for gallery
|
| 996 |
+
mol_obj = Chem.MolFromSmiles(mol['smiles'])
|
| 997 |
+
if mol_obj:
|
| 998 |
+
img = Draw.MolToImage(mol_obj, size=(150, 150), kekulize=True)
|
| 999 |
+
gallery_items.append((img, f"{mol['name']}: {mol['smiles']}"))
|
| 1000 |
+
return result, gallery_items
|
| 1001 |
+
|
| 1002 |
+
bookmark_btn.click(
|
| 1003 |
+
fn=bookmark_current_molecule,
|
| 1004 |
+
inputs=[smiles_input, bookmark_name],
|
| 1005 |
+
outputs=[bookmark_status, bookmarked_gallery]
|
| 1006 |
+
)
|
| 1007 |
+
|
| 1008 |
+
# Event handlers for Chemical Diagram Variations tab
|
| 1009 |
+
generate_variations_btn.click(
|
| 1010 |
+
fn=generate_variations_for_display,
|
| 1011 |
+
inputs=[variation_smiles_input, variation_count_slider],
|
| 1012 |
+
outputs=[variations_grid, main_structure_display, selected_smiles_display, selected_style_display]
|
| 1013 |
+
)
|
| 1014 |
+
|
| 1015 |
+
clear_variations_btn.click(
|
| 1016 |
+
fn=clear_variations,
|
| 1017 |
+
inputs=None,
|
| 1018 |
+
outputs=[variations_grid, main_structure_display, selected_smiles_display, selected_style_display]
|
| 1019 |
+
)
|
| 1020 |
+
|
| 1021 |
+
# Handle grid selection
|
| 1022 |
+
variations_grid.select(
|
| 1023 |
+
fn=select_variation,
|
| 1024 |
+
inputs=None,
|
| 1025 |
+
outputs=[main_structure_display, selected_smiles_display, selected_style_display]
|
| 1026 |
+
)
|
| 1027 |
+
|
| 1028 |
+
# Update grid size when slider changes
|
| 1029 |
+
grid_size_slider.change(
|
| 1030 |
+
fn=update_grid_size,
|
| 1031 |
+
inputs=grid_size_slider,
|
| 1032 |
+
outputs=variations_grid
|
| 1033 |
+
)
|
| 1034 |
+
|
| 1035 |
+
# Navigation controls
|
| 1036 |
+
prev_page_btn.click(
|
| 1037 |
+
fn=lambda: navigate_variations("prev"),
|
| 1038 |
+
inputs=None,
|
| 1039 |
+
outputs=[variations_grid, page_info, main_structure_display, selected_smiles_display, selected_style_display]
|
| 1040 |
+
)
|
| 1041 |
+
|
| 1042 |
+
next_page_btn.click(
|
| 1043 |
+
fn=lambda: navigate_variations("next"),
|
| 1044 |
+
inputs=None,
|
| 1045 |
+
outputs=[variations_grid, page_info, main_structure_display, selected_smiles_display, selected_style_display]
|
| 1046 |
+
)
|
| 1047 |
+
|
| 1048 |
+
# Update variation count
|
| 1049 |
+
variation_count_slider.change(
|
| 1050 |
+
fn=update_variation_count,
|
| 1051 |
+
inputs=variation_count_slider,
|
| 1052 |
+
outputs=variation_count_slider
|
| 1053 |
+
)
|
| 1054 |
+
|
| 1055 |
+
# Load default variations on startup
|
| 1056 |
+
def load_default_variations():
|
| 1057 |
+
default_smiles = "C[C@H](N)C(=O)O" # Alanine
|
| 1058 |
+
return generate_variations_for_display(default_smiles, 12)
|
| 1059 |
+
|
| 1060 |
+
demo.load(
|
| 1061 |
+
fn=load_default_variations,
|
| 1062 |
+
inputs=None,
|
| 1063 |
+
outputs=[variations_grid, main_structure_display, selected_smiles_display, selected_style_display]
|
| 1064 |
+
)
|
| 1065 |
+
|
| 1066 |
+
# AI Structure Generation Chat handlers
|
| 1067 |
+
structure_send_btn.click(
|
| 1068 |
+
fn=handle_structure_chat,
|
| 1069 |
+
inputs=[structure_chat_input, structure_chatbot, selected_smiles_display, chatbot.additional_inputs[4]], # hf_token
|
| 1070 |
+
outputs=[structure_chatbot, ai_generated_grid]
|
| 1071 |
+
)
|
| 1072 |
+
|
| 1073 |
+
structure_chat_input.submit(
|
| 1074 |
+
fn=handle_structure_chat,
|
| 1075 |
+
inputs=[structure_chat_input, structure_chatbot, selected_smiles_display, chatbot.additional_inputs[4]], # hf_token
|
| 1076 |
+
outputs=[structure_chatbot, ai_generated_grid]
|
| 1077 |
+
)
|
| 1078 |
+
|
| 1079 |
+
|
| 1080 |
+
if __name__ == "__main__":
|
| 1081 |
+
demo.launch()
|
main.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Main Entry Point for HITL Drug Discovery Application
|
| 3 |
+
|
| 4 |
+
This is the main entry point that imports and runs the modular application.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from src.app import create_app
|
| 8 |
+
|
| 9 |
+
if __name__ == "__main__":
|
| 10 |
+
app = create_app()
|
| 11 |
+
app.launch()
|
molecule_render_demo.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from rdkit import Chem
|
| 3 |
+
from rdkit.Chem import Draw
|
| 4 |
+
from rdkit.Chem.Draw import rdMolDraw2D
|
| 5 |
+
|
| 6 |
+
list_smiles = [
|
| 7 |
+
"C[C@H](N)C(=O)O", # - Alanine
|
| 8 |
+
"CC(=O)OC1=CC=CC=C1C(=O)O", # - Aspirin
|
| 9 |
+
"CCN(CC)CC", # - Triethylamine
|
| 10 |
+
"c1ccccc1O", # - Phenol
|
| 11 |
+
"CC(C)CC(=O)O", # - Valeric acid
|
| 12 |
+
"CN1C=NC2=C1N=CN2", # - Adenine
|
| 13 |
+
"O=C(O)C1=CC=CC=C1", # - Benzoic acid
|
| 14 |
+
"C1CCCCC1", # - Cyclohexane
|
| 15 |
+
"CC(=O)N1CCCCC1", # - (Unnamed)
|
| 16 |
+
]
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def _extract_smiles(smiles_input):
|
| 20 |
+
"""
|
| 21 |
+
Normalize the Gradio input into a list of non-empty SMILES strings.
|
| 22 |
+
"""
|
| 23 |
+
if smiles_input is None:
|
| 24 |
+
return []
|
| 25 |
+
|
| 26 |
+
# Handle text areas or other string-based inputs
|
| 27 |
+
if isinstance(smiles_input, str):
|
| 28 |
+
return [line.strip() for line in smiles_input.splitlines() if line.strip()]
|
| 29 |
+
|
| 30 |
+
smiles_strings = []
|
| 31 |
+
for row in smiles_input:
|
| 32 |
+
if isinstance(row, str):
|
| 33 |
+
candidate = row.strip()
|
| 34 |
+
if candidate:
|
| 35 |
+
smiles_strings.append(candidate)
|
| 36 |
+
continue
|
| 37 |
+
|
| 38 |
+
if not row:
|
| 39 |
+
continue
|
| 40 |
+
first_cell = row[0]
|
| 41 |
+
if isinstance(first_cell, str):
|
| 42 |
+
candidate = first_cell.strip()
|
| 43 |
+
if candidate:
|
| 44 |
+
smiles_strings.append(candidate)
|
| 45 |
+
|
| 46 |
+
return smiles_strings
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def render_images(smiles_input):
|
| 50 |
+
"""
|
| 51 |
+
Converts SMILES strings into a list of PIL Images for gallery display.
|
| 52 |
+
Invalid SMILES entries are skipped.
|
| 53 |
+
"""
|
| 54 |
+
smiles_strings = _extract_smiles(smiles_input)
|
| 55 |
+
gallery_items = []
|
| 56 |
+
|
| 57 |
+
for smiles in smiles_strings:
|
| 58 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 59 |
+
if mol is None:
|
| 60 |
+
continue
|
| 61 |
+
image = Draw.MolToImage(mol, size=(300, 300))
|
| 62 |
+
gallery_items.append((image, smiles))
|
| 63 |
+
|
| 64 |
+
return gallery_items
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def render_svgs(smiles_input):
|
| 68 |
+
"""
|
| 69 |
+
Converts SMILES strings into SVG representations of the molecules.
|
| 70 |
+
Returns HTML containing all SVGs and error notices for invalid inputs.
|
| 71 |
+
"""
|
| 72 |
+
smiles_strings = _extract_smiles(smiles_input)
|
| 73 |
+
svg_fragments = []
|
| 74 |
+
invalid_smiles = []
|
| 75 |
+
|
| 76 |
+
for smiles in smiles_strings:
|
| 77 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 78 |
+
if mol is None:
|
| 79 |
+
invalid_smiles.append(smiles)
|
| 80 |
+
continue
|
| 81 |
+
|
| 82 |
+
drawer = rdMolDraw2D.MolDraw2DSVG(300, 300)
|
| 83 |
+
drawer.DrawMolecule(mol)
|
| 84 |
+
drawer.FinishDrawing()
|
| 85 |
+
svg = drawer.GetDrawingText().replace("svg:", "")
|
| 86 |
+
svg_fragments.append(
|
| 87 |
+
f"<figure style='display:inline-block;margin:0 16px 16px 0;'>"
|
| 88 |
+
f"<figcaption style='text-align:center;font-family:monospace;margin-bottom:8px;'>{smiles}</figcaption>"
|
| 89 |
+
f"{svg}"
|
| 90 |
+
"</figure>"
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
html_parts = []
|
| 94 |
+
if svg_fragments:
|
| 95 |
+
html_parts.append("<div>" + "".join(svg_fragments) + "</div>")
|
| 96 |
+
|
| 97 |
+
if invalid_smiles:
|
| 98 |
+
invalid_list_items = "".join(f"<li>{smiles}</li>" for smiles in invalid_smiles)
|
| 99 |
+
html_parts.append(
|
| 100 |
+
"<div style='color:#b00020;margin-top:12px;'>"
|
| 101 |
+
"<strong>Invalid SMILES strings:</strong>"
|
| 102 |
+
f"<ul style='margin:4px 0 0 16px;'>{invalid_list_items}</ul>"
|
| 103 |
+
"</div>"
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
return "".join(html_parts)
|
| 107 |
+
|
| 108 |
+
# --- Gradio Interface ---
|
| 109 |
+
with gr.Blocks() as demo:
|
| 110 |
+
gr.Markdown("## Molecule Renderer")
|
| 111 |
+
gr.Markdown("Enter one or more SMILES strings to visualize the molecules.")
|
| 112 |
+
|
| 113 |
+
with gr.Row():
|
| 114 |
+
smiles_input = gr.DataFrame(
|
| 115 |
+
value=[[smiles] for smiles in list_smiles],
|
| 116 |
+
headers=["SMILES"],
|
| 117 |
+
label="SMILES Strings",
|
| 118 |
+
row_count=(len(list_smiles), "dynamic"),
|
| 119 |
+
col_count=1,
|
| 120 |
+
type="array",
|
| 121 |
+
wrap=True,
|
| 122 |
+
)
|
| 123 |
+
render_image_button = gr.Button("Render Image")
|
| 124 |
+
render_svg_button = gr.Button("Render SVG")
|
| 125 |
+
|
| 126 |
+
gr.Markdown("#### Image")
|
| 127 |
+
image_output = gr.Gallery(label="Molecule Structures")
|
| 128 |
+
|
| 129 |
+
gr.Markdown("#### SVG")
|
| 130 |
+
svg_output = gr.HTML(label="SVG Output")
|
| 131 |
+
|
| 132 |
+
# Separate actions for independent rendering
|
| 133 |
+
render_image_button.click(
|
| 134 |
+
fn=render_images,
|
| 135 |
+
inputs=smiles_input,
|
| 136 |
+
outputs=image_output,
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
render_svg_button.click(
|
| 140 |
+
fn=render_svgs,
|
| 141 |
+
inputs=smiles_input,
|
| 142 |
+
outputs=svg_output,
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
if __name__ == "__main__":
|
| 146 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio[oauth]==5.49.1
|
| 2 |
+
huggingface_hub==0.35.3
|
| 3 |
+
rdkit==2025.9.1
|
run.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
HITL Drug Discovery Application Launcher
|
| 4 |
+
|
| 5 |
+
This script launches the modular drug discovery application.
|
| 6 |
+
It handles virtual environment activation and provides clear feedback.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import sys
|
| 10 |
+
import os
|
| 11 |
+
import subprocess
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
|
| 14 |
+
def check_virtual_env():
|
| 15 |
+
"""Check if we're in a virtual environment."""
|
| 16 |
+
return hasattr(sys, 'real_prefix') or (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix)
|
| 17 |
+
|
| 18 |
+
def activate_venv_and_run():
|
| 19 |
+
"""Activate virtual environment and run the application."""
|
| 20 |
+
project_dir = Path(__file__).parent
|
| 21 |
+
venv_path = project_dir / "venv"
|
| 22 |
+
|
| 23 |
+
if not venv_path.exists():
|
| 24 |
+
print("❌ Virtual environment not found!")
|
| 25 |
+
print("Please run: python -m venv venv")
|
| 26 |
+
print("Then: source venv/bin/activate")
|
| 27 |
+
print("Finally: pip install -r requirements.txt")
|
| 28 |
+
return False
|
| 29 |
+
|
| 30 |
+
# Try to run with virtual environment
|
| 31 |
+
try:
|
| 32 |
+
if os.name == 'nt': # Windows
|
| 33 |
+
activate_script = venv_path / "Scripts" / "activate.bat"
|
| 34 |
+
python_exe = venv_path / "Scripts" / "python.exe"
|
| 35 |
+
else: # Unix/Linux/macOS
|
| 36 |
+
activate_script = venv_path / "bin" / "activate"
|
| 37 |
+
python_exe = venv_path / "bin" / "python"
|
| 38 |
+
|
| 39 |
+
if python_exe.exists():
|
| 40 |
+
print("🚀 Starting HITL Drug Discovery (Modular Version)...")
|
| 41 |
+
print("📍 Using virtual environment:", venv_path)
|
| 42 |
+
print("🌐 Application will be available at: http://localhost:7860")
|
| 43 |
+
print("⏹️ Press Ctrl+C to stop the application")
|
| 44 |
+
print("-" * 60)
|
| 45 |
+
|
| 46 |
+
# Run the application
|
| 47 |
+
subprocess.run([str(python_exe), "app.py"], cwd=project_dir)
|
| 48 |
+
return True
|
| 49 |
+
else:
|
| 50 |
+
print("❌ Python executable not found in virtual environment!")
|
| 51 |
+
return False
|
| 52 |
+
|
| 53 |
+
except Exception as e:
|
| 54 |
+
print(f"❌ Error running application: {e}")
|
| 55 |
+
return False
|
| 56 |
+
|
| 57 |
+
def main():
|
| 58 |
+
"""Main launcher function."""
|
| 59 |
+
print("🧬 HITL Drug Discovery Application Launcher")
|
| 60 |
+
print("=" * 50)
|
| 61 |
+
|
| 62 |
+
if check_virtual_env():
|
| 63 |
+
print("✅ Virtual environment detected")
|
| 64 |
+
print("🚀 Starting application...")
|
| 65 |
+
try:
|
| 66 |
+
import app
|
| 67 |
+
print("🌐 Application available at: http://localhost:7860")
|
| 68 |
+
print("⏹️ Press Ctrl+C to stop")
|
| 69 |
+
except ImportError as e:
|
| 70 |
+
print(f"❌ Import error: {e}")
|
| 71 |
+
print("💡 Try running: pip install -r requirements.txt")
|
| 72 |
+
except Exception as e:
|
| 73 |
+
print(f"❌ Error: {e}")
|
| 74 |
+
else:
|
| 75 |
+
print("⚠️ No virtual environment detected")
|
| 76 |
+
print("🔄 Attempting to activate virtual environment...")
|
| 77 |
+
if not activate_venv_and_run():
|
| 78 |
+
print("\n💡 Manual setup required:")
|
| 79 |
+
print("1. python -m venv venv")
|
| 80 |
+
print("2. source venv/bin/activate # (or venv\\Scripts\\activate on Windows)")
|
| 81 |
+
print("3. pip install -r requirements.txt")
|
| 82 |
+
print("4. python app.py")
|
| 83 |
+
|
| 84 |
+
if __name__ == "__main__":
|
| 85 |
+
main()
|
smiles_to_csv.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import base64, io, csv
|
| 2 |
+
from rdkit import Chem
|
| 3 |
+
from rdkit.Chem import Draw
|
| 4 |
+
from molecule_render_demo import list_smiles # imports SMILES list
|
| 5 |
+
|
| 6 |
+
def smiles_to_base64(smiles):
|
| 7 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 8 |
+
if mol is None:
|
| 9 |
+
return None
|
| 10 |
+
img = Draw.MolToImage(mol, size=(300, 300))
|
| 11 |
+
buffer = io.BytesIO()
|
| 12 |
+
img.save(buffer, format="PNG")
|
| 13 |
+
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
| 14 |
+
|
| 15 |
+
def export_to_csv(csv_filename="molecule_gallery.csv"):
|
| 16 |
+
with open(csv_filename, "w", newline="", encoding="utf-8") as f:
|
| 17 |
+
writer = csv.writer(f)
|
| 18 |
+
writer.writerow(["SMILES", "ImageBase64"])
|
| 19 |
+
for smiles in list_smiles:
|
| 20 |
+
encoded = smiles_to_base64(smiles)
|
| 21 |
+
writer.writerow([smiles, encoded or "INVALID_SMILES"])
|
| 22 |
+
print(f"Successfully processed {smiles} to {csv_filename}")
|
| 23 |
+
|
| 24 |
+
if __name__ == "__main__":
|
| 25 |
+
export_to_csv()
|
src/__init__.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
HITL Drug Discovery Application
|
| 3 |
+
|
| 4 |
+
A modular drug discovery application with AI-powered structure generation
|
| 5 |
+
and molecular analysis capabilities.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
__version__ = "1.0.0"
|
| 9 |
+
__author__ = "HITL Drug Discovery Team"
|
src/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (455 Bytes). View file
|
|
|
src/__pycache__/__init__.cpython-314.pyc
ADDED
|
Binary file (424 Bytes). View file
|
|
|
src/__pycache__/app.cpython-313.pyc
ADDED
|
Binary file (8.11 kB). View file
|
|
|
src/__pycache__/app.cpython-314.pyc
ADDED
|
Binary file (9.01 kB). View file
|
|
|
src/ai/__init__.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
AI Services Module
|
| 3 |
+
|
| 4 |
+
This module contains AI-powered features including
|
| 5 |
+
chat responses and structure generation.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from .services import (
|
| 9 |
+
respond,
|
| 10 |
+
generate_ai_structures,
|
| 11 |
+
parse_ai_structures,
|
| 12 |
+
handle_structure_chat
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
__all__ = [
|
| 16 |
+
'respond',
|
| 17 |
+
'generate_ai_structures',
|
| 18 |
+
'parse_ai_structures',
|
| 19 |
+
'handle_structure_chat'
|
| 20 |
+
]
|
src/ai/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (520 Bytes). View file
|
|
|
src/ai/__pycache__/services.cpython-313.pyc
ADDED
|
Binary file (6.02 kB). View file
|
|
|
src/ai/services.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
AI Services Module
|
| 3 |
+
|
| 4 |
+
This module handles AI-powered features including
|
| 5 |
+
chat responses and structure generation.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from huggingface_hub import InferenceClient
|
| 9 |
+
from rdkit import Chem
|
| 10 |
+
from rdkit.Chem import Draw
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def respond(
|
| 14 |
+
message,
|
| 15 |
+
history: list[dict[str, str]],
|
| 16 |
+
system_message,
|
| 17 |
+
max_tokens,
|
| 18 |
+
temperature,
|
| 19 |
+
top_p,
|
| 20 |
+
hf_token: str,
|
| 21 |
+
):
|
| 22 |
+
"""
|
| 23 |
+
Enhanced drug discovery chatbot with molecular property integration.
|
| 24 |
+
"""
|
| 25 |
+
client = InferenceClient(token=hf_token, model="openai/gpt-oss-20b")
|
| 26 |
+
|
| 27 |
+
# Enhanced system message for drug discovery
|
| 28 |
+
enhanced_system_message = f"""{system_message}
|
| 29 |
+
|
| 30 |
+
You are an expert medicinal chemist and drug discovery specialist. You help researchers understand:
|
| 31 |
+
- Molecular properties and drug-likeness
|
| 32 |
+
- Structure-activity relationships (SAR)
|
| 33 |
+
- ADMET properties (Absorption, Distribution, Metabolism, Excretion, Toxicity)
|
| 34 |
+
- Drug design principles and optimization strategies
|
| 35 |
+
- Chemical synthesis and medicinal chemistry
|
| 36 |
+
|
| 37 |
+
When discussing molecules, consider their molecular weight, LogP, hydrogen bonding, and other key properties. Reference the molecular gallery below for visual context."""
|
| 38 |
+
|
| 39 |
+
messages = [{"role": "system", "content": enhanced_system_message}]
|
| 40 |
+
messages.extend(history)
|
| 41 |
+
messages.append({"role": "user", "content": message})
|
| 42 |
+
|
| 43 |
+
response = ""
|
| 44 |
+
|
| 45 |
+
for message in client.chat_completion(
|
| 46 |
+
messages,
|
| 47 |
+
max_tokens=max_tokens,
|
| 48 |
+
stream=True,
|
| 49 |
+
temperature=temperature,
|
| 50 |
+
top_p=top_p,
|
| 51 |
+
):
|
| 52 |
+
choices = message.choices
|
| 53 |
+
token = ""
|
| 54 |
+
if len(choices) and choices[0].delta.content:
|
| 55 |
+
token = choices[0].delta.content
|
| 56 |
+
|
| 57 |
+
response += token
|
| 58 |
+
yield response
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def generate_ai_structures(message, history, selected_smiles, hf_token: str):
|
| 62 |
+
"""Generate new structures using AI based on user request and selected molecule."""
|
| 63 |
+
if not message.strip():
|
| 64 |
+
return history, []
|
| 65 |
+
|
| 66 |
+
client = InferenceClient(token=hf_token, model="openai/gpt-oss-20b")
|
| 67 |
+
|
| 68 |
+
# Enhanced system message for structure generation
|
| 69 |
+
system_message = f"""You are an expert medicinal chemist and drug discovery specialist. You help generate new chemical structures based on user requests and existing molecules.
|
| 70 |
+
|
| 71 |
+
Current selected molecule SMILES: {selected_smiles}
|
| 72 |
+
|
| 73 |
+
Your task is to:
|
| 74 |
+
1. Understand the user's request for structure modifications
|
| 75 |
+
2. Generate 3-6 new SMILES strings that meet their requirements
|
| 76 |
+
3. Provide brief explanations for each generated structure
|
| 77 |
+
4. Consider drug-likeness, ADMET properties, and medicinal chemistry principles
|
| 78 |
+
|
| 79 |
+
Format your response as:
|
| 80 |
+
STRUCTURE 1: [SMILES] - [Brief explanation]
|
| 81 |
+
STRUCTURE 2: [SMILES] - [Brief explanation]
|
| 82 |
+
etc.
|
| 83 |
+
|
| 84 |
+
Focus on practical, synthesizable structures that address the user's specific request."""
|
| 85 |
+
|
| 86 |
+
messages = [{"role": "system", "content": system_message}]
|
| 87 |
+
messages.extend(history)
|
| 88 |
+
messages.append({"role": "user", "content": message})
|
| 89 |
+
|
| 90 |
+
response = ""
|
| 91 |
+
for message_chunk in client.chat_completion(
|
| 92 |
+
messages,
|
| 93 |
+
max_tokens=512,
|
| 94 |
+
stream=True,
|
| 95 |
+
temperature=0.7,
|
| 96 |
+
top_p=0.9,
|
| 97 |
+
):
|
| 98 |
+
choices = message_chunk.choices
|
| 99 |
+
token = ""
|
| 100 |
+
if len(choices) and choices[0].delta.content:
|
| 101 |
+
token = choices[0].delta.content
|
| 102 |
+
response += token
|
| 103 |
+
yield response
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def parse_ai_structures(ai_response, selected_smiles):
|
| 107 |
+
"""Parse AI response to extract SMILES strings and generate images."""
|
| 108 |
+
structures = []
|
| 109 |
+
lines = ai_response.split('\n')
|
| 110 |
+
|
| 111 |
+
for line in lines:
|
| 112 |
+
if 'STRUCTURE' in line.upper() and ':' in line:
|
| 113 |
+
try:
|
| 114 |
+
# Extract SMILES from lines like "STRUCTURE 1: CCO - ethanol"
|
| 115 |
+
parts = line.split(':', 1)
|
| 116 |
+
if len(parts) > 1:
|
| 117 |
+
smiles_part = parts[1].split('-')[0].strip()
|
| 118 |
+
# Clean up the SMILES string
|
| 119 |
+
smiles = smiles_part.strip('[]()').strip()
|
| 120 |
+
|
| 121 |
+
# Validate and generate image
|
| 122 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 123 |
+
if mol:
|
| 124 |
+
img = Draw.MolToImage(mol, size=(150, 150), kekulize=True)
|
| 125 |
+
structures.append((img, f"Generated: {smiles}"))
|
| 126 |
+
except:
|
| 127 |
+
continue
|
| 128 |
+
|
| 129 |
+
# If no structures were parsed, generate some variations of the selected molecule
|
| 130 |
+
if not structures and selected_smiles:
|
| 131 |
+
try:
|
| 132 |
+
mol = Chem.MolFromSmiles(selected_smiles)
|
| 133 |
+
if mol:
|
| 134 |
+
# Generate a few variations with different sizes
|
| 135 |
+
for size in [(120, 120), (150, 150), (180, 180)]:
|
| 136 |
+
img = Draw.MolToImage(mol, size=size, kekulize=True)
|
| 137 |
+
structures.append((img, f"Variation: {selected_smiles}"))
|
| 138 |
+
except:
|
| 139 |
+
pass
|
| 140 |
+
|
| 141 |
+
return structures
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def handle_structure_chat(message, history, selected_smiles, hf_token: str):
|
| 145 |
+
"""Handle the structure generation chat."""
|
| 146 |
+
if not message.strip():
|
| 147 |
+
return history, []
|
| 148 |
+
|
| 149 |
+
# Add user message to history
|
| 150 |
+
history.append({"role": "user", "content": message})
|
| 151 |
+
|
| 152 |
+
# Generate AI response
|
| 153 |
+
ai_response = ""
|
| 154 |
+
for chunk in generate_ai_structures(message, history[:-1], selected_smiles, hf_token):
|
| 155 |
+
ai_response = chunk
|
| 156 |
+
|
| 157 |
+
# Add AI response to history
|
| 158 |
+
history.append({"role": "assistant", "content": ai_response})
|
| 159 |
+
|
| 160 |
+
# Parse and generate structure images
|
| 161 |
+
structures = parse_ai_structures(ai_response, selected_smiles)
|
| 162 |
+
|
| 163 |
+
return history, structures
|
src/app.py
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Main Application Module
|
| 3 |
+
|
| 4 |
+
This is the main entry point for the HITL Drug Discovery application.
|
| 5 |
+
It orchestrates all the modules and creates the Gradio interface.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import gradio as gr
|
| 9 |
+
from .ui.components import (
|
| 10 |
+
create_molecular_analysis_tab,
|
| 11 |
+
create_chemical_variations_tab,
|
| 12 |
+
create_molecular_gallery_tab,
|
| 13 |
+
create_drug_library_tab,
|
| 14 |
+
create_new_experiment_tab
|
| 15 |
+
)
|
| 16 |
+
from .ui.handlers import VariationHandlers, BookmarkHandlers, AIHandler, create_quick_example_handlers
|
| 17 |
+
from .molecules.analysis import analyze_molecule_image_only, validate_smiles_realtime, get_molecule_properties_for_hover
|
| 18 |
+
from .molecules.variations import generate_molecule_images, generate_chemical_series_variations
|
| 19 |
+
from .ai.services import respond, handle_structure_chat
|
| 20 |
+
from .config.settings import (
|
| 21 |
+
DRUG_SMILES, COMMON_SMILES, CUSTOM_CSS, DEFAULT_SMILES,
|
| 22 |
+
DEFAULT_VARIATIONS_COUNT, AI_MODEL, AI_MAX_TOKENS, AI_TEMPERATURE, AI_TOP_P
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def create_app():
|
| 27 |
+
"""Create and configure the main Gradio application."""
|
| 28 |
+
|
| 29 |
+
# Initialize handlers
|
| 30 |
+
variation_handlers = VariationHandlers()
|
| 31 |
+
bookmark_handlers = BookmarkHandlers()
|
| 32 |
+
ai_handler = AIHandler()
|
| 33 |
+
quick_handlers = create_quick_example_handlers()
|
| 34 |
+
|
| 35 |
+
# Create the main interface
|
| 36 |
+
with gr.Blocks(title="HITL Drug Discovery", theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
|
| 37 |
+
gr.Markdown("# 🧬 Human-in-the-Loop Drug Discovery")
|
| 38 |
+
gr.Markdown("Interactive molecular analysis and AI assistant for drug discovery research")
|
| 39 |
+
|
| 40 |
+
# Create tabs
|
| 41 |
+
with gr.Tabs():
|
| 42 |
+
# Main Drug Discovery Tab (consolidated)
|
| 43 |
+
with gr.Tab("🧬 Drug Discovery Lab"):
|
| 44 |
+
variation_components = create_chemical_variations_tab()
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# Molecular Gallery Tab
|
| 48 |
+
with gr.Tab("🧬 Molecular Gallery"):
|
| 49 |
+
gallery_components = create_molecular_gallery_tab()
|
| 50 |
+
|
| 51 |
+
# Drug Discovery Library Tab
|
| 52 |
+
with gr.Tab("💊 Drug Discovery Library"):
|
| 53 |
+
drug_components = create_drug_library_tab()
|
| 54 |
+
|
| 55 |
+
# Blank workspace tab for new experiments
|
| 56 |
+
with gr.Tab("🧪 New Experiment"):
|
| 57 |
+
new_experiment_components = create_new_experiment_tab()
|
| 58 |
+
|
| 59 |
+
# Event handlers for consolidated Drug Discovery Lab Tab
|
| 60 |
+
# Molecular Analysis handlers
|
| 61 |
+
variation_components['variation_smiles_input'].change(
|
| 62 |
+
fn=validate_smiles_realtime,
|
| 63 |
+
inputs=variation_components['variation_smiles_input'],
|
| 64 |
+
outputs=[variation_components['validation_status'], variation_components['preview_image']]
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
variation_components['analyze_btn'].click(
|
| 68 |
+
fn=variation_handlers.analyze_molecule_with_tooltip,
|
| 69 |
+
inputs=variation_components['variation_smiles_input'],
|
| 70 |
+
outputs=[variation_components['main_structure_display'], variation_components['properties_display']]
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
# Quick example button handlers
|
| 74 |
+
example_buttons = [
|
| 75 |
+
'alanine_btn', 'aspirin_btn', 'ibuprofen_btn', 'caffeine_btn',
|
| 76 |
+
'glucose_btn', 'benzene_btn', 'glucose_btn2', 'ethanol_btn',
|
| 77 |
+
'water_btn', 'complex_btn1', 'complex_btn2', 'complex_btn3'
|
| 78 |
+
]
|
| 79 |
+
|
| 80 |
+
for btn_name in example_buttons:
|
| 81 |
+
if btn_name in variation_components:
|
| 82 |
+
handler_name = btn_name.replace('_btn', '_handler')
|
| 83 |
+
if handler_name in quick_handlers:
|
| 84 |
+
variation_components[btn_name].click(
|
| 85 |
+
fn=quick_handlers[handler_name],
|
| 86 |
+
inputs=None,
|
| 87 |
+
outputs=[
|
| 88 |
+
variation_components['variation_smiles_input'],
|
| 89 |
+
variation_components['main_structure_display'],
|
| 90 |
+
variation_components['properties_display']
|
| 91 |
+
]
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
# Bookmark functionality
|
| 95 |
+
variation_components['bookmark_btn'].click(
|
| 96 |
+
fn=bookmark_handlers.bookmark_current_molecule,
|
| 97 |
+
inputs=[variation_components['variation_smiles_input'], variation_components['bookmark_name']],
|
| 98 |
+
outputs=[variation_components['bookmark_status'], gallery_components['bookmarked_gallery']]
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
# Event handlers for Chemical Diagram Variations Tab
|
| 102 |
+
variation_components['generate_variations_btn'].click(
|
| 103 |
+
fn=variation_handlers.generate_variations_for_display,
|
| 104 |
+
inputs=[variation_components['variation_smiles_input'], variation_components['variation_count_slider']],
|
| 105 |
+
outputs=[
|
| 106 |
+
variation_components['variations_grid'],
|
| 107 |
+
variation_components['selected_smiles_display'],
|
| 108 |
+
variation_components['selected_style_display']
|
| 109 |
+
]
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
variation_components['clear_variations_btn'].click(
|
| 113 |
+
fn=variation_handlers.clear_variations,
|
| 114 |
+
inputs=None,
|
| 115 |
+
outputs=[
|
| 116 |
+
variation_components['variations_grid'],
|
| 117 |
+
variation_components['selected_smiles_display'],
|
| 118 |
+
variation_components['selected_style_display']
|
| 119 |
+
]
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
# Handle grid selection - try using change event instead of select
|
| 123 |
+
variation_components['variations_grid'].change(
|
| 124 |
+
fn=variation_handlers.select_variation,
|
| 125 |
+
inputs=None,
|
| 126 |
+
outputs=[
|
| 127 |
+
variation_components['main_structure_display'],
|
| 128 |
+
variation_components['selected_smiles_display'],
|
| 129 |
+
variation_components['selected_style_display'],
|
| 130 |
+
variation_components['properties_display']
|
| 131 |
+
]
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
# Navigation controls
|
| 135 |
+
variation_components['prev_page_btn'].click(
|
| 136 |
+
fn=lambda: variation_handlers.navigate_variations("prev"),
|
| 137 |
+
inputs=None,
|
| 138 |
+
outputs=[
|
| 139 |
+
variation_components['variations_grid'],
|
| 140 |
+
variation_components['page_info'],
|
| 141 |
+
variation_components['main_structure_display'],
|
| 142 |
+
variation_components['selected_smiles_display'],
|
| 143 |
+
variation_components['selected_style_display']
|
| 144 |
+
]
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
variation_components['next_page_btn'].click(
|
| 148 |
+
fn=lambda: variation_handlers.navigate_variations("next"),
|
| 149 |
+
inputs=None,
|
| 150 |
+
outputs=[
|
| 151 |
+
variation_components['variations_grid'],
|
| 152 |
+
variation_components['page_info'],
|
| 153 |
+
variation_components['main_structure_display'],
|
| 154 |
+
variation_components['selected_smiles_display'],
|
| 155 |
+
variation_components['selected_style_display']
|
| 156 |
+
]
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
# Update grid size when slider changes
|
| 160 |
+
variation_components['grid_size_slider'].change(
|
| 161 |
+
fn=lambda cols: gr.Gallery(columns=cols),
|
| 162 |
+
inputs=variation_components['grid_size_slider'],
|
| 163 |
+
outputs=variation_components['variations_grid']
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
# Update variation count
|
| 167 |
+
variation_components['variation_count_slider'].change(
|
| 168 |
+
fn=variation_handlers.update_variation_count,
|
| 169 |
+
inputs=variation_components['variation_count_slider'],
|
| 170 |
+
outputs=variation_components['variation_count_slider']
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
# Unified AI Chat handlers
|
| 174 |
+
variation_components['ai_send_btn'].click(
|
| 175 |
+
fn=ai_handler.handle_ai_chat,
|
| 176 |
+
inputs=[
|
| 177 |
+
variation_components['ai_chat_input'],
|
| 178 |
+
variation_components['ai_chatbot'],
|
| 179 |
+
variation_components['selected_smiles_display'],
|
| 180 |
+
variation_components['hf_token_input'],
|
| 181 |
+
variation_components['ai_temperature']
|
| 182 |
+
],
|
| 183 |
+
outputs=[variation_components['ai_chatbot'], variation_components['ai_generated_grid']]
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
variation_components['ai_chat_input'].submit(
|
| 187 |
+
fn=ai_handler.handle_ai_chat,
|
| 188 |
+
inputs=[
|
| 189 |
+
variation_components['ai_chat_input'],
|
| 190 |
+
variation_components['ai_chatbot'],
|
| 191 |
+
variation_components['selected_smiles_display'],
|
| 192 |
+
variation_components['hf_token_input'],
|
| 193 |
+
variation_components['ai_temperature']
|
| 194 |
+
],
|
| 195 |
+
outputs=[variation_components['ai_chatbot'], variation_components['ai_generated_grid']]
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
# Load default data
|
| 199 |
+
def load_default_molecule():
|
| 200 |
+
molecule_img = analyze_molecule_image_only(DEFAULT_SMILES)
|
| 201 |
+
tooltip_text = get_molecule_properties_for_hover(DEFAULT_SMILES)
|
| 202 |
+
return molecule_img, tooltip_text
|
| 203 |
+
|
| 204 |
+
def load_default_variations():
|
| 205 |
+
return variation_handlers.generate_variations_for_display(DEFAULT_SMILES, DEFAULT_VARIATIONS_COUNT)
|
| 206 |
+
|
| 207 |
+
def load_gallery_images():
|
| 208 |
+
return generate_molecule_images(COMMON_SMILES)
|
| 209 |
+
|
| 210 |
+
def load_drug_images():
|
| 211 |
+
return generate_molecule_images(DRUG_SMILES)
|
| 212 |
+
|
| 213 |
+
# Load default data on startup
|
| 214 |
+
demo.load(
|
| 215 |
+
fn=load_default_molecule,
|
| 216 |
+
inputs=None,
|
| 217 |
+
outputs=[variation_components['main_structure_display'], variation_components['properties_display']]
|
| 218 |
+
)
|
| 219 |
+
|
| 220 |
+
demo.load(
|
| 221 |
+
fn=load_default_variations,
|
| 222 |
+
inputs=None,
|
| 223 |
+
outputs=[
|
| 224 |
+
variation_components['variations_grid'],
|
| 225 |
+
variation_components['selected_smiles_display'],
|
| 226 |
+
variation_components['selected_style_display']
|
| 227 |
+
]
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
demo.load(
|
| 231 |
+
fn=load_gallery_images,
|
| 232 |
+
inputs=None,
|
| 233 |
+
outputs=gallery_components['image_components']
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
demo.load(
|
| 237 |
+
fn=load_drug_images,
|
| 238 |
+
inputs=None,
|
| 239 |
+
outputs=drug_components['drug_images']
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
return demo
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
if __name__ == "__main__":
|
| 246 |
+
app = create_app()
|
| 247 |
+
app.launch()
|
src/clm/__init__.py
ADDED
|
File without changes
|
src/clm/model_new_torch.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70c6b736e86fd787e5753f98490fed41ec83c13dcac42a6132c3202764cb112e
|
| 3 |
+
size 51114556
|
src/config/__init__.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration Module
|
| 3 |
+
|
| 4 |
+
This module contains configuration settings and constants
|
| 5 |
+
for the drug discovery application.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from .settings import (
|
| 9 |
+
DRUG_SMILES,
|
| 10 |
+
COMMON_SMILES,
|
| 11 |
+
CUSTOM_CSS,
|
| 12 |
+
DEFAULT_SMILES,
|
| 13 |
+
DEFAULT_VARIATIONS_COUNT,
|
| 14 |
+
AI_MODEL,
|
| 15 |
+
AI_MAX_TOKENS,
|
| 16 |
+
AI_TEMPERATURE,
|
| 17 |
+
AI_TOP_P
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
__all__ = [
|
| 21 |
+
'DRUG_SMILES',
|
| 22 |
+
'COMMON_SMILES',
|
| 23 |
+
'CUSTOM_CSS',
|
| 24 |
+
'DEFAULT_SMILES',
|
| 25 |
+
'DEFAULT_VARIATIONS_COUNT',
|
| 26 |
+
'AI_MODEL',
|
| 27 |
+
'AI_MAX_TOKENS',
|
| 28 |
+
'AI_TEMPERATURE',
|
| 29 |
+
'AI_TOP_P'
|
| 30 |
+
]
|
src/config/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (634 Bytes). View file
|
|
|
src/config/__pycache__/settings.cpython-313.pyc
ADDED
|
Binary file (2.25 kB). View file
|
|
|
src/config/settings.py
ADDED
|
@@ -0,0 +1,401 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration Settings Module
|
| 3 |
+
|
| 4 |
+
This module contains configuration settings and constants
|
| 5 |
+
for the drug discovery application.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
# Drug discovery molecules
|
| 9 |
+
DRUG_SMILES = [
|
| 10 |
+
"CC1=CC=C(C=C1)C(C)NC(=O)C2=CC=C(C=C2)Cl", # Ibuprofen
|
| 11 |
+
"CC1=CC=C(C=C1)C(C)NC(=O)C2=CC=C(C=C2)F", # Flurbiprofen
|
| 12 |
+
"CC1=CC=C(C=C1)C(C)NC(=O)C2=CC=C(C=C2)Br", # Bromfenac
|
| 13 |
+
"CC1=CC=C(C=C1)C(C)NC(=O)C2=CC=C(C=C2)I", # Iodofenac
|
| 14 |
+
"CC1=CC=C(C=C1)C(C)NC(=O)C2=CC=C(C=C2)[N+](=O)[O-]", # Nitrofenac
|
| 15 |
+
]
|
| 16 |
+
|
| 17 |
+
# Common drug discovery molecules
|
| 18 |
+
COMMON_SMILES = [
|
| 19 |
+
"C[C@H](N)C(=O)O", # Alanine (amino acid)
|
| 20 |
+
"CC(=O)OC1=CC=CC=C1C(=O)O", # Aspirin (NSAID)
|
| 21 |
+
"CCN(CC)CC", # Triethylamine (base)
|
| 22 |
+
"c1ccccc1O", # Phenol (aromatic)
|
| 23 |
+
"CC(C)CC(=O)O", # Valeric acid (fatty acid)
|
| 24 |
+
"CN1C=NC2=C1N=CN2", # Adenine (nucleobase)
|
| 25 |
+
"O=C(O)C1=CC=CC=C1", # Benzoic acid (aromatic acid)
|
| 26 |
+
"C1CCCCC1", # Cyclohexane (cycloalkane)
|
| 27 |
+
"CC(=O)N1CCCCC1", # N-methylpiperidine (amine)
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
# AI Model Configuration
|
| 31 |
+
AI_MODEL = "openai/gpt-oss-20b"
|
| 32 |
+
AI_MAX_TOKENS = 512
|
| 33 |
+
AI_TEMPERATURE = 0.7
|
| 34 |
+
AI_TOP_P = 0.9
|
| 35 |
+
|
| 36 |
+
# UI Configuration
|
| 37 |
+
DEFAULT_SMILES = "C[C@H](N)C(=O)O" # Alanine
|
| 38 |
+
DEFAULT_VARIATIONS_COUNT = 12
|
| 39 |
+
DEFAULT_GRID_COLUMNS = 4
|
| 40 |
+
MAX_GRID_COLUMNS = 8
|
| 41 |
+
MIN_VARIATIONS = 6
|
| 42 |
+
MAX_VARIATIONS = 24
|
| 43 |
+
|
| 44 |
+
# Image Configuration
|
| 45 |
+
MOLECULE_IMAGE_SIZE = (300, 300)
|
| 46 |
+
PREVIEW_IMAGE_SIZE = (200, 200)
|
| 47 |
+
GALLERY_IMAGE_SIZE = (200, 200)
|
| 48 |
+
VARIATION_IMAGE_SIZES = [(150, 150), (180, 180), (200, 200), (160, 160)]
|
| 49 |
+
|
| 50 |
+
# CSS Styling
|
| 51 |
+
CUSTOM_CSS = """
|
| 52 |
+
/* Full width layout */
|
| 53 |
+
.gradio-container {
|
| 54 |
+
max-width: 100% !important;
|
| 55 |
+
width: 100% !important;
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
.gradio-container .container {
|
| 59 |
+
max-width: 100% !important;
|
| 60 |
+
width: 100% !important;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
/* Ensure rows use full width */
|
| 64 |
+
.gr-row {
|
| 65 |
+
width: 100% !important;
|
| 66 |
+
max-width: 100% !important;
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
.gr-column {
|
| 70 |
+
width: 100% !important;
|
| 71 |
+
max-width: 100% !important;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
/* Make images responsive to zoom */
|
| 75 |
+
.gr-image {
|
| 76 |
+
width: 100% !important;
|
| 77 |
+
height: auto !important;
|
| 78 |
+
max-width: 100% !important;
|
| 79 |
+
object-fit: contain !important;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
/* Responsive text scaling */
|
| 83 |
+
.gr-markdown {
|
| 84 |
+
width: 100% !important;
|
| 85 |
+
max-width: 100% !important;
|
| 86 |
+
word-wrap: break-word !important;
|
| 87 |
+
overflow-wrap: break-word !important;
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
/* Left column spacing */
|
| 91 |
+
.gr-column:first-child {
|
| 92 |
+
padding-right: 20px;
|
| 93 |
+
min-width: 300px;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
/* Right column spacing */
|
| 97 |
+
.gr-column:last-child {
|
| 98 |
+
padding-left: 20px;
|
| 99 |
+
min-width: 400px;
|
| 100 |
+
}
|
| 101 |
+
#main_structure {
|
| 102 |
+
border: none;
|
| 103 |
+
border-radius: 12px;
|
| 104 |
+
background: transparent;
|
| 105 |
+
width: 100%;
|
| 106 |
+
max-width: 100%;
|
| 107 |
+
height: auto;
|
| 108 |
+
max-height: 350px;
|
| 109 |
+
overflow: hidden;
|
| 110 |
+
object-fit: contain;
|
| 111 |
+
margin: 5px 0;
|
| 112 |
+
/* Make responsive to zoom */
|
| 113 |
+
min-height: 200px;
|
| 114 |
+
resize: both;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
/* Seamless properties display */
|
| 118 |
+
.seamless-properties {
|
| 119 |
+
background: transparent;
|
| 120 |
+
border: none;
|
| 121 |
+
padding: 8px 0;
|
| 122 |
+
margin: 5px 0;
|
| 123 |
+
font-size: 13px;
|
| 124 |
+
color: #e2e8f0;
|
| 125 |
+
text-align: left;
|
| 126 |
+
line-height: 1.4;
|
| 127 |
+
/* Make responsive to zoom */
|
| 128 |
+
width: 100%;
|
| 129 |
+
max-width: 100%;
|
| 130 |
+
overflow-wrap: break-word;
|
| 131 |
+
word-wrap: break-word;
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
.seamless-properties h1, .seamless-properties h2, .seamless-properties h3 {
|
| 135 |
+
margin: 0 0 8px 0;
|
| 136 |
+
color: #f7fafc;
|
| 137 |
+
font-weight: bold;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
.seamless-properties p {
|
| 141 |
+
margin: 4px 0;
|
| 142 |
+
color: #e2e8f0;
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
.seamless-properties ul {
|
| 146 |
+
margin: 4px 0;
|
| 147 |
+
padding-left: 20px;
|
| 148 |
+
color: #e2e8f0;
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
.seamless-properties li {
|
| 152 |
+
color: #e2e8f0;
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
.seamless-properties strong {
|
| 156 |
+
color: #f7fafc;
|
| 157 |
+
font-weight: bold;
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
.seamless-properties em {
|
| 161 |
+
color: #a0aec0;
|
| 162 |
+
font-style: italic;
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
#variations_gallery {
|
| 166 |
+
border: none;
|
| 167 |
+
border-radius: 12px;
|
| 168 |
+
background: transparent;
|
| 169 |
+
max-height: 300px;
|
| 170 |
+
overflow-y: auto;
|
| 171 |
+
margin: 5px 0;
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
#variations_container {
|
| 175 |
+
margin-top: 5px;
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
#variations_gallery .gallery-item {
|
| 179 |
+
border: 1px solid #4a5568;
|
| 180 |
+
border-radius: 8px;
|
| 181 |
+
margin: 3px;
|
| 182 |
+
transition: all 0.3s ease;
|
| 183 |
+
background: #1a202c;
|
| 184 |
+
/* Better aspect ratio for chemical structures */
|
| 185 |
+
aspect-ratio: 1.3 / 1;
|
| 186 |
+
min-width: 200px;
|
| 187 |
+
max-width: 250px;
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
#variations_gallery .gallery-item:hover {
|
| 191 |
+
border-color: #68d391;
|
| 192 |
+
transform: scale(1.05);
|
| 193 |
+
box-shadow: 0 4px 12px rgba(104, 211, 145, 0.3);
|
| 194 |
+
background: #2d3748;
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
/* Ensure images fill the wider cells properly */
|
| 198 |
+
#variations_gallery .gallery-item img {
|
| 199 |
+
width: 100%;
|
| 200 |
+
height: 100%;
|
| 201 |
+
object-fit: contain;
|
| 202 |
+
border-radius: 6px;
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
#variations_container {
|
| 206 |
+
max-height: 600px;
|
| 207 |
+
overflow-y: auto;
|
| 208 |
+
padding: 10px;
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
#selected_smiles, #selected_style {
|
| 212 |
+
background: #f5f5f5;
|
| 213 |
+
border: 1px solid #ddd;
|
| 214 |
+
border-radius: 5px;
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
.page-info {
|
| 218 |
+
text-align: center;
|
| 219 |
+
font-weight: bold;
|
| 220 |
+
color: #666;
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
.grid-controls {
|
| 224 |
+
background: #f0f0f0;
|
| 225 |
+
padding: 10px;
|
| 226 |
+
border-radius: 8px;
|
| 227 |
+
margin: 10px 0;
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
/* Compact properties display */
|
| 231 |
+
.compact-properties {
|
| 232 |
+
background: #2d3748;
|
| 233 |
+
border: 1px solid #4a5568;
|
| 234 |
+
border-radius: 8px;
|
| 235 |
+
padding: 15px;
|
| 236 |
+
margin: 10px 0;
|
| 237 |
+
font-size: 13px;
|
| 238 |
+
color: #e2e8f0 !important;
|
| 239 |
+
text-align: left;
|
| 240 |
+
min-height: 60px;
|
| 241 |
+
max-height: 200px;
|
| 242 |
+
overflow-y: auto;
|
| 243 |
+
line-height: 1.4;
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
.compact-properties h1, .compact-properties h2, .compact-properties h3 {
|
| 247 |
+
margin: 0 0 8px 0;
|
| 248 |
+
color: #f7fafc !important;
|
| 249 |
+
font-weight: bold;
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
.compact-properties p {
|
| 253 |
+
margin: 4px 0;
|
| 254 |
+
color: #e2e8f0 !important;
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
.compact-properties ul {
|
| 258 |
+
margin: 4px 0;
|
| 259 |
+
padding-left: 20px;
|
| 260 |
+
color: #e2e8f0 !important;
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
.compact-properties li {
|
| 264 |
+
color: #e2e8f0 !important;
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
.compact-properties strong {
|
| 268 |
+
color: #f7fafc !important;
|
| 269 |
+
font-weight: bold;
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
.compact-properties em {
|
| 273 |
+
color: #a0aec0 !important;
|
| 274 |
+
font-style: italic;
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
/* Force text visibility - override any conflicting styles */
|
| 278 |
+
.compact-properties * {
|
| 279 |
+
color: #e2e8f0 !important;
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
.compact-properties h1, .compact-properties h2, .compact-properties h3, .compact-properties strong {
|
| 283 |
+
color: #f7fafc !important;
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
/* Ensure markdown content is visible */
|
| 287 |
+
.compact-properties .markdown {
|
| 288 |
+
color: #e2e8f0 !important;
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
.compact-properties .markdown h1, .compact-properties .markdown h2, .compact-properties .markdown h3 {
|
| 292 |
+
color: #f7fafc !important;
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
.compact-properties .markdown strong {
|
| 296 |
+
color: #f7fafc !important;
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
/* Popup notification system */
|
| 300 |
+
.notification-popup {
|
| 301 |
+
position: fixed;
|
| 302 |
+
top: 20px;
|
| 303 |
+
right: 20px;
|
| 304 |
+
background: #2d3748;
|
| 305 |
+
color: #e2e8f0;
|
| 306 |
+
padding: 12px 20px;
|
| 307 |
+
border-radius: 8px;
|
| 308 |
+
border-left: 4px solid #68d391;
|
| 309 |
+
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
|
| 310 |
+
z-index: 1000;
|
| 311 |
+
font-size: 14px;
|
| 312 |
+
font-weight: 500;
|
| 313 |
+
max-width: 300px;
|
| 314 |
+
opacity: 0;
|
| 315 |
+
transform: translateX(100%);
|
| 316 |
+
transition: all 0.3s ease-in-out;
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
.notification-popup.show {
|
| 320 |
+
opacity: 1;
|
| 321 |
+
transform: translateX(0);
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
.notification-popup.error {
|
| 325 |
+
border-left-color: #f56565;
|
| 326 |
+
}
|
| 327 |
+
|
| 328 |
+
.notification-popup.warning {
|
| 329 |
+
border-left-color: #ed8936;
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
.notification-popup.info {
|
| 333 |
+
border-left-color: #4299e1;
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
/* Success notification */
|
| 337 |
+
.notification-popup.success {
|
| 338 |
+
border-left-color: #68d391;
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
/* Auto-hide animation */
|
| 342 |
+
.notification-popup.auto-hide {
|
| 343 |
+
animation: slideOut 0.3s ease-in-out 2.5s forwards;
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
@keyframes slideOut {
|
| 347 |
+
to {
|
| 348 |
+
opacity: 0;
|
| 349 |
+
transform: translateX(100%);
|
| 350 |
+
}
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
/* Spacer styling */
|
| 354 |
+
.spacer {
|
| 355 |
+
margin: 20px 0;
|
| 356 |
+
opacity: 0.3;
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
/* Notification container */
|
| 360 |
+
#notification-container {
|
| 361 |
+
position: fixed;
|
| 362 |
+
top: 20px;
|
| 363 |
+
right: 20px;
|
| 364 |
+
z-index: 1000;
|
| 365 |
+
pointer-events: none;
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
/* Temporary notification display */
|
| 369 |
+
.temp-notification {
|
| 370 |
+
background: #2d3748;
|
| 371 |
+
color: #e2e8f0;
|
| 372 |
+
padding: 12px 20px;
|
| 373 |
+
border-radius: 8px;
|
| 374 |
+
border-left: 4px solid #68d391;
|
| 375 |
+
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
|
| 376 |
+
margin-bottom: 10px;
|
| 377 |
+
font-size: 14px;
|
| 378 |
+
font-weight: 500;
|
| 379 |
+
max-width: 300px;
|
| 380 |
+
opacity: 1;
|
| 381 |
+
transform: translateX(0);
|
| 382 |
+
transition: all 0.3s ease-in-out;
|
| 383 |
+
pointer-events: auto;
|
| 384 |
+
}
|
| 385 |
+
|
| 386 |
+
.temp-notification.error {
|
| 387 |
+
border-left-color: #f56565;
|
| 388 |
+
}
|
| 389 |
+
|
| 390 |
+
.temp-notification.warning {
|
| 391 |
+
border-left-color: #ed8936;
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
.temp-notification.info {
|
| 395 |
+
border-left-color: #4299e1;
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
.temp-notification.success {
|
| 399 |
+
border-left-color: #68d391;
|
| 400 |
+
}
|
| 401 |
+
"""
|
src/molecules/__init__.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Molecular Analysis and Variations Module
|
| 3 |
+
|
| 4 |
+
This module contains functions for molecular structure analysis,
|
| 5 |
+
property calculations, and generation of molecular variations.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from .analysis import (
|
| 9 |
+
calculate_molecular_properties,
|
| 10 |
+
generate_molecule_image,
|
| 11 |
+
validate_smiles,
|
| 12 |
+
validate_smiles_realtime,
|
| 13 |
+
analyze_molecule
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
from .variations import (
|
| 17 |
+
generate_molecule_variations,
|
| 18 |
+
generate_chemical_series_variations,
|
| 19 |
+
generate_molecule_images
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
__all__ = [
|
| 23 |
+
'calculate_molecular_properties',
|
| 24 |
+
'generate_molecule_image',
|
| 25 |
+
'validate_smiles',
|
| 26 |
+
'validate_smiles_realtime',
|
| 27 |
+
'analyze_molecule',
|
| 28 |
+
'generate_molecule_variations',
|
| 29 |
+
'generate_chemical_series_variations',
|
| 30 |
+
'generate_molecule_images'
|
| 31 |
+
]
|
src/molecules/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (829 Bytes). View file
|
|
|
src/molecules/__pycache__/analysis.cpython-313.pyc
ADDED
|
Binary file (10.8 kB). View file
|
|
|
src/molecules/__pycache__/variations.cpython-313.pyc
ADDED
|
Binary file (3.61 kB). View file
|
|
|
src/molecules/analysis.py
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Molecular Analysis Module
|
| 3 |
+
|
| 4 |
+
This module contains functions for molecular structure analysis,
|
| 5 |
+
property calculations, and drug-likeness assessment.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from rdkit import Chem
|
| 9 |
+
from rdkit.Chem import Draw, Descriptors, Crippen
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def calculate_molecular_properties(smiles):
|
| 13 |
+
"""Calculate key molecular properties for drug discovery."""
|
| 14 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 15 |
+
if not mol:
|
| 16 |
+
return None
|
| 17 |
+
|
| 18 |
+
properties = {
|
| 19 |
+
'Molecular Weight': round(Descriptors.MolWt(mol), 2),
|
| 20 |
+
'LogP': round(Crippen.MolLogP(mol), 2),
|
| 21 |
+
'HBD': Descriptors.NumHDonors(mol),
|
| 22 |
+
'HBA': Descriptors.NumHAcceptors(mol),
|
| 23 |
+
'TPSA': round(Descriptors.TPSA(mol), 2),
|
| 24 |
+
'Rotatable Bonds': Descriptors.NumRotatableBonds(mol),
|
| 25 |
+
'Aromatic Rings': Descriptors.NumAromaticRings(mol),
|
| 26 |
+
'Heavy Atoms': mol.GetNumHeavyAtoms()
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
# Lipinski's Rule of Five
|
| 30 |
+
lipinski_violations = 0
|
| 31 |
+
if properties['Molecular Weight'] > 500:
|
| 32 |
+
lipinski_violations += 1
|
| 33 |
+
if properties['LogP'] > 5:
|
| 34 |
+
lipinski_violations += 1
|
| 35 |
+
if properties['HBD'] > 5:
|
| 36 |
+
lipinski_violations += 1
|
| 37 |
+
if properties['HBA'] > 10:
|
| 38 |
+
lipinski_violations += 1
|
| 39 |
+
|
| 40 |
+
properties['Lipinski Violations'] = lipinski_violations
|
| 41 |
+
properties['Drug-like'] = lipinski_violations <= 1
|
| 42 |
+
|
| 43 |
+
return properties
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def generate_molecule_image(smiles, size=(300, 300)):
|
| 47 |
+
"""Generate a molecular structure image from SMILES string."""
|
| 48 |
+
try:
|
| 49 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 50 |
+
if not mol:
|
| 51 |
+
print(f"Failed to parse SMILES: {smiles}")
|
| 52 |
+
return None
|
| 53 |
+
|
| 54 |
+
# Create a high-quality image
|
| 55 |
+
img = Draw.MolToImage(mol, size=size, kekulize=True)
|
| 56 |
+
if img is None:
|
| 57 |
+
print(f"Draw.MolToImage returned None for SMILES: {smiles}")
|
| 58 |
+
return None
|
| 59 |
+
|
| 60 |
+
print(f"Generated image successfully: {size}, mode: {img.mode}")
|
| 61 |
+
return img
|
| 62 |
+
except Exception as e:
|
| 63 |
+
print(f"Error in generate_molecule_image: {e}")
|
| 64 |
+
return None
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def validate_smiles(smiles):
|
| 68 |
+
"""Validate SMILES string and return error message if invalid."""
|
| 69 |
+
if not smiles or not smiles.strip():
|
| 70 |
+
return "Please enter a SMILES string"
|
| 71 |
+
|
| 72 |
+
# Try to parse the SMILES
|
| 73 |
+
mol = Chem.MolFromSmiles(smiles.strip())
|
| 74 |
+
if not mol:
|
| 75 |
+
# Provide more helpful error messages based on common issues
|
| 76 |
+
error_msg = f"❌ **Invalid SMILES string:** `{smiles}`\n\n"
|
| 77 |
+
|
| 78 |
+
# Check for specific common issues
|
| 79 |
+
if smiles.count('(') != smiles.count(')'):
|
| 80 |
+
error_msg += "🔍 **Issue detected:** Unmatched parentheses\n"
|
| 81 |
+
elif smiles.count('[') != smiles.count(']'):
|
| 82 |
+
error_msg += "🔍 **Issue detected:** Unmatched brackets\n"
|
| 83 |
+
elif any(char in smiles for char in ['@', '\\', '/']) and 'C' not in smiles:
|
| 84 |
+
error_msg += "🔍 **Issue detected:** Invalid stereochemistry notation\n"
|
| 85 |
+
else:
|
| 86 |
+
error_msg += "🔍 **Issue detected:** General syntax error\n"
|
| 87 |
+
|
| 88 |
+
error_msg += "\n**💡 Tips for complex SMILES:**\n"
|
| 89 |
+
error_msg += "- Complex molecules are supported! The issue is likely syntax\n"
|
| 90 |
+
error_msg += "- Check parentheses and brackets are balanced\n"
|
| 91 |
+
error_msg += "- Verify ring closure numbers (e.g., C1CCCC1)\n"
|
| 92 |
+
error_msg += "- Use proper stereochemistry notation (@, @@, /, \\)\n"
|
| 93 |
+
error_msg += "- Try breaking complex molecules into smaller parts first\n\n"
|
| 94 |
+
error_msg += "**🧪 Examples of complex valid SMILES:**\n"
|
| 95 |
+
error_msg += "- `CC(=O)OC1=CC=CC=C1C(=O)O` (Aspirin)\n"
|
| 96 |
+
error_msg += "- `CN1C=NC2=C1C(=O)N(C(=O)N2C)C` (Caffeine)\n"
|
| 97 |
+
error_msg += "- `C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O` (Glucose)\n"
|
| 98 |
+
error_msg += "- `CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)` (Penicillin)\n"
|
| 99 |
+
|
| 100 |
+
return error_msg
|
| 101 |
+
|
| 102 |
+
return None
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def validate_smiles_realtime(smiles):
|
| 106 |
+
"""Real-time SMILES validation for user feedback."""
|
| 107 |
+
if not smiles or not smiles.strip():
|
| 108 |
+
return "✅ Ready to analyze", None
|
| 109 |
+
|
| 110 |
+
validation_error = validate_smiles(smiles)
|
| 111 |
+
if validation_error:
|
| 112 |
+
return f"❌ Invalid SMILES", None
|
| 113 |
+
|
| 114 |
+
# Try to generate a preview image
|
| 115 |
+
try:
|
| 116 |
+
mol = Chem.MolFromSmiles(smiles.strip())
|
| 117 |
+
if mol:
|
| 118 |
+
img = Draw.MolToImage(mol, size=(200, 200), kekulize=True)
|
| 119 |
+
return "✅ Valid SMILES", img
|
| 120 |
+
except:
|
| 121 |
+
pass
|
| 122 |
+
|
| 123 |
+
return "✅ Valid SMILES", None
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def analyze_molecule(smiles):
|
| 127 |
+
"""Analyze a molecule and return its properties with robust error handling."""
|
| 128 |
+
# Validate SMILES first
|
| 129 |
+
validation_error = validate_smiles(smiles)
|
| 130 |
+
if validation_error:
|
| 131 |
+
return validation_error, None
|
| 132 |
+
|
| 133 |
+
# Calculate properties
|
| 134 |
+
properties = calculate_molecular_properties(smiles)
|
| 135 |
+
if not properties:
|
| 136 |
+
return "Error calculating molecular properties", None
|
| 137 |
+
|
| 138 |
+
# Format the properties nicely - use raw string to prevent hyperlink conversion
|
| 139 |
+
result = f"**Molecular Analysis for:**\n```\n{smiles}\n```\n\n"
|
| 140 |
+
result += "**Basic Properties:**\n"
|
| 141 |
+
result += f"- Molecular Weight: {properties['Molecular Weight']} g/mol\n"
|
| 142 |
+
result += f"- LogP: {properties['LogP']}\n"
|
| 143 |
+
result += f"- TPSA: {properties['TPSA']} Ų\n"
|
| 144 |
+
result += f"- Heavy Atoms: {properties['Heavy Atoms']}\n\n"
|
| 145 |
+
|
| 146 |
+
result += "**Hydrogen Bonding:**\n"
|
| 147 |
+
result += f"- HBD (Donors): {properties['HBD']}\n"
|
| 148 |
+
result += f"- HBA (Acceptors): {properties['HBA']}\n\n"
|
| 149 |
+
|
| 150 |
+
result += "**Structural Features:**\n"
|
| 151 |
+
result += f"- Rotatable Bonds: {properties['Rotatable Bonds']}\n"
|
| 152 |
+
result += f"- Aromatic Rings: {properties['Aromatic Rings']}\n\n"
|
| 153 |
+
|
| 154 |
+
result += "**Drug-likeness:**\n"
|
| 155 |
+
result += f"- Lipinski Violations: {properties['Lipinski Violations']}/4\n"
|
| 156 |
+
result += f"- Drug-like: {'Yes' if properties['Drug-like'] else 'No'}\n"
|
| 157 |
+
|
| 158 |
+
# Generate molecular structure image with error handling
|
| 159 |
+
try:
|
| 160 |
+
molecule_img = generate_molecule_image(smiles)
|
| 161 |
+
if not molecule_img:
|
| 162 |
+
result += "\n\n⚠️ **Warning:** Could not generate molecular structure image"
|
| 163 |
+
except Exception as e:
|
| 164 |
+
result += f"\n\n⚠️ **Warning:** Error generating molecular structure: {str(e)}"
|
| 165 |
+
molecule_img = None
|
| 166 |
+
|
| 167 |
+
return result, molecule_img
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def analyze_molecule_image_only(smiles):
|
| 171 |
+
"""Analyze a molecule and return only the image for compact UI."""
|
| 172 |
+
# Validate SMILES first
|
| 173 |
+
validation_error = validate_smiles(smiles)
|
| 174 |
+
if validation_error:
|
| 175 |
+
print(f"SMILES validation error: {validation_error}")
|
| 176 |
+
return None
|
| 177 |
+
|
| 178 |
+
# Generate molecule image
|
| 179 |
+
try:
|
| 180 |
+
molecule_img = generate_molecule_image(smiles, size=(500, 400))
|
| 181 |
+
if molecule_img is None:
|
| 182 |
+
print(f"Failed to generate image for SMILES: {smiles}")
|
| 183 |
+
# Try with a different size as fallback
|
| 184 |
+
molecule_img = generate_molecule_image(smiles, size=(300, 300))
|
| 185 |
+
if molecule_img is None:
|
| 186 |
+
print(f"Fallback image generation also failed for SMILES: {smiles}")
|
| 187 |
+
return None
|
| 188 |
+
else:
|
| 189 |
+
print(f"Successfully generated image for SMILES: {smiles}")
|
| 190 |
+
return molecule_img
|
| 191 |
+
except Exception as e:
|
| 192 |
+
print(f"Error generating molecule image: {e}")
|
| 193 |
+
# Try to create a simple fallback image
|
| 194 |
+
try:
|
| 195 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 196 |
+
# Create a simple text image as fallback
|
| 197 |
+
img = Image.new('RGB', (500, 400), color='white')
|
| 198 |
+
draw = ImageDraw.Draw(img)
|
| 199 |
+
try:
|
| 200 |
+
# Try to use a default font
|
| 201 |
+
font = ImageFont.load_default()
|
| 202 |
+
except:
|
| 203 |
+
font = None
|
| 204 |
+
|
| 205 |
+
# Draw error message
|
| 206 |
+
text = f"Error generating molecule\nSMILES: {smiles[:50]}..."
|
| 207 |
+
draw.text((50, 200), text, fill='red', font=font)
|
| 208 |
+
return img
|
| 209 |
+
except:
|
| 210 |
+
return None
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
def get_molecule_properties_for_hover(smiles):
|
| 214 |
+
"""Get molecular properties formatted for hover tooltip."""
|
| 215 |
+
# Validate SMILES first
|
| 216 |
+
validation_error = validate_smiles(smiles)
|
| 217 |
+
if validation_error:
|
| 218 |
+
print(f"SMILES validation error in properties: {validation_error}")
|
| 219 |
+
return f"**Error:** {validation_error}"
|
| 220 |
+
|
| 221 |
+
# Calculate properties
|
| 222 |
+
properties = calculate_molecular_properties(smiles)
|
| 223 |
+
if not properties:
|
| 224 |
+
print(f"Failed to calculate properties for SMILES: {smiles}")
|
| 225 |
+
return f"**Error:** Could not calculate molecular properties for {smiles}"
|
| 226 |
+
|
| 227 |
+
# Format properties for display
|
| 228 |
+
hover_text = f"**Basic Properties:**\n"
|
| 229 |
+
hover_text += f"• Molecular Weight: {properties['Molecular Weight']} g/mol\n"
|
| 230 |
+
hover_text += f"• LogP: {properties['LogP']}\n"
|
| 231 |
+
hover_text += f"• TPSA: {properties['TPSA']} Ų\n"
|
| 232 |
+
hover_text += f"• Heavy Atoms: {properties['Heavy Atoms']}\n\n"
|
| 233 |
+
|
| 234 |
+
hover_text += f"**Hydrogen Bonding:**\n"
|
| 235 |
+
hover_text += f"• HBD (Donors): {properties['HBD']}\n"
|
| 236 |
+
hover_text += f"• HBA (Acceptors): {properties['HBA']}\n\n"
|
| 237 |
+
|
| 238 |
+
hover_text += f"**Structural Features:**\n"
|
| 239 |
+
hover_text += f"• Rotatable Bonds: {properties['Rotatable Bonds']}\n"
|
| 240 |
+
hover_text += f"• Aromatic Rings: {properties['Aromatic Rings']}\n\n"
|
| 241 |
+
|
| 242 |
+
hover_text += f"**Drug-likeness:**\n"
|
| 243 |
+
hover_text += f"• Lipinski Violations: {properties['Lipinski Violations']}/4\n"
|
| 244 |
+
hover_text += f"• Drug-like: {'Yes' if properties['Drug-like'] else 'No'}"
|
| 245 |
+
|
| 246 |
+
print(f"Generated properties text for SMILES: {smiles}")
|
| 247 |
+
return hover_text
|
src/molecules/variations.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Molecular Variations Module
|
| 3 |
+
|
| 4 |
+
This module handles generation of multiple visualizations
|
| 5 |
+
and variations of chemical structures.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from rdkit import Chem
|
| 9 |
+
from rdkit.Chem import Draw
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def generate_molecule_variations(base_smiles, num_variations=12):
|
| 13 |
+
"""Generate multiple variations of a chemical structure for the grid display."""
|
| 14 |
+
mol = Chem.MolFromSmiles(base_smiles)
|
| 15 |
+
if not mol:
|
| 16 |
+
return []
|
| 17 |
+
|
| 18 |
+
variations = []
|
| 19 |
+
|
| 20 |
+
# Generate different rendering styles and sizes
|
| 21 |
+
sizes = [(150, 150), (180, 180), (200, 200), (160, 160)]
|
| 22 |
+
styles = [True, False] # kekulize vs non-kekulize
|
| 23 |
+
|
| 24 |
+
for i in range(num_variations):
|
| 25 |
+
size = sizes[i % len(sizes)]
|
| 26 |
+
kekulize = styles[i % len(styles)]
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
# Create variation with different rendering parameters
|
| 30 |
+
img = Draw.MolToImage(mol, size=size, kekulize=kekulize)
|
| 31 |
+
variations.append({
|
| 32 |
+
'image': img,
|
| 33 |
+
'smiles': base_smiles,
|
| 34 |
+
'variation_id': i + 1,
|
| 35 |
+
'size': size,
|
| 36 |
+
'kekulize': kekulize
|
| 37 |
+
})
|
| 38 |
+
except:
|
| 39 |
+
# Fallback to basic rendering
|
| 40 |
+
try:
|
| 41 |
+
img = Draw.MolToImage(mol, size=(150, 150), kekulize=True)
|
| 42 |
+
variations.append({
|
| 43 |
+
'image': img,
|
| 44 |
+
'smiles': base_smiles,
|
| 45 |
+
'variation_id': i + 1,
|
| 46 |
+
'size': (150, 150),
|
| 47 |
+
'kekulize': True
|
| 48 |
+
})
|
| 49 |
+
except:
|
| 50 |
+
continue
|
| 51 |
+
|
| 52 |
+
return variations
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def generate_chemical_series_variations(base_smiles):
|
| 56 |
+
"""Generate a series of related chemical structures for drug discovery."""
|
| 57 |
+
mol = Chem.MolFromSmiles(base_smiles)
|
| 58 |
+
if not mol:
|
| 59 |
+
return []
|
| 60 |
+
|
| 61 |
+
variations = []
|
| 62 |
+
|
| 63 |
+
# Create different visualization styles
|
| 64 |
+
styles = [
|
| 65 |
+
{'size': (200, 200), 'kekulize': True, 'style': 'Standard'},
|
| 66 |
+
{'size': (180, 180), 'kekulize': False, 'style': 'Kekulé'},
|
| 67 |
+
{'size': (220, 220), 'kekulize': True, 'style': 'Large'},
|
| 68 |
+
{'size': (160, 160), 'kekulize': False, 'style': 'Compact'},
|
| 69 |
+
{'size': (200, 200), 'kekulize': True, 'style': 'Detailed'},
|
| 70 |
+
{'size': (190, 190), 'kekulize': False, 'style': 'Minimal'},
|
| 71 |
+
{'size': (210, 210), 'kekulize': True, 'style': 'Enhanced'},
|
| 72 |
+
{'size': (170, 170), 'kekulize': False, 'style': 'Focused'},
|
| 73 |
+
{'size': (200, 200), 'kekulize': True, 'style': 'Classic'},
|
| 74 |
+
{'size': (185, 185), 'kekulize': False, 'style': 'Modern'},
|
| 75 |
+
{'size': (195, 195), 'kekulize': True, 'style': 'Scientific'},
|
| 76 |
+
{'size': (175, 175), 'kekulize': False, 'style': 'Clean'}
|
| 77 |
+
]
|
| 78 |
+
|
| 79 |
+
for i, style_config in enumerate(styles):
|
| 80 |
+
try:
|
| 81 |
+
print(f"Generating variation {i+1}: {style_config['style']}")
|
| 82 |
+
img = Draw.MolToImage(mol, size=style_config['size'], kekulize=style_config['kekulize'])
|
| 83 |
+
print(f"Successfully generated image for {style_config['style']}, type: {type(img)}")
|
| 84 |
+
variations.append({
|
| 85 |
+
'image': img,
|
| 86 |
+
'smiles': base_smiles,
|
| 87 |
+
'variation_id': i + 1,
|
| 88 |
+
'style': style_config['style'],
|
| 89 |
+
'size': style_config['size'],
|
| 90 |
+
'kekulize': style_config['kekulize']
|
| 91 |
+
})
|
| 92 |
+
except Exception as e:
|
| 93 |
+
print(f"Failed to generate image for {style_config['style']}: {e}")
|
| 94 |
+
continue
|
| 95 |
+
|
| 96 |
+
return variations
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def generate_molecule_images(smiles_list):
|
| 100 |
+
"""Generate images for a list of SMILES strings."""
|
| 101 |
+
images = []
|
| 102 |
+
for smiles in smiles_list:
|
| 103 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 104 |
+
if mol:
|
| 105 |
+
# Create a high-quality image with better rendering
|
| 106 |
+
img = Draw.MolToImage(mol, size=(200, 200), kekulize=True)
|
| 107 |
+
images.append(img)
|
| 108 |
+
else:
|
| 109 |
+
# Add a placeholder if SMILES is invalid
|
| 110 |
+
try:
|
| 111 |
+
from PIL import Image
|
| 112 |
+
images.append(Image.new('RGB', (200, 200), color='white'))
|
| 113 |
+
except ImportError:
|
| 114 |
+
# Fallback if PIL not available
|
| 115 |
+
images.append(None)
|
| 116 |
+
return images
|
src/ui/__init__.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
UI Components and Handlers Module
|
| 3 |
+
|
| 4 |
+
This module contains UI components, layouts, and event handlers
|
| 5 |
+
for the drug discovery application.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from .components import (
|
| 9 |
+
create_molecular_analysis_tab,
|
| 10 |
+
create_chemical_variations_tab,
|
| 11 |
+
create_molecular_gallery_tab,
|
| 12 |
+
create_drug_library_tab
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
from .handlers import (
|
| 16 |
+
VariationHandlers,
|
| 17 |
+
BookmarkHandlers,
|
| 18 |
+
create_quick_example_handlers
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
__all__ = [
|
| 22 |
+
'create_molecular_analysis_tab',
|
| 23 |
+
'create_chemical_variations_tab',
|
| 24 |
+
'create_molecular_gallery_tab',
|
| 25 |
+
'create_drug_library_tab',
|
| 26 |
+
'VariationHandlers',
|
| 27 |
+
'BookmarkHandlers',
|
| 28 |
+
'create_quick_example_handlers'
|
| 29 |
+
]
|
src/ui/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (748 Bytes). View file
|
|
|
src/ui/__pycache__/components.cpython-313.pyc
ADDED
|
Binary file (17.2 kB). View file
|
|
|
src/ui/__pycache__/handlers.cpython-313.pyc
ADDED
|
Binary file (10.8 kB). View file
|
|
|
src/ui/components.py
ADDED
|
@@ -0,0 +1,473 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
UI Components Module
|
| 3 |
+
|
| 4 |
+
This module contains reusable UI components and layouts
|
| 5 |
+
for the drug discovery application.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import gradio as gr
|
| 9 |
+
from .handlers import VariationHandlers
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def create_molecular_analysis_tab():
|
| 13 |
+
"""Create the molecular analysis tab components."""
|
| 14 |
+
with gr.Column(scale=1):
|
| 15 |
+
gr.Markdown("## 🔬 Molecular Analysis")
|
| 16 |
+
smiles_input = gr.Textbox(
|
| 17 |
+
label="Enter SMILES string",
|
| 18 |
+
placeholder="e.g., C[C@H](N)C(=O)O",
|
| 19 |
+
value="C[C@H](N)C(=O)O",
|
| 20 |
+
info="Enter a valid SMILES string for molecular analysis"
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
# Real-time validation status
|
| 24 |
+
validation_status = gr.Markdown("✅ Ready to analyze")
|
| 25 |
+
preview_image = gr.Image(
|
| 26 |
+
label="Preview",
|
| 27 |
+
show_download_button=False,
|
| 28 |
+
width=200,
|
| 29 |
+
height=200,
|
| 30 |
+
visible=False
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Quick access buttons for common molecules
|
| 34 |
+
gr.Markdown("### Quick Examples")
|
| 35 |
+
with gr.Row():
|
| 36 |
+
alanine_btn = gr.Button("Alanine", size="sm")
|
| 37 |
+
aspirin_btn = gr.Button("Aspirin", size="sm")
|
| 38 |
+
ibuprofen_btn = gr.Button("Ibuprofen", size="sm")
|
| 39 |
+
|
| 40 |
+
with gr.Row():
|
| 41 |
+
caffeine_btn = gr.Button("Caffeine", size="sm")
|
| 42 |
+
glucose_btn = gr.Button("Methanol", size="sm")
|
| 43 |
+
benzene_btn = gr.Button("Benzene", size="sm")
|
| 44 |
+
|
| 45 |
+
with gr.Row():
|
| 46 |
+
glucose_btn2 = gr.Button("Acetone", size="sm")
|
| 47 |
+
ethanol_btn = gr.Button("Ethanol", size="sm")
|
| 48 |
+
water_btn = gr.Button("Water", size="sm")
|
| 49 |
+
|
| 50 |
+
with gr.Row():
|
| 51 |
+
complex_btn1 = gr.Button("Glucose", size="sm")
|
| 52 |
+
complex_btn2 = gr.Button("Cholesterol", size="sm")
|
| 53 |
+
complex_btn3 = gr.Button("Penicillin", size="sm")
|
| 54 |
+
|
| 55 |
+
analyze_btn = gr.Button("🔍 Analyze Molecule", variant="primary", size="lg")
|
| 56 |
+
|
| 57 |
+
# Bookmark functionality
|
| 58 |
+
with gr.Row():
|
| 59 |
+
bookmark_name = gr.Textbox(
|
| 60 |
+
placeholder="Enter molecule name (optional)",
|
| 61 |
+
label="Molecule Name",
|
| 62 |
+
scale=2
|
| 63 |
+
)
|
| 64 |
+
bookmark_btn = gr.Button("🔖 Bookmark", variant="secondary", size="sm", scale=1)
|
| 65 |
+
|
| 66 |
+
bookmark_status = gr.Markdown("")
|
| 67 |
+
|
| 68 |
+
gr.Markdown("### 📊 Analysis Results")
|
| 69 |
+
analysis_output = gr.Markdown()
|
| 70 |
+
|
| 71 |
+
with gr.Column(scale=1):
|
| 72 |
+
gr.Markdown("### 🧪 Molecular Structure")
|
| 73 |
+
molecule_image = gr.Image(
|
| 74 |
+
label="Chemical Structure",
|
| 75 |
+
show_download_button=False,
|
| 76 |
+
width=400,
|
| 77 |
+
height=400
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
return {
|
| 81 |
+
'smiles_input': smiles_input,
|
| 82 |
+
'validation_status': validation_status,
|
| 83 |
+
'preview_image': preview_image,
|
| 84 |
+
'alanine_btn': alanine_btn,
|
| 85 |
+
'aspirin_btn': aspirin_btn,
|
| 86 |
+
'ibuprofen_btn': ibuprofen_btn,
|
| 87 |
+
'caffeine_btn': caffeine_btn,
|
| 88 |
+
'glucose_btn': glucose_btn,
|
| 89 |
+
'benzene_btn': benzene_btn,
|
| 90 |
+
'glucose_btn2': glucose_btn2,
|
| 91 |
+
'ethanol_btn': ethanol_btn,
|
| 92 |
+
'water_btn': water_btn,
|
| 93 |
+
'complex_btn1': complex_btn1,
|
| 94 |
+
'complex_btn2': complex_btn2,
|
| 95 |
+
'complex_btn3': complex_btn3,
|
| 96 |
+
'analyze_btn': analyze_btn,
|
| 97 |
+
'bookmark_name': bookmark_name,
|
| 98 |
+
'bookmark_btn': bookmark_btn,
|
| 99 |
+
'bookmark_status': bookmark_status,
|
| 100 |
+
'analysis_output': analysis_output,
|
| 101 |
+
'molecule_image': molecule_image
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def create_chemical_variations_tab():
|
| 106 |
+
"""Create the consolidated drug discovery tab with molecular analysis and variations."""
|
| 107 |
+
# Add notification container
|
| 108 |
+
gr.HTML("""
|
| 109 |
+
<div id="notification-container"></div>
|
| 110 |
+
<script>
|
| 111 |
+
function showNotification(message, type = 'success', duration = 2000) {
|
| 112 |
+
// Remove existing notifications
|
| 113 |
+
const container = document.getElementById('notification-container');
|
| 114 |
+
container.innerHTML = '';
|
| 115 |
+
|
| 116 |
+
// Create notification element
|
| 117 |
+
const notification = document.createElement('div');
|
| 118 |
+
notification.className = `temp-notification ${type}`;
|
| 119 |
+
notification.textContent = message;
|
| 120 |
+
|
| 121 |
+
// Add to container
|
| 122 |
+
container.appendChild(notification);
|
| 123 |
+
|
| 124 |
+
// Auto-remove after duration
|
| 125 |
+
setTimeout(() => {
|
| 126 |
+
if (notification.parentNode) {
|
| 127 |
+
notification.style.opacity = '0';
|
| 128 |
+
notification.style.transform = 'translateX(100%)';
|
| 129 |
+
setTimeout(() => {
|
| 130 |
+
if (notification.parentNode) {
|
| 131 |
+
notification.remove();
|
| 132 |
+
}
|
| 133 |
+
}, 300);
|
| 134 |
+
}
|
| 135 |
+
}, duration);
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
// Listen for validation status changes
|
| 139 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 140 |
+
const validationStatus = document.getElementById('validation_status');
|
| 141 |
+
if (validationStatus) {
|
| 142 |
+
const observer = new MutationObserver(function(mutations) {
|
| 143 |
+
mutations.forEach(function(mutation) {
|
| 144 |
+
if (mutation.type === 'childList' || mutation.type === 'characterData') {
|
| 145 |
+
const text = validationStatus.textContent || validationStatus.innerText;
|
| 146 |
+
if (text && text.trim()) {
|
| 147 |
+
if (text.includes('✅') && text.includes('Ready to analyze')) {
|
| 148 |
+
showNotification('✅ Ready to analyze', 'success');
|
| 149 |
+
} else if (text.includes('✅') && text.includes('Valid SMILES')) {
|
| 150 |
+
showNotification('✅ Valid SMILES', 'success');
|
| 151 |
+
} else if (text.includes('❌')) {
|
| 152 |
+
showNotification('❌ Invalid SMILES', 'error');
|
| 153 |
+
}
|
| 154 |
+
}
|
| 155 |
+
}
|
| 156 |
+
});
|
| 157 |
+
});
|
| 158 |
+
observer.observe(validationStatus, { childList: true, characterData: true, subtree: true });
|
| 159 |
+
}
|
| 160 |
+
});
|
| 161 |
+
</script>
|
| 162 |
+
""", visible=False)
|
| 163 |
+
|
| 164 |
+
with gr.Row():
|
| 165 |
+
with gr.Column(scale=3):
|
| 166 |
+
# Left side: Molecular Analysis
|
| 167 |
+
variation_smiles_input = gr.Textbox(
|
| 168 |
+
label="Enter SMILES string",
|
| 169 |
+
placeholder="e.g., C[C@H](N)C(=O)O",
|
| 170 |
+
value="C[C@H](N)C(=O)O",
|
| 171 |
+
info="Enter a valid SMILES string for molecular analysis"
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
# Hidden validation status for popup notifications
|
| 175 |
+
validation_status = gr.Markdown(
|
| 176 |
+
value="",
|
| 177 |
+
visible=False,
|
| 178 |
+
elem_id="validation_status"
|
| 179 |
+
)
|
| 180 |
+
preview_image = gr.Image(
|
| 181 |
+
label="Preview",
|
| 182 |
+
show_download_button=False,
|
| 183 |
+
width=200,
|
| 184 |
+
height=200,
|
| 185 |
+
visible=False
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
# Generate variations controls - moved up for better UX
|
| 189 |
+
with gr.Row():
|
| 190 |
+
generate_variations_btn = gr.Button("🔄 Generate Variations", variant="primary")
|
| 191 |
+
clear_variations_btn = gr.Button("🗑️ Clear", variant="secondary")
|
| 192 |
+
|
| 193 |
+
# Quick access buttons for common molecules
|
| 194 |
+
gr.Markdown("### Quick Examples")
|
| 195 |
+
with gr.Row():
|
| 196 |
+
alanine_btn = gr.Button("Alanine", size="sm")
|
| 197 |
+
aspirin_btn = gr.Button("Aspirin", size="sm")
|
| 198 |
+
ibuprofen_btn = gr.Button("Ibuprofen", size="sm")
|
| 199 |
+
|
| 200 |
+
with gr.Row():
|
| 201 |
+
caffeine_btn = gr.Button("Caffeine", size="sm")
|
| 202 |
+
glucose_btn = gr.Button("Methanol", size="sm")
|
| 203 |
+
benzene_btn = gr.Button("Benzene", size="sm")
|
| 204 |
+
|
| 205 |
+
with gr.Row():
|
| 206 |
+
glucose_btn2 = gr.Button("Acetone", size="sm")
|
| 207 |
+
ethanol_btn = gr.Button("Ethanol", size="sm")
|
| 208 |
+
water_btn = gr.Button("Water", size="sm")
|
| 209 |
+
|
| 210 |
+
with gr.Row():
|
| 211 |
+
complex_btn1 = gr.Button("Glucose", size="sm")
|
| 212 |
+
complex_btn2 = gr.Button("Cholesterol", size="sm")
|
| 213 |
+
complex_btn3 = gr.Button("Penicillin", size="sm")
|
| 214 |
+
|
| 215 |
+
analyze_btn = gr.Button("🔍 Analyze Molecule", variant="primary", size="lg")
|
| 216 |
+
|
| 217 |
+
# Bookmark functionality
|
| 218 |
+
with gr.Row():
|
| 219 |
+
bookmark_name = gr.Textbox(
|
| 220 |
+
placeholder="Enter molecule name (optional)",
|
| 221 |
+
label="Molecule Name",
|
| 222 |
+
scale=2
|
| 223 |
+
)
|
| 224 |
+
bookmark_btn = gr.Button("🔖 Bookmark", variant="secondary", size="sm", scale=1)
|
| 225 |
+
|
| 226 |
+
bookmark_status = gr.Markdown("")
|
| 227 |
+
|
| 228 |
+
# Hidden components for internal use (not displayed to user)
|
| 229 |
+
selected_smiles_display = gr.Textbox(
|
| 230 |
+
visible=False,
|
| 231 |
+
elem_id="selected_smiles"
|
| 232 |
+
)
|
| 233 |
+
selected_style_display = gr.Textbox(
|
| 234 |
+
visible=False,
|
| 235 |
+
elem_id="selected_style"
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
# Enhanced AI Chat Interface
|
| 239 |
+
gr.Markdown("#### 🤖 AI Drug Discovery Assistant")
|
| 240 |
+
gr.Markdown("Ask the AI about molecular properties, generate new structures, or get drug discovery insights")
|
| 241 |
+
|
| 242 |
+
# AI Settings
|
| 243 |
+
with gr.Row():
|
| 244 |
+
hf_token_input = gr.Textbox(
|
| 245 |
+
label="Hugging Face Token",
|
| 246 |
+
type="password",
|
| 247 |
+
placeholder="Enter your Hugging Face token for AI features",
|
| 248 |
+
scale=2
|
| 249 |
+
)
|
| 250 |
+
ai_temperature = gr.Slider(
|
| 251 |
+
minimum=0.1,
|
| 252 |
+
maximum=2.0,
|
| 253 |
+
value=0.7,
|
| 254 |
+
step=0.1,
|
| 255 |
+
label="AI Temperature",
|
| 256 |
+
scale=1
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
# Main chat interface
|
| 260 |
+
ai_chatbot = gr.Chatbot(
|
| 261 |
+
label="AI Drug Discovery Chat",
|
| 262 |
+
height=400,
|
| 263 |
+
elem_id="ai_chatbot",
|
| 264 |
+
type="messages"
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
with gr.Row():
|
| 268 |
+
ai_chat_input = gr.Textbox(
|
| 269 |
+
placeholder="e.g., 'Explain the drug-likeness of this molecule' or 'Generate a more soluble derivative'",
|
| 270 |
+
label="Your message",
|
| 271 |
+
scale=4
|
| 272 |
+
)
|
| 273 |
+
ai_send_btn = gr.Button("Send", variant="primary", scale=1)
|
| 274 |
+
|
| 275 |
+
# AI Generated Structures display
|
| 276 |
+
gr.Markdown("#### 🧬 AI Generated Structures")
|
| 277 |
+
ai_generated_grid = gr.Gallery(
|
| 278 |
+
label="AI Generated Structures",
|
| 279 |
+
show_label=False,
|
| 280 |
+
elem_id="ai_generated_grid",
|
| 281 |
+
columns=3,
|
| 282 |
+
rows=2,
|
| 283 |
+
height=200,
|
| 284 |
+
object_fit="contain",
|
| 285 |
+
allow_preview=True
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
with gr.Column(scale=4):
|
| 289 |
+
# Right side: Main structure display + variations grid
|
| 290 |
+
main_structure_display = gr.Image(
|
| 291 |
+
show_download_button=False,
|
| 292 |
+
elem_id="main_structure"
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
# Seamless properties display
|
| 296 |
+
properties_display = gr.Markdown(
|
| 297 |
+
value="*Click 'Analyze Molecule' or select a variation to see properties*",
|
| 298 |
+
elem_id="properties_display",
|
| 299 |
+
elem_classes="seamless-properties"
|
| 300 |
+
)
|
| 301 |
+
|
| 302 |
+
# Seamless variations section
|
| 303 |
+
with gr.Column(elem_id="variations_container"):
|
| 304 |
+
variations_grid = gr.Gallery(
|
| 305 |
+
show_label=False,
|
| 306 |
+
elem_id="variations_gallery",
|
| 307 |
+
columns=3,
|
| 308 |
+
rows=3,
|
| 309 |
+
height=300,
|
| 310 |
+
object_fit="contain",
|
| 311 |
+
allow_preview=True,
|
| 312 |
+
show_share_button=False,
|
| 313 |
+
show_download_button=False
|
| 314 |
+
)
|
| 315 |
+
|
| 316 |
+
# Navigation controls
|
| 317 |
+
with gr.Row():
|
| 318 |
+
prev_page_btn = gr.Button("⬅️ Previous", size="sm")
|
| 319 |
+
page_info = gr.Markdown("Page 1 of 1", elem_classes="page-info")
|
| 320 |
+
next_page_btn = gr.Button("➡️ Next", size="sm")
|
| 321 |
+
|
| 322 |
+
# Grid controls
|
| 323 |
+
with gr.Row(elem_classes="grid-controls"):
|
| 324 |
+
grid_size_slider = gr.Slider(
|
| 325 |
+
minimum=4,
|
| 326 |
+
maximum=8,
|
| 327 |
+
value=4,
|
| 328 |
+
step=1,
|
| 329 |
+
label="Grid Columns",
|
| 330 |
+
elem_id="grid_size_slider"
|
| 331 |
+
)
|
| 332 |
+
variation_count_slider = gr.Slider(
|
| 333 |
+
minimum=6,
|
| 334 |
+
maximum=24,
|
| 335 |
+
value=12,
|
| 336 |
+
step=6,
|
| 337 |
+
label="Number of Variations",
|
| 338 |
+
elem_id="variation_count_slider"
|
| 339 |
+
)
|
| 340 |
+
|
| 341 |
+
return {
|
| 342 |
+
# Molecular Analysis components
|
| 343 |
+
'variation_smiles_input': variation_smiles_input,
|
| 344 |
+
'validation_status': validation_status,
|
| 345 |
+
'preview_image': preview_image,
|
| 346 |
+
'alanine_btn': alanine_btn,
|
| 347 |
+
'aspirin_btn': aspirin_btn,
|
| 348 |
+
'ibuprofen_btn': ibuprofen_btn,
|
| 349 |
+
'caffeine_btn': caffeine_btn,
|
| 350 |
+
'glucose_btn': glucose_btn,
|
| 351 |
+
'benzene_btn': benzene_btn,
|
| 352 |
+
'glucose_btn2': glucose_btn2,
|
| 353 |
+
'ethanol_btn': ethanol_btn,
|
| 354 |
+
'water_btn': water_btn,
|
| 355 |
+
'complex_btn1': complex_btn1,
|
| 356 |
+
'complex_btn2': complex_btn2,
|
| 357 |
+
'complex_btn3': complex_btn3,
|
| 358 |
+
'analyze_btn': analyze_btn,
|
| 359 |
+
'bookmark_name': bookmark_name,
|
| 360 |
+
'bookmark_btn': bookmark_btn,
|
| 361 |
+
'bookmark_status': bookmark_status,
|
| 362 |
+
'main_structure_display': main_structure_display,
|
| 363 |
+
'properties_display': properties_display,
|
| 364 |
+
|
| 365 |
+
# Variations components
|
| 366 |
+
'generate_variations_btn': generate_variations_btn,
|
| 367 |
+
'clear_variations_btn': clear_variations_btn,
|
| 368 |
+
'selected_smiles_display': selected_smiles_display,
|
| 369 |
+
'selected_style_display': selected_style_display,
|
| 370 |
+
'variations_grid': variations_grid,
|
| 371 |
+
'prev_page_btn': prev_page_btn,
|
| 372 |
+
'page_info': page_info,
|
| 373 |
+
'next_page_btn': next_page_btn,
|
| 374 |
+
'grid_size_slider': grid_size_slider,
|
| 375 |
+
'variation_count_slider': variation_count_slider,
|
| 376 |
+
|
| 377 |
+
# AI components
|
| 378 |
+
'hf_token_input': hf_token_input,
|
| 379 |
+
'ai_temperature': ai_temperature,
|
| 380 |
+
'ai_chatbot': ai_chatbot,
|
| 381 |
+
'ai_chat_input': ai_chat_input,
|
| 382 |
+
'ai_send_btn': ai_send_btn,
|
| 383 |
+
'ai_generated_grid': ai_generated_grid
|
| 384 |
+
}
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
def create_molecular_gallery_tab():
|
| 388 |
+
"""Create the molecular gallery tab components."""
|
| 389 |
+
gr.Markdown("### Common Drug Discovery Molecules")
|
| 390 |
+
image_components = []
|
| 391 |
+
|
| 392 |
+
# Display images in a 3x3 grid
|
| 393 |
+
with gr.Row():
|
| 394 |
+
for row in range(3):
|
| 395 |
+
with gr.Row():
|
| 396 |
+
for col in range(3):
|
| 397 |
+
idx = row * 3 + col
|
| 398 |
+
img = gr.Image(
|
| 399 |
+
show_download_button=False,
|
| 400 |
+
width=200,
|
| 401 |
+
height=200,
|
| 402 |
+
label=f"Molecule {idx + 1}",
|
| 403 |
+
)
|
| 404 |
+
image_components.append(img)
|
| 405 |
+
|
| 406 |
+
# Bookmarked molecules section
|
| 407 |
+
gr.Markdown("### 🔖 Your Bookmarked Molecules")
|
| 408 |
+
bookmarked_gallery = gr.Gallery(
|
| 409 |
+
label="Bookmarked Structures",
|
| 410 |
+
show_label=False,
|
| 411 |
+
elem_id="bookmarked_gallery",
|
| 412 |
+
columns=4,
|
| 413 |
+
rows=1,
|
| 414 |
+
height=200,
|
| 415 |
+
object_fit="contain"
|
| 416 |
+
)
|
| 417 |
+
|
| 418 |
+
return {
|
| 419 |
+
'image_components': image_components,
|
| 420 |
+
'bookmarked_gallery': bookmarked_gallery
|
| 421 |
+
}
|
| 422 |
+
|
| 423 |
+
|
| 424 |
+
def create_drug_library_tab():
|
| 425 |
+
"""Create the drug discovery library tab components."""
|
| 426 |
+
gr.Markdown("### NSAID Drug Series")
|
| 427 |
+
drug_images = []
|
| 428 |
+
|
| 429 |
+
# Display drug molecules
|
| 430 |
+
with gr.Row():
|
| 431 |
+
for i in range(3): # First row
|
| 432 |
+
img = gr.Image(
|
| 433 |
+
show_download_button=False,
|
| 434 |
+
width=200,
|
| 435 |
+
height=200,
|
| 436 |
+
label=f"Drug {i + 1}",
|
| 437 |
+
)
|
| 438 |
+
drug_images.append(img)
|
| 439 |
+
|
| 440 |
+
with gr.Row():
|
| 441 |
+
for i in range(2): # Second row
|
| 442 |
+
img = gr.Image(
|
| 443 |
+
show_download_button=False,
|
| 444 |
+
width=200,
|
| 445 |
+
height=200,
|
| 446 |
+
label=f"Drug {i + 4}",
|
| 447 |
+
)
|
| 448 |
+
drug_images.append(img)
|
| 449 |
+
|
| 450 |
+
return {
|
| 451 |
+
'drug_images': drug_images
|
| 452 |
+
}
|
| 453 |
+
|
| 454 |
+
|
| 455 |
+
def create_new_experiment_tab():
|
| 456 |
+
"""Create a blank workspace tab for new experiments."""
|
| 457 |
+
with gr.Column():
|
| 458 |
+
gr.Markdown("## 🧪 New Experiment Workspace")
|
| 459 |
+
gr.Markdown(
|
| 460 |
+
"This tab is intentionally left blank so you can prototype new "
|
| 461 |
+
"ideas without affecting the existing lab, gallery, or library views."
|
| 462 |
+
)
|
| 463 |
+
gr.Markdown(
|
| 464 |
+
"- Add new components here as you explore ideas\n"
|
| 465 |
+
"- Copy elements from other tabs if needed\n"
|
| 466 |
+
"- Wire up handlers in `src/app.py` once ready"
|
| 467 |
+
)
|
| 468 |
+
gr.HTML(
|
| 469 |
+
"<div style='border:1px dashed #bbb; padding:2rem; text-align:center;'>"
|
| 470 |
+
"Your custom UI goes here"
|
| 471 |
+
"</div>"
|
| 472 |
+
)
|
| 473 |
+
return {}
|
src/ui/handlers.py
ADDED
|
@@ -0,0 +1,297 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
UI Handlers Module
|
| 3 |
+
|
| 4 |
+
This module contains event handlers and business logic
|
| 5 |
+
for the drug discovery application UI components.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from ..molecules.analysis import analyze_molecule_image_only, validate_smiles_realtime, get_molecule_properties_for_hover
|
| 9 |
+
from ..molecules.variations import generate_chemical_series_variations, generate_molecule_images
|
| 10 |
+
from ..ai.services import respond, handle_structure_chat, parse_ai_structures
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class VariationHandlers:
|
| 14 |
+
"""Handles variation-related functionality."""
|
| 15 |
+
|
| 16 |
+
def __init__(self):
|
| 17 |
+
self.current_variations = []
|
| 18 |
+
self.current_page = 0
|
| 19 |
+
self.variations_per_page = 12
|
| 20 |
+
|
| 21 |
+
def generate_variations_for_display(self, smiles, num_variations=12):
|
| 22 |
+
"""Generate variations and format for gallery display."""
|
| 23 |
+
print(f"=== GENERATE_VARIATIONS_FOR_DISPLAY CALLED ===")
|
| 24 |
+
print(f"SMILES: {smiles}")
|
| 25 |
+
print(f"Num variations: {num_variations}")
|
| 26 |
+
|
| 27 |
+
variations = generate_chemical_series_variations(smiles)
|
| 28 |
+
print(f"Generated {len(variations)} variations")
|
| 29 |
+
|
| 30 |
+
self.current_variations = variations[:num_variations]
|
| 31 |
+
print(f"Stored {len(self.current_variations)} variations in current_variations")
|
| 32 |
+
|
| 33 |
+
# Format for gallery display
|
| 34 |
+
gallery_items = []
|
| 35 |
+
for i, var in enumerate(self.current_variations):
|
| 36 |
+
print(f"Variation {i}: {var.get('style', 'Unknown')}, image type: {type(var.get('image', None))}")
|
| 37 |
+
gallery_items.append((var['image'], f"Style: {var['style']}"))
|
| 38 |
+
|
| 39 |
+
result = (gallery_items, smiles, self.current_variations[0]['style'] if self.current_variations else "None")
|
| 40 |
+
print(f"Returning: {len(gallery_items)} gallery items, SMILES: {smiles}, style: {result[2]}")
|
| 41 |
+
print(f"=== GENERATE_VARIATIONS_FOR_DISPLAY COMPLETE ===")
|
| 42 |
+
|
| 43 |
+
return result
|
| 44 |
+
|
| 45 |
+
def select_variation(self, evt):
|
| 46 |
+
"""Handle selection of a variation from the grid."""
|
| 47 |
+
try:
|
| 48 |
+
print(f"=== SELECT_VARIATION CALLED ===")
|
| 49 |
+
print(f"Event: {evt}, type: {type(evt)}")
|
| 50 |
+
print(f"Current variations count: {len(self.current_variations)}")
|
| 51 |
+
|
| 52 |
+
# If event is None, try to get the first variation as default
|
| 53 |
+
if evt is None:
|
| 54 |
+
print("Event is None, trying to return first variation")
|
| 55 |
+
if self.current_variations:
|
| 56 |
+
selected_var = self.current_variations[0]
|
| 57 |
+
print(f"Using first variation: {selected_var.get('style', 'Unknown')}")
|
| 58 |
+
properties_text = get_molecule_properties_for_hover(selected_var['smiles'])
|
| 59 |
+
return selected_var['image'], selected_var['smiles'], selected_var['style'], properties_text
|
| 60 |
+
else:
|
| 61 |
+
print("No variations available, returning empty")
|
| 62 |
+
return None, "", "", ""
|
| 63 |
+
|
| 64 |
+
# Handle both event object and direct index
|
| 65 |
+
if hasattr(evt, 'index'):
|
| 66 |
+
index = evt.index
|
| 67 |
+
elif isinstance(evt, (int, float)):
|
| 68 |
+
index = int(evt)
|
| 69 |
+
else:
|
| 70 |
+
print(f"Unexpected event type: {type(evt)}, value: {evt}")
|
| 71 |
+
# Try to return first variation as fallback
|
| 72 |
+
if self.current_variations:
|
| 73 |
+
selected_var = self.current_variations[0]
|
| 74 |
+
properties_text = get_molecule_properties_for_hover(selected_var['smiles'])
|
| 75 |
+
return selected_var['image'], selected_var['smiles'], selected_var['style'], properties_text
|
| 76 |
+
return None, "", "", ""
|
| 77 |
+
|
| 78 |
+
print(f"Selected index: {index}")
|
| 79 |
+
|
| 80 |
+
if not self.current_variations or index >= len(self.current_variations):
|
| 81 |
+
print(f"No variations available or index {index} out of range (total: {len(self.current_variations)})")
|
| 82 |
+
# Try to return first variation as fallback
|
| 83 |
+
if self.current_variations:
|
| 84 |
+
selected_var = self.current_variations[0]
|
| 85 |
+
properties_text = get_molecule_properties_for_hover(selected_var['smiles'])
|
| 86 |
+
return selected_var['image'], selected_var['smiles'], selected_var['style'], properties_text
|
| 87 |
+
return None, "", "", ""
|
| 88 |
+
|
| 89 |
+
selected_var = self.current_variations[index]
|
| 90 |
+
print(f"Selected variation {index}: {selected_var.get('style', 'Unknown')}")
|
| 91 |
+
print(f"Selected variation image type: {type(selected_var['image'])}")
|
| 92 |
+
print(f"Selected variation SMILES: {selected_var['smiles']}")
|
| 93 |
+
|
| 94 |
+
# Also update properties for the selected variation
|
| 95 |
+
print(f"Getting properties for SMILES: {selected_var['smiles']}")
|
| 96 |
+
properties_text = get_molecule_properties_for_hover(selected_var['smiles'])
|
| 97 |
+
print(f"Properties text length: {len(properties_text) if properties_text else 'None'}")
|
| 98 |
+
print(f"Properties text preview: {properties_text[:100] if properties_text else 'None'}...")
|
| 99 |
+
|
| 100 |
+
result = (selected_var['image'], selected_var['smiles'], selected_var['style'], properties_text)
|
| 101 |
+
print(f"Returning result: {len(result)} items")
|
| 102 |
+
print(f"Image type: {type(result[0])}")
|
| 103 |
+
print(f"SMILES: {result[1]}")
|
| 104 |
+
print(f"Style: {result[2]}")
|
| 105 |
+
print(f"Properties length: {len(result[3]) if result[3] else 'None'}")
|
| 106 |
+
print(f"=== SELECT_VARIATION COMPLETE ===")
|
| 107 |
+
|
| 108 |
+
return result
|
| 109 |
+
except Exception as e:
|
| 110 |
+
print(f"Error in select_variation: {e}")
|
| 111 |
+
import traceback
|
| 112 |
+
traceback.print_exc()
|
| 113 |
+
return None, "", "", ""
|
| 114 |
+
|
| 115 |
+
def clear_variations(self):
|
| 116 |
+
"""Clear all variations and reset display."""
|
| 117 |
+
self.current_variations = []
|
| 118 |
+
self.current_page = 0
|
| 119 |
+
return [], "", ""
|
| 120 |
+
|
| 121 |
+
def navigate_variations(self, direction):
|
| 122 |
+
"""Navigate through variations pages."""
|
| 123 |
+
if not self.current_variations:
|
| 124 |
+
return [], "Page 1 of 1", None, "", ""
|
| 125 |
+
|
| 126 |
+
total_pages = (len(self.current_variations) + self.variations_per_page - 1) // self.variations_per_page
|
| 127 |
+
|
| 128 |
+
if direction == "next":
|
| 129 |
+
self.current_page = min(self.current_page + 1, total_pages - 1)
|
| 130 |
+
elif direction == "prev":
|
| 131 |
+
self.current_page = max(self.current_page - 1, 0)
|
| 132 |
+
|
| 133 |
+
# Get variations for current page
|
| 134 |
+
start_idx = self.current_page * self.variations_per_page
|
| 135 |
+
end_idx = min(start_idx + self.variations_per_page, len(self.current_variations))
|
| 136 |
+
page_variations = self.current_variations[start_idx:end_idx]
|
| 137 |
+
|
| 138 |
+
# Format for gallery display
|
| 139 |
+
gallery_items = []
|
| 140 |
+
for var in page_variations:
|
| 141 |
+
gallery_items.append((var['image'], f"Style: {var['style']}"))
|
| 142 |
+
|
| 143 |
+
page_info = f"Page {self.current_page + 1} of {total_pages}"
|
| 144 |
+
|
| 145 |
+
return gallery_items, page_info, page_variations[0]['image'] if page_variations else None, page_variations[0]['smiles'] if page_variations else "", page_variations[0]['style'] if page_variations else ""
|
| 146 |
+
|
| 147 |
+
def update_variation_count(self, count):
|
| 148 |
+
"""Update the number of variations to generate."""
|
| 149 |
+
self.variations_per_page = count
|
| 150 |
+
return count
|
| 151 |
+
|
| 152 |
+
def analyze_molecule_with_tooltip(self, smiles):
|
| 153 |
+
"""Analyze molecule and return image with tooltip data."""
|
| 154 |
+
molecule_img = analyze_molecule_image_only(smiles)
|
| 155 |
+
tooltip_text = get_molecule_properties_for_hover(smiles)
|
| 156 |
+
|
| 157 |
+
# For now, we'll return the image and tooltip text separately
|
| 158 |
+
# The tooltip will be handled by JavaScript or CSS
|
| 159 |
+
return molecule_img, tooltip_text
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
class BookmarkHandlers:
|
| 163 |
+
"""Handles bookmark functionality."""
|
| 164 |
+
|
| 165 |
+
def __init__(self):
|
| 166 |
+
self.bookmarked_molecules = []
|
| 167 |
+
|
| 168 |
+
def bookmark_molecule(self, smiles, molecule_name=""):
|
| 169 |
+
"""Add a molecule to the bookmarked collection."""
|
| 170 |
+
from rdkit import Chem
|
| 171 |
+
from rdkit.Chem import Draw
|
| 172 |
+
|
| 173 |
+
# Validate SMILES first
|
| 174 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 175 |
+
if not mol:
|
| 176 |
+
return "❌ Invalid SMILES string - cannot bookmark"
|
| 177 |
+
|
| 178 |
+
# Check if already bookmarked
|
| 179 |
+
if smiles in [bm['smiles'] for bm in self.bookmarked_molecules]:
|
| 180 |
+
return "⚠️ Molecule already bookmarked"
|
| 181 |
+
|
| 182 |
+
# Generate a name if not provided
|
| 183 |
+
if not molecule_name:
|
| 184 |
+
molecule_name = f"Bookmarked_{len(self.bookmarked_molecules) + 1}"
|
| 185 |
+
|
| 186 |
+
# Add to bookmarks
|
| 187 |
+
self.bookmarked_molecules.append({
|
| 188 |
+
'smiles': smiles,
|
| 189 |
+
'name': molecule_name,
|
| 190 |
+
'timestamp': len(self.bookmarked_molecules) + 1 # Simple counter
|
| 191 |
+
})
|
| 192 |
+
|
| 193 |
+
return f"✅ Bookmarked: {molecule_name}"
|
| 194 |
+
|
| 195 |
+
def get_bookmarked_molecules(self):
|
| 196 |
+
"""Get all bookmarked molecules for display."""
|
| 197 |
+
return self.bookmarked_molecules
|
| 198 |
+
|
| 199 |
+
def remove_bookmark(self, smiles):
|
| 200 |
+
"""Remove a molecule from bookmarks."""
|
| 201 |
+
self.bookmarked_molecules = [bm for bm in self.bookmarked_molecules if bm['smiles'] != smiles]
|
| 202 |
+
return "🗑️ Removed from bookmarks"
|
| 203 |
+
|
| 204 |
+
def bookmark_current_molecule(self, smiles, name):
|
| 205 |
+
"""Bookmark current molecule and update gallery."""
|
| 206 |
+
from rdkit import Chem
|
| 207 |
+
from rdkit.Chem import Draw
|
| 208 |
+
|
| 209 |
+
result = self.bookmark_molecule(smiles, name)
|
| 210 |
+
# Update the bookmarked gallery
|
| 211 |
+
bookmarked_mols = self.get_bookmarked_molecules()
|
| 212 |
+
gallery_items = []
|
| 213 |
+
for mol in bookmarked_mols:
|
| 214 |
+
# Generate smaller images for gallery
|
| 215 |
+
mol_obj = Chem.MolFromSmiles(mol['smiles'])
|
| 216 |
+
if mol_obj:
|
| 217 |
+
img = Draw.MolToImage(mol_obj, size=(150, 150), kekulize=True)
|
| 218 |
+
gallery_items.append((img, f"{mol['name']}: {mol['smiles']}"))
|
| 219 |
+
return result, gallery_items
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
class AIHandler:
|
| 223 |
+
"""Handles AI chat functionality with both general questions and structure generation."""
|
| 224 |
+
|
| 225 |
+
def __init__(self):
|
| 226 |
+
self.chat_history = []
|
| 227 |
+
|
| 228 |
+
def handle_ai_chat(self, message, history, selected_smiles, hf_token, temperature):
|
| 229 |
+
"""Handle AI chat with both general questions and structure generation."""
|
| 230 |
+
if not message.strip() or not hf_token.strip():
|
| 231 |
+
return history, []
|
| 232 |
+
|
| 233 |
+
# Add user message to history
|
| 234 |
+
history.append({"role": "user", "content": message})
|
| 235 |
+
|
| 236 |
+
# Determine if this is a structure generation request
|
| 237 |
+
structure_keywords = ['generate', 'create', 'modify', 'derivative', 'variant', 'structure']
|
| 238 |
+
is_structure_request = any(keyword in message.lower() for keyword in structure_keywords)
|
| 239 |
+
|
| 240 |
+
if is_structure_request and selected_smiles:
|
| 241 |
+
# Handle structure generation
|
| 242 |
+
ai_response = ""
|
| 243 |
+
for chunk in respond(message, history[:-1],
|
| 244 |
+
"You are an expert medicinal chemist. Generate new chemical structures based on user requests.",
|
| 245 |
+
512, temperature, 0.9, hf_token):
|
| 246 |
+
ai_response = chunk
|
| 247 |
+
|
| 248 |
+
# Add AI response to history
|
| 249 |
+
history.append({"role": "assistant", "content": ai_response})
|
| 250 |
+
|
| 251 |
+
# Parse and generate structure images
|
| 252 |
+
structures = parse_ai_structures(ai_response, selected_smiles)
|
| 253 |
+
|
| 254 |
+
return history, structures
|
| 255 |
+
else:
|
| 256 |
+
# Handle general drug discovery questions
|
| 257 |
+
ai_response = ""
|
| 258 |
+
for chunk in respond(message, history[:-1],
|
| 259 |
+
"You are an expert medicinal chemist and drug discovery specialist. Help with molecular analysis, drug design, and medicinal chemistry questions.",
|
| 260 |
+
512, temperature, 0.9, hf_token):
|
| 261 |
+
ai_response = chunk
|
| 262 |
+
|
| 263 |
+
# Add AI response to history
|
| 264 |
+
history.append({"role": "assistant", "content": ai_response})
|
| 265 |
+
|
| 266 |
+
return history, []
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
def create_quick_example_handlers():
|
| 270 |
+
"""Create handlers for quick example buttons."""
|
| 271 |
+
examples = {
|
| 272 |
+
'alanine': "C[C@H](N)C(=O)O",
|
| 273 |
+
'aspirin': "CC(=O)OC1=CC=CC=C1C(=O)O",
|
| 274 |
+
'ibuprofen': "CC1=CC=C(C=C1)C(C)NC(=O)C2=CC=C(C=C2)Cl",
|
| 275 |
+
'caffeine': "CN1C=NC2=C1C(=O)N(C(=O)N2C)C",
|
| 276 |
+
'methanol': "CO",
|
| 277 |
+
'benzene': "c1ccccc1",
|
| 278 |
+
'acetone': "CC(=O)C",
|
| 279 |
+
'ethanol': "CCO",
|
| 280 |
+
'water': "O",
|
| 281 |
+
'glucose': "C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O",
|
| 282 |
+
'cholesterol': "C[C@H]1CC[C@H]2[C@@H](C)CC[C@H](O)[C@H]2CC[C@H]1O",
|
| 283 |
+
'penicillin': "CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)"
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
handlers = {}
|
| 287 |
+
for name, smiles in examples.items():
|
| 288 |
+
def create_handler(smiles):
|
| 289 |
+
def handler():
|
| 290 |
+
molecule_img = analyze_molecule_image_only(smiles)
|
| 291 |
+
tooltip_text = get_molecule_properties_for_hover(smiles)
|
| 292 |
+
return smiles, molecule_img, tooltip_text
|
| 293 |
+
return handler
|
| 294 |
+
|
| 295 |
+
handlers[f'{name}_handler'] = create_handler(smiles)
|
| 296 |
+
|
| 297 |
+
return handlers
|
test_error_handling.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Test script for SMILES error handling and validation.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from rdkit import Chem
|
| 7 |
+
from rdkit.Chem import Draw
|
| 8 |
+
|
| 9 |
+
def validate_smiles(smiles):
|
| 10 |
+
"""Validate SMILES string and return error message if invalid."""
|
| 11 |
+
if not smiles or not smiles.strip():
|
| 12 |
+
return "Please enter a SMILES string"
|
| 13 |
+
|
| 14 |
+
mol = Chem.MolFromSmiles(smiles.strip())
|
| 15 |
+
if not mol:
|
| 16 |
+
return f"Invalid SMILES string: '{smiles}'\n\n**Common issues:**\n- Unclosed parentheses or brackets\n- Invalid characters\n- Malformed ring notation\n- Incorrect stereochemistry notation\n\n**Try:**\n- Simple molecules first (e.g., 'CO' for methanol)\n- Check parentheses balance\n- Use standard SMILES notation"
|
| 17 |
+
|
| 18 |
+
return None
|
| 19 |
+
|
| 20 |
+
def test_smiles_validation():
|
| 21 |
+
"""Test various SMILES strings for validation."""
|
| 22 |
+
|
| 23 |
+
test_cases = [
|
| 24 |
+
# Valid SMILES
|
| 25 |
+
("CO", "Methanol - should work"),
|
| 26 |
+
("CCO", "Ethanol - should work"),
|
| 27 |
+
("c1ccccc1", "Benzene - should work"),
|
| 28 |
+
("C[C@H](N)C(=O)O", "Alanine - should work"),
|
| 29 |
+
|
| 30 |
+
# Invalid SMILES
|
| 31 |
+
("", "Empty string - should fail"),
|
| 32 |
+
("invalid", "Invalid text - should fail"),
|
| 33 |
+
("C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O", "Complex glucose - should fail"),
|
| 34 |
+
("C(C", "Unclosed parentheses - should fail"),
|
| 35 |
+
("C1C1", "Invalid ring - should fail"),
|
| 36 |
+
("C[invalid]", "Invalid bracket - should fail"),
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
print("🧪 Testing SMILES Validation")
|
| 40 |
+
print("=" * 50)
|
| 41 |
+
|
| 42 |
+
for smiles, description in test_cases:
|
| 43 |
+
print(f"\n📊 Testing: {description}")
|
| 44 |
+
print(f"SMILES: '{smiles}'")
|
| 45 |
+
|
| 46 |
+
error = validate_smiles(smiles)
|
| 47 |
+
if error:
|
| 48 |
+
print(f"❌ Validation failed: {error[:100]}...")
|
| 49 |
+
else:
|
| 50 |
+
print("✅ Validation passed")
|
| 51 |
+
|
| 52 |
+
print("-" * 30)
|
| 53 |
+
|
| 54 |
+
if __name__ == "__main__":
|
| 55 |
+
test_smiles_validation()
|
test_molecule_image.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Test script for molecular structure image generation.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from rdkit import Chem
|
| 7 |
+
from rdkit.Chem import Draw
|
| 8 |
+
|
| 9 |
+
def generate_molecule_image(smiles):
|
| 10 |
+
"""Generate a molecular structure image from SMILES string."""
|
| 11 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 12 |
+
if not mol:
|
| 13 |
+
return None
|
| 14 |
+
|
| 15 |
+
# Create a high-quality image
|
| 16 |
+
img = Draw.MolToImage(mol, size=(300, 300), kekulize=True)
|
| 17 |
+
return img
|
| 18 |
+
|
| 19 |
+
def test_molecule_images():
|
| 20 |
+
"""Test molecular structure generation for various molecules."""
|
| 21 |
+
|
| 22 |
+
test_molecules = [
|
| 23 |
+
("CC", "Ethane"),
|
| 24 |
+
("C[C@H](N)C(=O)O", "Alanine"),
|
| 25 |
+
("CC(=O)OC1=CC=CC=C1C(=O)O", "Aspirin"),
|
| 26 |
+
("CC1=CC=C(C=C1)C(C)NC(=O)C2=CC=C(C=C2)Cl", "Ibuprofen"),
|
| 27 |
+
("invalid_smiles", "Invalid SMILES")
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
print("🧬 Testing Molecular Structure Generation")
|
| 31 |
+
print("=" * 50)
|
| 32 |
+
|
| 33 |
+
for smiles, name in test_molecules:
|
| 34 |
+
print(f"\n📊 Testing: {name}")
|
| 35 |
+
print(f"SMILES: {smiles}")
|
| 36 |
+
|
| 37 |
+
img = generate_molecule_image(smiles)
|
| 38 |
+
if img:
|
| 39 |
+
print(f"✅ Successfully generated {img.size[0]}x{img.size[1]} image")
|
| 40 |
+
# Save the image for verification
|
| 41 |
+
img.save(f"test_{name.replace(' ', '_').lower()}.png")
|
| 42 |
+
print(f"💾 Saved as test_{name.replace(' ', '_').lower()}.png")
|
| 43 |
+
else:
|
| 44 |
+
print("❌ Failed to generate image")
|
| 45 |
+
print("-" * 30)
|
| 46 |
+
|
| 47 |
+
if __name__ == "__main__":
|
| 48 |
+
test_molecule_images()
|
test_variation_selection.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Test script to debug variation selection issue
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import sys
|
| 7 |
+
import os
|
| 8 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
|
| 9 |
+
|
| 10 |
+
from src.molecules.variations import generate_chemical_series_variations
|
| 11 |
+
from src.ui.handlers import VariationHandlers
|
| 12 |
+
|
| 13 |
+
def test_variation_selection():
|
| 14 |
+
"""Test the variation selection functionality"""
|
| 15 |
+
print("Testing variation selection...")
|
| 16 |
+
|
| 17 |
+
# Test SMILES
|
| 18 |
+
test_smiles = "C[C@H](N)C(=O)O"
|
| 19 |
+
|
| 20 |
+
# Generate variations
|
| 21 |
+
print(f"Generating variations for SMILES: {test_smiles}")
|
| 22 |
+
variations = generate_chemical_series_variations(test_smiles)
|
| 23 |
+
print(f"Generated {len(variations)} variations")
|
| 24 |
+
|
| 25 |
+
if variations:
|
| 26 |
+
print(f"First variation structure: {variations[0].keys()}")
|
| 27 |
+
print(f"First variation image type: {type(variations[0]['image'])}")
|
| 28 |
+
print(f"First variation SMILES: {variations[0]['smiles']}")
|
| 29 |
+
print(f"First variation style: {variations[0]['style']}")
|
| 30 |
+
|
| 31 |
+
# Test handler
|
| 32 |
+
handler = VariationHandlers()
|
| 33 |
+
handler.current_variations = variations
|
| 34 |
+
|
| 35 |
+
# Test selection
|
| 36 |
+
print("\nTesting selection...")
|
| 37 |
+
result = handler.select_variation(0) # Select first variation
|
| 38 |
+
print(f"Selection result: {result}")
|
| 39 |
+
print(f"Result image type: {type(result[0]) if result[0] else 'None'}")
|
| 40 |
+
print(f"Result SMILES: {result[1]}")
|
| 41 |
+
print(f"Result style: {result[2]}")
|
| 42 |
+
|
| 43 |
+
if __name__ == "__main__":
|
| 44 |
+
test_variation_selection()
|