added image with the background
Browse files- GA_optimization.png +0 -0
- app.py +9 -30
GA_optimization.png
CHANGED
|
|
app.py
CHANGED
|
@@ -304,7 +304,7 @@ The general scheme of *de novo* molecular optimisation presented in the figure b
|
|
| 304 |
|
| 305 |
image = Image.open('GA_optimization.png')
|
| 306 |
st.markdown(intro_text)
|
| 307 |
-
st.image(image, caption='The protocol of
|
| 308 |
|
| 309 |
launch_text = '''
|
| 310 |
If you want to edit parameters for opimisation including GA parameters, batch size and seed, you can do it in the left
|
|
@@ -360,8 +360,7 @@ if submit_side or submit_main:
|
|
| 360 |
with st.spinner('Getting unique solutions'):
|
| 361 |
unique_solutions = list(set(tuple(s) for s in ga_instance.solutions))
|
| 362 |
|
| 363 |
-
st.
|
| 364 |
-
# st.success(f'{len(unique_solutions)} solutions were obtained')
|
| 365 |
|
| 366 |
scores = {
|
| 367 |
"rf_score": [],
|
|
@@ -402,10 +401,9 @@ if submit_side or submit_main:
|
|
| 402 |
chosen_ids = chosen_gen.index.to_list()
|
| 403 |
chosen_solutions = np.array([unique_solutions[ind] for ind in chosen_ids])
|
| 404 |
gen_frag_inds = frag_counts_to_inds(chosen_solutions, max_atoms=51)
|
| 405 |
-
st.toast(f'The number of chosen latent vectors is {gen_frag_inds.shape[0]}')
|
| 406 |
-
# st.info(f'The number of chosen solutions is {gen_frag_inds.shape[0]}', icon="ℹ️")
|
| 407 |
|
| 408 |
-
|
|
|
|
| 409 |
results = {"smiles": [], "ordering_score": [], "validity": []}
|
| 410 |
decoding_progress = 0
|
| 411 |
decoding_progress_text = "Decoding chosen solutions"
|
|
@@ -422,7 +420,6 @@ if submit_side or submit_main:
|
|
| 422 |
vqgae_model=vqgae_model,
|
| 423 |
clean_2d=False
|
| 424 |
)
|
| 425 |
-
gen_molecules.extend(molecules)
|
| 426 |
results["smiles"].extend([str(molecule) for molecule in molecules])
|
| 427 |
results["ordering_score"].extend(scores)
|
| 428 |
results["validity"].extend([1 if i else 0 for i in validity])
|
|
@@ -433,34 +430,16 @@ if submit_side or submit_main:
|
|
| 433 |
full_stats = full_stats[["smiles", "similarity_score", "rf_score", "ordering_score", "validity"]]
|
| 434 |
valid_gen_stats = full_stats[full_stats.validity == 1]
|
| 435 |
|
| 436 |
-
valid_gen_mols = []
|
| 437 |
-
for i, record in zip(list(valid_gen_stats.index), valid_gen_stats.to_dict("records")):
|
| 438 |
-
valid_gen_mols.append(gen_molecules[i])
|
| 439 |
-
|
| 440 |
-
filtered_gen_mols = []
|
| 441 |
-
filtered_indices = []
|
| 442 |
-
for mol_i, mol in enumerate(valid_gen_mols):
|
| 443 |
-
is_frag = allene < mol or peroxide_charge < mol or peroxide < mol
|
| 444 |
-
is_ring = False
|
| 445 |
-
for ring in mol.sssr:
|
| 446 |
-
if len(ring) > 8 or len(ring) < 4:
|
| 447 |
-
is_ring = True
|
| 448 |
-
break
|
| 449 |
-
if not is_frag and not is_ring:
|
| 450 |
-
filtered_gen_mols.append(mol)
|
| 451 |
-
filtered_indices.append(mol_i)
|
| 452 |
-
|
| 453 |
-
filtered_gen_stats = valid_gen_stats.iloc[filtered_indices]
|
| 454 |
time_used = time() - start_time
|
| 455 |
mins_used = int(round(time_used // 60, 0))
|
| 456 |
seconds_used = int(round(time_used % 60, 0))
|
| 457 |
-
st.success(f'{
|
| 458 |
st.balloons()
|
| 459 |
|
| 460 |
st.subheader('Generation results', divider='rainbow')
|
| 461 |
-
st.dataframe(
|
| 462 |
download_button_valid = download_button(
|
| 463 |
-
object_to_download=
|
| 464 |
download_filename='vqgae_tubulin_inhibitors_valid.csv',
|
| 465 |
button_text="Download results as CSV"
|
| 466 |
)
|
|
@@ -474,10 +453,10 @@ if submit_side or submit_main:
|
|
| 474 |
|
| 475 |
st.subheader('Examples of generated molecules')
|
| 476 |
|
| 477 |
-
examples_smiles =
|
| 478 |
by=["similarity_score"],
|
| 479 |
ascending=False
|
| 480 |
-
).iloc[:
|
| 481 |
|
| 482 |
examples = []
|
| 483 |
for smi in examples_smiles:
|
|
|
|
| 304 |
|
| 305 |
image = Image.open('GA_optimization.png')
|
| 306 |
st.markdown(intro_text)
|
| 307 |
+
st.image(image, caption='The protocol of Inverse QSAR with VQGAE and Genetic Algorithm')
|
| 308 |
|
| 309 |
launch_text = '''
|
| 310 |
If you want to edit parameters for opimisation including GA parameters, batch size and seed, you can do it in the left
|
|
|
|
| 360 |
with st.spinner('Getting unique solutions'):
|
| 361 |
unique_solutions = list(set(tuple(s) for s in ga_instance.solutions))
|
| 362 |
|
| 363 |
+
st.success(f'{len(unique_solutions)} solutions were obtained')
|
|
|
|
| 364 |
|
| 365 |
scores = {
|
| 366 |
"rf_score": [],
|
|
|
|
| 401 |
chosen_ids = chosen_gen.index.to_list()
|
| 402 |
chosen_solutions = np.array([unique_solutions[ind] for ind in chosen_ids])
|
| 403 |
gen_frag_inds = frag_counts_to_inds(chosen_solutions, max_atoms=51)
|
|
|
|
|
|
|
| 404 |
|
| 405 |
+
st.info(f'The number of chosen solutions is {gen_frag_inds.shape[0]}', icon="ℹ️")
|
| 406 |
+
|
| 407 |
results = {"smiles": [], "ordering_score": [], "validity": []}
|
| 408 |
decoding_progress = 0
|
| 409 |
decoding_progress_text = "Decoding chosen solutions"
|
|
|
|
| 420 |
vqgae_model=vqgae_model,
|
| 421 |
clean_2d=False
|
| 422 |
)
|
|
|
|
| 423 |
results["smiles"].extend([str(molecule) for molecule in molecules])
|
| 424 |
results["ordering_score"].extend(scores)
|
| 425 |
results["validity"].extend([1 if i else 0 for i in validity])
|
|
|
|
| 430 |
full_stats = full_stats[["smiles", "similarity_score", "rf_score", "ordering_score", "validity"]]
|
| 431 |
valid_gen_stats = full_stats[full_stats.validity == 1]
|
| 432 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
time_used = time() - start_time
|
| 434 |
mins_used = int(round(time_used // 60, 0))
|
| 435 |
seconds_used = int(round(time_used % 60, 0))
|
| 436 |
+
st.success(f'{valid_gen_stats.shape[0]} valid solutions were obtained in {mins_used} mins {seconds_used} secs')
|
| 437 |
st.balloons()
|
| 438 |
|
| 439 |
st.subheader('Generation results', divider='rainbow')
|
| 440 |
+
st.dataframe(valid_gen_stats)
|
| 441 |
download_button_valid = download_button(
|
| 442 |
+
object_to_download=valid_gen_stats,
|
| 443 |
download_filename='vqgae_tubulin_inhibitors_valid.csv',
|
| 444 |
button_text="Download results as CSV"
|
| 445 |
)
|
|
|
|
| 453 |
|
| 454 |
st.subheader('Examples of generated molecules')
|
| 455 |
|
| 456 |
+
examples_smiles = valid_gen_stats.sort_values(
|
| 457 |
by=["similarity_score"],
|
| 458 |
ascending=False
|
| 459 |
+
).iloc[:15:3].smiles.to_list()
|
| 460 |
|
| 461 |
examples = []
|
| 462 |
for smi in examples_smiles:
|