Spaces:

tagirshin
/

VQGAE

Sleeping

App Files Files Community

tagirshin commited on Oct 3, 2023

Commit

0bf1d7c

1 Parent(s): dbcf05d

added image with the background

Browse files

Files changed (2) hide show

GA_optimization.png +0 -0
app.py +9 -30

GA_optimization.png CHANGED Viewed

app.py CHANGED Viewed

@@ -304,7 +304,7 @@ The general scheme of *de novo* molecular optimisation presented in the figure b
 image = Image.open('GA_optimization.png')
 st.markdown(intro_text)
-st.image(image, caption='The protocol of Iverse QSAR with VQGAE and Genetic Algorithm')
 launch_text = '''
 If you want to edit parameters for opimisation including GA parameters, batch size and seed, you can do it in the left
@@ -360,8 +360,7 @@ if submit_side or submit_main:
     with st.spinner('Getting unique solutions'):
         unique_solutions = list(set(tuple(s) for s in ga_instance.solutions))
-    st.toast(f'{len(unique_solutions)} latent vectors were obtained')
-    # st.success(f'{len(unique_solutions)} solutions were obtained')
     scores = {
         "rf_score": [],
@@ -402,10 +401,9 @@ if submit_side or submit_main:
     chosen_ids = chosen_gen.index.to_list()
     chosen_solutions = np.array([unique_solutions[ind] for ind in chosen_ids])
     gen_frag_inds = frag_counts_to_inds(chosen_solutions, max_atoms=51)
-    st.toast(f'The number of chosen latent vectors is {gen_frag_inds.shape[0]}')
-    # st.info(f'The number of chosen solutions is {gen_frag_inds.shape[0]}', icon="ℹ️")
-    gen_molecules = []
     results = {"smiles": [], "ordering_score": [], "validity": []}
     decoding_progress = 0
     decoding_progress_text = "Decoding chosen solutions"
@@ -422,7 +420,6 @@ if submit_side or submit_main:
             vqgae_model=vqgae_model,
             clean_2d=False
         )
-        gen_molecules.extend(molecules)
         results["smiles"].extend([str(molecule) for molecule in molecules])
         results["ordering_score"].extend(scores)
         results["validity"].extend([1 if i else 0 for i in validity])
@@ -433,34 +430,16 @@ if submit_side or submit_main:
     full_stats = full_stats[["smiles", "similarity_score", "rf_score", "ordering_score", "validity"]]
     valid_gen_stats = full_stats[full_stats.validity == 1]
-    valid_gen_mols = []
-    for i, record in zip(list(valid_gen_stats.index), valid_gen_stats.to_dict("records")):
-        valid_gen_mols.append(gen_molecules[i])
-    filtered_gen_mols = []
-    filtered_indices = []
-    for mol_i, mol in enumerate(valid_gen_mols):
-        is_frag = allene < mol or peroxide_charge < mol or peroxide < mol
-        is_ring = False
-        for ring in mol.sssr:
-            if len(ring) > 8 or len(ring) < 4:
-                is_ring = True
-                break
-        if not is_frag and not is_ring:
-            filtered_gen_mols.append(mol)
-            filtered_indices.append(mol_i)
-    filtered_gen_stats = valid_gen_stats.iloc[filtered_indices]
     time_used = time() - start_time
     mins_used = int(round(time_used // 60, 0))
     seconds_used = int(round(time_used % 60, 0))
-    st.success(f'{filtered_gen_stats.shape[0]} valid solutions were obtained in {mins_used} mins {seconds_used} secs')
     st.balloons()
     st.subheader('Generation results', divider='rainbow')
-    st.dataframe(filtered_gen_stats)
     download_button_valid = download_button(
-        object_to_download=filtered_gen_stats,
         download_filename='vqgae_tubulin_inhibitors_valid.csv',
         button_text="Download results as CSV"
     )
@@ -474,10 +453,10 @@ if submit_side or submit_main:
     st.subheader('Examples of generated molecules')
-    examples_smiles = filtered_gen_stats.sort_values(
         by=["similarity_score"],
         ascending=False
-    ).iloc[:12:2].smiles.to_list()
     examples = []
     for smi in examples_smiles:

 image = Image.open('GA_optimization.png')
 st.markdown(intro_text)
+st.image(image, caption='The protocol of Inverse QSAR with VQGAE and Genetic Algorithm')
 launch_text = '''
 If you want to edit parameters for opimisation including GA parameters, batch size and seed, you can do it in the left
     with st.spinner('Getting unique solutions'):
         unique_solutions = list(set(tuple(s) for s in ga_instance.solutions))
+    st.success(f'{len(unique_solutions)} solutions were obtained')
     scores = {
         "rf_score": [],
     chosen_ids = chosen_gen.index.to_list()
     chosen_solutions = np.array([unique_solutions[ind] for ind in chosen_ids])
     gen_frag_inds = frag_counts_to_inds(chosen_solutions, max_atoms=51)
+    st.info(f'The number of chosen solutions is {gen_frag_inds.shape[0]}', icon="ℹ️")
     results = {"smiles": [], "ordering_score": [], "validity": []}
     decoding_progress = 0
     decoding_progress_text = "Decoding chosen solutions"
             vqgae_model=vqgae_model,
             clean_2d=False
         )
         results["smiles"].extend([str(molecule) for molecule in molecules])
         results["ordering_score"].extend(scores)
         results["validity"].extend([1 if i else 0 for i in validity])
     full_stats = full_stats[["smiles", "similarity_score", "rf_score", "ordering_score", "validity"]]
     valid_gen_stats = full_stats[full_stats.validity == 1]
     time_used = time() - start_time
     mins_used = int(round(time_used // 60, 0))
     seconds_used = int(round(time_used % 60, 0))
+    st.success(f'{valid_gen_stats.shape[0]} valid solutions were obtained in {mins_used} mins {seconds_used} secs')
     st.balloons()
     st.subheader('Generation results', divider='rainbow')
+    st.dataframe(valid_gen_stats)
     download_button_valid = download_button(
+        object_to_download=valid_gen_stats,
         download_filename='vqgae_tubulin_inhibitors_valid.csv',
         button_text="Download results as CSV"
     )
     st.subheader('Examples of generated molecules')
+    examples_smiles = valid_gen_stats.sort_values(
         by=["similarity_score"],
         ascending=False
+    ).iloc[:15:3].smiles.to_list()
     examples = []
     for smi in examples_smiles: