ligdis commited on
Commit
9938028
·
verified ·
1 Parent(s): c3ffab2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -14
app.py CHANGED
@@ -21,6 +21,7 @@ from datasets import load_dataset
21
  import requests
22
  from io import BytesIO
23
  import urllib.request
 
24
 
25
  import warnings
26
  warnings.filterwarnings('ignore')
@@ -47,21 +48,21 @@ hide_streamlit_style = """
47
  #header {visibility: hidden;}
48
  </style>
49
  """
50
- st.markdown(hide_streamlit_style, unsafe_allow_html=True)
51
 
52
  dataset = load_dataset('ligdis/data', data_files={"predictions.csv"})
53
- df_predictions = dataset['train'].to_pandas()
54
 
55
  predictions_inchikeys = df_predictions["inchikey"].tolist()
56
  df_predictions = df_predictions.rename(columns={"inchikey": "InChIKey"})
57
 
58
  dataset = load_dataset('ligdis/data', data_files={"applicability.csv"})
59
- df_applicability = dataset['train'].to_pandas()
60
 
61
  df_predictions = pd.concat([df_predictions, df_applicability], axis=1)
62
 
63
  dataset = load_dataset('ligdis/data', data_files={"cemm_smiles.csv"})
64
- cemm_smiles = dataset['train'].to_pandas()
65
 
66
  fid2smi = {}
67
  for r in cemm_smiles.values:
@@ -74,7 +75,7 @@ CRF_PATTERN_0 = "C#CC"
74
  CRF_PATTERN_1 = "N=N"
75
 
76
  dataset = load_dataset('ligdis/data', data_files={"all_fff_enamine.csv"})
77
- enamine_catalog = dataset['train'].to_pandas()
78
  enamine_catalog_ids_set = set(enamine_catalog["catalog_id"])
79
  enamine_catalog_dict = {}
80
  catalog2inchikey = {}
@@ -130,11 +131,11 @@ def has_crf(mol):
130
  return True
131
 
132
  dataset = load_dataset('ligdis/data', data_files={"model_catalog.csv"})
133
- dm = dataset['train'].to_pandas()
134
  all_models = dm["model_name"].tolist()
135
 
136
  dataset = load_dataset('ligdis/data', data_files={"models_performance.tsv"})
137
- dp = dataset['train'].to_pandas()
138
 
139
  model_display = {}
140
  model_description = {}
@@ -149,8 +150,8 @@ prom_models = [x for x in dm["model_name"].tolist() if x.startswith("promiscuity
149
  sign_models = [x for x in dm["model_name"].tolist() if x.startswith("signature")]
150
 
151
  global_promiscuity_models = ["promiscuity_pxf0", "promiscuity_pxf1", "promiscuity_pxf2"]
152
- specific_promiscuity_models = ["promiscuity_fxp0_pxf0", "promiscuity_fxp1_pxf0","promiscuity_fxp2_pxf0", "promiscuity_fxp0_pxf1", "promiscuity_fxp1_pxf1", "promiscuity_fxp2_pxf1", "promiscuity_fxp0_pxf2", "promiscuity_fxp1_pxf2", "promiscuity_fxp2_pxf2"]
153
-
154
  def model_to_markdown(model_names):
155
  items = []
156
  for mn in model_names:
@@ -240,8 +241,8 @@ def get_fragment_image(smiles):
240
 
241
  st.markdown(
242
  """
243
- Explanation for Output: The results are displayed in 4 Columns.
244
- 1. **Structure** of the FFF, InChi, Enamine ID
245
  2. **Chemical space**: Displays the Molecular Weight (*MW*), Walden-Crippen *LogP* and Tanimoto Similarity to the most similar fragment (*Sim-1*) and third most similar fragment (*Sim-3*) in the training set
246
  3. **Promiscuity Predictions** based on 12 Model: 3 Global (section **A**) and 9 Specific (section **B**)
247
  4. **Ontology Predictions** based on 9 _Signature_ Models derived from protein annotations of multiple scopes - from domains and families to molecular functions and cellular localization
@@ -249,7 +250,7 @@ st.markdown(
249
  )
250
 
251
  myCol = st.columns(3)
252
-
253
  with myCol[0]:
254
  st.subheader("Promiscuity Predictions")
255
  st.markdown("**A. Global models**")
@@ -270,7 +271,7 @@ with myCol[2]:
270
  st.markdown(
271
  """
272
  - Model score (range 0 -> 1) corresponds to the mean AUROC in 10 train-test splits
273
- - Percentages in parenthesis denote the percentile of the score across the Enamine collection of FFFs (>250k compounds). for example, in "Sign-4: 0.02 (35.7%)", **35.7** is the percentile of score.
274
  - The exclamation sign (!) next to the prediction output indicates that the corresponding model has an AUROC accuracy below 0.7 (*! is a warning sign*)
275
  """
276
  )
@@ -434,4 +435,4 @@ if all_inputs_are_valid and len(R) > 0:
434
  st.download_button(
435
  "Download as CSV", csv, "predictions.csv", "text/csv", key="download-csv"
436
  )
437
-
 
21
  import requests
22
  from io import BytesIO
23
  import urllib.request
24
+ # import miniautoml
25
 
26
  import warnings
27
  warnings.filterwarnings('ignore')
 
48
  #header {visibility: hidden;}
49
  </style>
50
  """
51
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
52
 
53
  dataset = load_dataset('ligdis/data', data_files={"predictions.csv"})
54
+ df_predictions = dataset['train'].to_pandas()
55
 
56
  predictions_inchikeys = df_predictions["inchikey"].tolist()
57
  df_predictions = df_predictions.rename(columns={"inchikey": "InChIKey"})
58
 
59
  dataset = load_dataset('ligdis/data', data_files={"applicability.csv"})
60
+ df_applicability = dataset['train'].to_pandas()
61
 
62
  df_predictions = pd.concat([df_predictions, df_applicability], axis=1)
63
 
64
  dataset = load_dataset('ligdis/data', data_files={"cemm_smiles.csv"})
65
+ cemm_smiles = dataset['train'].to_pandas()
66
 
67
  fid2smi = {}
68
  for r in cemm_smiles.values:
 
75
  CRF_PATTERN_1 = "N=N"
76
 
77
  dataset = load_dataset('ligdis/data', data_files={"all_fff_enamine.csv"})
78
+ enamine_catalog = dataset['train'].to_pandas()
79
  enamine_catalog_ids_set = set(enamine_catalog["catalog_id"])
80
  enamine_catalog_dict = {}
81
  catalog2inchikey = {}
 
131
  return True
132
 
133
  dataset = load_dataset('ligdis/data', data_files={"model_catalog.csv"})
134
+ dm = dataset['train'].to_pandas()
135
  all_models = dm["model_name"].tolist()
136
 
137
  dataset = load_dataset('ligdis/data', data_files={"models_performance.tsv"})
138
+ dp = dataset['train'].to_pandas()
139
 
140
  model_display = {}
141
  model_description = {}
 
150
  sign_models = [x for x in dm["model_name"].tolist() if x.startswith("signature")]
151
 
152
  global_promiscuity_models = ["promiscuity_pxf0", "promiscuity_pxf1", "promiscuity_pxf2"]
153
+ specific_promiscuity_models = ["promiscuity_fxp0_pxf0", "promiscuity_fxp1_pxf0","promiscuity_fxp2_pxf0", "promiscuity_fxp0_pxf1", "promiscuity_fxp1_pxf1", "promiscuity_fxp2_pxf1", "promiscuity_fxp0_pxf2", "promiscuity_fxp1_pxf2", "promiscuity_fxp2_pxf2"]
154
+
155
  def model_to_markdown(model_names):
156
  items = []
157
  for mn in model_names:
 
241
 
242
  st.markdown(
243
  """
244
+ Explanation for Output: The results are displayed in 4 Columns.
245
+ 1. **Structure** of the FFF, InChi, Enamine ID
246
  2. **Chemical space**: Displays the Molecular Weight (*MW*), Walden-Crippen *LogP* and Tanimoto Similarity to the most similar fragment (*Sim-1*) and third most similar fragment (*Sim-3*) in the training set
247
  3. **Promiscuity Predictions** based on 12 Model: 3 Global (section **A**) and 9 Specific (section **B**)
248
  4. **Ontology Predictions** based on 9 _Signature_ Models derived from protein annotations of multiple scopes - from domains and families to molecular functions and cellular localization
 
250
  )
251
 
252
  myCol = st.columns(3)
253
+
254
  with myCol[0]:
255
  st.subheader("Promiscuity Predictions")
256
  st.markdown("**A. Global models**")
 
271
  st.markdown(
272
  """
273
  - Model score (range 0 -> 1) corresponds to the mean AUROC in 10 train-test splits
274
+ - Percentages in parenthesis denote the percentile of the score across the Enamine collection of FFFs (>250k compounds). for example, in "Sign-4: 0.02 (35.7%)", **35.7** is the percentile of score.
275
  - The exclamation sign (!) next to the prediction output indicates that the corresponding model has an AUROC accuracy below 0.7 (*! is a warning sign*)
276
  """
277
  )
 
435
  st.download_button(
436
  "Download as CSV", csv, "predictions.csv", "text/csv", key="download-csv"
437
  )
438
+