Spaces:
Runtime error
Runtime error
Eachan Johnson
commited on
Commit
·
028bbd0
1
Parent(s):
c35d034
Major refactor
Browse files- .gitignore +3 -2
- app.py +480 -364
- example-data/examples.json +122 -0
.gitignore
CHANGED
|
@@ -1,2 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
/cache/
|
|
|
|
|
|
| 1 |
+
/cache/duvida/models--*
|
| 2 |
+
/cache/downloads/
|
| 3 |
+
*.log
|
app.py
CHANGED
|
@@ -1,11 +1,15 @@
|
|
| 1 |
"""Gradio demo for schemist."""
|
| 2 |
|
| 3 |
from typing import Iterable, List, Optional, Union
|
|
|
|
| 4 |
from functools import partial
|
| 5 |
from io import TextIOWrapper
|
|
|
|
| 6 |
import json
|
| 7 |
import os
|
| 8 |
-
|
|
|
|
|
|
|
| 9 |
|
| 10 |
from carabiner import cast, print_err
|
| 11 |
from carabiner.pd import read_table
|
|
@@ -22,18 +26,20 @@ from schemist.converting import (
|
|
| 22 |
)
|
| 23 |
from schemist.tables import converter
|
| 24 |
import torch
|
|
|
|
| 25 |
|
|
|
|
| 26 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 27 |
|
| 28 |
CACHE = "./cache"
|
| 29 |
-
MAX_ROWS =
|
| 30 |
-
BATCH_SIZE=32
|
| 31 |
HEADER_FILE = os.path.join("sources", "header.md")
|
| 32 |
with open("repos.json", "r") as f:
|
| 33 |
MODEL_REPOS = json.load(f)
|
| 34 |
|
| 35 |
MODELBOXES = {
|
| 36 |
-
key: AutoModelBox.from_pretrained(val, cache_dir=CACHE)
|
| 37 |
for key, val in MODEL_REPOS.items()
|
| 38 |
}
|
| 39 |
[mb.to(DEVICE) for mb in MODELBOXES.values()]
|
|
@@ -45,24 +51,46 @@ EXTRA_METRICS = {
|
|
| 45 |
"Information sensitivity (approx.)": lambda modelbox, candidates: modelbox.information_sensitivity(candidates=candidates, batch_size=BATCH_SIZE, optimality_approximation=True, approximator="squared_jacobian", cache=CACHE).map(lambda x: {"information sensitivity": torch.log10(x["information sensitivity"])}),
|
| 46 |
}
|
| 47 |
|
|
|
|
|
|
|
|
|
|
| 48 |
def get_dropdown_options(df, _type = str):
|
| 49 |
if _type == str:
|
| 50 |
cols = list(df.select_dtypes(exclude=[np.number]))
|
| 51 |
else:
|
| 52 |
cols = list(df.select_dtypes([np.number]))
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
|
| 56 |
-
def load_input_data(file: Union[TextIOWrapper, str]) -> pd.DataFrame:
|
| 57 |
file = file if isinstance(file, str) else file.name
|
| 58 |
print_err(f"Loading {file}")
|
| 59 |
-
df = read_table(file)
|
| 60 |
print_err(df.head())
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
|
| 64 |
def _clean_split_input(strings: str) -> List[str]:
|
| 65 |
-
return [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
|
| 68 |
def _convert_input(
|
|
@@ -82,7 +110,7 @@ def _convert_input(
|
|
| 82 |
def convert_one(
|
| 83 |
strings: str,
|
| 84 |
input_representation: str = 'smiles',
|
| 85 |
-
output_representation: Union[Iterable[str], str] = 'smiles'
|
| 86 |
):
|
| 87 |
output_representation = cast(output_representation, to=list)
|
| 88 |
for rep in output_representation:
|
|
@@ -168,7 +196,9 @@ def predict_one(
|
|
| 168 |
strings: str,
|
| 169 |
input_representation: str = 'smiles',
|
| 170 |
predict: Union[Iterable[str], str] = 'smiles',
|
| 171 |
-
extra_metrics: Optional[Union[Iterable[str], str]] = None
|
|
|
|
|
|
|
| 172 |
):
|
| 173 |
prediction_df = convert_one(
|
| 174 |
strings=strings,
|
|
@@ -180,15 +210,26 @@ def predict_one(
|
|
| 180 |
predict=predict,
|
| 181 |
extra_metrics=extra_metrics,
|
| 182 |
)
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
def convert_file(
|
| 194 |
df: pd.DataFrame,
|
|
@@ -230,10 +271,12 @@ def predict_file(
|
|
| 230 |
input_representation: str = 'smiles',
|
| 231 |
predict: str = 'smiles',
|
| 232 |
predict2: Optional[str] = None,
|
| 233 |
-
extra_metrics: Optional[Union[Iterable[str], str]] = None
|
|
|
|
|
|
|
| 234 |
):
|
| 235 |
predict = cast(predict, to=list)
|
| 236 |
-
if predict2 is not None:
|
| 237 |
predict += cast(predict2, to=list)
|
| 238 |
if extra_metrics is None:
|
| 239 |
extra_metrics = []
|
|
@@ -266,25 +309,49 @@ def predict_file(
|
|
| 266 |
col for col in prediction_df
|
| 267 |
if col not in main_cols
|
| 268 |
]
|
| 269 |
-
|
| 270 |
['id', 'inchikey']
|
| 271 |
+ [column]
|
| 272 |
+ prediction_cols + other_cols
|
| 273 |
+ ['smiles', "mwt", "clogp"]
|
| 274 |
]
|
| 275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
def draw_one(
|
| 277 |
-
|
| 278 |
-
|
|
|
|
| 279 |
):
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
gr.Info(message, duration=10)
|
| 282 |
-
_ids =
|
| 283 |
-
|
| 284 |
-
input_representation,
|
| 285 |
-
["inchikey", "id", "pubchem_name"],
|
| 286 |
-
)
|
| 287 |
-
mols = cast(_x2mol(_clean_split_input(strings), input_representation), to=list)
|
| 288 |
if isinstance(mols, Mol):
|
| 289 |
mols = [mols]
|
| 290 |
return Draw.MolsToGridImage(
|
|
@@ -294,6 +361,7 @@ def draw_one(
|
|
| 294 |
legends=["\n".join(items) for items in zip(*_ids.values())],
|
| 295 |
)
|
| 296 |
|
|
|
|
| 297 |
def log10_if_all_positive(df, col):
|
| 298 |
if np.all(df[col] > 0.):
|
| 299 |
df[col] = np.log10(df[col])
|
|
@@ -355,386 +423,434 @@ def download_table(
|
|
| 355 |
df: pd.DataFrame
|
| 356 |
) -> str:
|
| 357 |
df_hash = nm.hash(pd.util.hash_pandas_object(df).values)
|
| 358 |
-
filename = f"predicted-{df_hash}.csv"
|
|
|
|
|
|
|
| 359 |
df.to_csv(filename, index=False)
|
| 360 |
return gr.DownloadButton(value=filename, visible=True)
|
| 361 |
|
| 362 |
|
| 363 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
|
| 365 |
-
|
| 366 |
-
header_md = f.read()
|
| 367 |
-
gr.Markdown(header_md)
|
| 368 |
|
| 369 |
-
|
| 370 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
label="Input string format",
|
| 372 |
choices=list(_FROM_FUNCTIONS),
|
| 373 |
value="smiles",
|
| 374 |
interactive=True,
|
| 375 |
-
)
|
| 376 |
-
|
| 377 |
-
label="Input",
|
| 378 |
-
placeholder="Paste your molecule here, one per line",
|
| 379 |
-
lines=2,
|
| 380 |
-
interactive=True,
|
| 381 |
-
submit_btn=True,
|
| 382 |
-
)
|
| 383 |
-
output_species_single = gr.CheckboxGroup(
|
| 384 |
label="Species for prediction",
|
| 385 |
choices=list(MODEL_REPOS),
|
| 386 |
value=list(MODEL_REPOS)[:1],
|
| 387 |
interactive=True,
|
| 388 |
-
)
|
| 389 |
-
|
| 390 |
label="Extra metrics (Doubscore & Information Sensitivity can increase calculation time to a couple of minutes!)",
|
| 391 |
choices=list(EXTRA_METRICS),
|
| 392 |
value=list(EXTRA_METRICS)[:2],
|
| 393 |
interactive=True,
|
| 394 |
-
)
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
"CC1=C2C=CC=C(C2=C(C3=C1C[C@H]4[C@@H](C(=O)C(=C([C@]4(C3=O)O)O)C(=O)N)N(C)C)O)O",
|
| 416 |
-
]),
|
| 417 |
-
"Staphylococcus aureus",
|
| 418 |
-
list(EXTRA_METRICS)[:2],
|
| 419 |
-
], # doxorubicin, ampicillin, amoxicillin, meropenem, tetracycline, anhydrotetracycline
|
| 420 |
-
[
|
| 421 |
-
'\n'.join([
|
| 422 |
-
"C1=C(SC(=N1)SC2=NN=C(S2)N)[N+](=O)[O-]",
|
| 423 |
-
"C1CN(CCC12C3=CC=CC=C3NC(=O)O2)CCC4=CC=C(C=C4)C(F)(F)F",
|
| 424 |
-
"COC1=CC(=CC(=C1OC)OC)CC2=CN=C(N=C2N)N",
|
| 425 |
-
"CC1=CC(=NO1)NS(=O)(=O)C2=CC=C(C=C2)N",
|
| 426 |
-
"C1[C@@H]([C@H]([C@@H]([C@H]([C@@H]1NC(=O)[C@H](CCN)O)O[C@@H]2[C@@H]([C@H]([C@@H]([C@H](O2)CO)O)N)O)O)O[C@@H]3[C@@H]([C@H]([C@@H]([C@H](O3)CN)O)O)O)N",
|
| 427 |
-
"C1=CN=CC=C1C(=O)NN",
|
| 428 |
-
]),
|
| 429 |
-
["Escherichia coli", "Acinetobacter baumannii"],
|
| 430 |
-
list(EXTRA_METRICS)[:2],
|
| 431 |
-
], # Halicin, Abaucin, Trimethoprim, Sulfamethoxazole, Amikacin, Isoniazid
|
| 432 |
-
[
|
| 433 |
-
'\n'.join([
|
| 434 |
-
"CC[C@H](C)[C@H]1C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N2CCC[C@@H]2C(=O)N3CCC[C@H]3C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CC4=CNC5=CC=CC=C54)[C@@H](C)O)CO)C)CCN)CCN)CC6=CNC7=CC=CC=C76)CCN)CCN)CCCN)CCN",
|
| 435 |
-
"C[C@H]1[C@H]([C@@](C[C@@H](O1)O[C@@H]2[C@H]([C@@H]([C@H](O[C@H]2OC3=C4C=C5C=C3OC6=C(C=C(C=C6)[C@H]([C@H](C(=O)N[C@H](C(=O)N[C@H]5C(=O)N[C@@H]7C8=CC(=C(C=C8)O)C9=C(C=C(C=C9O)O)[C@H](NC(=O)[C@H]([C@@H](C1=CC(=C(O4)C=C1)Cl)O)NC7=O)C(=O)O)CC(=O)N)NC(=O)[C@@H](CC(C)C)NC)O)Cl)CO)O)O)(C)N)O",
|
| 436 |
-
"CN1[C@H](C(=O)NCC2=C(C=CC=C2SC3=C(CN[C@H](C(=O)N[C@H](C1=O)CCCCN)CCCN)C=CC=N3)C4=CC=C(C=C4)C(=O)O)CC5=CNC6=CC=CC=C65",
|
| 437 |
-
"C[C@@]1(CO[C@@H]([C@@H]([C@H]1NC)O)O[C@H]2[C@@H](C[C@@H]([C@H]([C@@H]2O)O[C@@H]3[C@@H](CC=C(O3)CNCCO)N)N)NC(=O)[C@H](CCN)O)O",
|
| 438 |
-
"CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC",
|
| 439 |
-
"C[C@H]1/C=C/C=C(\C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)[C@](O4)(O/C=C/[C@@H]([C@H]([C@H]([C@@H]([C@@H]([C@@H]([C@H]1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)/C=N/N5CCN(CC5)C)/C",
|
| 440 |
-
]),
|
| 441 |
-
"Acinetobacter baumannii",
|
| 442 |
-
list(EXTRA_METRICS)[:2],
|
| 443 |
-
], # murepavadin, vancomycin, zosurabalpin, plazomicin, Gentamicin, rifampicin
|
| 444 |
-
[
|
| 445 |
-
'\n'.join([
|
| 446 |
-
"CC1=C(OC2=CC=CC=C12)CN(C)C(=O)/C=C/C3=CC4=C(NC(=O)CC4)N=C3",
|
| 447 |
-
"CC1=C(OC2=CC=CC=C12)CN(C)C(=O)/C=C/C3=CC4=C(NC(=O)[C@@H](C4)N)N=C3",
|
| 448 |
-
"CC1=C(OC2=CC=CC=C12)CN(C)C(=O)/C=C/C3=CC4=C(NC(=O)[C@H](CC4)[NH3+])N=C3.[Cl-]",
|
| 449 |
-
"C1=C(C(=O)NC(=O)N1)F",
|
| 450 |
-
"CCCCCCNC(=O)N1C=C(C(=O)NC1=O)F",
|
| 451 |
-
"C[C@@H]1OC[C@@H]2[C@@H](O1)[C@@H]([C@H]([C@@H](O2)O[C@H]3[C@H]4COC(=O)[C@@H]4[C@@H](C5=CC6=C(C=C35)OCO6)C7=CC(=C(C(=C7)OC)O)OC)O)O",
|
| 452 |
-
]),
|
| 453 |
-
"Escherichia coli",
|
| 454 |
-
list(EXTRA_METRICS)[:2],
|
| 455 |
-
], # Debio1452, Debio-1452-NH3, Fabimycin, 5-FU, Carmofur, Etoposide
|
| 456 |
-
[
|
| 457 |
-
'\n'.join([
|
| 458 |
-
"COC1=CC(=CC(=C1OC)OC)CC2=CN=C(N=C2N)N",
|
| 459 |
-
"CC(C)C1=CC=C(C=C1)CN2C=CC3=C2C=CC4=C3C(=NC(=N4)NC5CC5)N",
|
| 460 |
-
"C1=CC(=CC=C1CCC2=CNC3=C2C(=O)NC(=N3)N)C(=O)N[C@@H](CCC(=O)O)C(=O)O",
|
| 461 |
-
"CC1=C(C2=C(C=C1)N=C(NC2=O)N)SC3=CC=NC=C3",
|
| 462 |
-
"CN(CC1=CN=C2C(=N1)C(=NC(=N2)N)N)C3=CC=C(C=C3)C(=O)N[C@@H](CCC(=O)O)C(=O)O",
|
| 463 |
-
"CC1=NC2=C(C=C(C=C2)CN(C)C3=CC=C(S3)C(=O)N[C@@H](CCC(=O)O)C(=O)O)C(=O)N1",
|
| 464 |
-
]),
|
| 465 |
-
"Klebsiella pneumoniae",
|
| 466 |
-
list(EXTRA_METRICS)[:2],
|
| 467 |
-
], # Trimethoprim, SCH79797, Pemetrexed, Nolatrexed, Methotrexate, Raltitrexed
|
| 468 |
-
[
|
| 469 |
-
'\n'.join([
|
| 470 |
-
"C[C@H]([C@@H](C(=O)NO)NC(=O)C1=CC=C(C=C1)C#CC2=CC=C(C=C2)CN3CCOCC3)O",
|
| 471 |
-
"CC(C)C1=CC=C(C=C1)CN2C=CC3=C2C=CC4=C3C(=NC(=N4)NC5CC5)N",
|
| 472 |
-
"C1=CC=C(C=C1)CNC2=NC(=NC3=CC=CC=C32)NCC4=CC=CC=C4",
|
| 473 |
-
"CC(C)(C)C1=CC=C(C=C1)C(=O)NC(=S)NC2=CC=C(C=C2)NC(=O)CCCCN(C)C",
|
| 474 |
-
"CCC1=C(C(=NC(=N1)N)N)C2=CC=C(C=C2)Cl",
|
| 475 |
-
"C1=CC(=CC=C1C(=O)N[C@@H](CCC(=O)O)C(=O)O)NCC2=CN=C3C(=N2)C(=NC(=N3)N)N",
|
| 476 |
-
]),
|
| 477 |
-
"Klebsiella pneumoniae",
|
| 478 |
-
list(EXTRA_METRICS)[:2],
|
| 479 |
-
], # CHIR-090, SCH79797, DBeQ, Tenovin-6, Pyrimethamine, Aminopterin
|
| 480 |
-
|
| 481 |
-
],
|
| 482 |
-
example_labels=[
|
| 483 |
-
"_Y. pestis_ (plague) vs Ciprofloxacin, Ceftriaxone, Cefiderocol, Linezolid, Gepotidacin",
|
| 484 |
-
"_S. aureus_ vs Doxorubicin, Ampicillin, Amoxicillin, Meropenem, Tetracycline, Anhydrotetracycline",
|
| 485 |
-
"_E. coli_ and _A. baumannii_ vs Halicin, Abaucin, Trimethoprim, Sulfamethoxazole, Amikacin, Isoniazid",
|
| 486 |
-
"_A. baumannii_ vs Murepavadin, Vancomycin, Zosurabalpin, Plazomicin, Gentamicin, Rifampicin",
|
| 487 |
-
"_E. coli_ vs Debio-1452, Debio-1452-NH3, Fabimycin, 5-FU, Carmofur, Etoposide",
|
| 488 |
-
"_K. pneumoniae_ vs Trimethoprim, Pemetrexed, Nolatrexed, Methotrexate, Raltitrexed",
|
| 489 |
-
"_K. pneumoniae_ vs CHIR-090, SCH79797, DBeQ, Tenovin-6, Pyrimethamine, Aminopterin"
|
| 490 |
-
],
|
| 491 |
-
inputs=[input_line, output_species_single, extra_metric],
|
| 492 |
-
cache_mode="eager",
|
| 493 |
-
)
|
| 494 |
-
download_single = gr.DownloadButton(
|
| 495 |
-
label="Download predictions",
|
| 496 |
-
visible=False,
|
| 497 |
-
)
|
| 498 |
-
# with gr.Row():
|
| 499 |
-
output_line = gr.DataFrame(
|
| 500 |
-
label="Predictions",
|
| 501 |
-
interactive=False,
|
| 502 |
-
visible=False,
|
| 503 |
-
)
|
| 504 |
-
drawing = gr.Image(label="Chemical structures")
|
| 505 |
-
|
| 506 |
-
gr.on(
|
| 507 |
-
[
|
| 508 |
-
input_line.submit,
|
| 509 |
-
],
|
| 510 |
-
fn=predict_one,
|
| 511 |
-
inputs=[
|
| 512 |
-
input_line,
|
| 513 |
-
input_format_single,
|
| 514 |
-
output_species_single,
|
| 515 |
-
extra_metric,
|
| 516 |
-
],
|
| 517 |
-
outputs={
|
| 518 |
-
output_line,
|
| 519 |
-
}
|
| 520 |
-
).then(
|
| 521 |
-
draw_one,
|
| 522 |
-
inputs=[
|
| 523 |
-
input_line,
|
| 524 |
-
input_format_single,
|
| 525 |
-
],
|
| 526 |
-
outputs=drawing,
|
| 527 |
-
).then(
|
| 528 |
-
download_table,
|
| 529 |
-
inputs=output_line,
|
| 530 |
-
outputs=download_single
|
| 531 |
-
)
|
| 532 |
|
| 533 |
-
|
| 534 |
-
|
| 535 |
label="Upload a table of chemical compounds here",
|
| 536 |
file_types=[".xlsx", ".csv", ".tsv", ".txt"],
|
| 537 |
-
)
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
gr.Dropdown(
|
| 554 |
label="Species 1 for prediction",
|
| 555 |
choices=list(MODEL_REPOS),
|
| 556 |
value=list(MODEL_REPOS)[0],
|
| 557 |
interactive=True,
|
|
|
|
| 558 |
),
|
| 559 |
gr.Dropdown(
|
| 560 |
label="Species 2 for prediction",
|
| 561 |
choices=list(MODEL_REPOS),
|
| 562 |
value=None,
|
| 563 |
interactive=True,
|
|
|
|
| 564 |
),
|
| 565 |
-
]
|
| 566 |
-
|
| 567 |
label="Extra metrics (Information Sensitivity can increase calculation time)",
|
| 568 |
choices=list(EXTRA_METRICS),
|
| 569 |
value=list(EXTRA_METRICS)[:2],
|
| 570 |
interactive=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 571 |
)
|
| 572 |
-
|
| 573 |
-
go_button2 = gr.Button(
|
| 574 |
-
value="Predict!",
|
| 575 |
-
)
|
| 576 |
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 580 |
)
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
|
|
|
| 584 |
visible=False,
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 616 |
)
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 623 |
)
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
"
|
| 635 |
-
"Escherichia coli: Doubtscore",
|
| 636 |
-
list(EXTRA_METRICS)[:3],
|
| 637 |
],
|
| 638 |
-
[
|
| 639 |
-
"
|
| 640 |
-
"SMILES",
|
| 641 |
-
"Acinetobacter baumannii",
|
| 642 |
-
"Mean",
|
| 643 |
-
"Acinetobacter baumannii: Doubtscore",
|
| 644 |
-
list(EXTRA_METRICS)[:3],
|
| 645 |
],
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
"
|
| 649 |
-
"
|
| 650 |
-
"
|
| 651 |
-
"
|
| 652 |
-
|
| 653 |
],
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
inputs=[input_file, input_column, output_species[0], observed_col, color_col, extra_metric_file],
|
| 661 |
-
cache_mode="eager",
|
| 662 |
-
)
|
| 663 |
-
with gr.Row():
|
| 664 |
-
pred_vs_observed = gr.ScatterPlot(
|
| 665 |
-
label="Prediction vs observed",
|
| 666 |
-
x_title="Predicted MIC (µM)",
|
| 667 |
-
y_title="Observed",
|
| 668 |
-
visible=False,
|
| 669 |
-
height=600,
|
| 670 |
)
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 675 |
)
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
outputs={
|
| 699 |
-
input_data,
|
| 700 |
-
}
|
| 701 |
-
).then(
|
| 702 |
-
download_table,
|
| 703 |
-
inputs=input_data,
|
| 704 |
-
outputs=download
|
| 705 |
-
).then(
|
| 706 |
-
lambda: gr.Button(visible=True),
|
| 707 |
-
outputs=[plot_button]
|
| 708 |
-
)
|
| 709 |
-
|
| 710 |
-
for dropdown in [observed_col, color_col, any_color_col, any_x_col, any_y_col]:
|
| 711 |
-
go2_click_event.then(
|
| 712 |
-
partial(get_dropdown_options, _type="number"),
|
| 713 |
-
inputs=[input_data],
|
| 714 |
-
outputs=[dropdown],
|
| 715 |
)
|
| 716 |
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
|
| 722 |
-
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
],
|
| 734 |
-
outputs=[plot_any_vs_any],
|
| 735 |
-
)
|
| 736 |
-
|
| 737 |
-
if __name__ == "__main__":
|
| 738 |
-
demo.queue()
|
| 739 |
-
demo.launch(share=True)
|
| 740 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""Gradio demo for schemist."""
|
| 2 |
|
| 3 |
from typing import Iterable, List, Optional, Union
|
| 4 |
+
import csv
|
| 5 |
from functools import partial
|
| 6 |
from io import TextIOWrapper
|
| 7 |
+
import itertools
|
| 8 |
import json
|
| 9 |
import os
|
| 10 |
+
import sys
|
| 11 |
+
|
| 12 |
+
csv.field_size_limit(sys.maxsize)
|
| 13 |
|
| 14 |
from carabiner import cast, print_err
|
| 15 |
from carabiner.pd import read_table
|
|
|
|
| 26 |
)
|
| 27 |
from schemist.tables import converter
|
| 28 |
import torch
|
| 29 |
+
from duvida.stateless.config import config
|
| 30 |
|
| 31 |
+
THEME = 'd8ahazard/material_design_rd'
|
| 32 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 33 |
|
| 34 |
CACHE = "./cache"
|
| 35 |
+
MAX_ROWS = 500
|
| 36 |
+
BATCH_SIZE = 32
|
| 37 |
HEADER_FILE = os.path.join("sources", "header.md")
|
| 38 |
with open("repos.json", "r") as f:
|
| 39 |
MODEL_REPOS = json.load(f)
|
| 40 |
|
| 41 |
MODELBOXES = {
|
| 42 |
+
key: AutoModelBox.from_pretrained(val, cache_dir=os.path.join(CACHE, "duvida"))
|
| 43 |
for key, val in MODEL_REPOS.items()
|
| 44 |
}
|
| 45 |
[mb.to(DEVICE) for mb in MODELBOXES.values()]
|
|
|
|
| 51 |
"Information sensitivity (approx.)": lambda modelbox, candidates: modelbox.information_sensitivity(candidates=candidates, batch_size=BATCH_SIZE, optimality_approximation=True, approximator="squared_jacobian", cache=CACHE).map(lambda x: {"information sensitivity": torch.log10(x["information sensitivity"])}),
|
| 52 |
}
|
| 53 |
|
| 54 |
+
with open(os.path.join("example-data", "examples.json"), "r") as f:
|
| 55 |
+
EXAMPLES = json.load(f)
|
| 56 |
+
|
| 57 |
def get_dropdown_options(df, _type = str):
|
| 58 |
if _type == str:
|
| 59 |
cols = list(df.select_dtypes(exclude=[np.number]))
|
| 60 |
else:
|
| 61 |
cols = list(df.select_dtypes([np.number]))
|
| 62 |
+
non_none = [col for col in cols if col is not None]
|
| 63 |
+
if len(cols) > 0:
|
| 64 |
+
default_value = non_none[0]
|
| 65 |
+
else:
|
| 66 |
+
default_value = ""
|
| 67 |
+
print_err(f"Dropdown default value is {default_value}")
|
| 68 |
+
return gr.Dropdown(
|
| 69 |
+
choices=cols,
|
| 70 |
+
interactive=True,
|
| 71 |
+
value=default_value,
|
| 72 |
+
visible=True,
|
| 73 |
+
allow_custom_value=True,
|
| 74 |
+
)
|
| 75 |
|
| 76 |
|
| 77 |
+
def load_input_data(file: Union[TextIOWrapper, str], return_pd: bool = False) -> pd.DataFrame:
|
| 78 |
file = file if isinstance(file, str) else file.name
|
| 79 |
print_err(f"Loading {file}")
|
| 80 |
+
df = read_table(file, nrows=MAX_ROWS)
|
| 81 |
print_err(df.head())
|
| 82 |
+
if return_pd:
|
| 83 |
+
return (df, gr.Dataframe(value=df, visible=True)), get_dropdown_options(df, str)
|
| 84 |
+
else:
|
| 85 |
+
return gr.Dataframe(value=df, visible=True), get_dropdown_options(df, str)
|
| 86 |
|
| 87 |
|
| 88 |
def _clean_split_input(strings: str) -> List[str]:
|
| 89 |
+
return [
|
| 90 |
+
s2.split(":")[-1].strip()
|
| 91 |
+
for s in strings.split("\n")
|
| 92 |
+
for s2 in s.split(",")
|
| 93 |
+
]
|
| 94 |
|
| 95 |
|
| 96 |
def _convert_input(
|
|
|
|
| 110 |
def convert_one(
|
| 111 |
strings: str,
|
| 112 |
input_representation: str = 'smiles',
|
| 113 |
+
output_representation: Union[Iterable[str], str] = 'smiles',
|
| 114 |
):
|
| 115 |
output_representation = cast(output_representation, to=list)
|
| 116 |
for rep in output_representation:
|
|
|
|
| 196 |
strings: str,
|
| 197 |
input_representation: str = 'smiles',
|
| 198 |
predict: Union[Iterable[str], str] = 'smiles',
|
| 199 |
+
extra_metrics: Optional[Union[Iterable[str], str]] = None,
|
| 200 |
+
return_pd: bool = False
|
| 201 |
+
# progress = gr.Progress(track_tqdm=True)
|
| 202 |
):
|
| 203 |
prediction_df = convert_one(
|
| 204 |
strings=strings,
|
|
|
|
| 210 |
predict=predict,
|
| 211 |
extra_metrics=extra_metrics,
|
| 212 |
)
|
| 213 |
+
df = prediction_df[
|
| 214 |
+
['id', 'pubchem_name', 'pubchem_id']
|
| 215 |
+
+ prediction_cols
|
| 216 |
+
+ ['smiles', 'inchikey', "mwt", "clogp"]
|
| 217 |
+
]
|
| 218 |
+
if return_pd:
|
| 219 |
+
return (
|
| 220 |
+
df,
|
| 221 |
+
gr.DataFrame(
|
| 222 |
+
df,
|
| 223 |
+
pinned_columns=3,
|
| 224 |
+
visible=True,
|
| 225 |
+
)
|
| 226 |
+
)
|
| 227 |
+
else:
|
| 228 |
+
return gr.DataFrame(
|
| 229 |
+
df,
|
| 230 |
+
pinned_columns=3,
|
| 231 |
+
visible=True,
|
| 232 |
+
)
|
| 233 |
|
| 234 |
def convert_file(
|
| 235 |
df: pd.DataFrame,
|
|
|
|
| 271 |
input_representation: str = 'smiles',
|
| 272 |
predict: str = 'smiles',
|
| 273 |
predict2: Optional[str] = None,
|
| 274 |
+
extra_metrics: Optional[Union[Iterable[str], str]] = None,
|
| 275 |
+
return_pd: bool = False
|
| 276 |
+
# progress = gr.Progress(track_tqdm=True)
|
| 277 |
):
|
| 278 |
predict = cast(predict, to=list)
|
| 279 |
+
if predict2 is not None and predict2 in MODELBOXES:
|
| 280 |
predict += cast(predict2, to=list)
|
| 281 |
if extra_metrics is None:
|
| 282 |
extra_metrics = []
|
|
|
|
| 309 |
col for col in prediction_df
|
| 310 |
if col not in main_cols
|
| 311 |
]
|
| 312 |
+
prediction_df = prediction_df[
|
| 313 |
['id', 'inchikey']
|
| 314 |
+ [column]
|
| 315 |
+ prediction_cols + other_cols
|
| 316 |
+ ['smiles', "mwt", "clogp"]
|
| 317 |
]
|
| 318 |
|
| 319 |
+
if return_pd:
|
| 320 |
+
return (
|
| 321 |
+
prediction_df,
|
| 322 |
+
gr.Dataframe(
|
| 323 |
+
label="Predictions",
|
| 324 |
+
value=prediction_df,
|
| 325 |
+
pinned_columns=3,
|
| 326 |
+
visible=True,
|
| 327 |
+
wrap=True,
|
| 328 |
+
column_widths=[75] * prediction_df.shape[1],
|
| 329 |
+
),
|
| 330 |
+
)
|
| 331 |
+
else:
|
| 332 |
+
return gr.Dataframe(
|
| 333 |
+
label="Predictions",
|
| 334 |
+
value=prediction_df,
|
| 335 |
+
pinned_columns=3,
|
| 336 |
+
visible=True,
|
| 337 |
+
wrap=True,
|
| 338 |
+
column_widths=[125] * prediction_df.shape[1],
|
| 339 |
+
)
|
| 340 |
+
|
| 341 |
+
|
| 342 |
def draw_one(
|
| 343 |
+
df,
|
| 344 |
+
smiles_col: str = "smiles",
|
| 345 |
+
legends: Optional[Union[str, Iterable[str]]] = None
|
| 346 |
):
|
| 347 |
+
if legends is None:
|
| 348 |
+
legends = ["inchikey", "id", "pubchem_name"]
|
| 349 |
+
else:
|
| 350 |
+
legends = []
|
| 351 |
+
message = f"Drawing {df.shape[0]} molecules..."
|
| 352 |
gr.Info(message, duration=10)
|
| 353 |
+
_ids = {col: df[col].tolist() for col in legends}
|
| 354 |
+
mols = cast(_x2mol(df[smiles_col], "smiles"), to=list)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
if isinstance(mols, Mol):
|
| 356 |
mols = [mols]
|
| 357 |
return Draw.MolsToGridImage(
|
|
|
|
| 361 |
legends=["\n".join(items) for items in zip(*_ids.values())],
|
| 362 |
)
|
| 363 |
|
| 364 |
+
|
| 365 |
def log10_if_all_positive(df, col):
|
| 366 |
if np.all(df[col] > 0.):
|
| 367 |
df[col] = np.log10(df[col])
|
|
|
|
| 423 |
df: pd.DataFrame
|
| 424 |
) -> str:
|
| 425 |
df_hash = nm.hash(pd.util.hash_pandas_object(df).values)
|
| 426 |
+
filename = os.path.join(CACHE, "downloads", f"predicted-{df_hash}.csv")
|
| 427 |
+
if not os.path.exists(os.path.dirname(filename)):
|
| 428 |
+
os.makedirs(os.path.dirname(filename))
|
| 429 |
df.to_csv(filename, index=False)
|
| 430 |
return gr.DownloadButton(value=filename, visible=True)
|
| 431 |
|
| 432 |
|
| 433 |
+
def _predict_then_draw_then_download(
|
| 434 |
+
strings: str,
|
| 435 |
+
input_representation: str = 'smiles',
|
| 436 |
+
predict: Union[Iterable[str], str] = 'smiles',
|
| 437 |
+
extra_metrics: Optional[Union[Iterable[str], str]] = None,
|
| 438 |
+
smiles_col: str = "smiles",
|
| 439 |
+
legends: Optional[Union[str, Iterable[str]]] = None
|
| 440 |
+
):
|
| 441 |
+
df, gr_df = predict_one(
|
| 442 |
+
strings=strings,
|
| 443 |
+
input_representation=input_representation,
|
| 444 |
+
predict=predict,
|
| 445 |
+
extra_metrics=extra_metrics,
|
| 446 |
+
return_pd=True,
|
| 447 |
+
)
|
| 448 |
+
img = draw_one(
|
| 449 |
+
df,
|
| 450 |
+
smiles_col="smiles",
|
| 451 |
+
)
|
| 452 |
+
return gr_df, img, download_table(df)
|
| 453 |
+
|
| 454 |
+
|
| 455 |
+
def _load_then_predict_then_download_then_reveal_plot(
|
| 456 |
+
file: str,
|
| 457 |
+
column: str = 'smiles',
|
| 458 |
+
input_representation: str = 'smiles',
|
| 459 |
+
predict: str = 'smiles',
|
| 460 |
+
predict2: Optional[str] = "",
|
| 461 |
+
extra_metrics: Optional[Union[Iterable[str], str]] = None
|
| 462 |
+
):
|
| 463 |
+
(df, df_gr), col_opts = load_input_data(
|
| 464 |
+
file,
|
| 465 |
+
return_pd=True,
|
| 466 |
+
)
|
| 467 |
+
df, df_gr = predict_file(
|
| 468 |
+
df,
|
| 469 |
+
column=column,
|
| 470 |
+
input_representation=input_representation,
|
| 471 |
+
predict=predict,
|
| 472 |
+
predict2=None if predict2 == "" else predict2,
|
| 473 |
+
extra_metrics=extra_metrics,
|
| 474 |
+
return_pd=True,
|
| 475 |
+
)
|
| 476 |
+
print_err(df.head())
|
| 477 |
+
# plot_dropdown = get_dropdown_options(df, _type="number")
|
| 478 |
+
return (
|
| 479 |
+
df_gr,
|
| 480 |
+
download_table(df),
|
| 481 |
+
)
|
| 482 |
+
|
| 483 |
|
| 484 |
+
def _initial_setup():
|
|
|
|
|
|
|
| 485 |
|
| 486 |
+
"""Set up blocks.
|
| 487 |
+
|
| 488 |
+
"""
|
| 489 |
+
print_err(f"Duvida config is {config}")
|
| 490 |
+
print_err(f"Default torch device is {DEVICE}")
|
| 491 |
+
|
| 492 |
+
line_inputs = {
|
| 493 |
+
"format": gr.Dropdown(
|
| 494 |
label="Input string format",
|
| 495 |
choices=list(_FROM_FUNCTIONS),
|
| 496 |
value="smiles",
|
| 497 |
interactive=True,
|
| 498 |
+
),
|
| 499 |
+
"species": gr.CheckboxGroup(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
label="Species for prediction",
|
| 501 |
choices=list(MODEL_REPOS),
|
| 502 |
value=list(MODEL_REPOS)[:1],
|
| 503 |
interactive=True,
|
| 504 |
+
),
|
| 505 |
+
"extras": gr.CheckboxGroup(
|
| 506 |
label="Extra metrics (Doubscore & Information Sensitivity can increase calculation time to a couple of minutes!)",
|
| 507 |
choices=list(EXTRA_METRICS),
|
| 508 |
value=list(EXTRA_METRICS)[:2],
|
| 509 |
interactive=True,
|
| 510 |
+
),
|
| 511 |
+
"strings": gr.Textbox(
|
| 512 |
+
label="Input",
|
| 513 |
+
placeholder="Paste your molecule here, one per line.",
|
| 514 |
+
lines=2,
|
| 515 |
+
interactive=True,
|
| 516 |
+
submit_btn=True,
|
| 517 |
+
),
|
| 518 |
+
}
|
| 519 |
+
output_line = gr.DataFrame(
|
| 520 |
+
label="Predictions (scroll left and right)",
|
| 521 |
+
interactive=False,
|
| 522 |
+
max_chars=75,
|
| 523 |
+
pinned_columns=3,
|
| 524 |
+
visible=True,
|
| 525 |
+
)
|
| 526 |
+
download_single = gr.DownloadButton(
|
| 527 |
+
label="Download predictions",
|
| 528 |
+
visible=False,
|
| 529 |
+
)
|
| 530 |
+
drawing = gr.Image(label="Chemical structures")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 531 |
|
| 532 |
+
file_inputs = {
|
| 533 |
+
"file": gr.File(
|
| 534 |
label="Upload a table of chemical compounds here",
|
| 535 |
file_types=[".xlsx", ".csv", ".tsv", ".txt"],
|
| 536 |
+
),
|
| 537 |
+
"column": gr.Dropdown(
|
| 538 |
+
label="Input column name",
|
| 539 |
+
choices=[],
|
| 540 |
+
allow_custom_value=True,
|
| 541 |
+
visible=True,
|
| 542 |
+
interactive=True,
|
| 543 |
+
),
|
| 544 |
+
"format": gr.Dropdown(
|
| 545 |
+
label="Input string format",
|
| 546 |
+
choices=list(_FROM_FUNCTIONS),
|
| 547 |
+
value="smiles",
|
| 548 |
+
interactive=True,
|
| 549 |
+
visible=True,
|
| 550 |
+
),
|
| 551 |
+
"species": [
|
| 552 |
gr.Dropdown(
|
| 553 |
label="Species 1 for prediction",
|
| 554 |
choices=list(MODEL_REPOS),
|
| 555 |
value=list(MODEL_REPOS)[0],
|
| 556 |
interactive=True,
|
| 557 |
+
allow_custom_value=True,
|
| 558 |
),
|
| 559 |
gr.Dropdown(
|
| 560 |
label="Species 2 for prediction",
|
| 561 |
choices=list(MODEL_REPOS),
|
| 562 |
value=None,
|
| 563 |
interactive=True,
|
| 564 |
+
allow_custom_value=True,
|
| 565 |
),
|
| 566 |
+
],
|
| 567 |
+
"extras": gr.CheckboxGroup(
|
| 568 |
label="Extra metrics (Information Sensitivity can increase calculation time)",
|
| 569 |
choices=list(EXTRA_METRICS),
|
| 570 |
value=list(EXTRA_METRICS)[:2],
|
| 571 |
interactive=True,
|
| 572 |
+
),
|
| 573 |
+
}
|
| 574 |
+
|
| 575 |
+
input_dataframe = gr.Dataframe(
|
| 576 |
+
label="Input data",
|
| 577 |
+
max_height=500,
|
| 578 |
+
visible=True,
|
| 579 |
+
interactive=False,
|
| 580 |
+
show_fullscreen_button=True,
|
| 581 |
+
show_search="filter",
|
| 582 |
+
max_chars=45,
|
| 583 |
+
)
|
| 584 |
+
download = gr.DownloadButton(
|
| 585 |
+
label="Download predictions",
|
| 586 |
+
visible=False,
|
| 587 |
+
)
|
| 588 |
+
plot_button = gr.Button(
|
| 589 |
+
value="Plot!",
|
| 590 |
+
visible=False,
|
| 591 |
+
)
|
| 592 |
+
|
| 593 |
+
left_plot_inputs = {
|
| 594 |
+
"observed": gr.Dropdown(
|
| 595 |
+
label="Observed column (y-axis) for left plot",
|
| 596 |
+
choices=[],
|
| 597 |
+
value=None,
|
| 598 |
+
interactive=True,
|
| 599 |
+
visible=True,
|
| 600 |
+
allow_custom_value=True,
|
| 601 |
+
),
|
| 602 |
+
"color": gr.Dropdown(
|
| 603 |
+
label="Color for left plot",
|
| 604 |
+
choices=[],
|
| 605 |
+
value=None,
|
| 606 |
+
interactive=True,
|
| 607 |
+
visible=True,
|
| 608 |
+
allow_custom_value=True,
|
| 609 |
)
|
| 610 |
+
}
|
|
|
|
|
|
|
|
|
|
| 611 |
|
| 612 |
+
right_plot_inputs = {
|
| 613 |
+
"x": gr.Dropdown(
|
| 614 |
+
label="x-axis for right plot",
|
| 615 |
+
choices=[],
|
| 616 |
+
value=None,
|
| 617 |
+
interactive=True,
|
| 618 |
+
visible=True,
|
| 619 |
+
allow_custom_value=True,
|
| 620 |
+
),
|
| 621 |
+
"y": gr.Dropdown(
|
| 622 |
+
label="y-axis for right plot",
|
| 623 |
+
choices=[],
|
| 624 |
+
value=None,
|
| 625 |
+
interactive=True,
|
| 626 |
+
visible=True,
|
| 627 |
+
allow_custom_value=True,
|
| 628 |
+
),
|
| 629 |
+
"color": gr.Dropdown(
|
| 630 |
+
label="Color for right plot",
|
| 631 |
+
choices=[],
|
| 632 |
+
value=None,
|
| 633 |
+
interactive=True,
|
| 634 |
+
visible=True,
|
| 635 |
+
allow_custom_value=True,
|
| 636 |
)
|
| 637 |
+
}
|
| 638 |
+
plots = {
|
| 639 |
+
"left": gr.ScatterPlot(
|
| 640 |
+
height=500,
|
| 641 |
visible=False,
|
| 642 |
+
),
|
| 643 |
+
"right": gr.ScatterPlot(
|
| 644 |
+
height=500,
|
| 645 |
+
visible=False,
|
| 646 |
+
),
|
| 647 |
+
}
|
| 648 |
+
|
| 649 |
+
return (
|
| 650 |
+
line_inputs,
|
| 651 |
+
output_line,
|
| 652 |
+
download_single,
|
| 653 |
+
drawing,
|
| 654 |
+
file_inputs,
|
| 655 |
+
input_dataframe,
|
| 656 |
+
download,
|
| 657 |
+
plot_button,
|
| 658 |
+
left_plot_inputs,
|
| 659 |
+
right_plot_inputs,
|
| 660 |
+
plots,
|
| 661 |
+
)
|
| 662 |
+
|
| 663 |
+
if __name__ == "__main__":
|
| 664 |
+
(
|
| 665 |
+
line_inputs,
|
| 666 |
+
output_line,
|
| 667 |
+
download_single,
|
| 668 |
+
drawing,
|
| 669 |
+
file_inputs,
|
| 670 |
+
input_dataframe,
|
| 671 |
+
download,
|
| 672 |
+
plot_button,
|
| 673 |
+
left_plot_inputs,
|
| 674 |
+
right_plot_inputs,
|
| 675 |
+
plots,
|
| 676 |
+
) = _initial_setup()
|
| 677 |
+
with gr.Blocks(theme=THEME) as demo:
|
| 678 |
+
with open(HEADER_FILE, 'r') as f:
|
| 679 |
+
header_md = f.read()
|
| 680 |
+
gr.Markdown(header_md)
|
| 681 |
+
|
| 682 |
+
with gr.Tab(label="Paste one per line"):
|
| 683 |
+
examples = gr.Examples(
|
| 684 |
+
examples=[
|
| 685 |
+
[
|
| 686 |
+
"\n".join(eg["strings"]),
|
| 687 |
+
"smiles",
|
| 688 |
+
eg["species"],
|
| 689 |
+
list(EXTRA_METRICS)[:2],
|
| 690 |
+
]
|
| 691 |
+
for eg in EXAMPLES["line input examples"]
|
| 692 |
+
],
|
| 693 |
+
example_labels=[
|
| 694 |
+
eg["label"] for eg in EXAMPLES["line input examples"]
|
| 695 |
+
],
|
| 696 |
+
inputs=[
|
| 697 |
+
line_inputs["strings"],
|
| 698 |
+
line_inputs["format"],
|
| 699 |
+
line_inputs["species"],
|
| 700 |
+
line_inputs["extras"],
|
| 701 |
+
],
|
| 702 |
+
fn=_predict_then_draw_then_download,
|
| 703 |
+
outputs=[
|
| 704 |
+
output_line,
|
| 705 |
+
drawing,
|
| 706 |
+
download_single,
|
| 707 |
+
],
|
| 708 |
+
cache_examples=True,
|
| 709 |
+
cache_mode="lazy",
|
| 710 |
)
|
| 711 |
+
|
| 712 |
+
for val in line_inputs.values():
|
| 713 |
+
val.render()
|
| 714 |
+
# with gr.Row():
|
| 715 |
+
output_line.render()
|
| 716 |
+
download_single.render()
|
| 717 |
+
drawing.render()
|
| 718 |
+
line_inputs["strings"].submit(
|
| 719 |
+
fn=_predict_then_draw_then_download,
|
| 720 |
+
inputs=[
|
| 721 |
+
line_inputs["strings"],
|
| 722 |
+
line_inputs["format"],
|
| 723 |
+
line_inputs["species"],
|
| 724 |
+
line_inputs["extras"],
|
| 725 |
+
],
|
| 726 |
+
outputs=[
|
| 727 |
+
output_line,
|
| 728 |
+
drawing,
|
| 729 |
+
download_single,
|
| 730 |
+
],
|
| 731 |
)
|
| 732 |
+
with gr.Tab(f"Predict on structures from a file (max. {MAX_ROWS} rows, ≤ 2 species)"):
|
| 733 |
+
file_examples = gr.Examples(
|
| 734 |
+
examples=[
|
| 735 |
+
[
|
| 736 |
+
eg["file"],
|
| 737 |
+
eg["column"],
|
| 738 |
+
"smiles",
|
| 739 |
+
eg["species"],
|
| 740 |
+
"",
|
| 741 |
+
list(EXTRA_METRICS)[:2],
|
| 742 |
+
] for eg in EXAMPLES["file examples"]
|
|
|
|
|
|
|
| 743 |
],
|
| 744 |
+
example_labels=[
|
| 745 |
+
eg["label"] for eg in EXAMPLES["file examples"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 746 |
],
|
| 747 |
+
fn=_load_then_predict_then_download_then_reveal_plot,
|
| 748 |
+
inputs=[
|
| 749 |
+
file_inputs["file"],
|
| 750 |
+
file_inputs["column"],
|
| 751 |
+
file_inputs["format"],
|
| 752 |
+
*file_inputs["species"],
|
| 753 |
+
file_inputs["extras"],
|
| 754 |
],
|
| 755 |
+
outputs=[
|
| 756 |
+
input_dataframe,
|
| 757 |
+
download,
|
| 758 |
+
],
|
| 759 |
+
cache_examples=True, ## appears to cause CSV load error
|
| 760 |
+
cache_mode="lazy",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 761 |
)
|
| 762 |
+
file_inputs["file"].render()
|
| 763 |
+
with gr.Row():
|
| 764 |
+
for key in ("column", "format"):
|
| 765 |
+
file_inputs[key].render()
|
| 766 |
+
with gr.Row():
|
| 767 |
+
for item in file_inputs["species"]:
|
| 768 |
+
item.render()
|
| 769 |
+
file_inputs["extras"].render()
|
| 770 |
+
|
| 771 |
+
go_button2 = gr.Button(value="Predict!")
|
| 772 |
+
|
| 773 |
+
input_dataframe.render()
|
| 774 |
+
download.render()
|
| 775 |
+
with gr.Row():
|
| 776 |
+
for val in left_plot_inputs.values():
|
| 777 |
+
val.render()
|
| 778 |
+
with gr.Row():
|
| 779 |
+
for val in right_plot_inputs.values():
|
| 780 |
+
val.render()
|
| 781 |
+
plot_button.render()
|
| 782 |
+
|
| 783 |
+
with gr.Row():
|
| 784 |
+
for val in plots.values():
|
| 785 |
+
val.render()
|
| 786 |
+
|
| 787 |
+
file_inputs["file"].upload(
|
| 788 |
+
fn=load_input_data,
|
| 789 |
+
inputs=file_inputs["file"],
|
| 790 |
+
outputs=[
|
| 791 |
+
input_dataframe,
|
| 792 |
+
file_inputs["column"],
|
| 793 |
+
],
|
| 794 |
)
|
| 795 |
+
go2_click_event = go_button2.click(
|
| 796 |
+
predict_file,
|
| 797 |
+
inputs=[
|
| 798 |
+
input_dataframe,
|
| 799 |
+
file_inputs["column"],
|
| 800 |
+
file_inputs["format"],
|
| 801 |
+
*file_inputs["species"],
|
| 802 |
+
file_inputs["extras"],
|
| 803 |
+
],
|
| 804 |
+
outputs=[
|
| 805 |
+
input_dataframe,
|
| 806 |
+
],
|
| 807 |
+
)
|
| 808 |
+
|
| 809 |
+
df_change = input_dataframe.change(
|
| 810 |
+
download_table,
|
| 811 |
+
inputs=input_dataframe,
|
| 812 |
+
outputs=download
|
| 813 |
+
).then(
|
| 814 |
+
lambda: gr.Button(visible=True),
|
| 815 |
+
outputs=[plot_button],
|
| 816 |
+
js=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 817 |
)
|
| 818 |
|
| 819 |
+
# file_examples.load_input_event.then(
|
| 820 |
+
# lambda: gr.Button(visible=True),
|
| 821 |
+
# outputs=[plot_button],
|
| 822 |
+
# js=True,
|
| 823 |
+
# )
|
| 824 |
+
|
| 825 |
+
for dropdown in itertools.chain(
|
| 826 |
+
left_plot_inputs.values(),
|
| 827 |
+
right_plot_inputs.values(),
|
| 828 |
+
):
|
| 829 |
+
# for e in (file_examples.load_input_event, go2_click_event):
|
| 830 |
+
df_change.then(
|
| 831 |
+
partial(get_dropdown_options, _type="number"),
|
| 832 |
+
inputs=[input_dataframe],
|
| 833 |
+
outputs=[dropdown],
|
| 834 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 835 |
|
| 836 |
+
plot_button.click(
|
| 837 |
+
plot_pred_vs_observed,
|
| 838 |
+
inputs=[
|
| 839 |
+
input_dataframe,
|
| 840 |
+
file_inputs["species"][0],
|
| 841 |
+
left_plot_inputs["observed"],
|
| 842 |
+
left_plot_inputs["color"],
|
| 843 |
+
],
|
| 844 |
+
outputs=[plots["left"]],
|
| 845 |
+
).then(
|
| 846 |
+
plot_x_vs_y,
|
| 847 |
+
inputs=[
|
| 848 |
+
input_dataframe,
|
| 849 |
+
right_plot_inputs["x"],
|
| 850 |
+
right_plot_inputs["y"],
|
| 851 |
+
right_plot_inputs["color"],
|
| 852 |
+
],
|
| 853 |
+
outputs=[plots["right"]],
|
| 854 |
+
)
|
| 855 |
+
demo.queue()
|
| 856 |
+
demo.launch(share=True)
|
example-data/examples.json
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"line input examples": [
|
| 3 |
+
{
|
| 4 |
+
"label": "Y. pestis (plague) vs Ciprofloxacin, Ceftriaxone, Cefiderocol, Linezolid, Gepotidacin",
|
| 5 |
+
"strings": [
|
| 6 |
+
"Ciprofloxacin: C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O",
|
| 7 |
+
"Ceftriaxone: CN1C(=NC(=O)C(=O)N1)SCC2=C(N3[C@@H]([C@@H](C3=O)NC(=O)/C(=N\\OC)/C4=CSC(=N4)N)SC2)C(=O)O",
|
| 8 |
+
"Cefiderocol: CC(C)(C(=O)O)O/N=C(/C1=CSC(=N1)N)\\C(=O)N[C@H]2[C@@H]3N(C2=O)C(=C(CS3)C[N+]4(CCCC4)CCNC(=O)C5=C(C(=C(C=C5)O)O)Cl)C(=O)[O-]",
|
| 9 |
+
"Linezolid: CC(=O)NC[C@H]1CN(C(=O)O1)C2=CC(=C(C=C2)N3CCOCC3)F",
|
| 10 |
+
"Gepotidacin: C1CC2=CC(=NC=C2OC1)CNC3CCN(CC3)C[C@@H]4CN5C(=O)C=CC6=C5N4C(=O)C=N6"
|
| 11 |
+
],
|
| 12 |
+
"species": [
|
| 13 |
+
"Yersinia pestis"
|
| 14 |
+
]
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"label": "S. aureus vs Doxorubicin, Ampicillin, Amoxicillin, Meropenem, Tetracycline, Anhydrotetracycline",
|
| 18 |
+
"strings": [
|
| 19 |
+
"Doxorubicin: C[C@H]1[C@H]([C@H](C[C@@H](O1)O[C@H]2C[C@@](CC3=C2C(=C4C(=C3O)C(=O)C5=C(C4=O)C(=CC=C5)OC)O)(C(=O)CO)O)N)O",
|
| 20 |
+
"Ampicillin: CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](C3=CC=CC=C3)N)C(=O)O)C",
|
| 21 |
+
"Amoxicillin: CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](C3=CC=C(C=C3)O)N)C(=O)O)C",
|
| 22 |
+
"Meropenem: C[C@@H]1[C@@H]2[C@H](C(=O)N2C(=C1S[C@H]3C[C@H](NC3)C(=O)N(C)C)C(=O)O)[C@@H](C)O",
|
| 23 |
+
"Tetracycline: C[C@@]1([C@H]2C[C@H]3[C@@H](C(=O)C(=C([C@]3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O",
|
| 24 |
+
"Anhydrotetracycline: CC1=C2C=CC=C(C2=C(C3=C1C[C@H]4[C@@H](C(=O)C(=C([C@]4(C3=O)O)O)C(=O)N)N(C)C)O)O"
|
| 25 |
+
],
|
| 26 |
+
"species": [
|
| 27 |
+
"Staphylococcus aureus"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"label": "E. coli and A. baumannii vs Halicin, Abaucin, Trimethoprim, Sulfamethoxazole, Amikacin, Isoniazid",
|
| 32 |
+
"strings": [
|
| 33 |
+
"Halicin: C1=C(SC(=N1)SC2=NN=C(S2)N)[N+](=O)[O-]",
|
| 34 |
+
"Abaucin: C1CN(CCC12C3=CC=CC=C3NC(=O)O2)CCC4=CC=C(C=C4)C(F)(F)F",
|
| 35 |
+
"Trimethoprim: COC1=CC(=CC(=C1OC)OC)CC2=CN=C(N=C2N)N",
|
| 36 |
+
"Amikacin: CC1=CC(=NO1)NS(=O)(=O)C2=CC=C(C=C2)N",
|
| 37 |
+
"Sulfamethoxazole: C1[C@@H]([C@H]([C@@H]([C@H]([C@@H]1NC(=O)[C@H](CCN)O)O[C@@H]2[C@@H]([C@H]([C@@H]([C@H](O2)CO)O)N)O)O)O[C@@H]3[C@@H]([C@H]([C@@H]([C@H](O3)CN)O)O)O)N",
|
| 38 |
+
"Isoniazid: C1=CN=CC=C1C(=O)NN"
|
| 39 |
+
],
|
| 40 |
+
"species": [
|
| 41 |
+
"Escherichia coli",
|
| 42 |
+
"Acinetobacter baumannii"
|
| 43 |
+
]
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"label": "A. baumannii vs Murepavadin, Vancomycin, Zosurabalpin, Plazomicin, Gentamicin, Rifampicin",
|
| 47 |
+
"strings": [
|
| 48 |
+
"Murepavadin: CC[C@H](C)[C@H]1C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N2CCC[C@@H]2C(=O)N3CCC[C@H]3C(=O)N[C@H](C(=O)N[C@H](C(=O)N1)CC4=CNC5=CC=CC=C54)[C@@H](C)O)CO)C)CCN)CCN)CC6=CNC7=CC=CC=C76)CCN)CCN)CCCN)CCN",
|
| 49 |
+
"Vancomycin: C[C@H]1[C@H]([C@@](C[C@@H](O1)O[C@@H]2[C@H]([C@@H]([C@H](O[C@H]2OC3=C4C=C5C=C3OC6=C(C=C(C=C6)[C@H]([C@H](C(=O)N[C@H](C(=O)N[C@H]5C(=O)N[C@@H]7C8=CC(=C(C=C8)O)C9=C(C=C(C=C9O)O)[C@H](NC(=O)[C@H]([C@@H](C1=CC(=C(O4)C=C1)Cl)O)NC7=O)C(=O)O)CC(=O)N)NC(=O)[C@@H](CC(C)C)NC)O)Cl)CO)O)O)(C)N)O",
|
| 50 |
+
"Zosurabalpin: CN1[C@H](C(=O)NCC2=C(C=CC=C2SC3=C(CN[C@H](C(=O)N[C@H](C1=O)CCCCN)CCCN)C=CC=N3)C4=CC=C(C=C4)C(=O)O)CC5=CNC6=CC=CC=C65",
|
| 51 |
+
"Plazomicin: C[C@@]1(CO[C@@H]([C@@H]([C@H]1NC)O)O[C@H]2[C@@H](C[C@@H]([C@H]([C@@H]2O)O[C@@H]3[C@@H](CC=C(O3)CNCCO)N)N)NC(=O)[C@H](CCN)O)O",
|
| 52 |
+
"Gentamicin: CC(C1CCC(C(O1)OC2C(CC(C(C2O)OC3C(C(C(CO3)(C)O)NC)O)N)N)N)NC",
|
| 53 |
+
"Rifampicin: C[C@H]1/C=C/C=C(\\C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)[C@](O4)(O/C=C/[C@@H]([C@H]([C@H]([C@@H]([C@@H]([C@@H]([C@H]1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)/C=N/N5CCN(CC5)C)/C"
|
| 54 |
+
],
|
| 55 |
+
"species": [
|
| 56 |
+
"Acinetobacter baumannii"
|
| 57 |
+
]
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"label": "E. coli vs Debio-1452, Debio-1452-NH3, Fabimycin, 5-FU, Carmofur, Etoposide",
|
| 61 |
+
"strings": [
|
| 62 |
+
"Debio-1452: CC1=C(OC2=CC=CC=C12)CN(C)C(=O)/C=C/C3=CC4=C(NC(=O)CC4)N=C3",
|
| 63 |
+
"Debio-1452-NH3: CC1=C(OC2=CC=CC=C12)CN(C)C(=O)/C=C/C3=CC4=C(NC(=O)[C@@H](C4)N)N=C3",
|
| 64 |
+
"Fabimycin: CC1=C(OC2=CC=CC=C12)CN(C)C(=O)/C=C/C3=CC4=C(NC(=O)[C@H](CC4)[NH3+])N=C3.[Cl-]",
|
| 65 |
+
"5-FU: C1=C(C(=O)NC(=O)N1)F",
|
| 66 |
+
"Carmofur: CCCCCCNC(=O)N1C=C(C(=O)NC1=O)F",
|
| 67 |
+
"Etoposide: C[C@@H]1OC[C@@H]2[C@@H](O1)[C@@H]([C@H]([C@@H](O2)O[C@H]3[C@H]4COC(=O)[C@@H]4[C@@H](C5=CC6=C(C=C35)OCO6)C7=CC(=C(C(=C7)OC)O)OC)O)O"
|
| 68 |
+
],
|
| 69 |
+
"species": [
|
| 70 |
+
"Escherichia coli"
|
| 71 |
+
]
|
| 72 |
+
},
|
| 73 |
+
{
|
| 74 |
+
"label": "K. pneumoniae vs Trimethoprim, SCH-79797, Pemetrexed, Nolatrexed, Methotrexate, Raltitrexed",
|
| 75 |
+
"strings": [
|
| 76 |
+
"Trimethoprim: COC1=CC(=CC(=C1OC)OC)CC2=CN=C(N=C2N)N",
|
| 77 |
+
"SCH-79797: CC(C)C1=CC=C(C=C1)CN2C=CC3=C2C=CC4=C3C(=NC(=N4)NC5CC5)N",
|
| 78 |
+
"Pemetrexed: C1=CC(=CC=C1CCC2=CNC3=C2C(=O)NC(=N3)N)C(=O)N[C@@H](CCC(=O)O)C(=O)O",
|
| 79 |
+
"Nolatrexed: CC1=C(C2=C(C=C1)N=C(NC2=O)N)SC3=CC=NC=C3",
|
| 80 |
+
"Methotrexate: CN(CC1=CN=C2C(=N1)C(=NC(=N2)N)N)C3=CC=C(C=C3)C(=O)N[C@@H](CCC(=O)O)C(=O)O",
|
| 81 |
+
"Raltitrexed: CC1=NC2=C(C=C(C=C2)CN(C)C3=CC=C(S3)C(=O)N[C@@H](CCC(=O)O)C(=O)O)C(=O)N1"
|
| 82 |
+
],
|
| 83 |
+
"species": [
|
| 84 |
+
"Klebsiella pneumoniae"
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"label": "K. pneumoniae vs CHIR-090, SCH79797, DBeQ, Tenovin-6, Pyrimethamine, Aminopterin",
|
| 89 |
+
"strings": [
|
| 90 |
+
"CHIR-090: C[C@H]([C@@H](C(=O)NO)NC(=O)C1=CC=C(C=C1)C#CC2=CC=C(C=C2)CN3CCOCC3)O",
|
| 91 |
+
"SCH79797: CC(C)C1=CC=C(C=C1)CN2C=CC3=C2C=CC4=C3C(=NC(=N4)NC5CC5)N",
|
| 92 |
+
"DBeQ: C1=CC=C(C=C1)CNC2=NC(=NC3=CC=CC=C32)NCC4=CC=CC=C4",
|
| 93 |
+
"Tenovin-6: CC(C)(C)C1=CC=C(C=C1)C(=O)NC(=S)NC2=CC=C(C=C2)NC(=O)CCCCN(C)C",
|
| 94 |
+
"Pyrimethamine: CCC1=C(C(=NC(=N1)N)N)C2=CC=C(C=C2)Cl",
|
| 95 |
+
"Aminopterin: C1=CC(=CC=C1C(=O)N[C@@H](CCC(=O)O)C(=O)O)NCC2=CN=C3C(=N2)C(=NC(=N3)N)N"
|
| 96 |
+
],
|
| 97 |
+
"species": [
|
| 98 |
+
"Klebsiella pneumoniae"
|
| 99 |
+
]
|
| 100 |
+
}
|
| 101 |
+
],
|
| 102 |
+
"file examples": [
|
| 103 |
+
{
|
| 104 |
+
"label": "E. coli training data from Stokes J. et al., Cell (2020)",
|
| 105 |
+
"file": "example-data/stokes2020-eco.csv",
|
| 106 |
+
"column": "SMILES",
|
| 107 |
+
"species": "Escherichia coli"
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"label": "A. baumannii training data from Liu (2023)",
|
| 111 |
+
"file": "example-data/liu23-abau.csv",
|
| 112 |
+
"column": "SMILES",
|
| 113 |
+
"species": "Acinetobacter baumannii"
|
| 114 |
+
},
|
| 115 |
+
{
|
| 116 |
+
"label": "S. aureus training data from Wong (2024)",
|
| 117 |
+
"file": "example-data/wong24-sau-tox-5000.csv",
|
| 118 |
+
"column": "SMILES",
|
| 119 |
+
"species": "Staphylococcus aureus"
|
| 120 |
+
}
|
| 121 |
+
]
|
| 122 |
+
}
|