| | from __future__ import annotations |
| |
|
| | import csv |
| | import os |
| | import tempfile |
| | from pathlib import Path |
| | from typing import Any |
| |
|
| | import gradio as gr |
| | import numpy as np |
| | import pandas as pd |
| |
|
| | from space_runtime import ( |
| | AssayQuery, |
| | load_compatibility_model_from_hub, |
| | molecule_ui_metrics, |
| | rank_compounds, |
| | serialize_assay_query, |
| | ) |
| |
|
| | MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "lighteternal/BioAssayAlign-Qwen3-Embedding-0.6B-Compatibility") |
| | MAX_INPUT_SMILES = int(os.getenv("MAX_INPUT_SMILES", "3000")) |
| | DEFAULT_TOP_K = int(os.getenv("DEFAULT_TOP_K", "50")) |
| | ENABLE_BACKGROUND_WARMUP = os.getenv("ENABLE_BACKGROUND_WARMUP", "0") == "1" |
| |
|
| | CSS = """ |
| | @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Sans:wght@400;500;600;700&family=IBM+Plex+Mono:wght@400;500&family=Fraunces:opsz,wght@9..144,600;9..144,700&display=swap'); |
| | |
| | :root { |
| | --paper: #f6f1e6; |
| | --paper-deep: #ece4d2; |
| | --ink: #17252d; |
| | --ink-soft: #5f6d75; |
| | --accent: #165b55; |
| | --accent-deep: #0c4641; |
| | --accent-soft: #dbeee8; |
| | --accent-warm: #b8643d; |
| | --accent-bright: #d06a3b; |
| | --accent-bright-deep: #b25124; |
| | --accent-warm-soft: #f3e1d5; |
| | --line: #c6cdbf; |
| | --warning: #8a4b0f; |
| | --good: #0e6b48; |
| | --card: rgba(255, 252, 246, 0.9); |
| | --card-strong: rgba(255, 255, 255, 0.96); |
| | --shadow: 0 20px 45px rgba(23, 37, 45, 0.08); |
| | } |
| | |
| | .gradio-container { |
| | --body-text-color: var(--ink); |
| | --color-text-body: var(--ink); |
| | --block-title-text-color: var(--ink); |
| | --input-text-color: var(--ink); |
| | --input-placeholder-color: #6d7c83; |
| | font-family: "IBM Plex Sans", sans-serif; |
| | background: |
| | radial-gradient(circle at 10% 0%, rgba(22,91,85,0.13), transparent 26rem), |
| | radial-gradient(circle at 92% 8%, rgba(184,100,61,0.12), transparent 24rem), |
| | linear-gradient(180deg, #fbf8f1 0%, var(--paper) 100%); |
| | color: var(--ink); |
| | } |
| | |
| | #hero { |
| | border: 1px solid var(--line); |
| | background: |
| | linear-gradient(135deg, rgba(255,255,255,0.98), rgba(241,247,245,0.9)), |
| | linear-gradient(90deg, rgba(22,91,85,0.06), rgba(184,100,61,0.04)); |
| | border-radius: 30px; |
| | padding: 1.5rem 1.65rem; |
| | box-shadow: var(--shadow); |
| | } |
| | |
| | .eyebrow { |
| | font-family: "IBM Plex Mono", monospace; |
| | font-size: 0.78rem; |
| | letter-spacing: 0.08em; |
| | text-transform: uppercase; |
| | color: var(--accent-warm); |
| | } |
| | |
| | .hero-title { |
| | font-family: "Fraunces", serif; |
| | font-size: 2.35rem; |
| | line-height: 1.05; |
| | margin: 0.2rem 0 0.5rem 0; |
| | } |
| | |
| | .hero-copy { |
| | color: var(--ink-soft); |
| | max-width: 60rem; |
| | font-size: 1rem; |
| | } |
| | |
| | .hero-grid { |
| | display: grid; |
| | grid-template-columns: minmax(0, 1.6fr) minmax(19rem, 0.9fr); |
| | gap: 1.1rem; |
| | align-items: start; |
| | } |
| | |
| | .hero-side { |
| | background: rgba(255,255,255,0.75); |
| | border: 1px solid rgba(198,205,191,0.8); |
| | border-radius: 20px; |
| | padding: 1rem 1.05rem; |
| | } |
| | |
| | .hero-side-title { |
| | font-family: "IBM Plex Mono", monospace; |
| | font-size: 0.74rem; |
| | letter-spacing: 0.08em; |
| | text-transform: uppercase; |
| | color: var(--accent-warm); |
| | margin-bottom: 0.55rem; |
| | } |
| | |
| | .hero-list { |
| | margin: 0; |
| | padding-left: 1rem; |
| | color: var(--ink); |
| | } |
| | |
| | .hero-list li + li { |
| | margin-top: 0.45rem; |
| | } |
| | |
| | .metric-strip { |
| | display: grid; |
| | grid-template-columns: minmax(0, 1.2fr) minmax(0, 1fr); |
| | gap: 0.8rem; |
| | } |
| | |
| | .metric-card { |
| | border: 1px solid var(--line); |
| | background: linear-gradient(180deg, rgba(255,255,255,0.92), rgba(248,244,236,0.9)); |
| | padding: 0.72rem 0.85rem; |
| | border-radius: 18px; |
| | min-height: 4.9rem; |
| | box-shadow: 0 8px 24px rgba(23,37,45,0.04); |
| | } |
| | |
| | .metric-card span { |
| | color: var(--ink-soft); |
| | display: block; |
| | } |
| | |
| | .metric-card strong { |
| | display: block; |
| | font-size: 1rem; |
| | margin-top: 0.15rem; |
| | color: var(--ink); |
| | } |
| | |
| | .metric-card a { |
| | color: var(--ink); |
| | text-decoration: none; |
| | } |
| | |
| | .metric-card a:hover { |
| | color: var(--accent-deep); |
| | text-decoration: underline; |
| | } |
| | |
| | .compact-spec { |
| | margin: 0.7rem 0 1rem 0; |
| | border: 1px solid var(--line); |
| | border-radius: 18px; |
| | background: rgba(255,255,255,0.82); |
| | padding: 0.78rem 0.9rem; |
| | color: var(--ink-soft); |
| | font-size: 0.92rem; |
| | line-height: 1.45; |
| | } |
| | |
| | .workspace { |
| | gap: 1rem !important; |
| | align-items: stretch; |
| | } |
| | |
| | .workspace > div { |
| | gap: 1rem !important; |
| | } |
| | |
| | .pane { |
| | background: linear-gradient(180deg, rgba(255,255,255,0.94), rgba(248,244,236,0.92)); |
| | border: 1px solid var(--line); |
| | border-radius: 24px; |
| | padding: 0.95rem 1rem 1rem 1rem; |
| | box-shadow: var(--shadow); |
| | } |
| | |
| | .pane-header { |
| | display: flex; |
| | align-items: baseline; |
| | justify-content: space-between; |
| | gap: 0.75rem; |
| | margin-bottom: 0.9rem; |
| | } |
| | |
| | .pane-title { |
| | font-family: "Fraunces", serif; |
| | font-size: 1.35rem; |
| | line-height: 1.1; |
| | } |
| | |
| | .pane-kicker { |
| | font-family: "IBM Plex Mono", monospace; |
| | font-size: 0.72rem; |
| | letter-spacing: 0.08em; |
| | text-transform: uppercase; |
| | color: var(--accent-warm); |
| | } |
| | |
| | .pane-copy { |
| | color: var(--ink-soft); |
| | font-size: 0.95rem; |
| | margin-bottom: 1rem; |
| | } |
| | |
| | .helper-row { |
| | display: grid; |
| | grid-template-columns: repeat(3, minmax(0, 1fr)); |
| | gap: 0.7rem; |
| | margin-bottom: 0.8rem; |
| | } |
| | |
| | .helper-chip { |
| | background: var(--accent-soft); |
| | border: 1px solid rgba(22,91,85,0.12); |
| | border-radius: 14px; |
| | padding: 0.75rem 0.8rem; |
| | } |
| | |
| | .helper-chip strong { |
| | display: block; |
| | margin-bottom: 0.15rem; |
| | } |
| | |
| | .section-note { |
| | color: var(--ink-soft); |
| | font-size: 0.88rem; |
| | margin: 0.1rem 0 0.32rem 0; |
| | } |
| | |
| | .action-row { |
| | display: flex; |
| | gap: 0.7rem; |
| | flex-wrap: wrap; |
| | } |
| | |
| | .summary-shell, |
| | .results-shell { |
| | background: linear-gradient(180deg, rgba(255,255,255,0.94), rgba(248,244,236,0.92)); |
| | border: 1px solid var(--line); |
| | border-radius: 24px; |
| | padding: 0.9rem 1rem; |
| | box-shadow: var(--shadow); |
| | } |
| | |
| | .results-shell, |
| | .results-shell *, |
| | .results-shell p, |
| | .results-shell li, |
| | .results-shell strong, |
| | .results-shell code { |
| | color: var(--ink) !important; |
| | } |
| | |
| | .results-shell code, |
| | .summary-shell code, |
| | .guide-card code, |
| | .footer-note code, |
| | .section-note code, |
| | .pane code { |
| | background: rgba(22, 91, 85, 0.08) !important; |
| | color: var(--ink) !important; |
| | border-radius: 8px !important; |
| | padding: 0.08rem 0.35rem !important; |
| | box-shadow: none !important; |
| | } |
| | |
| | .results-callout { |
| | padding: 0.15rem 0.1rem 0.2rem 0.1rem; |
| | } |
| | |
| | .results-callout h3 { |
| | margin: 0 0 0.55rem 0; |
| | font-family: "Fraunces", serif; |
| | font-size: 1.1rem; |
| | } |
| | |
| | .results-callout ul { |
| | margin: 0; |
| | padding-left: 1rem; |
| | } |
| | |
| | .results-callout li + li { |
| | margin-top: 0.28rem; |
| | } |
| | |
| | .results-callout p { |
| | margin: 0.7rem 0 0 0; |
| | color: var(--ink-soft) !important; |
| | line-height: 1.5; |
| | } |
| | |
| | .results-metrics { |
| | display: grid; |
| | grid-template-columns: repeat(3, minmax(0, 1fr)); |
| | gap: 0.65rem; |
| | margin-top: 0.55rem; |
| | } |
| | |
| | .results-metric { |
| | border: 1px solid var(--line); |
| | border-radius: 14px; |
| | background: rgba(255,255,255,0.9); |
| | padding: 0.65rem 0.75rem; |
| | } |
| | |
| | .results-metric strong { |
| | display: block; |
| | margin-bottom: 0.16rem; |
| | } |
| | |
| | .examples-note { |
| | color: var(--ink-soft); |
| | font-size: 0.92rem; |
| | margin-top: 0.1rem; |
| | } |
| | |
| | .footer-note { |
| | color: var(--ink-soft); |
| | font-size: 0.9rem; |
| | line-height: 1.5; |
| | } |
| | |
| | .gradio-container .prose, |
| | .gradio-container .prose *, |
| | .gradio-container label, |
| | .gradio-container .label-wrap, |
| | .gradio-container .label-wrap span, |
| | .gradio-container [data-testid="block-info"], |
| | .gradio-container .gr-form, |
| | .gradio-container .gr-button, |
| | .gradio-container input, |
| | .gradio-container textarea, |
| | .gradio-container select, |
| | .gradio-container .wrap, |
| | .gradio-container .wrap textarea, |
| | .gradio-container .wrap input, |
| | .gradio-container table, |
| | .gradio-container th, |
| | .gradio-container td { |
| | color: var(--ink) !important; |
| | } |
| | |
| | .gradio-container .block, |
| | .gradio-container .block > label, |
| | .gradio-container .block .container, |
| | .gradio-container .block .input-container, |
| | .gradio-container .block .wrap, |
| | .gradio-container .block .wrap-inner, |
| | .gradio-container .block fieldset, |
| | .gradio-container .block .form, |
| | .gradio-container .block .inner, |
| | .gradio-container [data-testid="textbox"], |
| | .gradio-container [data-testid="dropdown"], |
| | .gradio-container [data-testid="number"], |
| | .gradio-container [data-testid="file-upload"], |
| | .gradio-container [data-testid="file"], |
| | .gradio-container [data-testid="accordion"], |
| | .gradio-container [data-testid="dataframe"] { |
| | color: var(--ink) !important; |
| | } |
| | |
| | .gradio-container input, |
| | .gradio-container textarea, |
| | .gradio-container select, |
| | .gradio-container [role="combobox"], |
| | .gradio-container [role="listbox"], |
| | .gradio-container [role="option"], |
| | .gradio-container .choices, |
| | .gradio-container .choices__inner, |
| | .gradio-container .choices__list, |
| | .gradio-container .choices__item { |
| | background: #fffdf8 !important; |
| | border: 1px solid #e3e8e0 !important; |
| | border-radius: 14px !important; |
| | box-shadow: inset 0 1px 0 rgba(255,255,255,0.7) !important; |
| | color: var(--ink) !important; |
| | } |
| | |
| | .gradio-container [role="listbox"], |
| | .gradio-container .choices__list, |
| | .gradio-container .choices__item { |
| | background: #fffdf8 !important; |
| | color: var(--ink) !important; |
| | } |
| | |
| | .gradio-container [role="option"][aria-selected="true"], |
| | .gradio-container .choices__item--selectable.is-highlighted { |
| | background: #e7f0ed !important; |
| | color: #0d3d38 !important; |
| | } |
| | |
| | .gradio-container input:focus, |
| | .gradio-container textarea:focus, |
| | .gradio-container select:focus { |
| | border-color: var(--accent) !important; |
| | box-shadow: 0 0 0 3px rgba(22,91,85,0.12) !important; |
| | } |
| | |
| | .gradio-container .block > label, |
| | .gradio-container .block .container.show_textbox_border, |
| | .gradio-container .block .input-container, |
| | .gradio-container .block .wrap, |
| | .gradio-container .block .wrap-inner, |
| | .gradio-container .block fieldset, |
| | .gradio-container [data-testid="textbox"], |
| | .gradio-container [data-testid="dropdown"], |
| | .gradio-container [data-testid="number"], |
| | .gradio-container [data-testid="file-upload"] { |
| | background: #fffdf8 !important; |
| | border-color: #e3e8e0 !important; |
| | box-shadow: none !important; |
| | outline: none !important; |
| | } |
| | |
| | .gradio-container .block > label, |
| | .gradio-container .block .container.show_textbox_border, |
| | .gradio-container .block .wrap, |
| | .gradio-container .block .wrap-inner, |
| | .gradio-container .block fieldset { |
| | border: 1px solid #e3e8e0 !important; |
| | border-radius: 14px !important; |
| | } |
| | |
| | .gradio-container .block > label:focus-within, |
| | .gradio-container .block .container.show_textbox_border:focus-within, |
| | .gradio-container .block .wrap:focus-within, |
| | .gradio-container .block .wrap-inner:focus-within, |
| | .gradio-container .block fieldset:focus-within { |
| | border-color: rgba(22, 91, 85, 0.4) !important; |
| | box-shadow: 0 0 0 3px rgba(22, 91, 85, 0.08) !important; |
| | } |
| | |
| | .gradio-container .form, |
| | .gradio-container .form.svelte-d5xbca, |
| | .gradio-container .form > .block, |
| | .gradio-container .form > .block.padded, |
| | .gradio-container .form > .block.auto-margin { |
| | background: transparent !important; |
| | border: none !important; |
| | box-shadow: none !important; |
| | } |
| | |
| | .gradio-container .block:has(textarea[data-testid="textbox"]), |
| | .gradio-container .block:has(input), |
| | .gradio-container .block:has(select), |
| | .gradio-container .block:has([data-testid="file-upload"]), |
| | .gradio-container .block:has(.svelte-1xfsv4t.container) { |
| | border-color: transparent !important; |
| | background: transparent !important; |
| | box-shadow: none !important; |
| | } |
| | |
| | .gradio-container button { |
| | border-radius: 14px !important; |
| | font-weight: 600 !important; |
| | transition: transform 0.18s ease, box-shadow 0.18s ease, background 0.18s ease !important; |
| | } |
| | |
| | .gradio-container button:hover { |
| | transform: translateY(-1px); |
| | } |
| | |
| | .gradio-container button.primary { |
| | background: linear-gradient(180deg, var(--accent-bright), var(--accent-bright-deep)) !important; |
| | color: #f7fbfa !important; |
| | border: none !important; |
| | box-shadow: 0 14px 26px rgba(184,100,61,0.24) !important; |
| | } |
| | |
| | .gradio-container button.secondary { |
| | background: linear-gradient(180deg, #f6e7da, #f0ddcd) !important; |
| | color: var(--ink) !important; |
| | border: 1px solid #dbc4b4 !important; |
| | } |
| | |
| | .gradio-container .tab-nav button { |
| | color: var(--ink) !important; |
| | font-weight: 600 !important; |
| | border-radius: 999px !important; |
| | padding: 0.55rem 1rem !important; |
| | background: rgba(255,255,255,0.62) !important; |
| | border: 1px solid rgba(198,205,191,0.7) !important; |
| | } |
| | |
| | .gradio-container .tab-nav button.selected, |
| | .gradio-container button[role="tab"][aria-selected="true"], |
| | .gradio-container [role="tab"][aria-selected="true"] { |
| | background: linear-gradient(180deg, #d9ece8, #cde4de) !important; |
| | color: #0d3d38 !important; |
| | box-shadow: 0 6px 16px rgba(22,91,85,0.12); |
| | border-color: rgba(22,91,85,0.18) !important; |
| | } |
| | |
| | .gradio-container .gradio-dataframe table, |
| | .gradio-container .gradio-dataframe th, |
| | .gradio-container .gradio-dataframe td { |
| | background: #fffdf7 !important; |
| | } |
| | |
| | .gradio-container .gradio-dataframe, |
| | .gradio-container .gradio-dataframe .wrap, |
| | .gradio-container .gradio-dataframe .table-wrap, |
| | .gradio-container .gradio-dataframe .scrollable, |
| | .gradio-container .gradio-dataframe .table-container, |
| | .gradio-container .gradio-dataframe .cell-wrap, |
| | .gradio-container .gradio-dataframe .cell-input, |
| | .gradio-container .gradio-dataframe .cell-output, |
| | .gradio-container [data-testid="dataframe"], |
| | .gradio-container [data-testid="dataframe"] .wrap, |
| | .gradio-container [data-testid="dataframe"] .table-wrap, |
| | .gradio-container [data-testid="dataframe"] .scrollable, |
| | .gradio-container [data-testid="dataframe"] .cell-wrap, |
| | .gradio-container [data-testid="dataframe"] .cell-input, |
| | .gradio-container [data-testid="dataframe"] .cell-output, |
| | .result-frame, |
| | .result-frame *, |
| | .result-frame .wrap, |
| | .result-frame .table-wrap, |
| | .result-frame .table-container, |
| | .result-frame .scrollable { |
| | background: #fffdf7 !important; |
| | border-color: var(--line) !important; |
| | color: var(--ink) !important; |
| | } |
| | |
| | .gradio-container .gradio-dataframe th { |
| | background: #eaf2ef !important; |
| | } |
| | |
| | .gradio-container .gradio-dataframe tr:nth-child(even) td, |
| | .gradio-container [data-testid="dataframe"] tr:nth-child(even) td { |
| | background: #fcfaf4 !important; |
| | } |
| | |
| | .gradio-container .gr-accordion, |
| | .gradio-container .gr-accordion *, |
| | .gradio-container [data-testid="accordion"], |
| | .gradio-container [data-testid="accordion"] * { |
| | color: var(--ink) !important; |
| | } |
| | |
| | .result-accordion, |
| | .result-accordion *, |
| | .result-accordion [data-testid="accordion"], |
| | .result-accordion [data-testid="accordion"] * { |
| | color: var(--ink) !important; |
| | background: #fffdf7 !important; |
| | } |
| | |
| | .result-file, |
| | .result-file *, |
| | .result-file .wrap, |
| | .result-file .file-preview, |
| | .result-file .file-preview-holder, |
| | .result-file [data-testid="file"] { |
| | background: #fffdf7 !important; |
| | color: var(--ink) !important; |
| | border-color: var(--line) !important; |
| | } |
| | |
| | .result-file button { |
| | background: linear-gradient(180deg, #f6e7da, #f0ddcd) !important; |
| | color: var(--ink) !important; |
| | border: 1px solid #dbc4b4 !important; |
| | } |
| | |
| | .gradio-container .choices__list--dropdown, |
| | .gradio-container .choices__list[aria-expanded], |
| | .gradio-container .choices__list--single, |
| | .gradio-container .choices__list--multiple { |
| | background: #fffdf8 !important; |
| | color: var(--ink) !important; |
| | } |
| | |
| | .gradio-container .gr-box, |
| | .gradio-container .gr-group, |
| | .gradio-container .gr-accordion { |
| | border-color: var(--line) !important; |
| | } |
| | |
| | .gradio-container [data-testid="file"] button, |
| | .gradio-container [data-testid="download-button"], |
| | .gradio-container .download-button { |
| | background: linear-gradient(180deg, #f6e7da, #f0ddcd) !important; |
| | color: var(--ink) !important; |
| | border: 1px solid #dbc4b4 !important; |
| | } |
| | |
| | @media (max-width: 980px) { |
| | .hero-grid, |
| | .helper-row, |
| | .results-metrics, |
| | .metric-strip { |
| | grid-template-columns: 1fr; |
| | } |
| | |
| | .hero-title { |
| | font-size: 1.95rem; |
| | } |
| | } |
| | """ |
| |
|
| | THEME = gr.themes.Soft( |
| | primary_hue="emerald", |
| | secondary_hue="stone", |
| | neutral_hue="slate", |
| | font=["IBM Plex Sans", "ui-sans-serif", "system-ui", "sans-serif"], |
| | font_mono=["IBM Plex Mono", "ui-monospace", "monospace"], |
| | ) |
| |
|
| | EXAMPLES = { |
| | "JAK2 cell assay": { |
| | "title": "JAK2 inhibition assay", |
| | "description": "Cell-based luminescence assay measuring JAK2 inhibition in HEK293 cells.", |
| | "organism": "Homo sapiens", |
| | "readout": "luminescence", |
| | "assay_format": "cell-based", |
| | "assay_type": "inhibition", |
| | "target_uniprot": "O60674", |
| | "smiles": "\n".join( |
| | [ |
| | "CC1=CC(=O)N(C)C(=O)N1", |
| | "CC(=O)Nc1ncc(C#N)c(Nc2ccc(F)c(Cl)c2)n1", |
| | "CCOc1ccc2nc(N3CCN(C)CC3)n(C)c(=O)c2c1", |
| | "CCO", |
| | ] |
| | ), |
| | }, |
| | "ALDH1A1 fluorescence": { |
| | "title": "ALDH1A1 inhibition assay", |
| | "description": "Cell-based fluorescence assay measuring ALDH1A1 inhibition in human cells.", |
| | "organism": "Homo sapiens", |
| | "readout": "fluorescence", |
| | "assay_format": "cell-based", |
| | "assay_type": "inhibition", |
| | "target_uniprot": "P00352", |
| | "smiles": "\n".join( |
| | [ |
| | "CCOC1=CC=CC=C1", |
| | "CC1=CC(=O)N(C)C(=O)N1", |
| | "CCN(CC)CCOC1=CC=CC=C1", |
| | "CCO", |
| | ] |
| | ), |
| | }, |
| | "BTK binding quick check": { |
| | "title": "BTK kinase inhibitor binding assay", |
| | "description": "In vitro kinase-domain binding assay for Bruton's tyrosine kinase inhibitor ranking.", |
| | "organism": "Homo sapiens", |
| | "readout": "binding", |
| | "assay_format": "biochemical", |
| | "assay_type": "binding", |
| | "target_uniprot": "Q06187", |
| | "smiles": "\n".join( |
| | [ |
| | "CC1=NC(=O)N(C)C(=O)N1", |
| | "c1ccccc1", |
| | "CCO", |
| | ] |
| | ), |
| | }, |
| | } |
| |
|
| | DEFAULT_EXAMPLE_NAME = "JAK2 cell assay" |
| | DEFAULT_EXAMPLE = EXAMPLES[DEFAULT_EXAMPLE_NAME] |
| |
|
| |
|
| | def _parse_smiles_text(value: str | None) -> list[str]: |
| | if not value: |
| | return [] |
| | lines = [line.strip() for line in value.replace(",", "\n").splitlines()] |
| | return [line for line in lines if line] |
| |
|
| |
|
| | def _read_uploaded_smiles(file_obj: Any) -> list[str]: |
| | if file_obj is None: |
| | return [] |
| | path = Path(file_obj.name if hasattr(file_obj, "name") else str(file_obj)) |
| | suffix = path.suffix.lower() |
| | if suffix in {".txt", ".smi", ".smiles"}: |
| | return [line.strip() for line in path.read_text().splitlines() if line.strip()] |
| | if suffix == ".csv": |
| | frame = pd.read_csv(path) |
| | for column in ("smiles", "canonical_smiles", "SMILES"): |
| | if column in frame.columns: |
| | return [str(item).strip() for item in frame[column].tolist() if str(item).strip()] |
| | first = frame.columns[0] |
| | return [str(item).strip() for item in frame[first].tolist() if str(item).strip()] |
| | raise gr.Error("Upload a .csv, .txt, .smi, or .smiles file.") |
| |
|
| |
|
| | def _collect_smiles(smiles_text: str, upload_file: Any) -> tuple[list[str], str | None]: |
| | items = _parse_smiles_text(smiles_text) + _read_uploaded_smiles(upload_file) |
| | deduped: list[str] = [] |
| | seen: set[str] = set() |
| | for item in items: |
| | if item not in seen: |
| | deduped.append(item) |
| | seen.add(item) |
| | warning = None |
| | if len(deduped) > MAX_INPUT_SMILES: |
| | warning = f"Input truncated to the first {MAX_INPUT_SMILES} unique SMILES for interactive use." |
| | deduped = deduped[:MAX_INPUT_SMILES] |
| | return deduped, warning |
| |
|
| |
|
| | def _load_model(): |
| | return load_compatibility_model_from_hub(MODEL_REPO_ID) |
| |
|
| |
|
| | def _warm_model_background() -> None: |
| | try: |
| | _load_model() |
| | except Exception: |
| | |
| | return |
| |
|
| |
|
| | def _priority_band(relative_score: float, rank: int, total: int) -> str: |
| | if total <= 3: |
| | return "Screen first" if rank == 1 else ("Worth a look" if rank == 2 else "Low priority") |
| | if relative_score >= 85: |
| | return "Screen first" |
| | if relative_score >= 60: |
| | return "Worth a look" |
| | if relative_score >= 35: |
| | return "Middle pack" |
| | return "Low priority" |
| |
|
| |
|
| | def _decorate_valid_rows(valid_rows: list[dict[str, Any]]) -> list[dict[str, Any]]: |
| | if not valid_rows: |
| | return [] |
| | scores = np.array([float(row["score"]) for row in valid_rows], dtype=np.float32) |
| | minimum = float(scores.min()) |
| | maximum = float(scores.max()) |
| | spread = maximum - minimum |
| | decorated: list[dict[str, Any]] = [] |
| | for idx, row in enumerate(valid_rows): |
| | score = float(row["score"]) |
| | relative_score = 100.0 if spread <= 1e-8 and idx == 0 else (50.0 if spread <= 1e-8 else 100.0 * (score - minimum) / spread) |
| | metrics = molecule_ui_metrics(row["canonical_smiles"]) |
| | decorated.append( |
| | { |
| | **row, |
| | "relative_score": round(relative_score, 1), |
| | "priority_band": _priority_band(relative_score, idx + 1, len(valid_rows)), |
| | "mol_wt": round(float(metrics["mol_wt"]), 1), |
| | "logp": round(float(metrics["logp"]), 2), |
| | "tpsa": round(float(metrics["tpsa"]), 1), |
| | "heavy_atoms": int(metrics["heavy_atoms"]), |
| | } |
| | ) |
| | return decorated |
| |
|
| |
|
| | def _build_summary(query_text: str, valid_rows: list[dict[str, Any]], invalid_rows: list[dict[str, Any]], warning: str | None) -> str: |
| | best = valid_rows[0] if valid_rows else None |
| | bullets = [f"<li><strong>Ranked:</strong> {len(valid_rows)}</li>"] |
| | if invalid_rows: |
| | bullets.append(f"<li><strong>Rejected:</strong> {len(invalid_rows)}</li>") |
| | if best is not None: |
| | bullets.append( |
| | f"<li><strong>Top candidate:</strong> <code>{best['canonical_smiles']}</code> · {best['priority_band']} · relative score {best['relative_score']:.1f}/100</li>" |
| | ) |
| | if warning: |
| | bullets.append(f"<li><strong>Warning:</strong> {warning}</li>") |
| | return ( |
| | "<div class='results-callout'>" |
| | "<h3>Ranking summary</h3>" |
| | f"<ul>{''.join(bullets)}</ul>" |
| | "<div class='results-metrics'>" |
| | "<div class='results-metric'><strong>Priority</strong>Shortlist cue derived from the within-list ranking.</div>" |
| | "<div class='results-metric'><strong>Relative score</strong>0–100 rescaling inside this submitted list only.</div>" |
| | "<div class='results-metric'><strong>Model score</strong>Internal logit retained for debugging and reproducibility.</div>" |
| | "</div>" |
| | "</div>" |
| | ) |
| |
|
| |
|
| | def _results_to_csv(valid_rows: list[dict[str, Any]], invalid_rows: list[dict[str, Any]]) -> str | None: |
| | rows = valid_rows + invalid_rows |
| | if not rows: |
| | return None |
| | handle = tempfile.NamedTemporaryFile("w", suffix=".csv", delete=False, newline="") |
| | writer = csv.DictWriter( |
| | handle, |
| | fieldnames=[ |
| | "rank", |
| | "priority_band", |
| | "relative_score_100", |
| | "input_smiles", |
| | "canonical_smiles", |
| | "smiles_hash", |
| | "mol_wt", |
| | "logp", |
| | "tpsa", |
| | "heavy_atoms", |
| | "model_score", |
| | "valid", |
| | "error", |
| | ], |
| | ) |
| | writer.writeheader() |
| | rank = 1 |
| | for row in valid_rows: |
| | writer.writerow( |
| | { |
| | "rank": rank, |
| | "priority_band": row["priority_band"], |
| | "relative_score_100": row["relative_score"], |
| | "input_smiles": row["input_smiles"], |
| | "canonical_smiles": row["canonical_smiles"], |
| | "smiles_hash": row["smiles_hash"], |
| | "mol_wt": row["mol_wt"], |
| | "logp": row["logp"], |
| | "tpsa": row["tpsa"], |
| | "heavy_atoms": row["heavy_atoms"], |
| | "model_score": row["score"], |
| | "valid": True, |
| | "error": "", |
| | } |
| | ) |
| | rank += 1 |
| | for row in invalid_rows: |
| | writer.writerow( |
| | { |
| | "rank": "", |
| | "input_smiles": row["input_smiles"], |
| | "canonical_smiles": "", |
| | "smiles_hash": "", |
| | "mol_wt": "", |
| | "logp": "", |
| | "tpsa": "", |
| | "heavy_atoms": "", |
| | "model_score": "", |
| | "valid": False, |
| | "error": row.get("error", "invalid_smiles"), |
| | } |
| | ) |
| | handle.close() |
| | return handle.name |
| |
|
| |
|
| | def run_ranking( |
| | title: str, |
| | description: str, |
| | organism: str, |
| | readout: str, |
| | assay_format: str, |
| | assay_type: str, |
| | target_uniprot: str, |
| | smiles_text: str, |
| | upload_file: Any, |
| | top_k: int, |
| | ): |
| | smiles_values, warning = _collect_smiles(smiles_text, upload_file) |
| | if not smiles_values: |
| | raise gr.Error("Provide at least one SMILES entry by paste or file upload.") |
| | query = AssayQuery( |
| | title=title or "", |
| | description=description or "", |
| | organism=organism or "", |
| | readout=readout or "", |
| | assay_format=assay_format or "", |
| | assay_type=assay_type or "", |
| | target_uniprot=[token.strip() for token in target_uniprot.split(",") if token.strip()], |
| | ) |
| | assay_text = serialize_assay_query(query) |
| | model = _load_model() |
| | ranked = rank_compounds(model, assay_text=assay_text, smiles_list=smiles_values, top_k=top_k or None) |
| | valid_rows = [row for row in ranked if row["valid"]] |
| | invalid_rows = [row for row in ranked if not row["valid"]] |
| | valid_rows = _decorate_valid_rows(valid_rows) |
| |
|
| | display_rows = [ |
| | { |
| | "rank": idx + 1, |
| | "priority": row["priority_band"], |
| | "relative_score_100": row["relative_score"], |
| | "canonical_smiles": row["canonical_smiles"], |
| | "mol_wt": row["mol_wt"], |
| | "logp": row["logp"], |
| | "tpsa": row["tpsa"], |
| | "heavy_atoms": row["heavy_atoms"], |
| | "model_score": round(float(row["score"]), 4), |
| | } |
| | for idx, row in enumerate(valid_rows) |
| | ] |
| | invalid_display = [ |
| | {"input_smiles": row["input_smiles"], "error": row.get("error", "invalid_smiles")} |
| | for row in invalid_rows |
| | ] |
| | summary = _build_summary(assay_text, valid_rows, invalid_rows, warning) |
| | csv_path = _results_to_csv(valid_rows, invalid_rows) |
| | invalid_frame = pd.DataFrame(invalid_display) if invalid_display else pd.DataFrame(columns=["input_smiles", "error"]) |
| | invalid_title = gr.update(visible=bool(invalid_display)) |
| | invalid_table = gr.update(value=invalid_frame, visible=bool(invalid_display)) |
| | return summary, assay_text, pd.DataFrame(display_rows), invalid_title, invalid_table, csv_path |
| |
|
| |
|
| | def load_example(example_name: str): |
| | example = EXAMPLES[example_name] |
| | return ( |
| | example["title"], |
| | example["description"], |
| | example["organism"], |
| | example["readout"], |
| | example["assay_format"], |
| | example["assay_type"], |
| | example["target_uniprot"], |
| | example["smiles"], |
| | ) |
| |
|
| |
|
| | with gr.Blocks(title="BioAssayAlign Compatibility Explorer", analytics_enabled=False, css=CSS, theme=THEME) as demo: |
| | gr.HTML( |
| | """ |
| | <div id="hero"> |
| | <div class="hero-grid"> |
| | <div> |
| | <div class="eyebrow">BioAssayAlign · assay-conditioned compound ranking</div> |
| | <div class="hero-title">Rank a compound list against one assay definition</div> |
| | <div class="hero-copy"> |
| | Define one assay, submit a candidate molecule list, and obtain an assay-conditioned ranking for that specific list. |
| | </div> |
| | </div> |
| | <div class="hero-side"> |
| | <div class="hero-side-title">Operational scope</div> |
| | <ul class="hero-list"> |
| | <li>One assay at a time</li> |
| | <li>Relative ranking within the submitted candidate set</li> |
| | <li>Immediate rejection of malformed SMILES</li> |
| | </ul> |
| | </div> |
| | </div> |
| | </div> |
| | """ |
| | ) |
| |
|
| | with gr.Row(): |
| | with gr.Column(scale=9): |
| | gr.HTML( |
| | f""" |
| | <div class="metric-strip"> |
| | <div class="metric-card"><span>Model</span><strong><a href="https://huggingface.co/{MODEL_REPO_ID}" target="_blank" rel="noopener">{MODEL_REPO_ID}</a></strong></div> |
| | <div class="metric-card"><span>Input</span><strong>One assay definition and up to {MAX_INPUT_SMILES} candidate SMILES. Scores are only meaningful within the submitted list.</strong></div> |
| | </div> |
| | """ |
| | ) |
| | gr.HTML("<div class='compact-spec'>Write your own assay directly or start from a live example. Include target, readout, organism, and format when known. Provide candidates as one SMILES per line or as a CSV with a <code>smiles</code> column.</div>") |
| |
|
| | with gr.Tab("Rank Compounds"): |
| | with gr.Row(elem_classes="workspace"): |
| | with gr.Column(scale=6, elem_classes="pane"): |
| | gr.HTML( |
| | """ |
| | <div class="pane-header"> |
| | <div> |
| | <div class="pane-kicker">Assay definition</div> |
| | <div class="pane-title">Define the assay context</div> |
| | </div> |
| | </div> |
| | <div class="pane-copy">Edit the fields directly. The example selector only pre-populates a realistic starting point.</div> |
| | <div class="helper-row"> |
| | <div class="helper-chip"><strong>Protocol signal</strong>State readout, assay system, target biology, and measurement context.</div> |
| | <div class="helper-chip"><strong>Target identifiers</strong>UniProt IDs usually improve separation between plausible and implausible candidates.</div> |
| | <div class="helper-chip"><strong>Ranking scope</strong>Scores are only comparable within the candidate list submitted here.</div> |
| | </div> |
| | """ |
| | ) |
| | example_name = gr.Dropdown(choices=list(EXAMPLES.keys()), value=DEFAULT_EXAMPLE_NAME, label="Example assay") |
| | gr.HTML("<div class='examples-note'>Choose a live example or overwrite the fields directly with your own assay and candidate list.</div>") |
| | assay_title = gr.Textbox(label="Assay title", value=DEFAULT_EXAMPLE["title"]) |
| | description = gr.Textbox(label="Description", value=DEFAULT_EXAMPLE["description"], lines=6, placeholder="Describe the assay in practical lab language.") |
| | with gr.Row(): |
| | organism = gr.Textbox(label="Organism", value=DEFAULT_EXAMPLE["organism"], placeholder="Homo sapiens") |
| | readout = gr.Textbox(label="Readout", value=DEFAULT_EXAMPLE["readout"], placeholder="binding / fluorescence / luminescence") |
| | with gr.Row(): |
| | assay_format = gr.Textbox(label="Assay format", value=DEFAULT_EXAMPLE["assay_format"], placeholder="biochemical / cell-based") |
| | assay_type = gr.Textbox(label="Assay type", value=DEFAULT_EXAMPLE["assay_type"], placeholder="binding / inhibition / activation") |
| | target_uniprot = gr.Textbox(label="Target UniProt IDs", value=DEFAULT_EXAMPLE["target_uniprot"], placeholder="Q06187, P52333") |
| |
|
| | with gr.Column(scale=5, elem_classes="pane"): |
| | gr.HTML( |
| | """ |
| | <div class="pane-header"> |
| | <div> |
| | <div class="pane-kicker">Candidate set</div> |
| | <div class="pane-title">Submit the candidate molecules</div> |
| | </div> |
| | </div> |
| | <div class="pane-copy">The model ranks molecules relative to the exact candidate set submitted in this run.</div> |
| | """ |
| | ) |
| | smiles_text = gr.Textbox( |
| | label="Candidate SMILES", |
| | value=DEFAULT_EXAMPLE["smiles"], |
| | lines=14, |
| | placeholder="Paste one candidate molecule per line. Example: CCO", |
| | ) |
| | upload_file = gr.File(label="Upload CSV / TXT / SMI", file_count="single", file_types=[".csv", ".txt", ".smi", ".smiles"]) |
| | top_k = gr.Slider(label="Top-K rows to display", minimum=5, maximum=200, step=5, value=DEFAULT_TOP_K) |
| | gr.HTML("<div class='section-note'>Use one SMILES per row. Invalid or non-standardizable structures are flagged separately and excluded from ranking.</div>") |
| | with gr.Row(elem_classes="action-row"): |
| | run_btn = gr.Button("Run assay-conditioned ranking", variant="primary") |
| | clear_btn = gr.ClearButton(value="Clear inputs", components=[assay_title, description, organism, readout, assay_format, assay_type, target_uniprot, smiles_text, upload_file]) |
| |
|
| | summary = gr.HTML(elem_classes="results-shell") |
| | with gr.Accordion("Serialized assay text used by the model", open=False, elem_classes="result-accordion"): |
| | assay_preview = gr.Textbox(lines=12, label="Model-facing assay text") |
| | gr.HTML("<div class='section-note'><strong>Ranked candidates</strong></div>") |
| | ranked_df = gr.Dataframe(label=None, show_label=False, interactive=False, wrap=True, elem_classes="result-frame") |
| | invalid_title = gr.HTML("<div class='section-note'><strong>Rejected inputs</strong></div>", visible=False) |
| | invalid_df = gr.Dataframe(label=None, show_label=False, interactive=False, wrap=True, elem_classes="result-frame", visible=False) |
| | download_file = gr.File(label="Export CSV", elem_classes="result-file") |
| |
|
| | example_name.change( |
| | load_example, |
| | inputs=[example_name], |
| | outputs=[assay_title, description, organism, readout, assay_format, assay_type, target_uniprot, smiles_text], |
| | ) |
| | run_btn.click( |
| | run_ranking, |
| | inputs=[assay_title, description, organism, readout, assay_format, assay_type, target_uniprot, smiles_text, upload_file, top_k], |
| | outputs=[summary, assay_preview, ranked_df, invalid_title, invalid_df, download_file], |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | if ENABLE_BACKGROUND_WARMUP: |
| | import threading |
| | threading.Thread(target=_warm_model_background, daemon=True).start() |
| | demo.queue(default_concurrency_limit=4).launch( |
| | show_error=True, |
| | quiet=True, |
| | footer_links=["gradio"], |
| | ) |
| |
|