File size: 9,593 Bytes
a60d40d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
import os
import datetime as dt
from typing import List

import gradio as gr
import pandas as pd
from datasets import load_dataset, Dataset

repo = 'hugging-science/m-boltz-submissions'
CONFIGS = {'antibody':'Antibody–Antigen', 'ligand':'Allosteric–Orthosteric', 'final':'final'}

# Column schemas per tab (used to create empty frames and to order columns)
COLUMNS = {
    "antibody": [
        "timestamp",
        "user",
        "model_name",
        "antibody_id",
        "antigen_id",
        "predicted_affinity",
        "notes",
    ],
    "ligand": [
        "timestamp",
        "user",
        "model_name",
        "protein_id",
        "ligand_type",
        "predicted_kd",
        "notes",
    ],
    "final": [
        "timestamp",
        "team_name",
        "archive_url",
        "results_summary",
        "contact_email",
    ],
}

# You need a write‑enabled token available to the Space (Settings → Repository secrets)
# with name HF_TOKEN. This function raises a helpful error if it is missing.
def _hf_token() -> str:
    token = os.getenv("HF_TOKEN")
    if not token:
        raise RuntimeError(
            "Missing HF_TOKEN. Add a write-enabled token in your Space secrets."
        )
    return token


def _empty_df(columns: List[str]) -> pd.DataFrame:
    return pd.DataFrame(columns=columns)


def load_df(config: str, columns: List[str]) -> pd.DataFrame:
    """Load the 'train' split from a Hub dataset into a pandas DataFrame.
    Returns an empty DataFrame with the expected columns if the dataset doesn't exist yet.
    """
    try:
        ds = load_dataset(repo, config, split="train", token=_hf_token())
        df = ds.to_pandas()
        # Ensure all expected columns exist and in correct order
        for c in columns:
            if c not in df.columns:
                df[c] = pd.NA
        return df[columns]
    except Exception:
        # Fresh repo or first run: return empty with correct columns
        return _empty_df(columns)


def push_df(config: str, df: pd.DataFrame) -> None:
    """Overwrite the dataset's 'train' split on the Hub with the provided DataFrame.
    If the repo doesn't exist, this will create it under your account/org.
    """
    # Convert to datasets.Dataset (drops pandas index)
    ds = Dataset.from_pandas(df.reset_index(drop=True), preserve_index=False)
    # Overwrite the dataset on the Hub. If it doesn't exist, it's created.
    ds.push_to_hub(repo, config_name=config, token=_hf_token())


# --- Tab logic --------------------------------------------------------------
# Antibody–Antigen

def submit_antibody(user, model_name, antibody_id, antigen_id, predicted_affinity, notes):
    config = CONFIGS["antibody"]
    cols = COLUMNS["antibody"]
    df = load_df(config, cols)
    row = {
        "timestamp": dt.datetime.now(dt.timezone.utc).isoformat(),
        "user": user or "",
        "model_name": model_name or "",
        "antibody_id": antibody_id or "",
        "antigen_id": antigen_id or "",
        "predicted_affinity": float(predicted_affinity) if predicted_affinity is not None else None,
        "notes": notes or "",
    }
    df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
    push_df(config, df)
    # Re-load to ensure what we show is exactly what's on the Hub
    return load_df(config, cols)


def refresh_antibody():
    return load_df(CONFIGS["antibody"], COLUMNS["antibody"])


# Allosteric–Orthosteric

def submit_ligand(user, model_name, protein_id, ligand_type, predicted_kd, notes):
    config = CONFIGS["ligand"]
    cols = COLUMNS["ligand"]
    df = load_df(config, cols)
    row = {
        "timestamp": dt.datetime.now(dt.timezone.utc).isoformat(),
        "user": user or "",
        "model_name": model_name or "",
        "protein_id": protein_id or "",
        "ligand_type": ligand_type or "",
        "predicted_kd": float(predicted_kd) if predicted_kd is not None else None,
        "notes": notes or "",
    }
    df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
    push_df(config, df)
    return load_df(config, cols)


def refresh_ligand():
    return load_df(CONFIGS["ligand"], COLUMNS["ligand"])


# Final Submission

def submit_final(team_name, archive_url, results_summary, contact_email):
    config = CONFIGS["final"]
    cols = COLUMNS["final"]
    df = load_df(config, cols)
    row = {
        "timestamp": dt.datetime.now(dt.timezone.utc).isoformat(),
        "team_name": team_name or "",
        "archive_url": archive_url or "",
        "results_summary": results_summary or "",
        "contact_email": contact_email or "",
    }
    df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
    push_df(config, df)
    return load_df(config, cols)


def refresh_final():
    return load_df(CONFIGS["final"], COLUMNS["final"])


# --- UI ---------------------------------------------------------------------
with gr.Blocks(title="Binding Challenges") as app:
    gr.Markdown("# Binding Challenges\nMinimal demo: submit → write to HF dataset → re-hydrate DataFrame from the same dataset.")

    with gr.Tab("Antibody–Antigen Binding Challenge"):
        gr.Markdown("""
            # Antibody-Antigen Binding Challenge
            The goal of this challenge is to improve Boltz-2 accuracy for predicting the correct poses of a VHH binding to an antigen.\n
            Accuracy will be measured through the Capri-Q docking assessment classification scores and the final winner will be determined based on the number of successful top-1 predictions on our *internal* test set. However, you are encouraged to submit results on the training set during the hack to see where you stack up.\n
            A prediction is deemed successful if the Capri-Q classification is either “high”, “medium”, or “acceptable”.
            If multiple entries reach the same number of successful predictions, ties are broken by looking at the number of predictions with “High” classification, then with “Medium” classification and finally with “Acceptable” classification.
            If there is still a tie then, we will look at the mean RMSD across all successful predictions.
        """)
        with gr.Row():
            aa_user = gr.Textbox(label="User / Team", placeholder="Your name or team")
            aa_model = gr.Textbox(label="Model Name")
        with gr.Row():
            aa_antibody = gr.Textbox(label="Antibody ID")
            aa_antigen = gr.Textbox(label="Antigen ID")
            aa_aff = gr.Number(label="Predicted affinity (nM)")
        aa_notes = gr.Textbox(label="Notes", lines=3)
        with gr.Row():
            aa_submit = gr.Button("Submit")
            aa_refresh = gr.Button("Refresh table")
        aa_df = gr.Dataframe(
            value=load_df(CONFIGS["antibody"], COLUMNS["antibody"]),
            label="Submissions (Antibody–Antigen)",
            interactive=False,
            wrap=True,
        )
        aa_submit.click(
            submit_antibody,
            inputs=[aa_user, aa_model, aa_antibody, aa_antigen, aa_aff, aa_notes],
            outputs=aa_df,
        )
        aa_refresh.click(refresh_antibody, outputs=aa_df)

    with gr.Tab("Allosteric–Orthosteric Ligand Binding Challenge"):
        gr.Markdown("""
            # Allosteric-Orthosteric Ligand Binding Challenge
            The goal of this challenge is to improve Boltz-2 accuracy for predicting the binding poses of either allosteric or orthosteric ligands.\n 
            The winner will be determined by accuracy measured on our *internal* test set by calculating the RMSD between the top-1 prediction and the experimental pose. However, submit your intermediate results here to see where you stack up!
                    """)
        with gr.Row():
            li_user = gr.Textbox(label="User / Team")
            li_model = gr.Textbox(label="Model Name")
        with gr.Row():
            li_protein = gr.Textbox(label="Protein ID")
            li_type = gr.Radio(["allosteric", "orthosteric"], label="Ligand type")
            li_kd = gr.Number(label="Predicted Kd (nM)")
        li_notes = gr.Textbox(label="Notes", lines=3)
        with gr.Row():
            li_submit = gr.Button("Submit")
            li_refresh = gr.Button("Refresh table")
        li_df = gr.Dataframe(
            value=load_df(CONFIGS["ligand"], COLUMNS["ligand"]),
            label="Submissions (Ligand Binding)",
            interactive=False,
            wrap=True,
        )
        li_submit.click(
            submit_ligand,
            inputs=[li_user, li_model, li_protein, li_type, li_kd, li_notes],
            outputs=li_df,
        )
        li_refresh.click(refresh_ligand, outputs=li_df)

    with gr.Tab("3) Final Submission"):
        fs_team = gr.Textbox(label="Team name")
        fs_archive = gr.Textbox(label="Archive URL (e.g., model artifacts)")
        fs_summary = gr.Textbox(label="Results summary", lines=4)
        fs_email = gr.Textbox(label="Contact email")
        with gr.Row():
            fs_submit = gr.Button("Submit")
            fs_refresh = gr.Button("Refresh table")
        fs_df = gr.Dataframe(
            value=load_df(CONFIGS["final"], COLUMNS["final"]),
            label="Final submissions",
            interactive=False,
            wrap=True,
        )
        fs_submit.click(
            submit_final,
            inputs=[fs_team, fs_archive, fs_summary, fs_email],
            outputs=fs_df,
        )
        fs_refresh.click(refresh_final, outputs=fs_df)

# For local dev: `python app.py`
if __name__ == "__main__":
    app.launch()