hmacdope Claude Sonnet 4.6 commited on
Commit
efb1001
·
1 Parent(s): 018c186

feat: Add proprietary data disclosure checkbox to submission form

Browse files

Adds a checkbox for participants to declare whether proprietary (non-public)
data was used in training. The flag is stored in the submission metadata,
propagated through the leaderboard pipeline, and displayed as a
"Proprietary Data" (Yes/No) column on both leaderboards.

Backend handles missing field gracefully via COALESCE in DuckDB queries
and explicit column defaulting in validation metadata builders, ensuring
backwards compatibility with existing submissions.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. app.py +27 -0
  2. models.py +2 -0
app.py CHANGED
@@ -83,6 +83,7 @@ _ACTIVITY_EMPTY = pd.DataFrame(
83
  "username",
84
  "Submitted",
85
  "model_report_link",
 
86
  "MAE",
87
  "RAE",
88
  "R2",
@@ -96,6 +97,7 @@ _STRUCTURE_EMPTY = pd.DataFrame(
96
  "username",
97
  "Submitted",
98
  "model_report_link",
 
99
  "LDDT-PLI",
100
  "BiSyRMSD",
101
  "LDDT-LP",
@@ -118,6 +120,12 @@ def _prepare_activity_df(df: pd.DataFrame) -> pd.DataFrame:
118
  df["Submitted"] = pd.to_datetime(df["Submitted"], utc=True).dt.strftime(
119
  "%Y-%m-%d %H:%M UTC"
120
  )
 
 
 
 
 
 
121
  return df
122
 
123
 
@@ -134,6 +142,12 @@ def _prepare_structure_df(df: pd.DataFrame) -> pd.DataFrame:
134
  "%Y-%m-%d %H:%M UTC"
135
  )
136
  df["model_report_link"] = df["model_report_link"].fillna("")
 
 
 
 
 
 
137
  return df
138
 
139
 
@@ -247,6 +261,7 @@ def submit_predictions(
247
  affiliation,
248
  model_tag,
249
  paper_checkbox,
 
250
  track_select,
251
  file_input,
252
  ):
@@ -264,6 +279,7 @@ def submit_predictions(
264
  affiliation: Institutional affiliation (optional).
265
  model_tag: Link to method report (optional).
266
  paper_checkbox: Opt-in for future publication inclusion.
 
267
  track_select: "Activity Prediction" or "Structure Prediction".
268
  file_input: Path to the uploaded submission file.
269
 
@@ -387,6 +403,7 @@ def submit_predictions(
387
  affiliation=affiliation.strip(),
388
  model_report_link=model_tag.strip(),
389
  include_in_publication=paper_checkbox,
 
390
  track=track_select,
391
  filename=file_path.name,
392
  )
@@ -588,6 +605,7 @@ We thank the experimentalists at **Octant** and **UCSF** (Fraser Lab) for all th
588
  "username",
589
  "Submitted",
590
  "model_report_link",
 
591
  "MAE",
592
  "RAE",
593
  "R2",
@@ -605,6 +623,7 @@ We thank the experimentalists at **Octant** and **UCSF** (Fraser Lab) for all th
605
  "str",
606
  "str",
607
  "str",
 
608
  ],
609
  )
610
  with gr.TabItem("Structure Prediction (Pose)"):
@@ -620,6 +639,7 @@ We thank the experimentalists at **Octant** and **UCSF** (Fraser Lab) for all th
620
  "username",
621
  "Submitted",
622
  "model_report_link",
 
623
  "LDDT-PLI",
624
  "BiSyRMSD",
625
  "LDDT-LP",
@@ -635,6 +655,7 @@ We thank the experimentalists at **Octant** and **UCSF** (Fraser Lab) for all th
635
  "str",
636
  "str",
637
  "str",
 
638
  ],
639
  )
640
  leaderboard_timer.tick(fn=load_activity_leaderboard, outputs=[activity_lb])
@@ -700,6 +721,11 @@ We thank the experimentalists at **Octant** and **UCSF** (Fraser Lab) for all th
700
  label="Include me in a future Challenge publication",
701
  value=False,
702
  )
 
 
 
 
 
703
 
704
  # --- Column 3: Track & file ---
705
  with gr.Column(scale=1):
@@ -739,6 +765,7 @@ We thank the experimentalists at **Octant** and **UCSF** (Fraser Lab) for all th
739
  affiliation,
740
  model_tag,
741
  paper_checkbox,
 
742
  track_select,
743
  file_input,
744
  ],
 
83
  "username",
84
  "Submitted",
85
  "model_report_link",
86
+ "Proprietary Data",
87
  "MAE",
88
  "RAE",
89
  "R2",
 
97
  "username",
98
  "Submitted",
99
  "model_report_link",
100
+ "Proprietary Data",
101
  "LDDT-PLI",
102
  "BiSyRMSD",
103
  "LDDT-LP",
 
120
  df["Submitted"] = pd.to_datetime(df["Submitted"], utc=True).dt.strftime(
121
  "%Y-%m-%d %H:%M UTC"
122
  )
123
+ df["Proprietary Data"] = (
124
+ df["used_proprietary_data"].fillna(False).map({True: "Yes", False: "No"})
125
+ if "used_proprietary_data" in df.columns
126
+ else "No"
127
+ )
128
+ df = df.drop(columns=["used_proprietary_data"], errors="ignore")
129
  return df
130
 
131
 
 
142
  "%Y-%m-%d %H:%M UTC"
143
  )
144
  df["model_report_link"] = df["model_report_link"].fillna("")
145
+ df["Proprietary Data"] = (
146
+ df["used_proprietary_data"].fillna(False).map({True: "Yes", False: "No"})
147
+ if "used_proprietary_data" in df.columns
148
+ else "No"
149
+ )
150
+ df = df.drop(columns=["used_proprietary_data"], errors="ignore")
151
  return df
152
 
153
 
 
261
  affiliation,
262
  model_tag,
263
  paper_checkbox,
264
+ proprietary_data_checkbox,
265
  track_select,
266
  file_input,
267
  ):
 
279
  affiliation: Institutional affiliation (optional).
280
  model_tag: Link to method report (optional).
281
  paper_checkbox: Opt-in for future publication inclusion.
282
+ proprietary_data_checkbox: Whether proprietary data was used in training.
283
  track_select: "Activity Prediction" or "Structure Prediction".
284
  file_input: Path to the uploaded submission file.
285
 
 
403
  affiliation=affiliation.strip(),
404
  model_report_link=model_tag.strip(),
405
  include_in_publication=paper_checkbox,
406
+ used_proprietary_data=proprietary_data_checkbox,
407
  track=track_select,
408
  filename=file_path.name,
409
  )
 
605
  "username",
606
  "Submitted",
607
  "model_report_link",
608
+ "Proprietary Data",
609
  "MAE",
610
  "RAE",
611
  "R2",
 
623
  "str",
624
  "str",
625
  "str",
626
+ "str",
627
  ],
628
  )
629
  with gr.TabItem("Structure Prediction (Pose)"):
 
639
  "username",
640
  "Submitted",
641
  "model_report_link",
642
+ "Proprietary Data",
643
  "LDDT-PLI",
644
  "BiSyRMSD",
645
  "LDDT-LP",
 
655
  "str",
656
  "str",
657
  "str",
658
+ "str",
659
  ],
660
  )
661
  leaderboard_timer.tick(fn=load_activity_leaderboard, outputs=[activity_lb])
 
721
  label="Include me in a future Challenge publication",
722
  value=False,
723
  )
724
+ proprietary_data_checkbox = gr.Checkbox(
725
+ label="I used proprietary data (not publicly available) in training my model",
726
+ value=False,
727
+ info="Displayed publicly on the leaderboard.",
728
+ )
729
 
730
  # --- Column 3: Track & file ---
731
  with gr.Column(scale=1):
 
765
  affiliation,
766
  model_tag,
767
  paper_checkbox,
768
+ proprietary_data_checkbox,
769
  track_select,
770
  file_input,
771
  ],
models.py CHANGED
@@ -37,6 +37,7 @@ class Submission(BaseModel):
37
  affiliation: Institutional affiliation — stored privately.
38
  model_report_link: URL to method report (required before deadline).
39
  include_in_publication: Opt-in for Challenge publication authorship.
 
40
  track: Competition track.
41
  filename: Original uploaded filename.
42
  s3_key: Full S3 object key for the uploaded prediction file.
@@ -60,6 +61,7 @@ class Submission(BaseModel):
60
  affiliation: str = ""
61
  model_report_link: str = ""
62
  include_in_publication: bool = False
 
63
 
64
  # --- submission ---
65
  track: Literal["Activity Prediction", "Structure Prediction"]
 
37
  affiliation: Institutional affiliation — stored privately.
38
  model_report_link: URL to method report (required before deadline).
39
  include_in_publication: Opt-in for Challenge publication authorship.
40
+ used_proprietary_data: Whether proprietary data was used in training.
41
  track: Competition track.
42
  filename: Original uploaded filename.
43
  s3_key: Full S3 object key for the uploaded prediction file.
 
61
  affiliation: str = ""
62
  model_report_link: str = ""
63
  include_in_publication: bool = False
64
+ used_proprietary_data: bool = False
65
 
66
  # --- submission ---
67
  track: Literal["Activity Prediction", "Structure Prediction"]