Spaces:
Running
Running
feat(hf-space): enforce submission cooldown and switch leaderboard reads to boto3
Browse files- add 8-hour per-track submission cooldown checks in submit flow
- add helper to fetch last submission timestamp from S3 metadata objects
- replace pandas+s3fs leaderboard reads with boto3 get_object + in-memory CSV parsing (Avoids 403 Forbidden error)
- change leaderboard config paths to S3 keys (bucket configured separately)
- remove s3fs dependency from hf_space requirements
- grant hf-space IAM user s3:ListBucket on submissions prefixes for cooldown lookup
- app.py +116 -18
- config.py +3 -3
- requirements.txt +1 -2
- submission_store.py +52 -1
app.py
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
|
|
| 1 |
import tempfile
|
| 2 |
import zipfile
|
|
|
|
| 3 |
from pathlib import Path
|
| 4 |
|
|
|
|
| 5 |
import gradio as gr
|
| 6 |
import numpy as np
|
| 7 |
import pandas as pd
|
|
@@ -9,14 +12,18 @@ from config import (
|
|
| 9 |
ACTIVITY_DATASET_SIZE,
|
| 10 |
ACTIVITY_LEADERBOARD_S3,
|
| 11 |
REQUIRED_ACTIVITY_COLUMNS,
|
|
|
|
| 12 |
STRUCTURE_DATASET_SIZE,
|
| 13 |
STRUCTURE_LEADERBOARD_S3,
|
|
|
|
| 14 |
)
|
| 15 |
from gradio.themes.utils import sizes
|
| 16 |
from gradio_leaderboard import Leaderboard
|
| 17 |
from loguru import logger
|
| 18 |
-
from models import Submission
|
| 19 |
-
from submission_store import upload_submission
|
|
|
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
def make_user_clickable(name: str) -> str:
|
|
@@ -48,6 +55,7 @@ def _collapse_mean_std(df: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
|
|
| 48 |
|
| 49 |
Returns:
|
| 50 |
DataFrame with combined columns replacing the original pairs.
|
|
|
|
| 51 |
"""
|
| 52 |
df = df.copy()
|
| 53 |
for metric in metrics:
|
|
@@ -55,19 +63,35 @@ def _collapse_mean_std(df: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
|
|
| 55 |
std_col = f"{metric}_std"
|
| 56 |
if mean_col in df.columns and std_col in df.columns:
|
| 57 |
df[metric] = (
|
| 58 |
-
df[mean_col].map(_fmt_metric)
|
| 59 |
-
+ "±"
|
| 60 |
-
+ df[std_col].map(_fmt_metric)
|
| 61 |
)
|
| 62 |
df = df.drop(columns=[mean_col, std_col])
|
| 63 |
return df
|
| 64 |
|
| 65 |
|
| 66 |
_ACTIVITY_EMPTY = pd.DataFrame(
|
| 67 |
-
columns=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
)
|
| 69 |
_STRUCTURE_EMPTY = pd.DataFrame(
|
| 70 |
-
columns=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
)
|
| 72 |
|
| 73 |
|
|
@@ -75,12 +99,16 @@ def _prepare_activity_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 75 |
"""Sort, collapse, and rename activity leaderboard columns (no HTML)."""
|
| 76 |
df = df.sort_values("RAE_mean", ascending=True).reset_index(drop=True)
|
| 77 |
df = _collapse_mean_std(df, ["MAE", "RAE", "R2", "Spearman_R", "Kendall's_Tau"])
|
| 78 |
-
df = df.rename(
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
return df
|
| 85 |
|
| 86 |
|
|
@@ -89,7 +117,9 @@ def _prepare_structure_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 89 |
df = df.sort_values("LDDT-PLI_mean", ascending=False).reset_index(drop=True)
|
| 90 |
df = _collapse_mean_std(df, ["LDDT-PLI", "BiSyRMSD", "Ligand_RMSD", "LDDT-LP"])
|
| 91 |
df = df.rename(columns={"Ligand_RMSD": "Ligand RMSD", "submitted_at": "Submitted"})
|
| 92 |
-
df["Submitted"] = pd.to_datetime(df["Submitted"], utc=True).dt.strftime(
|
|
|
|
|
|
|
| 93 |
df["model_report_link"] = df["model_report_link"].fillna("")
|
| 94 |
return df
|
| 95 |
|
|
@@ -98,7 +128,11 @@ def load_activity_leaderboard() -> pd.DataFrame:
|
|
| 98 |
"""Load the activity leaderboard from S3."""
|
| 99 |
logger.info("Refreshing activity leaderboard...")
|
| 100 |
try:
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
except Exception as exc:
|
| 103 |
logger.warning("Could not load activity leaderboard: {}", exc)
|
| 104 |
return _ACTIVITY_EMPTY
|
|
@@ -112,7 +146,11 @@ def load_structure_leaderboard() -> pd.DataFrame:
|
|
| 112 |
"""Load the structure leaderboard from S3."""
|
| 113 |
logger.info("Refreshing structure leaderboard...")
|
| 114 |
try:
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
except Exception as exc:
|
| 117 |
logger.warning("Could not load structure leaderboard: {}", exc)
|
| 118 |
return _STRUCTURE_EMPTY
|
|
@@ -126,7 +164,11 @@ def load_structure_leaderboard() -> pd.DataFrame:
|
|
| 126 |
def download_activity_leaderboard() -> str:
|
| 127 |
"""Write the activity leaderboard to a temp CSV and return the file path."""
|
| 128 |
try:
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
except Exception as exc:
|
| 131 |
logger.warning("Could not load activity leaderboard for download: {}", exc)
|
| 132 |
df = _ACTIVITY_EMPTY
|
|
@@ -141,7 +183,11 @@ def download_activity_leaderboard() -> str:
|
|
| 141 |
def download_structure_leaderboard() -> str:
|
| 142 |
"""Write the structure leaderboard to a temp CSV and return the file path."""
|
| 143 |
try:
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
except Exception as exc:
|
| 146 |
logger.warning("Could not load structure leaderboard for download: {}", exc)
|
| 147 |
df = _STRUCTURE_EMPTY
|
|
@@ -153,6 +199,24 @@ def download_structure_leaderboard() -> str:
|
|
| 153 |
return f.name
|
| 154 |
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
def submit_predictions(
|
| 157 |
username,
|
| 158 |
user_alias,
|
|
@@ -239,6 +303,23 @@ def submit_predictions(
|
|
| 239 |
return gr.update(
|
| 240 |
value="Error: pEC50 column contains infinite values.", visible=True
|
| 241 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
|
| 243 |
elif track_select == "Structure Prediction":
|
| 244 |
if file_path.suffix.lower() != ".zip":
|
|
@@ -257,6 +338,23 @@ def submit_predictions(
|
|
| 257 |
value=f"Error: Expected {STRUCTURE_DATASET_SIZE} files in zip, got {n_files}.",
|
| 258 |
visible=True,
|
| 259 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
|
| 261 |
# --- build submission model and persist to S3 ---
|
| 262 |
submission = Submission(
|
|
|
|
| 1 |
+
import io
|
| 2 |
import tempfile
|
| 3 |
import zipfile
|
| 4 |
+
from datetime import datetime, timezone
|
| 5 |
from pathlib import Path
|
| 6 |
|
| 7 |
+
import boto3
|
| 8 |
import gradio as gr
|
| 9 |
import numpy as np
|
| 10 |
import pandas as pd
|
|
|
|
| 12 |
ACTIVITY_DATASET_SIZE,
|
| 13 |
ACTIVITY_LEADERBOARD_S3,
|
| 14 |
REQUIRED_ACTIVITY_COLUMNS,
|
| 15 |
+
S3_BUCKET,
|
| 16 |
STRUCTURE_DATASET_SIZE,
|
| 17 |
STRUCTURE_LEADERBOARD_S3,
|
| 18 |
+
TIME_BETWEEN_SUBMISSIONS,
|
| 19 |
)
|
| 20 |
from gradio.themes.utils import sizes
|
| 21 |
from gradio_leaderboard import Leaderboard
|
| 22 |
from loguru import logger
|
| 23 |
+
from models import Submission, _safeify_username
|
| 24 |
+
from submission_store import _fetch_last_submission_date, upload_submission
|
| 25 |
+
|
| 26 |
+
s3_client = boto3.client("s3", region_name="us-east-1")
|
| 27 |
|
| 28 |
|
| 29 |
def make_user_clickable(name: str) -> str:
|
|
|
|
| 55 |
|
| 56 |
Returns:
|
| 57 |
DataFrame with combined columns replacing the original pairs.
|
| 58 |
+
|
| 59 |
"""
|
| 60 |
df = df.copy()
|
| 61 |
for metric in metrics:
|
|
|
|
| 63 |
std_col = f"{metric}_std"
|
| 64 |
if mean_col in df.columns and std_col in df.columns:
|
| 65 |
df[metric] = (
|
| 66 |
+
df[mean_col].map(_fmt_metric) + "±" + df[std_col].map(_fmt_metric)
|
|
|
|
|
|
|
| 67 |
)
|
| 68 |
df = df.drop(columns=[mean_col, std_col])
|
| 69 |
return df
|
| 70 |
|
| 71 |
|
| 72 |
_ACTIVITY_EMPTY = pd.DataFrame(
|
| 73 |
+
columns=[
|
| 74 |
+
"rank",
|
| 75 |
+
"username",
|
| 76 |
+
"Submitted",
|
| 77 |
+
"MAE",
|
| 78 |
+
"RAE",
|
| 79 |
+
"R2",
|
| 80 |
+
"Spearman ρ",
|
| 81 |
+
"Kendall's τ",
|
| 82 |
+
]
|
| 83 |
)
|
| 84 |
_STRUCTURE_EMPTY = pd.DataFrame(
|
| 85 |
+
columns=[
|
| 86 |
+
"rank",
|
| 87 |
+
"username",
|
| 88 |
+
"Submitted",
|
| 89 |
+
"model_report_link",
|
| 90 |
+
"LDDT-PLI",
|
| 91 |
+
"BiSyRMSD",
|
| 92 |
+
"Ligand RMSD",
|
| 93 |
+
"LDDT-LP",
|
| 94 |
+
]
|
| 95 |
)
|
| 96 |
|
| 97 |
|
|
|
|
| 99 |
"""Sort, collapse, and rename activity leaderboard columns (no HTML)."""
|
| 100 |
df = df.sort_values("RAE_mean", ascending=True).reset_index(drop=True)
|
| 101 |
df = _collapse_mean_std(df, ["MAE", "RAE", "R2", "Spearman_R", "Kendall's_Tau"])
|
| 102 |
+
df = df.rename(
|
| 103 |
+
columns={
|
| 104 |
+
"Spearman_R": "Spearman ρ",
|
| 105 |
+
"Kendall's_Tau": "Kendall's τ",
|
| 106 |
+
"submitted_at": "Submitted",
|
| 107 |
+
}
|
| 108 |
+
)
|
| 109 |
+
df["Submitted"] = pd.to_datetime(df["Submitted"], utc=True).dt.strftime(
|
| 110 |
+
"%Y-%m-%d %H:%M UTC"
|
| 111 |
+
)
|
| 112 |
return df
|
| 113 |
|
| 114 |
|
|
|
|
| 117 |
df = df.sort_values("LDDT-PLI_mean", ascending=False).reset_index(drop=True)
|
| 118 |
df = _collapse_mean_std(df, ["LDDT-PLI", "BiSyRMSD", "Ligand_RMSD", "LDDT-LP"])
|
| 119 |
df = df.rename(columns={"Ligand_RMSD": "Ligand RMSD", "submitted_at": "Submitted"})
|
| 120 |
+
df["Submitted"] = pd.to_datetime(df["Submitted"], utc=True).dt.strftime(
|
| 121 |
+
"%Y-%m-%d %H:%M UTC"
|
| 122 |
+
)
|
| 123 |
df["model_report_link"] = df["model_report_link"].fillna("")
|
| 124 |
return df
|
| 125 |
|
|
|
|
| 128 |
"""Load the activity leaderboard from S3."""
|
| 129 |
logger.info("Refreshing activity leaderboard...")
|
| 130 |
try:
|
| 131 |
+
obj = s3_client.get_object(
|
| 132 |
+
Bucket=S3_BUCKET,
|
| 133 |
+
Key=ACTIVITY_LEADERBOARD_S3,
|
| 134 |
+
)
|
| 135 |
+
df = pd.read_csv(io.BytesIO(obj["Body"].read()))
|
| 136 |
except Exception as exc:
|
| 137 |
logger.warning("Could not load activity leaderboard: {}", exc)
|
| 138 |
return _ACTIVITY_EMPTY
|
|
|
|
| 146 |
"""Load the structure leaderboard from S3."""
|
| 147 |
logger.info("Refreshing structure leaderboard...")
|
| 148 |
try:
|
| 149 |
+
obj = s3_client.get_object(
|
| 150 |
+
Bucket=S3_BUCKET,
|
| 151 |
+
Key=STRUCTURE_LEADERBOARD_S3,
|
| 152 |
+
)
|
| 153 |
+
df = pd.read_csv(io.BytesIO(obj["Body"].read()))
|
| 154 |
except Exception as exc:
|
| 155 |
logger.warning("Could not load structure leaderboard: {}", exc)
|
| 156 |
return _STRUCTURE_EMPTY
|
|
|
|
| 164 |
def download_activity_leaderboard() -> str:
|
| 165 |
"""Write the activity leaderboard to a temp CSV and return the file path."""
|
| 166 |
try:
|
| 167 |
+
obj = s3_client.get_object(
|
| 168 |
+
Bucket=S3_BUCKET,
|
| 169 |
+
Key=ACTIVITY_LEADERBOARD_S3,
|
| 170 |
+
)
|
| 171 |
+
df = pd.read_csv(io.BytesIO(obj["Body"].read()))
|
| 172 |
except Exception as exc:
|
| 173 |
logger.warning("Could not load activity leaderboard for download: {}", exc)
|
| 174 |
df = _ACTIVITY_EMPTY
|
|
|
|
| 183 |
def download_structure_leaderboard() -> str:
|
| 184 |
"""Write the structure leaderboard to a temp CSV and return the file path."""
|
| 185 |
try:
|
| 186 |
+
obj = s3_client.get_object(
|
| 187 |
+
Bucket=S3_BUCKET,
|
| 188 |
+
Key=STRUCTURE_LEADERBOARD_S3,
|
| 189 |
+
)
|
| 190 |
+
df = pd.read_csv(io.BytesIO(obj["Body"].read()))
|
| 191 |
except Exception as exc:
|
| 192 |
logger.warning("Could not load structure leaderboard for download: {}", exc)
|
| 193 |
df = _STRUCTURE_EMPTY
|
|
|
|
| 199 |
return f.name
|
| 200 |
|
| 201 |
|
| 202 |
+
def _format_submission_time_message(last_submission: datetime, track: str) -> str:
|
| 203 |
+
"""Format a message indicating when the user can next submit next."""
|
| 204 |
+
track_name = "an activity" if track == "activity" else "a structure"
|
| 205 |
+
next_submission_time = last_submission + pd.Timedelta(
|
| 206 |
+
seconds=TIME_BETWEEN_SUBMISSIONS
|
| 207 |
+
)
|
| 208 |
+
time_remaining = next_submission_time - datetime.now(timezone.utc)
|
| 209 |
+
seconds_left = max(0, int(time_remaining.total_seconds()))
|
| 210 |
+
hours, rem = divmod(seconds_left, 3600)
|
| 211 |
+
minutes, seconds = divmod(rem, 60)
|
| 212 |
+
wait_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
| 213 |
+
return (
|
| 214 |
+
f"Error: You submitted {track_name} prediction on "
|
| 215 |
+
f"{last_submission.strftime('%Y-%m-%d %H:%M:%S (UTC)')}.\n"
|
| 216 |
+
f"Please wait {wait_str} before submitting again."
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
|
| 220 |
def submit_predictions(
|
| 221 |
username,
|
| 222 |
user_alias,
|
|
|
|
| 303 |
return gr.update(
|
| 304 |
value="Error: pEC50 column contains infinite values.", visible=True
|
| 305 |
)
|
| 306 |
+
last_submission = _fetch_last_submission_date(
|
| 307 |
+
"activity", _safeify_username(username.strip())
|
| 308 |
+
)
|
| 309 |
+
logger.info(
|
| 310 |
+
f"Last submission date for user {username.strip()!r}: {last_submission}"
|
| 311 |
+
)
|
| 312 |
+
if (
|
| 313 |
+
last_submission
|
| 314 |
+
and (datetime.now(timezone.utc) - last_submission).total_seconds()
|
| 315 |
+
< TIME_BETWEEN_SUBMISSIONS
|
| 316 |
+
):
|
| 317 |
+
return gr.update(
|
| 318 |
+
value=_format_submission_time_message(
|
| 319 |
+
last_submission, track="activity"
|
| 320 |
+
),
|
| 321 |
+
visible=True,
|
| 322 |
+
)
|
| 323 |
|
| 324 |
elif track_select == "Structure Prediction":
|
| 325 |
if file_path.suffix.lower() != ".zip":
|
|
|
|
| 338 |
value=f"Error: Expected {STRUCTURE_DATASET_SIZE} files in zip, got {n_files}.",
|
| 339 |
visible=True,
|
| 340 |
)
|
| 341 |
+
last_submission = _fetch_last_submission_date(
|
| 342 |
+
"structure", _safeify_username(username.strip())
|
| 343 |
+
)
|
| 344 |
+
logger.info(
|
| 345 |
+
f"Last submission date for user {username.strip()!r}: {last_submission}"
|
| 346 |
+
)
|
| 347 |
+
if (
|
| 348 |
+
last_submission
|
| 349 |
+
and (datetime.now(timezone.utc) - last_submission).total_seconds()
|
| 350 |
+
< TIME_BETWEEN_SUBMISSIONS
|
| 351 |
+
):
|
| 352 |
+
return gr.update(
|
| 353 |
+
value=_format_submission_time_message(
|
| 354 |
+
last_submission, track="structure"
|
| 355 |
+
),
|
| 356 |
+
visible=True,
|
| 357 |
+
)
|
| 358 |
|
| 359 |
# --- build submission model and persist to S3 ---
|
| 360 |
submission = Submission(
|
config.py
CHANGED
|
@@ -5,8 +5,8 @@ import os
|
|
| 5 |
ACTIVITY_DATASET_SIZE = 531
|
| 6 |
STRUCTURE_DATASET_SIZE = 125
|
| 7 |
REQUIRED_ACTIVITY_COLUMNS = {"SMILES", "Molecule Name", "pEC50"}
|
|
|
|
| 8 |
|
| 9 |
S3_BUCKET: str = os.environ.get("S3_BUCKET", "")
|
| 10 |
-
ACTIVITY_LEADERBOARD_S3 =
|
| 11 |
-
|
| 12 |
-
STRUCTURE_LEADERBOARD_S3 = f"s3://{S3_BUCKET}/leaderboard/interim/structure/leaderboard_latest.csv"
|
|
|
|
| 5 |
ACTIVITY_DATASET_SIZE = 531
|
| 6 |
STRUCTURE_DATASET_SIZE = 125
|
| 7 |
REQUIRED_ACTIVITY_COLUMNS = {"SMILES", "Molecule Name", "pEC50"}
|
| 8 |
+
TIME_BETWEEN_SUBMISSIONS = 28800 # 8 hours in seconds
|
| 9 |
|
| 10 |
S3_BUCKET: str = os.environ.get("S3_BUCKET", "")
|
| 11 |
+
ACTIVITY_LEADERBOARD_S3 = "leaderboard/interim/activity/leaderboard_latest.csv"
|
| 12 |
+
STRUCTURE_LEADERBOARD_S3 = "leaderboard/interim/structure/leaderboard_latest.csv"
|
|
|
requirements.txt
CHANGED
|
@@ -8,5 +8,4 @@ scikit-learn
|
|
| 8 |
loguru
|
| 9 |
statsmodels
|
| 10 |
tqdm
|
| 11 |
-
boto3
|
| 12 |
-
s3fs
|
|
|
|
| 8 |
loguru
|
| 9 |
statsmodels
|
| 10 |
tqdm
|
| 11 |
+
boto3
|
|
|
submission_store.py
CHANGED
|
@@ -19,8 +19,8 @@ AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY / AWS_DEFAULT_REGION
|
|
| 19 |
Standard boto3 credentials — set via HuggingFace Space secrets.
|
| 20 |
"""
|
| 21 |
|
| 22 |
-
import json
|
| 23 |
import os
|
|
|
|
| 24 |
from pathlib import Path
|
| 25 |
|
| 26 |
import boto3
|
|
@@ -89,3 +89,54 @@ def upload_submission(submission: Submission, file_path: Path) -> Submission:
|
|
| 89 |
)
|
| 90 |
|
| 91 |
return submission
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
Standard boto3 credentials — set via HuggingFace Space secrets.
|
| 20 |
"""
|
| 21 |
|
|
|
|
| 22 |
import os
|
| 23 |
+
from datetime import datetime, timezone
|
| 24 |
from pathlib import Path
|
| 25 |
|
| 26 |
import boto3
|
|
|
|
| 89 |
)
|
| 90 |
|
| 91 |
return submission
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def _fetch_last_submission_date(track: str, user_id: str) -> datetime | None:
|
| 95 |
+
"""Fetch the submission date of the most recent submission for a track and user.
|
| 96 |
+
|
| 97 |
+
Args:
|
| 98 |
+
track (str): The track name (e.g., "activity" or "structure").
|
| 99 |
+
user_id (str): The user ID to check for previous submissions.
|
| 100 |
+
|
| 101 |
+
Returns:
|
| 102 |
+
datetime | None: The submission date of the most recent submission, or None if
|
| 103 |
+
no previous submissions are found.
|
| 104 |
+
|
| 105 |
+
"""
|
| 106 |
+
bucket = os.environ.get("S3_BUCKET")
|
| 107 |
+
if not bucket:
|
| 108 |
+
logger.warning(
|
| 109 |
+
"S3_BUCKET not set — cannot fetch last submission date. "
|
| 110 |
+
"Set S3_BUCKET and AWS credentials as Space secrets to enable this feature."
|
| 111 |
+
)
|
| 112 |
+
return None
|
| 113 |
+
|
| 114 |
+
s3 = boto3.client("s3")
|
| 115 |
+
prefix = f"submissions/{track}/{user_id}/"
|
| 116 |
+
try:
|
| 117 |
+
response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
|
| 118 |
+
if response["IsTruncated"]: # Unlikely to be > 1000 submissions per user
|
| 119 |
+
logger.warning(
|
| 120 |
+
f"ListObjectsV2 response truncated for prefix {prefix!r}. "
|
| 121 |
+
"Only the first 1000 objects will be considered."
|
| 122 |
+
)
|
| 123 |
+
if "Contents" not in response:
|
| 124 |
+
return None # No submissions found
|
| 125 |
+
|
| 126 |
+
logger.info(
|
| 127 |
+
f"Found {len(response['Contents'])} objects under prefix {prefix!r}."
|
| 128 |
+
)
|
| 129 |
+
submission_dates = []
|
| 130 |
+
for obj in response["Contents"]:
|
| 131 |
+
if obj["Key"].endswith("metadata.json"):
|
| 132 |
+
submission_dates.append(obj["LastModified"])
|
| 133 |
+
|
| 134 |
+
if not submission_dates: # Shouldn't be possible
|
| 135 |
+
logger.warning(f"No metadata.json files found under prefix {prefix!r}.")
|
| 136 |
+
return None
|
| 137 |
+
|
| 138 |
+
return max(submission_dates).astimezone(timezone.utc)
|
| 139 |
+
|
| 140 |
+
except Exception as exc:
|
| 141 |
+
logger.error(f"Failed to fetch last submission date for {user_id!r}: {exc}")
|
| 142 |
+
return None
|