pxr-challenge / models.py
hmacdope's picture
feat: Add proprietary data disclosure checkbox to submission form
efb1001
"""Pydantic models for PXR Challenge submission data."""
import uuid
from datetime import datetime
from typing import Literal
from pydantic import BaseModel, ConfigDict, Field
def _safeify_username(username: str) -> str:
"""Sanitise a HuggingFace username for use in S3 keys and file paths.
HF usernames for organisations use the format ``org/user``, which would
create unintended S3 path nesting. Spaces are also replaced for safety.
"""
return str(username.strip()).replace("/", "_").replace(" ", "_")
class Submission(BaseModel):
model_config = ConfigDict(protected_namespaces=())
"""A single competition submission.
Serialisable to JSON for storage in S3 alongside the uploaded prediction
file. All fields are stored; only non-private fields are ever surfaced on
the leaderboard.
Attributes:
submission_id: Auto-generated UUID, used as the S3 key component.
submitted_at: UTC timestamp of submission.
username: HuggingFace username (required, used for deduplication).
safe_username: Sanitised username for use in S3 keys and file paths.
user_alias: Optional display alias for anonymous submissions.
anonymous: If True, display user_alias on leaderboard instead of username.
participant_name: Real name β€” stored privately, never displayed.
discord_username: Discord handle β€” stored privately.
email: Contact email β€” stored privately.
affiliation: Institutional affiliation β€” stored privately.
model_report_link: URL to method report (required before deadline).
include_in_publication: Opt-in for Challenge publication authorship.
used_proprietary_data: Whether proprietary data was used in training.
track: Competition track.
filename: Original uploaded filename.
s3_key: Full S3 object key for the uploaded prediction file.
Populated by submission_store.upload_submission() after upload.
"""
# --- generated ---
submission_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
submitted_at: datetime = Field(default_factory=datetime.utcnow)
# --- public identity ---
username: str
safe_username: str = ""
user_alias: str = ""
anonymous: bool = False
# --- private contact ---
participant_name: str = ""
discord_username: str = ""
email: str = ""
affiliation: str = ""
model_report_link: str = ""
include_in_publication: bool = False
used_proprietary_data: bool = False
# --- submission ---
track: Literal["Activity Prediction", "Structure Prediction"]
filename: str
s3_key: str = ""
@property
def display_name(self) -> str:
"""Name to show on the leaderboard."""
if self.anonymous and self.user_alias:
return self.user_alias
return self.username
@property
def s3_prefix(self) -> str:
"""S3 prefix for all objects belonging to this submission.
Layout::
submissions/
activity/{username}/{submission_id}/
metadata.json
predictions.parquet # or .csv
structure/{username}/{submission_id}/
metadata.json
structures.zip
"""
track_slug = "activity" if self.track == "Activity Prediction" else "structure"
safe_username = _safeify_username(self.username)
self.safe_username = safe_username
return f"submissions/{track_slug}/{safe_username}/{self.submission_id}"