Spaces:
Runtime error
Runtime error
Commit
Β·
b3d34ad
1
Parent(s):
9d4d10e
slight refactor, doc
Browse files- src/submission/submit.py +10 -8
src/submission/submit.py
CHANGED
|
@@ -35,10 +35,6 @@ def add_new_solutions(
|
|
| 35 |
is_warmup_dataset: bool,
|
| 36 |
ensure_all_present: bool = False,
|
| 37 |
):
|
| 38 |
-
# Rate limits:
|
| 39 |
-
# 1. Users must wait MIN_WAIT_TIME_PER_USER_HRS hours between submissions.
|
| 40 |
-
# 2. No more than MAX_SUBMISSIONS_PER_WINDOW submissions RATE_LIMIT_WINDOW_HRS hours overall.
|
| 41 |
-
|
| 42 |
try:
|
| 43 |
submitted_ids = get_dataset_config_names(SUBMISSIONS_REPO, token=TOKEN)
|
| 44 |
except (DatasetNotFoundError, FileNotFoundError):
|
|
@@ -46,14 +42,19 @@ def add_new_solutions(
|
|
| 46 |
|
| 47 |
logger.info(f"Found {len(submitted_ids)} submissions")
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
sub_df = pd.DataFrame.from_dict(
|
| 50 |
{
|
| 51 |
"submission_id": submitted_ids,
|
| 52 |
-
"user_id": map(
|
| 53 |
-
"timestamp": map(
|
| 54 |
}
|
| 55 |
)
|
| 56 |
|
|
|
|
| 57 |
now = datetime.now(timezone.utc)
|
| 58 |
cutoff_user = now - timedelta(hours=MIN_WAIT_TIME_PER_USER_HRS)
|
| 59 |
user_last_submission_ts = sub_df[sub_df.user_id == user_id].timestamp.max()
|
|
@@ -66,6 +67,7 @@ def add_new_solutions(
|
|
| 66 |
f"Remaining wait time: {remaining_hrs:.2f} hours"
|
| 67 |
)
|
| 68 |
|
|
|
|
| 69 |
cutoff_overall = now - timedelta(hours=RATE_LIMIT_WINDOW_HRS)
|
| 70 |
if len(sub_df.timestamp > cutoff_overall) >= MAX_SUBMISSIONS_PER_WINDOW:
|
| 71 |
logger.info(
|
|
@@ -174,14 +176,14 @@ def _validate_all_submissions_present(
|
|
| 174 |
return ValueError("Duplicate problem IDs exist in uploaded file")
|
| 175 |
|
| 176 |
|
| 177 |
-
def
|
| 178 |
"""
|
| 179 |
Extracts the user ID from the submission ID: "YYYYMMDD_HHMMSS_username_userid"
|
| 180 |
"""
|
| 181 |
return submission_id.rsplit("_", 1)[-1]
|
| 182 |
|
| 183 |
|
| 184 |
-
def
|
| 185 |
"""
|
| 186 |
Extracts the timestamp from the submission ID: "YYYYMMDD_HHMMSS_username_userid"
|
| 187 |
"""
|
|
|
|
| 35 |
is_warmup_dataset: bool,
|
| 36 |
ensure_all_present: bool = False,
|
| 37 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
try:
|
| 39 |
submitted_ids = get_dataset_config_names(SUBMISSIONS_REPO, token=TOKEN)
|
| 40 |
except (DatasetNotFoundError, FileNotFoundError):
|
|
|
|
| 42 |
|
| 43 |
logger.info(f"Found {len(submitted_ids)} submissions")
|
| 44 |
|
| 45 |
+
# Rate limits:
|
| 46 |
+
# 1. Users must wait MIN_WAIT_TIME_PER_USER_HRS hours between submissions.
|
| 47 |
+
# 2. No more than MAX_SUBMISSIONS_PER_WINDOW submissions RATE_LIMIT_WINDOW_HRS hours overall.
|
| 48 |
+
|
| 49 |
sub_df = pd.DataFrame.from_dict(
|
| 50 |
{
|
| 51 |
"submission_id": submitted_ids,
|
| 52 |
+
"user_id": map(_submission_id_to_user_id, submitted_ids),
|
| 53 |
+
"timestamp": map(_submission_id_to_timestamp, submitted_ids),
|
| 54 |
}
|
| 55 |
)
|
| 56 |
|
| 57 |
+
# Per user limit
|
| 58 |
now = datetime.now(timezone.utc)
|
| 59 |
cutoff_user = now - timedelta(hours=MIN_WAIT_TIME_PER_USER_HRS)
|
| 60 |
user_last_submission_ts = sub_df[sub_df.user_id == user_id].timestamp.max()
|
|
|
|
| 67 |
f"Remaining wait time: {remaining_hrs:.2f} hours"
|
| 68 |
)
|
| 69 |
|
| 70 |
+
# Overall limit
|
| 71 |
cutoff_overall = now - timedelta(hours=RATE_LIMIT_WINDOW_HRS)
|
| 72 |
if len(sub_df.timestamp > cutoff_overall) >= MAX_SUBMISSIONS_PER_WINDOW:
|
| 73 |
logger.info(
|
|
|
|
| 176 |
return ValueError("Duplicate problem IDs exist in uploaded file")
|
| 177 |
|
| 178 |
|
| 179 |
+
def _submission_id_to_user_id(submission_id: str) -> str:
|
| 180 |
"""
|
| 181 |
Extracts the user ID from the submission ID: "YYYYMMDD_HHMMSS_username_userid"
|
| 182 |
"""
|
| 183 |
return submission_id.rsplit("_", 1)[-1]
|
| 184 |
|
| 185 |
|
| 186 |
+
def _submission_id_to_timestamp(submission_id: str) -> datetime:
|
| 187 |
"""
|
| 188 |
Extracts the timestamp from the submission ID: "YYYYMMDD_HHMMSS_username_userid"
|
| 189 |
"""
|