Spaces:
Sleeping
Sleeping
Nicolas Wagner commited on
Commit ·
dcb04e7
1
Parent(s): a2556f7
textual update
Browse files- src/about.py +24 -70
- src/populate.py +15 -2
- src/submission/submit_csv.py +3 -11
- src/submission/validate_csv.py +5 -5
src/about.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
TITLE = """<h1 id="space-title">Truth vs. Machine Hackathon Leaderboard</h1>"""
|
| 2 |
|
| 3 |
INTRODUCTION_TEXT = """
|
| 4 |
-
Welcome to the Truth vs. Machine Hackathon Leaderboard! This leaderboard tracks teams competing in an audio deepfake detection challenge. Teams submit predictions on audio samples to determine whether they are real or fake, and the leaderboard displays the best
|
| 5 |
"""
|
| 6 |
|
| 7 |
LLM_BENCHMARKS_TEXT = """
|
|
@@ -9,12 +9,12 @@ LLM_BENCHMARKS_TEXT = """
|
|
| 9 |
|
| 10 |
### 1. Register Your Team
|
| 11 |
- Go to the "Register Team" tab
|
| 12 |
-
- Enter your team name and number of teammates
|
| 13 |
-
- **Save your token
|
| 14 |
-
- You won't be able to see your token again after registration
|
| 15 |
|
| 16 |
-
### 2.
|
| 17 |
-
|
|
|
|
| 18 |
|
| 19 |
### 3. Prepare Your Predictions
|
| 20 |
Create a CSV file with two columns:
|
|
@@ -24,80 +24,34 @@ Create a CSV file with two columns:
|
|
| 24 |
Example CSV format:
|
| 25 |
```csv
|
| 26 |
id,label
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
```
|
| 32 |
|
|
|
|
|
|
|
| 33 |
### 4. Submit Your Predictions
|
| 34 |
- Go to the "Submit Predictions" tab
|
| 35 |
-
- Enter your team token
|
| 36 |
-
-
|
| 37 |
-
-
|
| 38 |
-
|
| 39 |
-
### 5. Evaluation Metrics
|
| 40 |
-
Your predictions are evaluated on:
|
| 41 |
-
- **Accuracy**: Percentage of correct predictions
|
| 42 |
-
- **F1 Score**: Harmonic mean of precision and recall
|
| 43 |
-
- **Precision**: True positives / (True positives + False positives)
|
| 44 |
-
- **Recall**: True positives / (True positives + False negatives)
|
| 45 |
-
|
| 46 |
-
### 6. Leaderboard Updates
|
| 47 |
-
- Only your **best** scores are displayed on the leaderboard
|
| 48 |
-
- A submission is accepted only if it improves your accuracy or F1 score
|
| 49 |
-
- The leaderboard is sorted by best F1 score (primary metric)
|
| 50 |
-
- If F1 score is tied, earlier submission date is used as a tiebreaker
|
| 51 |
-
- **Rate Limit**: You can submit once every 15 minutes
|
| 52 |
|
| 53 |
-
## 🏆 Prize Distribution
|
| 54 |
|
| 55 |
Prizes are awarded based on the **F1 Score** metric:
|
| 56 |
|
| 57 |
-
- **1st Prize**:
|
| 58 |
-
- **2nd Prize**:
|
| 59 |
-
- **
|
| 60 |
|
| 61 |
-
The final rankings will be
|
| 62 |
-
|
| 63 |
-
## Important Notes
|
| 64 |
-
- True labels are kept private and not accessible to participants
|
| 65 |
-
- You can submit once every **15 minutes** - plan your submissions carefully
|
| 66 |
-
- Only your best scores count on the leaderboard
|
| 67 |
-
- Make sure your CSV file format is correct before submitting
|
| 68 |
-
- **All IDs from the test set must be present in your submission**
|
| 69 |
-
"""
|
| 70 |
|
| 71 |
-
|
| 72 |
-
## Submission Guidelines
|
| 73 |
-
|
| 74 |
-
### CSV File Requirements
|
| 75 |
-
- Must contain exactly two columns: `id` and `label`
|
| 76 |
-
- `id` must be UUID strings matching the test set exactly
|
| 77 |
-
- `label` must be exactly `0.0` (real) or `1.0` (fake)
|
| 78 |
-
- No missing values allowed
|
| 79 |
-
- **All IDs from the test set must be included** in your submission
|
| 80 |
-
- No unknown IDs are allowed (only IDs from the test set)
|
| 81 |
-
|
| 82 |
-
### Label Format
|
| 83 |
-
Accepted formats for labels:
|
| 84 |
-
- **Only**: `0.0` (real) or `1.0` (fake)
|
| 85 |
-
- Any other format will be rejected
|
| 86 |
-
|
| 87 |
-
### Scoring
|
| 88 |
-
- Submissions are evaluated immediately upon upload
|
| 89 |
-
- Scores are computed using accuracy, F1 score, precision, and recall
|
| 90 |
-
- Only submissions that improve your best accuracy or F1 score are accepted
|
| 91 |
-
- Rejected submissions are logged but don't update the leaderboard
|
| 92 |
-
- **Rate Limit**: Teams can submit once every 15 minutes
|
| 93 |
-
|
| 94 |
-
## 🏆 Prize Distribution & Evaluation Criteria
|
| 95 |
-
|
| 96 |
-
Prizes are awarded based on the **F1 Score** metric:
|
| 97 |
|
| 98 |
-
- **
|
| 99 |
-
- **2nd Prize**: Team with the second highest F1 score
|
| 100 |
-
- **Tiebreaker**: In case of equal F1 scores, the team that submitted their winning score **earlier** will be ranked higher
|
| 101 |
|
| 102 |
-
|
| 103 |
"""
|
|
|
|
| 1 |
TITLE = """<h1 id="space-title">Truth vs. Machine Hackathon Leaderboard</h1>"""
|
| 2 |
|
| 3 |
INTRODUCTION_TEXT = """
|
| 4 |
+
Welcome to the Truth vs. Machine Hackathon Leaderboard! This leaderboard tracks teams competing in an audio deepfake detection challenge. Teams submit predictions on audio samples to determine whether they are real or fake, and the leaderboard displays the submission with the best F1 scores for each team.
|
| 5 |
"""
|
| 6 |
|
| 7 |
LLM_BENCHMARKS_TEXT = """
|
|
|
|
| 9 |
|
| 10 |
### 1. Register Your Team
|
| 11 |
- Go to the "Register Team" tab
|
| 12 |
+
- Enter your team name and the total number of teammates
|
| 13 |
+
- **Save your token** - you'll need it to submit predictions and you won't be able to see your token again after registration
|
|
|
|
| 14 |
|
| 15 |
+
### 2. Exploratory Notebook
|
| 16 |
+
To get you started quickly, we have prepared an [Exploratory Notebook](https://colab.research.google.com/drive/16O_P901xLdjkka8Xi4CfysF6h8l8q28H?usp=sharing)
|
| 17 |
+
Feel free to use your computer instead of Google Colab to run the notebook
|
| 18 |
|
| 19 |
### 3. Prepare Your Predictions
|
| 20 |
Create a CSV file with two columns:
|
|
|
|
| 24 |
Example CSV format:
|
| 25 |
```csv
|
| 26 |
id,label
|
| 27 |
+
f7e3a2c1,0.0
|
| 28 |
+
8b1c4d2e,1.0
|
| 29 |
+
7f5b9e8a,0.0
|
| 30 |
+
c2fa163b,1.0
|
| 31 |
```
|
| 32 |
|
| 33 |
+
- True labels are kept private and not accessible to participants
|
| 34 |
+
|
| 35 |
### 4. Submit Your Predictions
|
| 36 |
- Go to the "Submit Predictions" tab
|
| 37 |
+
- Enter your team token, upload your CSV file and submit
|
| 38 |
+
- Your submission will be automatically evaluated - There is a **rate limit** of 1 valid submission per 15 minutes per team
|
| 39 |
+
- Only your **best** scores, selected based on F1 score, are displayed on the leaderboard
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
+
## 🏆 Prize Distribution
|
| 42 |
|
| 43 |
Prizes are awarded based on the **F1 Score** metric:
|
| 44 |
|
| 45 |
+
- 🥇 **1st Prize**: 75 CHF digitec giftcard per team member
|
| 46 |
+
- 🥈 **2nd Prize**: 20 CHF digitec giftcard per team member
|
| 47 |
+
- 🥉 **3rd Prize**: 20 CHF digitec giftcard per team member
|
| 48 |
|
| 49 |
+
The final rankings will be set at the end of the hackathon, any submissions after the deadline won't count towards the prizes.
|
| 50 |
+
**Tiebreaker**: In case of equal F1 scores, the team that submitted their winning score **earlier** will be ranked higher
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
+
We will also award a **Creative Prize** to the team that submits the most creative solution:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
+
- 🎨 **Creative Prize**: 20 CHF digitec giftcard per team member
|
|
|
|
|
|
|
| 55 |
|
| 56 |
+
To select the teams, we sadly do not have the time to evaluate each solution, so we will ask only the 8 teams with the highest F1 scores to present.
|
| 57 |
"""
|
src/populate.py
CHANGED
|
@@ -19,6 +19,15 @@ def get_leaderboard_df(results_path: str, cols: list) -> pd.DataFrame:
|
|
| 19 |
by=[TeamColumn.best_f1.name, TeamColumn.best_submission_date.name],
|
| 20 |
ascending=[False, True],
|
| 21 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
df = df[cols].round(decimals=4)
|
| 23 |
return df
|
| 24 |
|
|
@@ -59,8 +68,12 @@ def get_submission_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
|
| 59 |
except Exception:
|
| 60 |
continue
|
| 61 |
|
| 62 |
-
accepted_list = [
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
df_accepted = (
|
| 66 |
pd.DataFrame.from_records(accepted_list, columns=cols) if accepted_list else pd.DataFrame(columns=cols)
|
|
|
|
| 19 |
by=[TeamColumn.best_f1.name, TeamColumn.best_submission_date.name],
|
| 20 |
ascending=[False, True],
|
| 21 |
)
|
| 22 |
+
|
| 23 |
+
team_name_col = TeamColumn.team_name.name
|
| 24 |
+
if team_name_col in df.columns and len(df) > 0:
|
| 25 |
+
medals = ["🥇", "🥈", "🥉"]
|
| 26 |
+
for idx in range(min(3, len(df))):
|
| 27 |
+
current_name = str(df.iloc[idx][team_name_col])
|
| 28 |
+
if not any(current_name.startswith(medal) for medal in medals):
|
| 29 |
+
df.iloc[idx, df.columns.get_loc(team_name_col)] = f"{medals[idx]} {current_name}"
|
| 30 |
+
|
| 31 |
df = df[cols].round(decimals=4)
|
| 32 |
return df
|
| 33 |
|
|
|
|
| 68 |
except Exception:
|
| 69 |
continue
|
| 70 |
|
| 71 |
+
accepted_list = [
|
| 72 |
+
s for s in all_submissions if s[SubmissionQueueColumn.status.name] in ["ACCEPTED", "ACCEPTED, BUT WORST"]
|
| 73 |
+
]
|
| 74 |
+
rejected_list = [
|
| 75 |
+
s for s in all_submissions if s[SubmissionQueueColumn.status.name] not in ["ACCEPTED", "ACCEPTED, BUT WORST"]
|
| 76 |
+
]
|
| 77 |
|
| 78 |
df_accepted = (
|
| 79 |
pd.DataFrame.from_records(accepted_list, columns=cols) if accepted_list else pd.DataFrame(columns=cols)
|
src/submission/submit_csv.py
CHANGED
|
@@ -87,18 +87,10 @@ def should_update_scores(new_scores: dict, best_scores: dict | None) -> bool:
|
|
| 87 |
if best_scores is None:
|
| 88 |
return True
|
| 89 |
|
| 90 |
-
new_accuracy = new_scores.get("accuracy", 0.0)
|
| 91 |
new_f1 = new_scores.get("f1", 0.0)
|
| 92 |
-
|
| 93 |
-
best_accuracy = best_scores.get("best_accuracy", 0.0)
|
| 94 |
best_f1 = best_scores.get("best_f1", 0.0)
|
| 95 |
|
| 96 |
-
|
| 97 |
-
return True
|
| 98 |
-
if new_accuracy == best_accuracy and new_f1 > best_f1:
|
| 99 |
-
return True
|
| 100 |
-
|
| 101 |
-
return False
|
| 102 |
|
| 103 |
|
| 104 |
def check_rate_limit(team_name: str) -> tuple[bool, str]:
|
|
@@ -173,10 +165,10 @@ def submit_csv(token: str, csv_content: str) -> tuple[bool, str]:
|
|
| 173 |
status = "ACCEPTED"
|
| 174 |
message = f"Submission accepted! Your scores: Accuracy={scores['accuracy']:.4f}, F1={scores['f1']:.4f}, Precision={scores['precision']:.4f}, Recall={scores['recall']:.4f}, TP={scores['tp']}, FP={scores['fp']}, FN={scores['fn']}, TN={scores['tn']}"
|
| 175 |
else:
|
| 176 |
-
status = "
|
| 177 |
best_acc = best_scores.get("best_accuracy", 0.0) if best_scores else 0.0
|
| 178 |
best_f1 = best_scores.get("best_f1", 0.0) if best_scores else 0.0
|
| 179 |
-
message = f"Submission
|
| 180 |
|
| 181 |
save_submission(team_name, token_hash, csv_content, scores, status)
|
| 182 |
|
|
|
|
| 87 |
if best_scores is None:
|
| 88 |
return True
|
| 89 |
|
|
|
|
| 90 |
new_f1 = new_scores.get("f1", 0.0)
|
|
|
|
|
|
|
| 91 |
best_f1 = best_scores.get("best_f1", 0.0)
|
| 92 |
|
| 93 |
+
return new_f1 > best_f1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
|
| 96 |
def check_rate_limit(team_name: str) -> tuple[bool, str]:
|
|
|
|
| 165 |
status = "ACCEPTED"
|
| 166 |
message = f"Submission accepted! Your scores: Accuracy={scores['accuracy']:.4f}, F1={scores['f1']:.4f}, Precision={scores['precision']:.4f}, Recall={scores['recall']:.4f}, TP={scores['tp']}, FP={scores['fp']}, FN={scores['fn']}, TN={scores['tn']}"
|
| 167 |
else:
|
| 168 |
+
status = "ACCEPTED, BUT WORST"
|
| 169 |
best_acc = best_scores.get("best_accuracy", 0.0) if best_scores else 0.0
|
| 170 |
best_f1 = best_scores.get("best_f1", 0.0) if best_scores else 0.0
|
| 171 |
+
message = f"Submission accepted but did not improve your best score. Your scores (Accuracy={scores['accuracy']:.4f}, F1={scores['f1']:.4f}) vs. your best scores (Accuracy={best_acc:.4f}, F1={best_f1:.4f})."
|
| 172 |
|
| 173 |
save_submission(team_name, token_hash, csv_content, scores, status)
|
| 174 |
|
src/submission/validate_csv.py
CHANGED
|
@@ -8,13 +8,13 @@ def normalize_label(label: any) -> float | None:
|
|
| 8 |
return None
|
| 9 |
|
| 10 |
if isinstance(label, (int, float)):
|
| 11 |
-
if label
|
| 12 |
return float(label)
|
| 13 |
return None
|
| 14 |
|
| 15 |
if isinstance(label, str):
|
| 16 |
label_stripped = label.strip()
|
| 17 |
-
if label_stripped in ["0.0", "1.0"]:
|
| 18 |
return float(label_stripped)
|
| 19 |
return None
|
| 20 |
|
|
@@ -39,14 +39,14 @@ def validate_csv(csv_content: str, true_labels: dict[str, float]) -> tuple[bool,
|
|
| 39 |
if df.empty:
|
| 40 |
return False, "CSV is empty", None
|
| 41 |
|
| 42 |
-
df["id"] = df["id"].astype(str).str.strip()
|
| 43 |
-
|
| 44 |
if df["id"].isna().any():
|
| 45 |
return False, "id column contains missing values", None
|
| 46 |
|
| 47 |
if df["label"].isna().any():
|
| 48 |
return False, "label column contains missing values", None
|
| 49 |
|
|
|
|
|
|
|
| 50 |
normalized_labels = []
|
| 51 |
invalid_labels = []
|
| 52 |
|
|
@@ -55,7 +55,7 @@ def validate_csv(csv_content: str, true_labels: dict[str, float]) -> tuple[bool,
|
|
| 55 |
label = normalize_label(row["label"])
|
| 56 |
|
| 57 |
if label is None:
|
| 58 |
-
invalid_labels.append(f"Row {idx + 1}: invalid label value '{row['label']}' (must be 0.0 or 1.0)")
|
| 59 |
else:
|
| 60 |
normalized_labels.append(label)
|
| 61 |
|
|
|
|
| 8 |
return None
|
| 9 |
|
| 10 |
if isinstance(label, (int, float)):
|
| 11 |
+
if label in [0, 1, 0.0, 1.0]:
|
| 12 |
return float(label)
|
| 13 |
return None
|
| 14 |
|
| 15 |
if isinstance(label, str):
|
| 16 |
label_stripped = label.strip()
|
| 17 |
+
if label_stripped in ["0", "1", "0.0", "1.0"]:
|
| 18 |
return float(label_stripped)
|
| 19 |
return None
|
| 20 |
|
|
|
|
| 39 |
if df.empty:
|
| 40 |
return False, "CSV is empty", None
|
| 41 |
|
|
|
|
|
|
|
| 42 |
if df["id"].isna().any():
|
| 43 |
return False, "id column contains missing values", None
|
| 44 |
|
| 45 |
if df["label"].isna().any():
|
| 46 |
return False, "label column contains missing values", None
|
| 47 |
|
| 48 |
+
df["id"] = df["id"].astype(str).str.strip()
|
| 49 |
+
|
| 50 |
normalized_labels = []
|
| 51 |
invalid_labels = []
|
| 52 |
|
|
|
|
| 55 |
label = normalize_label(row["label"])
|
| 56 |
|
| 57 |
if label is None:
|
| 58 |
+
invalid_labels.append(f"Row {idx + 1}: invalid label value '{row['label']}' (must be 0, 1, 0.0, or 1.0)")
|
| 59 |
else:
|
| 60 |
normalized_labels.append(label)
|
| 61 |
|