Nicolas Wagner commited on
Commit
dcb04e7
·
1 Parent(s): a2556f7

textual update

Browse files
src/about.py CHANGED
@@ -1,7 +1,7 @@
1
  TITLE = """<h1 id="space-title">Truth vs. Machine Hackathon Leaderboard</h1>"""
2
 
3
  INTRODUCTION_TEXT = """
4
- Welcome to the Truth vs. Machine Hackathon Leaderboard! This leaderboard tracks teams competing in an audio deepfake detection challenge. Teams submit predictions on audio samples to determine whether they are real or fake, and the leaderboard displays the best performance metrics for each team.
5
  """
6
 
7
  LLM_BENCHMARKS_TEXT = """
@@ -9,12 +9,12 @@ LLM_BENCHMARKS_TEXT = """
9
 
10
  ### 1. Register Your Team
11
  - Go to the "Register Team" tab
12
- - Enter your team name and number of teammates
13
- - **Save your token immediately** - you'll need it to submit predictions
14
- - You won't be able to see your token again after registration
15
 
16
- ### 2. Explore the Data
17
- Check out this [Exploratory Notebook](https://colab.research.google.com/drive/16O_P901xLdjkka8Xi4CfysF6h8l8q28H?usp=sharing) to understand the dataset and get started with your analysis.
 
18
 
19
  ### 3. Prepare Your Predictions
20
  Create a CSV file with two columns:
@@ -24,80 +24,34 @@ Create a CSV file with two columns:
24
  Example CSV format:
25
  ```csv
26
  id,label
27
- 550e8400-e29b-41d4-a716-446655440000,0.0
28
- 550e8400-e29b-41d4-a716-446655440001,1.0
29
- 550e8400-e29b-41d4-a716-446655440002,0.0
30
- 550e8400-e29b-41d4-a716-446655440003,1.0
31
  ```
32
 
 
 
33
  ### 4. Submit Your Predictions
34
  - Go to the "Submit Predictions" tab
35
- - Enter your team token
36
- - Upload your CSV file
37
- - Your submission will be automatically evaluated
38
-
39
- ### 5. Evaluation Metrics
40
- Your predictions are evaluated on:
41
- - **Accuracy**: Percentage of correct predictions
42
- - **F1 Score**: Harmonic mean of precision and recall
43
- - **Precision**: True positives / (True positives + False positives)
44
- - **Recall**: True positives / (True positives + False negatives)
45
-
46
- ### 6. Leaderboard Updates
47
- - Only your **best** scores are displayed on the leaderboard
48
- - A submission is accepted only if it improves your accuracy or F1 score
49
- - The leaderboard is sorted by best F1 score (primary metric)
50
- - If F1 score is tied, earlier submission date is used as a tiebreaker
51
- - **Rate Limit**: You can submit once every 15 minutes
52
 
53
- ## 🏆 Prize Distribution & Evaluation Criteria
54
 
55
  Prizes are awarded based on the **F1 Score** metric:
56
 
57
- - **1st Prize**: Team with the highest F1 score
58
- - **2nd Prize**: Team with the second highest F1 score
59
- - **Tiebreaker**: In case of equal F1 scores, the team that submitted their winning score **earlier** will be ranked higher
60
 
61
- The final rankings will be determined at the end of the hackathon based on each team's best F1 score.
62
-
63
- ## Important Notes
64
- - True labels are kept private and not accessible to participants
65
- - You can submit once every **15 minutes** - plan your submissions carefully
66
- - Only your best scores count on the leaderboard
67
- - Make sure your CSV file format is correct before submitting
68
- - **All IDs from the test set must be present in your submission**
69
- """
70
 
71
- EVALUATION_QUEUE_TEXT = """
72
- ## Submission Guidelines
73
-
74
- ### CSV File Requirements
75
- - Must contain exactly two columns: `id` and `label`
76
- - `id` must be UUID strings matching the test set exactly
77
- - `label` must be exactly `0.0` (real) or `1.0` (fake)
78
- - No missing values allowed
79
- - **All IDs from the test set must be included** in your submission
80
- - No unknown IDs are allowed (only IDs from the test set)
81
-
82
- ### Label Format
83
- Accepted formats for labels:
84
- - **Only**: `0.0` (real) or `1.0` (fake)
85
- - Any other format will be rejected
86
-
87
- ### Scoring
88
- - Submissions are evaluated immediately upon upload
89
- - Scores are computed using accuracy, F1 score, precision, and recall
90
- - Only submissions that improve your best accuracy or F1 score are accepted
91
- - Rejected submissions are logged but don't update the leaderboard
92
- - **Rate Limit**: Teams can submit once every 15 minutes
93
-
94
- ## 🏆 Prize Distribution & Evaluation Criteria
95
-
96
- Prizes are awarded based on the **F1 Score** metric:
97
 
98
- - **1st Prize**: Team with the highest F1 score
99
- - **2nd Prize**: Team with the second highest F1 score
100
- - **Tiebreaker**: In case of equal F1 scores, the team that submitted their winning score **earlier** will be ranked higher
101
 
102
- The final rankings will be determined at the end of the hackathon based on each team's best F1 score.
103
  """
 
1
  TITLE = """<h1 id="space-title">Truth vs. Machine Hackathon Leaderboard</h1>"""
2
 
3
  INTRODUCTION_TEXT = """
4
+ Welcome to the Truth vs. Machine Hackathon Leaderboard! This leaderboard tracks teams competing in an audio deepfake detection challenge. Teams submit predictions on audio samples to determine whether they are real or fake, and the leaderboard displays the submission with the best F1 scores for each team.
5
  """
6
 
7
  LLM_BENCHMARKS_TEXT = """
 
9
 
10
  ### 1. Register Your Team
11
  - Go to the "Register Team" tab
12
+ - Enter your team name and the total number of teammates
13
+ - **Save your token** - you'll need it to submit predictions and you won't be able to see your token again after registration
 
14
 
15
+ ### 2. Exploratory Notebook
16
+ To get you started quickly, we have prepared an [Exploratory Notebook](https://colab.research.google.com/drive/16O_P901xLdjkka8Xi4CfysF6h8l8q28H?usp=sharing)
17
+ Feel free to use your computer instead of Google Colab to run the notebook
18
 
19
  ### 3. Prepare Your Predictions
20
  Create a CSV file with two columns:
 
24
  Example CSV format:
25
  ```csv
26
  id,label
27
+ f7e3a2c1,0.0
28
+ 8b1c4d2e,1.0
29
+ 7f5b9e8a,0.0
30
+ c2fa163b,1.0
31
  ```
32
 
33
+ - True labels are kept private and not accessible to participants
34
+
35
  ### 4. Submit Your Predictions
36
  - Go to the "Submit Predictions" tab
37
+ - Enter your team token, upload your CSV file and submit
38
+ - Your submission will be automatically evaluated - There is a **rate limit** of 1 valid submission per 15 minutes per team
39
+ - Only your **best** scores, selected based on F1 score, are displayed on the leaderboard
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ ## 🏆 Prize Distribution
42
 
43
  Prizes are awarded based on the **F1 Score** metric:
44
 
45
+ - 🥇 **1st Prize**: 75 CHF digitec giftcard per team member
46
+ - 🥈 **2nd Prize**: 20 CHF digitec giftcard per team member
47
+ - 🥉 **3rd Prize**: 20 CHF digitec giftcard per team member
48
 
49
+ The final rankings will be set at the end of the hackathon, any submissions after the deadline won't count towards the prizes.
50
+ **Tiebreaker**: In case of equal F1 scores, the team that submitted their winning score **earlier** will be ranked higher
 
 
 
 
 
 
 
51
 
52
+ We will also award a **Creative Prize** to the team that submits the most creative solution:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ - 🎨 **Creative Prize**: 20 CHF digitec giftcard per team member
 
 
55
 
56
+ To select the teams, we sadly do not have the time to evaluate each solution, so we will ask only the 8 teams with the highest F1 scores to present.
57
  """
src/populate.py CHANGED
@@ -19,6 +19,15 @@ def get_leaderboard_df(results_path: str, cols: list) -> pd.DataFrame:
19
  by=[TeamColumn.best_f1.name, TeamColumn.best_submission_date.name],
20
  ascending=[False, True],
21
  )
 
 
 
 
 
 
 
 
 
22
  df = df[cols].round(decimals=4)
23
  return df
24
 
@@ -59,8 +68,12 @@ def get_submission_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
59
  except Exception:
60
  continue
61
 
62
- accepted_list = [s for s in all_submissions if s[SubmissionQueueColumn.status.name] == "ACCEPTED"]
63
- rejected_list = [s for s in all_submissions if s[SubmissionQueueColumn.status.name] == "REJECTED"]
 
 
 
 
64
 
65
  df_accepted = (
66
  pd.DataFrame.from_records(accepted_list, columns=cols) if accepted_list else pd.DataFrame(columns=cols)
 
19
  by=[TeamColumn.best_f1.name, TeamColumn.best_submission_date.name],
20
  ascending=[False, True],
21
  )
22
+
23
+ team_name_col = TeamColumn.team_name.name
24
+ if team_name_col in df.columns and len(df) > 0:
25
+ medals = ["🥇", "🥈", "🥉"]
26
+ for idx in range(min(3, len(df))):
27
+ current_name = str(df.iloc[idx][team_name_col])
28
+ if not any(current_name.startswith(medal) for medal in medals):
29
+ df.iloc[idx, df.columns.get_loc(team_name_col)] = f"{medals[idx]} {current_name}"
30
+
31
  df = df[cols].round(decimals=4)
32
  return df
33
 
 
68
  except Exception:
69
  continue
70
 
71
+ accepted_list = [
72
+ s for s in all_submissions if s[SubmissionQueueColumn.status.name] in ["ACCEPTED", "ACCEPTED, BUT WORST"]
73
+ ]
74
+ rejected_list = [
75
+ s for s in all_submissions if s[SubmissionQueueColumn.status.name] not in ["ACCEPTED", "ACCEPTED, BUT WORST"]
76
+ ]
77
 
78
  df_accepted = (
79
  pd.DataFrame.from_records(accepted_list, columns=cols) if accepted_list else pd.DataFrame(columns=cols)
src/submission/submit_csv.py CHANGED
@@ -87,18 +87,10 @@ def should_update_scores(new_scores: dict, best_scores: dict | None) -> bool:
87
  if best_scores is None:
88
  return True
89
 
90
- new_accuracy = new_scores.get("accuracy", 0.0)
91
  new_f1 = new_scores.get("f1", 0.0)
92
-
93
- best_accuracy = best_scores.get("best_accuracy", 0.0)
94
  best_f1 = best_scores.get("best_f1", 0.0)
95
 
96
- if new_accuracy > best_accuracy:
97
- return True
98
- if new_accuracy == best_accuracy and new_f1 > best_f1:
99
- return True
100
-
101
- return False
102
 
103
 
104
  def check_rate_limit(team_name: str) -> tuple[bool, str]:
@@ -173,10 +165,10 @@ def submit_csv(token: str, csv_content: str) -> tuple[bool, str]:
173
  status = "ACCEPTED"
174
  message = f"Submission accepted! Your scores: Accuracy={scores['accuracy']:.4f}, F1={scores['f1']:.4f}, Precision={scores['precision']:.4f}, Recall={scores['recall']:.4f}, TP={scores['tp']}, FP={scores['fp']}, FN={scores['fn']}, TN={scores['tn']}"
175
  else:
176
- status = "REJECTED"
177
  best_acc = best_scores.get("best_accuracy", 0.0) if best_scores else 0.0
178
  best_f1 = best_scores.get("best_f1", 0.0) if best_scores else 0.0
179
- message = f"Submission rejected. Your scores (Accuracy={scores['accuracy']:.4f}, F1={scores['f1']:.4f}) did not improve your best scores (Accuracy={best_acc:.4f}, F1={best_f1:.4f})."
180
 
181
  save_submission(team_name, token_hash, csv_content, scores, status)
182
 
 
87
  if best_scores is None:
88
  return True
89
 
 
90
  new_f1 = new_scores.get("f1", 0.0)
 
 
91
  best_f1 = best_scores.get("best_f1", 0.0)
92
 
93
+ return new_f1 > best_f1
 
 
 
 
 
94
 
95
 
96
  def check_rate_limit(team_name: str) -> tuple[bool, str]:
 
165
  status = "ACCEPTED"
166
  message = f"Submission accepted! Your scores: Accuracy={scores['accuracy']:.4f}, F1={scores['f1']:.4f}, Precision={scores['precision']:.4f}, Recall={scores['recall']:.4f}, TP={scores['tp']}, FP={scores['fp']}, FN={scores['fn']}, TN={scores['tn']}"
167
  else:
168
+ status = "ACCEPTED, BUT WORST"
169
  best_acc = best_scores.get("best_accuracy", 0.0) if best_scores else 0.0
170
  best_f1 = best_scores.get("best_f1", 0.0) if best_scores else 0.0
171
+ message = f"Submission accepted but did not improve your best score. Your scores (Accuracy={scores['accuracy']:.4f}, F1={scores['f1']:.4f}) vs. your best scores (Accuracy={best_acc:.4f}, F1={best_f1:.4f})."
172
 
173
  save_submission(team_name, token_hash, csv_content, scores, status)
174
 
src/submission/validate_csv.py CHANGED
@@ -8,13 +8,13 @@ def normalize_label(label: any) -> float | None:
8
  return None
9
 
10
  if isinstance(label, (int, float)):
11
- if label == 0.0 or label == 1.0:
12
  return float(label)
13
  return None
14
 
15
  if isinstance(label, str):
16
  label_stripped = label.strip()
17
- if label_stripped in ["0.0", "1.0"]:
18
  return float(label_stripped)
19
  return None
20
 
@@ -39,14 +39,14 @@ def validate_csv(csv_content: str, true_labels: dict[str, float]) -> tuple[bool,
39
  if df.empty:
40
  return False, "CSV is empty", None
41
 
42
- df["id"] = df["id"].astype(str).str.strip()
43
-
44
  if df["id"].isna().any():
45
  return False, "id column contains missing values", None
46
 
47
  if df["label"].isna().any():
48
  return False, "label column contains missing values", None
49
 
 
 
50
  normalized_labels = []
51
  invalid_labels = []
52
 
@@ -55,7 +55,7 @@ def validate_csv(csv_content: str, true_labels: dict[str, float]) -> tuple[bool,
55
  label = normalize_label(row["label"])
56
 
57
  if label is None:
58
- invalid_labels.append(f"Row {idx + 1}: invalid label value '{row['label']}' (must be 0.0 or 1.0)")
59
  else:
60
  normalized_labels.append(label)
61
 
 
8
  return None
9
 
10
  if isinstance(label, (int, float)):
11
+ if label in [0, 1, 0.0, 1.0]:
12
  return float(label)
13
  return None
14
 
15
  if isinstance(label, str):
16
  label_stripped = label.strip()
17
+ if label_stripped in ["0", "1", "0.0", "1.0"]:
18
  return float(label_stripped)
19
  return None
20
 
 
39
  if df.empty:
40
  return False, "CSV is empty", None
41
 
 
 
42
  if df["id"].isna().any():
43
  return False, "id column contains missing values", None
44
 
45
  if df["label"].isna().any():
46
  return False, "label column contains missing values", None
47
 
48
+ df["id"] = df["id"].astype(str).str.strip()
49
+
50
  normalized_labels = []
51
  invalid_labels = []
52
 
 
55
  label = normalize_label(row["label"])
56
 
57
  if label is None:
58
+ invalid_labels.append(f"Row {idx + 1}: invalid label value '{row['label']}' (must be 0, 1, 0.0, or 1.0)")
59
  else:
60
  normalized_labels.append(label)
61