Spaces:

aurigin
/

Hackathon_Truth_Vs_Machine

Sleeping

App Files Files Community

Nicolas Wagner commited on Nov 25, 2025

Commit

30f0c04

1 Parent(s): bc714de

proper metric evaluation

Browse files

Files changed (6) hide show

app.py +3 -3
src/about.py +51 -25
src/display/utils.py +3 -3
src/leaderboard/read_team_results.py +3 -3
src/populate.py +4 -1
src/submission/submit_csv.py +1 -1

app.py CHANGED Viewed

@@ -84,15 +84,15 @@ def init_leaderboard(dataframe):
     if not valid_cols:
         valid_cols = [
             "Team Name",
-            "Best Accuracy ⬆️",
             "Best F1 Score ⬆️",
             "Best Precision ⬆️",
             "Best Recall ⬆️",
             "Best TP ⬆️",
             "Best FP ⬇️",
             "Best FN ⬇️",
             "Best TN ⬆️",
-            "Last Submission",
         ]
     if dataframe is None or dataframe.empty:
@@ -290,7 +290,7 @@ with demo:
                 gr.Markdown("## Submit Your Predictions", elem_classes="markdown-text")
                 gr.Markdown(
                     "Upload a CSV file with your predictions. The CSV must have two columns: "
-                    "`index` and `prediction`. Predictions should be binary (0/1 or 'real'/'fake').",
                     elem_classes="markdown-text",
                 )

     if not valid_cols:
         valid_cols = [
             "Team Name",
             "Best F1 Score ⬆️",
+            "Best Accuracy ⬆️",
             "Best Precision ⬆️",
             "Best Recall ⬆️",
             "Best TP ⬆️",
             "Best FP ⬇️",
             "Best FN ⬇️",
             "Best TN ⬆️",
+            "Submission Date",
         ]
     if dataframe is None or dataframe.empty:
                 gr.Markdown("## Submit Your Predictions", elem_classes="markdown-text")
                 gr.Markdown(
                     "Upload a CSV file with your predictions. The CSV must have two columns: "
+                    "`id` (UUID) and `label` (must be exactly `0.0` or `1.0`). All IDs from the test set must be included.",
                     elem_classes="markdown-text",
                 )

src/about.py CHANGED Viewed

@@ -13,62 +13,88 @@ LLM_BENCHMARKS_TEXT = """
 - **Save your token immediately** - you'll need it to submit predictions
 - You won't be able to see your token again after registration
-### 2. Prepare Your Predictions
 Create a CSV file with two columns:
-- `index`: The index of the test sample (must match the test set)
-- `prediction`: Your prediction (binary: 0/1, or "real"/"fake")
 Example CSV format:
 ```csv
-index,prediction
-43555,0
-43556,1
-43557,real
-43558,fake
 ```
-### 3. Submit Your Predictions
 - Go to the "Submit Predictions" tab
 - Enter your team token
 - Upload your CSV file
 - Your submission will be automatically evaluated
-### 4. Evaluation Metrics
 Your predictions are evaluated on:
 - **Accuracy**: Percentage of correct predictions
 - **F1 Score**: Harmonic mean of precision and recall
-- **Error Rate**: Percentage of incorrect predictions
-### 5. Leaderboard Updates
 - Only your **best** scores are displayed on the leaderboard
-- A submission is accepted only if it improves at least one metric
-- The leaderboard is sorted by best accuracy (primary metric)
-- If accuracy is tied, F1 score is used as a tiebreaker
 ## Important Notes
 - True labels are kept private and not accessible to participants
 - You can submit multiple times - only your best scores count
 - Make sure your CSV file format is correct before submitting
-- Indices in your CSV must exactly match the test set indices
 """
 EVALUATION_QUEUE_TEXT = """
 ## Submission Guidelines
 ### CSV File Requirements
-- Must contain exactly two columns: `index` and `prediction`
-- `index` must match the test set indices exactly
-- `prediction` must be binary: 0/1 or "real"/"fake"
 - No missing values allowed
-### Prediction Format
-Accepted formats for predictions:
-- Numeric: `0` (real) or `1` (fake)
-- String: `"real"` or `"fake"` (case-insensitive)
 ### Scoring
 - Submissions are evaluated immediately upon upload
-- Scores are computed using accuracy, F1 score, and error rate
-- Only submissions that improve your best scores are accepted
 - Rejected submissions are logged but don't update the leaderboard
 """

 - **Save your token immediately** - you'll need it to submit predictions
 - You won't be able to see your token again after registration
+### 2. Explore the Data
+Check out this [Exploratory Notebook](https://colab.research.google.com/drive/16O_P901xLdjkka8Xi4CfysF6h8l8q28H?usp=sharing) to understand the dataset and get started with your analysis.
+### 3. Prepare Your Predictions
 Create a CSV file with two columns:
+- `id`: The UUID identifier of the test sample (must match the test set)
+- `label`: Your prediction (must be exactly `0.0` for real or `1.0` for fake)
 Example CSV format:
 ```csv
+id,label
+550e8400-e29b-41d4-a716-446655440000,0.0
+550e8400-e29b-41d4-a716-446655440001,1.0
+550e8400-e29b-41d4-a716-446655440002,0.0
+550e8400-e29b-41d4-a716-446655440003,1.0
 ```
+### 4. Submit Your Predictions
 - Go to the "Submit Predictions" tab
 - Enter your team token
 - Upload your CSV file
 - Your submission will be automatically evaluated
+### 5. Evaluation Metrics
 Your predictions are evaluated on:
 - **Accuracy**: Percentage of correct predictions
 - **F1 Score**: Harmonic mean of precision and recall
+- **Precision**: True positives / (True positives + False positives)
+- **Recall**: True positives / (True positives + False negatives)
+### 6. Leaderboard Updates
 - Only your **best** scores are displayed on the leaderboard
+- A submission is accepted only if it improves your accuracy or F1 score
+- The leaderboard is sorted by best F1 score (primary metric)
+- If F1 score is tied, earlier submission date is used as a tiebreaker
+## 🏆 Prize Distribution & Evaluation Criteria
+Prizes are awarded based on the **F1 Score** metric:
+- **1st Prize**: Team with the highest F1 score
+- **2nd Prize**: Team with the second highest F1 score
+- **Tiebreaker**: In case of equal F1 scores, the team that submitted their winning score **earlier** will be ranked higher
+The final rankings will be determined at the end of the hackathon based on each team's best F1 score.
 ## Important Notes
 - True labels are kept private and not accessible to participants
 - You can submit multiple times - only your best scores count
 - Make sure your CSV file format is correct before submitting
+- **All IDs from the test set must be present in your submission**
 """
 EVALUATION_QUEUE_TEXT = """
 ## Submission Guidelines
 ### CSV File Requirements
+- Must contain exactly two columns: `id` and `label`
+- `id` must be UUID strings matching the test set exactly
+- `label` must be exactly `0.0` (real) or `1.0` (fake)
 - No missing values allowed
+- **All IDs from the test set must be included** in your submission
+- No unknown IDs are allowed (only IDs from the test set)
+### Label Format
+Accepted formats for labels:
+- **Only**: `0.0` (real) or `1.0` (fake)
+- Any other format will be rejected
 ### Scoring
 - Submissions are evaluated immediately upon upload
+- Scores are computed using accuracy, F1 score, precision, and recall
+- Only submissions that improve your best accuracy or F1 score are accepted
 - Rejected submissions are logged but don't update the leaderboard
+## 🏆 Prize Distribution & Evaluation Criteria
+Prizes are awarded based on the **F1 Score** metric:
+- **1st Prize**: Team with the highest F1 score
+- **2nd Prize**: Team with the second highest F1 score
+- **Tiebreaker**: In case of equal F1 scores, the team that submitted their winning score **earlier** will be ranked higher
+The final rankings will be determined at the end of the hackathon based on each team's best F1 score.
 """

src/display/utils.py CHANGED Viewed

@@ -24,23 +24,23 @@ class ColumnContent:
 @dataclass(frozen=True)
 class TeamColumn:
     team_name = ColumnContent("Team Name", "str", True, never_hidden=True)
-    best_accuracy = ColumnContent("Best Accuracy ⬆️", "number", True)
     best_f1 = ColumnContent("Best F1 Score ⬆️", "number", True)
     best_precision = ColumnContent("Best Precision ⬆️", "number", True)
     best_recall = ColumnContent("Best Recall ⬆️", "number", True)
     best_tp = ColumnContent("Best TP ⬆️", "number", True)
     best_fp = ColumnContent("Best FP ⬇️", "number", True)
     best_fn = ColumnContent("Best FN ⬇️", "number", True)
     best_tn = ColumnContent("Best TN ⬆️", "number", True)
-    last_submission_date = ColumnContent("Last Submission", "str", True)
 @dataclass(frozen=True)
 class SubmissionQueueColumn:
     team_name = ColumnContent("Team Name", "str", True)
     submission_date = ColumnContent("Submission Date", "str", True)
-    accuracy = ColumnContent("Accuracy ⬆️", "number", True)
     f1 = ColumnContent("F1 Score ⬆️", "number", True)
     precision = ColumnContent("Precision ⬆️", "number", True)
     recall = ColumnContent("Recall ⬆️", "number", True)
     tp = ColumnContent("TP ⬆️", "number", True)

 @dataclass(frozen=True)
 class TeamColumn:
     team_name = ColumnContent("Team Name", "str", True, never_hidden=True)
     best_f1 = ColumnContent("Best F1 Score ⬆️", "number", True)
+    best_accuracy = ColumnContent("Best Accuracy ⬆️", "number", True)
     best_precision = ColumnContent("Best Precision ⬆️", "number", True)
     best_recall = ColumnContent("Best Recall ⬆️", "number", True)
     best_tp = ColumnContent("Best TP ⬆️", "number", True)
     best_fp = ColumnContent("Best FP ⬇️", "number", True)
     best_fn = ColumnContent("Best FN ⬇️", "number", True)
     best_tn = ColumnContent("Best TN ⬆️", "number", True)
+    best_submission_date = ColumnContent("Submission Date", "str", True)
 @dataclass(frozen=True)
 class SubmissionQueueColumn:
     team_name = ColumnContent("Team Name", "str", True)
     submission_date = ColumnContent("Submission Date", "str", True)
     f1 = ColumnContent("F1 Score ⬆️", "number", True)
+    accuracy = ColumnContent("Accuracy ⬆️", "number", True)
     precision = ColumnContent("Precision ⬆️", "number", True)
     recall = ColumnContent("Recall ⬆️", "number", True)
     tp = ColumnContent("TP ⬆️", "number", True)

src/leaderboard/read_team_results.py CHANGED Viewed

@@ -16,7 +16,7 @@ class TeamResult:
     best_fp: int
     best_fn: int
     best_tn: int
-    last_submission_date: str
     def to_dict(self):
         return {
@@ -29,7 +29,7 @@ class TeamResult:
             TeamColumn.best_fp.name: self.best_fp,
             TeamColumn.best_fn.name: self.best_fn,
             TeamColumn.best_tn.name: self.best_tn,
-            TeamColumn.last_submission_date.name: self.last_submission_date,
         }
@@ -58,7 +58,7 @@ def get_team_results(results_path: str) -> list[TeamResult]:
                     best_fp=data.get("best_fp", 0),
                     best_fn=data.get("best_fn", 0),
                     best_tn=data.get("best_tn", 0),
-                    last_submission_date=data.get("last_submission_date", ""),
                 )
                 results.append(result)
         except Exception:

     best_fp: int
     best_fn: int
     best_tn: int
+    best_submission_date: str
     def to_dict(self):
         return {
             TeamColumn.best_fp.name: self.best_fp,
             TeamColumn.best_fn.name: self.best_fn,
             TeamColumn.best_tn.name: self.best_tn,
+            TeamColumn.best_submission_date.name: self.best_submission_date,
         }
                     best_fp=data.get("best_fp", 0),
                     best_fn=data.get("best_fn", 0),
                     best_tn=data.get("best_tn", 0),
+                    best_submission_date=data.get("best_submission_date", ""),
                 )
                 results.append(result)
         except Exception:

src/populate.py CHANGED Viewed

@@ -15,7 +15,10 @@ def get_leaderboard_df(results_path: str, cols: list) -> pd.DataFrame:
         return pd.DataFrame(columns=cols)
     df = pd.DataFrame.from_records(all_data_json)
-    df = df.sort_values(by=[TeamColumn.best_accuracy.name], ascending=False)
     df = df[cols].round(decimals=4)
     return df

         return pd.DataFrame(columns=cols)
     df = pd.DataFrame.from_records(all_data_json)
+    df = df.sort_values(
+        by=[TeamColumn.best_f1.name, TeamColumn.best_submission_date.name],
+        ascending=[False, True],
+    )
     df = df[cols].round(decimals=4)
     return df

src/submission/submit_csv.py CHANGED Viewed

@@ -162,7 +162,7 @@ def submit_csv(token: str, csv_content: str) -> tuple[bool, str]:
             "best_fp": scores["fp"],
             "best_fn": scores["fn"],
             "best_tn": scores["tn"],
-            "last_submission_date": timestamp,
         }
         save_team_best_scores(team_name, updated_scores)
         status = "ACCEPTED"

             "best_fp": scores["fp"],
             "best_fn": scores["fn"],
             "best_tn": scores["tn"],
+            "best_submission_date": timestamp,
         }
         save_team_best_scores(team_name, updated_scores)
         status = "ACCEPTED"