Spaces:

aurigin
/

Hackathon_Truth_Vs_Machine

Sleeping

App Files Files Community

Nicolas Wagner commited on Nov 25, 2025

Commit

bc714de

1 Parent(s): 741106a

update for correct metric and label

Browse files

Files changed (8) hide show

app.py +12 -1
src/display/utils.py +15 -5
src/evaluation/compute_metrics.py +22 -9
src/evaluation/load_labels.py +11 -21
src/leaderboard/read_team_results.py +18 -3
src/populate.py +6 -1
src/submission/submit_csv.py +7 -6
src/submission/validate_csv.py +47 -40

app.py CHANGED Viewed

@@ -82,7 +82,18 @@ def init_leaderboard(dataframe):
     valid_cols = [col for col in COLS if col is not None and isinstance(col, str) and col.strip() != ""]
     if not valid_cols:
-        valid_cols = ["Team Name", "Best Accuracy ⬆️", "Best F1 Score", "Best Error Rate", "Last Submission"]
     if dataframe is None or dataframe.empty:
         empty_df = pd.DataFrame(columns=valid_cols)

     valid_cols = [col for col in COLS if col is not None and isinstance(col, str) and col.strip() != ""]
     if not valid_cols:
+        valid_cols = [
+            "Team Name",
+            "Best Accuracy ⬆️",
+            "Best F1 Score ⬆️",
+            "Best Precision ⬆️",
+            "Best Recall ⬆️",
+            "Best TP ⬆️",
+            "Best FP ⬇️",
+            "Best FN ⬇️",
+            "Best TN ⬆️",
+            "Last Submission",
+        ]
     if dataframe is None or dataframe.empty:
         empty_df = pd.DataFrame(columns=valid_cols)

src/display/utils.py CHANGED Viewed

@@ -25,8 +25,13 @@ class ColumnContent:
 class TeamColumn:
     team_name = ColumnContent("Team Name", "str", True, never_hidden=True)
     best_accuracy = ColumnContent("Best Accuracy ⬆️", "number", True)
-    best_f1 = ColumnContent("Best F1 Score", "number", True)
-    best_error_rate = ColumnContent("Best Error Rate", "number", True)
     last_submission_date = ColumnContent("Last Submission", "str", True)
@@ -34,9 +39,14 @@ class TeamColumn:
 class SubmissionQueueColumn:
     team_name = ColumnContent("Team Name", "str", True)
     submission_date = ColumnContent("Submission Date", "str", True)
-    accuracy = ColumnContent("Accuracy", "number", True)
-    f1 = ColumnContent("F1 Score", "number", True)
-    error_rate = ColumnContent("Error Rate", "number", True)
     status = ColumnContent("Status", "str", True)

 class TeamColumn:
     team_name = ColumnContent("Team Name", "str", True, never_hidden=True)
     best_accuracy = ColumnContent("Best Accuracy ⬆️", "number", True)
+    best_f1 = ColumnContent("Best F1 Score ⬆️", "number", True)
+    best_precision = ColumnContent("Best Precision ⬆️", "number", True)
+    best_recall = ColumnContent("Best Recall ⬆️", "number", True)
+    best_tp = ColumnContent("Best TP ⬆️", "number", True)
+    best_fp = ColumnContent("Best FP ⬇️", "number", True)
+    best_fn = ColumnContent("Best FN ⬇️", "number", True)
+    best_tn = ColumnContent("Best TN ⬆️", "number", True)
     last_submission_date = ColumnContent("Last Submission", "str", True)
 class SubmissionQueueColumn:
     team_name = ColumnContent("Team Name", "str", True)
     submission_date = ColumnContent("Submission Date", "str", True)
+    accuracy = ColumnContent("Accuracy ⬆️", "number", True)
+    f1 = ColumnContent("F1 Score ⬆️", "number", True)
+    precision = ColumnContent("Precision ⬆️", "number", True)
+    recall = ColumnContent("Recall ⬆️", "number", True)
+    tp = ColumnContent("TP ⬆️", "number", True)
+    fp = ColumnContent("FP ⬇️", "number", True)
+    fn = ColumnContent("FN ⬇️", "number", True)
+    tn = ColumnContent("TN ⬆️", "number", True)
     status = ColumnContent("Status", "str", True)

src/evaluation/compute_metrics.py CHANGED Viewed

@@ -1,18 +1,18 @@
 import pandas as pd
-from sklearn.metrics import accuracy_score, f1_score
-def compute_metrics(predictions_df: pd.DataFrame, true_labels: dict[str, int]) -> dict[str, float]:
     y_true = []
     y_pred = []
     for _, row in predictions_df.iterrows():
-        index_val = str(row["index"]).strip()
-        if index_val not in true_labels:
             continue
-        true_label = true_labels[index_val]
-        pred_label = int(row["prediction"])
         y_true.append(true_label)
         y_pred.append(pred_label)
@@ -21,15 +21,28 @@ def compute_metrics(predictions_df: pd.DataFrame, true_labels: dict[str, int]) -
         return {
             "accuracy": 0.0,
             "f1": 0.0,
-            "error_rate": 1.0,
         }
     accuracy = accuracy_score(y_true, y_pred)
     f1 = f1_score(y_true, y_pred, zero_division=0.0)
-    error_rate = 1.0 - accuracy
     return {
         "accuracy": float(accuracy),
         "f1": float(f1),
-        "error_rate": float(error_rate),
     }

 import pandas as pd
+from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score
+def compute_metrics(predictions_df: pd.DataFrame, true_labels: dict[str, float]) -> dict[str, float]:
     y_true = []
     y_pred = []
     for _, row in predictions_df.iterrows():
+        id_val = str(row["id"]).strip()
+        if id_val not in true_labels:
             continue
+        true_label = int(true_labels[id_val])
+        pred_label = int(row["label"])
         y_true.append(true_label)
         y_pred.append(pred_label)
         return {
             "accuracy": 0.0,
             "f1": 0.0,
+            "precision": 0.0,
+            "recall": 0.0,
+            "tp": 0,
+            "fp": 0,
+            "fn": 0,
+            "tn": 0,
         }
     accuracy = accuracy_score(y_true, y_pred)
     f1 = f1_score(y_true, y_pred, zero_division=0.0)
+    precision = precision_score(y_true, y_pred, zero_division=0.0)
+    recall = recall_score(y_true, y_pred, zero_division=0.0)
+    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
     return {
         "accuracy": float(accuracy),
         "f1": float(f1),
+        "precision": float(precision),
+        "recall": float(recall),
+        "tp": int(tp),
+        "fp": int(fp),
+        "fn": int(fn),
+        "tn": int(tn),
     }

src/evaluation/load_labels.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import json
 import os
 from huggingface_hub import snapshot_download
@@ -6,7 +5,7 @@ from huggingface_hub import snapshot_download
 from src.envs import TOKEN, TRUE_LABELS_PATH, TRUE_LABELS_REPO
-def load_true_labels() -> dict[str, int]:
     os.makedirs(TRUE_LABELS_PATH, exist_ok=True)
     try:
@@ -24,30 +23,21 @@ def load_true_labels() -> dict[str, int]:
     labels = {}
     for root, _, files in os.walk(TRUE_LABELS_PATH):
         for file in files:
-            if file.endswith(".json"):
                 filepath = os.path.join(root, file)
                 try:
-                    with open(filepath, "r") as f:
-                        data = json.load(f)
-                        if isinstance(data, dict):
-                            labels.update(data)
-                        elif isinstance(data, list):
-                            for item in data:
-                                if isinstance(item, dict) and "file_name" in item and "label" in item:
-                                    labels[item["file_name"]] = item["label"]
-                except Exception:
-                    continue
-            elif file.endswith(".csv"):
-                import pandas as pd
-                try:
-                    df = pd.read_csv(os.path.join(root, file))
-                    if "index" in df.columns and "label" in df.columns:
                         for _, row in df.iterrows():
-                            labels[str(row["index"])] = int(row["label"])
-                except Exception:
                     continue
     return labels

 import os
 from huggingface_hub import snapshot_download
 from src.envs import TOKEN, TRUE_LABELS_PATH, TRUE_LABELS_REPO
+def load_true_labels() -> dict[str, float]:
     os.makedirs(TRUE_LABELS_PATH, exist_ok=True)
     try:
     labels = {}
+    import pandas as pd
     for root, _, files in os.walk(TRUE_LABELS_PATH):
         for file in files:
+            if file == "true_label.csv":
                 filepath = os.path.join(root, file)
                 try:
+                    df = pd.read_csv(filepath)
+                    if "id" in df.columns and "label" in df.columns:
                         for _, row in df.iterrows():
+                            label_val = float(row["label"])
+                            if label_val in [0.0, 1.0]:
+                                labels[str(row["id"])] = label_val
+                except Exception as e:
+                    print(f"Error loading true_label.csv: {e}")
                     continue
     return labels

src/leaderboard/read_team_results.py CHANGED Viewed

@@ -10,7 +10,12 @@ class TeamResult:
     team_name: str
     best_accuracy: float
     best_f1: float
-    best_error_rate: float
     last_submission_date: str
     def to_dict(self):
@@ -18,7 +23,12 @@ class TeamResult:
             TeamColumn.team_name.name: self.team_name,
             TeamColumn.best_accuracy.name: self.best_accuracy,
             TeamColumn.best_f1.name: self.best_f1,
-            TeamColumn.best_error_rate.name: self.best_error_rate,
             TeamColumn.last_submission_date.name: self.last_submission_date,
         }
@@ -42,7 +52,12 @@ def get_team_results(results_path: str) -> list[TeamResult]:
                     team_name=data.get("team_name", ""),
                     best_accuracy=data.get("best_accuracy", 0.0),
                     best_f1=data.get("best_f1", 0.0),
-                    best_error_rate=data.get("best_error_rate", 1.0),
                     last_submission_date=data.get("last_submission_date", ""),
                 )
                 results.append(result)

     team_name: str
     best_accuracy: float
     best_f1: float
+    best_precision: float
+    best_recall: float
+    best_tp: int
+    best_fp: int
+    best_fn: int
+    best_tn: int
     last_submission_date: str
     def to_dict(self):
             TeamColumn.team_name.name: self.team_name,
             TeamColumn.best_accuracy.name: self.best_accuracy,
             TeamColumn.best_f1.name: self.best_f1,
+            TeamColumn.best_precision.name: self.best_precision,
+            TeamColumn.best_recall.name: self.best_recall,
+            TeamColumn.best_tp.name: self.best_tp,
+            TeamColumn.best_fp.name: self.best_fp,
+            TeamColumn.best_fn.name: self.best_fn,
+            TeamColumn.best_tn.name: self.best_tn,
             TeamColumn.last_submission_date.name: self.last_submission_date,
         }
                     team_name=data.get("team_name", ""),
                     best_accuracy=data.get("best_accuracy", 0.0),
                     best_f1=data.get("best_f1", 0.0),
+                    best_precision=data.get("best_precision", 0.0),
+                    best_recall=data.get("best_recall", 0.0),
+                    best_tp=data.get("best_tp", 0),
+                    best_fp=data.get("best_fp", 0),
+                    best_fn=data.get("best_fn", 0),
+                    best_tn=data.get("best_tn", 0),
                     last_submission_date=data.get("last_submission_date", ""),
                 )
                 results.append(result)

src/populate.py CHANGED Viewed

@@ -44,7 +44,12 @@ def get_submission_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
                     SubmissionQueueColumn.submission_date.name: data.get("timestamp", ""),
                     SubmissionQueueColumn.accuracy.name: data.get("scores", {}).get("accuracy", 0.0),
                     SubmissionQueueColumn.f1.name: data.get("scores", {}).get("f1", 0.0),
-                    SubmissionQueueColumn.error_rate.name: data.get("scores", {}).get("error_rate", 1.0),
                     SubmissionQueueColumn.status.name: data.get("status", "UNKNOWN"),
                 }
                 all_submissions.append(submission_data)

                     SubmissionQueueColumn.submission_date.name: data.get("timestamp", ""),
                     SubmissionQueueColumn.accuracy.name: data.get("scores", {}).get("accuracy", 0.0),
                     SubmissionQueueColumn.f1.name: data.get("scores", {}).get("f1", 0.0),
+                    SubmissionQueueColumn.precision.name: data.get("scores", {}).get("precision", 0.0),
+                    SubmissionQueueColumn.recall.name: data.get("scores", {}).get("recall", 0.0),
+                    SubmissionQueueColumn.tp.name: data.get("scores", {}).get("tp", 0),
+                    SubmissionQueueColumn.fp.name: data.get("scores", {}).get("fp", 0),
+                    SubmissionQueueColumn.fn.name: data.get("scores", {}).get("fn", 0),
+                    SubmissionQueueColumn.tn.name: data.get("scores", {}).get("tn", 0),
                     SubmissionQueueColumn.status.name: data.get("status", "UNKNOWN"),
                 }
                 all_submissions.append(submission_data)

src/submission/submit_csv.py CHANGED Viewed

@@ -89,18 +89,14 @@ def should_update_scores(new_scores: dict, best_scores: dict | None) -> bool:
     new_accuracy = new_scores.get("accuracy", 0.0)
     new_f1 = new_scores.get("f1", 0.0)
-    new_error = new_scores.get("error_rate", 1.0)
     best_accuracy = best_scores.get("best_accuracy", 0.0)
     best_f1 = best_scores.get("best_f1", 0.0)
-    best_error = best_scores.get("best_error_rate", 1.0)
     if new_accuracy > best_accuracy:
         return True
     if new_accuracy == best_accuracy and new_f1 > best_f1:
         return True
-    if new_accuracy == best_accuracy and new_f1 == best_f1 and new_error < best_error:
-        return True
     return False
@@ -160,12 +156,17 @@ def submit_csv(token: str, csv_content: str) -> tuple[bool, str]:
             "team_name": team_name,
             "best_accuracy": scores["accuracy"],
             "best_f1": scores["f1"],
-            "best_error_rate": scores["error_rate"],
             "last_submission_date": timestamp,
         }
         save_team_best_scores(team_name, updated_scores)
         status = "ACCEPTED"
-        message = f"Submission accepted! Your scores: Accuracy={scores['accuracy']:.4f}, F1={scores['f1']:.4f}, Error Rate={scores['error_rate']:.4f}"
     else:
         status = "REJECTED"
         best_acc = best_scores.get("best_accuracy", 0.0) if best_scores else 0.0

     new_accuracy = new_scores.get("accuracy", 0.0)
     new_f1 = new_scores.get("f1", 0.0)
     best_accuracy = best_scores.get("best_accuracy", 0.0)
     best_f1 = best_scores.get("best_f1", 0.0)
     if new_accuracy > best_accuracy:
         return True
     if new_accuracy == best_accuracy and new_f1 > best_f1:
         return True
     return False
             "team_name": team_name,
             "best_accuracy": scores["accuracy"],
             "best_f1": scores["f1"],
+            "best_precision": scores["precision"],
+            "best_recall": scores["recall"],
+            "best_tp": scores["tp"],
+            "best_fp": scores["fp"],
+            "best_fn": scores["fn"],
+            "best_tn": scores["tn"],
             "last_submission_date": timestamp,
         }
         save_team_best_scores(team_name, updated_scores)
         status = "ACCEPTED"
+        message = f"Submission accepted! Your scores: Accuracy={scores['accuracy']:.4f}, F1={scores['f1']:.4f}, Precision={scores['precision']:.4f}, Recall={scores['recall']:.4f}, TP={scores['tp']}, FP={scores['fp']}, FN={scores['fn']}, TN={scores['tn']}"
     else:
         status = "REJECTED"
         best_acc = best_scores.get("best_accuracy", 0.0) if best_scores else 0.0

src/submission/validate_csv.py CHANGED Viewed

@@ -3,30 +3,25 @@ from io import StringIO
 import pandas as pd
-def normalize_prediction(pred: any) -> int | None:
-    if pd.isna(pred):
         return None
-    if isinstance(pred, (int, float)):
-        if pred == 0 or pred == 1:
-            return int(pred)
-        if pred == 0.0 or pred == 1.0:
-            return int(pred)
         return None
-    if isinstance(pred, str):
-        pred_lower = pred.strip().lower()
-        if pred_lower in ["0", "1", "real", "fake"]:
-            if pred_lower in ["0", "real"]:
-                return 0
-            else:
-                return 1
         return None
     return None
-def validate_csv(csv_content: str, true_labels: dict[str, int]) -> tuple[bool, str, pd.DataFrame | None]:
     if not csv_content or not csv_content.strip():
         return False, "CSV content is empty", None
@@ -35,49 +30,61 @@ def validate_csv(csv_content: str, true_labels: dict[str, int]) -> tuple[bool, s
     except Exception as e:
         return False, f"Invalid CSV format: {str(e)}", None
-    if "index" not in df.columns:
-        return False, "CSV must contain 'index' column", None
-    if "prediction" not in df.columns:
-        return False, "CSV must contain 'prediction' column", None
     if df.empty:
         return False, "CSV is empty", None
-    df["index"] = df["index"].astype(float).astype(str)
-    if df["index"].isna().any():
-        return False, "index column contains missing values", None
-    if df["prediction"].isna().any():
-        return False, "prediction column contains missing values", None
-    normalized_predictions = []
-    invalid_predictions = []
     for idx, row in df.iterrows():
-        index_val = str(row["index"]).strip()
-        pred = normalize_prediction(row["prediction"])
-        if pred is None:
-            invalid_predictions.append(f"Row {idx + 1}: invalid prediction value '{row['prediction']}'")
         else:
-            normalized_predictions.append(pred)
-    if invalid_predictions:
-        return False, "Invalid predictions found:\n" + "\n".join(invalid_predictions[:5]), None
-    df["prediction"] = normalized_predictions
-    missing_indices = []
-    for index_val in df["index"]:
-        if str(index_val) not in true_labels:
-            missing_indices.append(str(index_val))
-    if missing_indices:
         return (
             False,
-            f"Unknown indices found: {', '.join(missing_indices[:5])}{'...' if len(missing_indices) > 5 else ''}",
             None,
         )

 import pandas as pd
+def normalize_label(label: any) -> float | None:
+    if pd.isna(label):
         return None
+    if isinstance(label, (int, float)):
+        if label == 0.0 or label == 1.0:
+            return float(label)
         return None
+    if isinstance(label, str):
+        label_stripped = label.strip()
+        if label_stripped in ["0.0", "1.0"]:
+            return float(label_stripped)
         return None
     return None
+def validate_csv(csv_content: str, true_labels: dict[str, float]) -> tuple[bool, str, pd.DataFrame | None]:
     if not csv_content or not csv_content.strip():
         return False, "CSV content is empty", None
     except Exception as e:
         return False, f"Invalid CSV format: {str(e)}", None
+    if "id" not in df.columns:
+        return False, "CSV must contain 'id' column", None
+    if "label" not in df.columns:
+        return False, "CSV must contain 'label' column", None
     if df.empty:
         return False, "CSV is empty", None
+    df["id"] = df["id"].astype(str).str.strip()
+    if df["id"].isna().any():
+        return False, "id column contains missing values", None
+    if df["label"].isna().any():
+        return False, "label column contains missing values", None
+    normalized_labels = []
+    invalid_labels = []
     for idx, row in df.iterrows():
+        id_val = str(row["id"]).strip()
+        label = normalize_label(row["label"])
+        if label is None:
+            invalid_labels.append(f"Row {idx + 1}: invalid label value '{row['label']}' (must be 0.0 or 1.0)")
         else:
+            normalized_labels.append(label)
+    if invalid_labels:
+        return False, "Invalid labels found:\n" + "\n".join(invalid_labels[:5]), None
+    df["label"] = normalized_labels
+    unknown_ids = []
+    for id_val in df["id"]:
+        if str(id_val) not in true_labels:
+            unknown_ids.append(str(id_val))
+    if unknown_ids:
         return (
             False,
+            f"Unknown IDs found: {', '.join(unknown_ids[:5])}{'...' if len(unknown_ids) > 5 else ''}",
+            None,
+        )
+    missing_ids = []
+    for true_id in true_labels.keys():
+        if true_id not in df["id"].values:
+            missing_ids.append(true_id)
+    if missing_ids:
+        return (
+            False,
+            f"Missing IDs from true labels: {', '.join(missing_ids[:5])}{'...' if len(missing_ids) > 5 else ''} (total: {len(missing_ids)})",
             None,
         )