Nicolas Wagner commited on
Commit
bc714de
·
1 Parent(s): 741106a

update for correct metric and label

Browse files
app.py CHANGED
@@ -82,7 +82,18 @@ def init_leaderboard(dataframe):
82
 
83
  valid_cols = [col for col in COLS if col is not None and isinstance(col, str) and col.strip() != ""]
84
  if not valid_cols:
85
- valid_cols = ["Team Name", "Best Accuracy ⬆️", "Best F1 Score", "Best Error Rate", "Last Submission"]
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  if dataframe is None or dataframe.empty:
88
  empty_df = pd.DataFrame(columns=valid_cols)
 
82
 
83
  valid_cols = [col for col in COLS if col is not None and isinstance(col, str) and col.strip() != ""]
84
  if not valid_cols:
85
+ valid_cols = [
86
+ "Team Name",
87
+ "Best Accuracy ⬆️",
88
+ "Best F1 Score ⬆️",
89
+ "Best Precision ⬆️",
90
+ "Best Recall ⬆️",
91
+ "Best TP ⬆️",
92
+ "Best FP ⬇️",
93
+ "Best FN ⬇️",
94
+ "Best TN ⬆️",
95
+ "Last Submission",
96
+ ]
97
 
98
  if dataframe is None or dataframe.empty:
99
  empty_df = pd.DataFrame(columns=valid_cols)
src/display/utils.py CHANGED
@@ -25,8 +25,13 @@ class ColumnContent:
25
  class TeamColumn:
26
  team_name = ColumnContent("Team Name", "str", True, never_hidden=True)
27
  best_accuracy = ColumnContent("Best Accuracy ⬆️", "number", True)
28
- best_f1 = ColumnContent("Best F1 Score", "number", True)
29
- best_error_rate = ColumnContent("Best Error Rate", "number", True)
 
 
 
 
 
30
  last_submission_date = ColumnContent("Last Submission", "str", True)
31
 
32
 
@@ -34,9 +39,14 @@ class TeamColumn:
34
  class SubmissionQueueColumn:
35
  team_name = ColumnContent("Team Name", "str", True)
36
  submission_date = ColumnContent("Submission Date", "str", True)
37
- accuracy = ColumnContent("Accuracy", "number", True)
38
- f1 = ColumnContent("F1 Score", "number", True)
39
- error_rate = ColumnContent("Error Rate", "number", True)
 
 
 
 
 
40
  status = ColumnContent("Status", "str", True)
41
 
42
 
 
25
  class TeamColumn:
26
  team_name = ColumnContent("Team Name", "str", True, never_hidden=True)
27
  best_accuracy = ColumnContent("Best Accuracy ⬆️", "number", True)
28
+ best_f1 = ColumnContent("Best F1 Score ⬆️", "number", True)
29
+ best_precision = ColumnContent("Best Precision ⬆️", "number", True)
30
+ best_recall = ColumnContent("Best Recall ⬆️", "number", True)
31
+ best_tp = ColumnContent("Best TP ⬆️", "number", True)
32
+ best_fp = ColumnContent("Best FP ⬇️", "number", True)
33
+ best_fn = ColumnContent("Best FN ⬇️", "number", True)
34
+ best_tn = ColumnContent("Best TN ⬆️", "number", True)
35
  last_submission_date = ColumnContent("Last Submission", "str", True)
36
 
37
 
 
39
  class SubmissionQueueColumn:
40
  team_name = ColumnContent("Team Name", "str", True)
41
  submission_date = ColumnContent("Submission Date", "str", True)
42
+ accuracy = ColumnContent("Accuracy ⬆️", "number", True)
43
+ f1 = ColumnContent("F1 Score ⬆️", "number", True)
44
+ precision = ColumnContent("Precision ⬆️", "number", True)
45
+ recall = ColumnContent("Recall ⬆️", "number", True)
46
+ tp = ColumnContent("TP ⬆️", "number", True)
47
+ fp = ColumnContent("FP ⬇️", "number", True)
48
+ fn = ColumnContent("FN ⬇️", "number", True)
49
+ tn = ColumnContent("TN ⬆️", "number", True)
50
  status = ColumnContent("Status", "str", True)
51
 
52
 
src/evaluation/compute_metrics.py CHANGED
@@ -1,18 +1,18 @@
1
  import pandas as pd
2
- from sklearn.metrics import accuracy_score, f1_score
3
 
4
 
5
- def compute_metrics(predictions_df: pd.DataFrame, true_labels: dict[str, int]) -> dict[str, float]:
6
  y_true = []
7
  y_pred = []
8
 
9
  for _, row in predictions_df.iterrows():
10
- index_val = str(row["index"]).strip()
11
- if index_val not in true_labels:
12
  continue
13
 
14
- true_label = true_labels[index_val]
15
- pred_label = int(row["prediction"])
16
 
17
  y_true.append(true_label)
18
  y_pred.append(pred_label)
@@ -21,15 +21,28 @@ def compute_metrics(predictions_df: pd.DataFrame, true_labels: dict[str, int]) -
21
  return {
22
  "accuracy": 0.0,
23
  "f1": 0.0,
24
- "error_rate": 1.0,
 
 
 
 
 
25
  }
26
 
27
  accuracy = accuracy_score(y_true, y_pred)
28
  f1 = f1_score(y_true, y_pred, zero_division=0.0)
29
- error_rate = 1.0 - accuracy
 
 
 
30
 
31
  return {
32
  "accuracy": float(accuracy),
33
  "f1": float(f1),
34
- "error_rate": float(error_rate),
 
 
 
 
 
35
  }
 
1
  import pandas as pd
2
+ from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score
3
 
4
 
5
+ def compute_metrics(predictions_df: pd.DataFrame, true_labels: dict[str, float]) -> dict[str, float]:
6
  y_true = []
7
  y_pred = []
8
 
9
  for _, row in predictions_df.iterrows():
10
+ id_val = str(row["id"]).strip()
11
+ if id_val not in true_labels:
12
  continue
13
 
14
+ true_label = int(true_labels[id_val])
15
+ pred_label = int(row["label"])
16
 
17
  y_true.append(true_label)
18
  y_pred.append(pred_label)
 
21
  return {
22
  "accuracy": 0.0,
23
  "f1": 0.0,
24
+ "precision": 0.0,
25
+ "recall": 0.0,
26
+ "tp": 0,
27
+ "fp": 0,
28
+ "fn": 0,
29
+ "tn": 0,
30
  }
31
 
32
  accuracy = accuracy_score(y_true, y_pred)
33
  f1 = f1_score(y_true, y_pred, zero_division=0.0)
34
+ precision = precision_score(y_true, y_pred, zero_division=0.0)
35
+ recall = recall_score(y_true, y_pred, zero_division=0.0)
36
+
37
+ tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
38
 
39
  return {
40
  "accuracy": float(accuracy),
41
  "f1": float(f1),
42
+ "precision": float(precision),
43
+ "recall": float(recall),
44
+ "tp": int(tp),
45
+ "fp": int(fp),
46
+ "fn": int(fn),
47
+ "tn": int(tn),
48
  }
src/evaluation/load_labels.py CHANGED
@@ -1,4 +1,3 @@
1
- import json
2
  import os
3
 
4
  from huggingface_hub import snapshot_download
@@ -6,7 +5,7 @@ from huggingface_hub import snapshot_download
6
  from src.envs import TOKEN, TRUE_LABELS_PATH, TRUE_LABELS_REPO
7
 
8
 
9
- def load_true_labels() -> dict[str, int]:
10
  os.makedirs(TRUE_LABELS_PATH, exist_ok=True)
11
 
12
  try:
@@ -24,30 +23,21 @@ def load_true_labels() -> dict[str, int]:
24
 
25
  labels = {}
26
 
 
 
27
  for root, _, files in os.walk(TRUE_LABELS_PATH):
28
  for file in files:
29
- if file.endswith(".json"):
30
  filepath = os.path.join(root, file)
31
  try:
32
- with open(filepath, "r") as f:
33
- data = json.load(f)
34
- if isinstance(data, dict):
35
- labels.update(data)
36
- elif isinstance(data, list):
37
- for item in data:
38
- if isinstance(item, dict) and "file_name" in item and "label" in item:
39
- labels[item["file_name"]] = item["label"]
40
- except Exception:
41
- continue
42
- elif file.endswith(".csv"):
43
- import pandas as pd
44
-
45
- try:
46
- df = pd.read_csv(os.path.join(root, file))
47
- if "index" in df.columns and "label" in df.columns:
48
  for _, row in df.iterrows():
49
- labels[str(row["index"])] = int(row["label"])
50
- except Exception:
 
 
 
51
  continue
52
 
53
  return labels
 
 
1
  import os
2
 
3
  from huggingface_hub import snapshot_download
 
5
  from src.envs import TOKEN, TRUE_LABELS_PATH, TRUE_LABELS_REPO
6
 
7
 
8
+ def load_true_labels() -> dict[str, float]:
9
  os.makedirs(TRUE_LABELS_PATH, exist_ok=True)
10
 
11
  try:
 
23
 
24
  labels = {}
25
 
26
+ import pandas as pd
27
+
28
  for root, _, files in os.walk(TRUE_LABELS_PATH):
29
  for file in files:
30
+ if file == "true_label.csv":
31
  filepath = os.path.join(root, file)
32
  try:
33
+ df = pd.read_csv(filepath)
34
+ if "id" in df.columns and "label" in df.columns:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  for _, row in df.iterrows():
36
+ label_val = float(row["label"])
37
+ if label_val in [0.0, 1.0]:
38
+ labels[str(row["id"])] = label_val
39
+ except Exception as e:
40
+ print(f"Error loading true_label.csv: {e}")
41
  continue
42
 
43
  return labels
src/leaderboard/read_team_results.py CHANGED
@@ -10,7 +10,12 @@ class TeamResult:
10
  team_name: str
11
  best_accuracy: float
12
  best_f1: float
13
- best_error_rate: float
 
 
 
 
 
14
  last_submission_date: str
15
 
16
  def to_dict(self):
@@ -18,7 +23,12 @@ class TeamResult:
18
  TeamColumn.team_name.name: self.team_name,
19
  TeamColumn.best_accuracy.name: self.best_accuracy,
20
  TeamColumn.best_f1.name: self.best_f1,
21
- TeamColumn.best_error_rate.name: self.best_error_rate,
 
 
 
 
 
22
  TeamColumn.last_submission_date.name: self.last_submission_date,
23
  }
24
 
@@ -42,7 +52,12 @@ def get_team_results(results_path: str) -> list[TeamResult]:
42
  team_name=data.get("team_name", ""),
43
  best_accuracy=data.get("best_accuracy", 0.0),
44
  best_f1=data.get("best_f1", 0.0),
45
- best_error_rate=data.get("best_error_rate", 1.0),
 
 
 
 
 
46
  last_submission_date=data.get("last_submission_date", ""),
47
  )
48
  results.append(result)
 
10
  team_name: str
11
  best_accuracy: float
12
  best_f1: float
13
+ best_precision: float
14
+ best_recall: float
15
+ best_tp: int
16
+ best_fp: int
17
+ best_fn: int
18
+ best_tn: int
19
  last_submission_date: str
20
 
21
  def to_dict(self):
 
23
  TeamColumn.team_name.name: self.team_name,
24
  TeamColumn.best_accuracy.name: self.best_accuracy,
25
  TeamColumn.best_f1.name: self.best_f1,
26
+ TeamColumn.best_precision.name: self.best_precision,
27
+ TeamColumn.best_recall.name: self.best_recall,
28
+ TeamColumn.best_tp.name: self.best_tp,
29
+ TeamColumn.best_fp.name: self.best_fp,
30
+ TeamColumn.best_fn.name: self.best_fn,
31
+ TeamColumn.best_tn.name: self.best_tn,
32
  TeamColumn.last_submission_date.name: self.last_submission_date,
33
  }
34
 
 
52
  team_name=data.get("team_name", ""),
53
  best_accuracy=data.get("best_accuracy", 0.0),
54
  best_f1=data.get("best_f1", 0.0),
55
+ best_precision=data.get("best_precision", 0.0),
56
+ best_recall=data.get("best_recall", 0.0),
57
+ best_tp=data.get("best_tp", 0),
58
+ best_fp=data.get("best_fp", 0),
59
+ best_fn=data.get("best_fn", 0),
60
+ best_tn=data.get("best_tn", 0),
61
  last_submission_date=data.get("last_submission_date", ""),
62
  )
63
  results.append(result)
src/populate.py CHANGED
@@ -44,7 +44,12 @@ def get_submission_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
44
  SubmissionQueueColumn.submission_date.name: data.get("timestamp", ""),
45
  SubmissionQueueColumn.accuracy.name: data.get("scores", {}).get("accuracy", 0.0),
46
  SubmissionQueueColumn.f1.name: data.get("scores", {}).get("f1", 0.0),
47
- SubmissionQueueColumn.error_rate.name: data.get("scores", {}).get("error_rate", 1.0),
 
 
 
 
 
48
  SubmissionQueueColumn.status.name: data.get("status", "UNKNOWN"),
49
  }
50
  all_submissions.append(submission_data)
 
44
  SubmissionQueueColumn.submission_date.name: data.get("timestamp", ""),
45
  SubmissionQueueColumn.accuracy.name: data.get("scores", {}).get("accuracy", 0.0),
46
  SubmissionQueueColumn.f1.name: data.get("scores", {}).get("f1", 0.0),
47
+ SubmissionQueueColumn.precision.name: data.get("scores", {}).get("precision", 0.0),
48
+ SubmissionQueueColumn.recall.name: data.get("scores", {}).get("recall", 0.0),
49
+ SubmissionQueueColumn.tp.name: data.get("scores", {}).get("tp", 0),
50
+ SubmissionQueueColumn.fp.name: data.get("scores", {}).get("fp", 0),
51
+ SubmissionQueueColumn.fn.name: data.get("scores", {}).get("fn", 0),
52
+ SubmissionQueueColumn.tn.name: data.get("scores", {}).get("tn", 0),
53
  SubmissionQueueColumn.status.name: data.get("status", "UNKNOWN"),
54
  }
55
  all_submissions.append(submission_data)
src/submission/submit_csv.py CHANGED
@@ -89,18 +89,14 @@ def should_update_scores(new_scores: dict, best_scores: dict | None) -> bool:
89
 
90
  new_accuracy = new_scores.get("accuracy", 0.0)
91
  new_f1 = new_scores.get("f1", 0.0)
92
- new_error = new_scores.get("error_rate", 1.0)
93
 
94
  best_accuracy = best_scores.get("best_accuracy", 0.0)
95
  best_f1 = best_scores.get("best_f1", 0.0)
96
- best_error = best_scores.get("best_error_rate", 1.0)
97
 
98
  if new_accuracy > best_accuracy:
99
  return True
100
  if new_accuracy == best_accuracy and new_f1 > best_f1:
101
  return True
102
- if new_accuracy == best_accuracy and new_f1 == best_f1 and new_error < best_error:
103
- return True
104
 
105
  return False
106
 
@@ -160,12 +156,17 @@ def submit_csv(token: str, csv_content: str) -> tuple[bool, str]:
160
  "team_name": team_name,
161
  "best_accuracy": scores["accuracy"],
162
  "best_f1": scores["f1"],
163
- "best_error_rate": scores["error_rate"],
 
 
 
 
 
164
  "last_submission_date": timestamp,
165
  }
166
  save_team_best_scores(team_name, updated_scores)
167
  status = "ACCEPTED"
168
- message = f"Submission accepted! Your scores: Accuracy={scores['accuracy']:.4f}, F1={scores['f1']:.4f}, Error Rate={scores['error_rate']:.4f}"
169
  else:
170
  status = "REJECTED"
171
  best_acc = best_scores.get("best_accuracy", 0.0) if best_scores else 0.0
 
89
 
90
  new_accuracy = new_scores.get("accuracy", 0.0)
91
  new_f1 = new_scores.get("f1", 0.0)
 
92
 
93
  best_accuracy = best_scores.get("best_accuracy", 0.0)
94
  best_f1 = best_scores.get("best_f1", 0.0)
 
95
 
96
  if new_accuracy > best_accuracy:
97
  return True
98
  if new_accuracy == best_accuracy and new_f1 > best_f1:
99
  return True
 
 
100
 
101
  return False
102
 
 
156
  "team_name": team_name,
157
  "best_accuracy": scores["accuracy"],
158
  "best_f1": scores["f1"],
159
+ "best_precision": scores["precision"],
160
+ "best_recall": scores["recall"],
161
+ "best_tp": scores["tp"],
162
+ "best_fp": scores["fp"],
163
+ "best_fn": scores["fn"],
164
+ "best_tn": scores["tn"],
165
  "last_submission_date": timestamp,
166
  }
167
  save_team_best_scores(team_name, updated_scores)
168
  status = "ACCEPTED"
169
+ message = f"Submission accepted! Your scores: Accuracy={scores['accuracy']:.4f}, F1={scores['f1']:.4f}, Precision={scores['precision']:.4f}, Recall={scores['recall']:.4f}, TP={scores['tp']}, FP={scores['fp']}, FN={scores['fn']}, TN={scores['tn']}"
170
  else:
171
  status = "REJECTED"
172
  best_acc = best_scores.get("best_accuracy", 0.0) if best_scores else 0.0
src/submission/validate_csv.py CHANGED
@@ -3,30 +3,25 @@ from io import StringIO
3
  import pandas as pd
4
 
5
 
6
- def normalize_prediction(pred: any) -> int | None:
7
- if pd.isna(pred):
8
  return None
9
 
10
- if isinstance(pred, (int, float)):
11
- if pred == 0 or pred == 1:
12
- return int(pred)
13
- if pred == 0.0 or pred == 1.0:
14
- return int(pred)
15
  return None
16
 
17
- if isinstance(pred, str):
18
- pred_lower = pred.strip().lower()
19
- if pred_lower in ["0", "1", "real", "fake"]:
20
- if pred_lower in ["0", "real"]:
21
- return 0
22
- else:
23
- return 1
24
  return None
25
 
26
  return None
27
 
28
 
29
- def validate_csv(csv_content: str, true_labels: dict[str, int]) -> tuple[bool, str, pd.DataFrame | None]:
30
  if not csv_content or not csv_content.strip():
31
  return False, "CSV content is empty", None
32
 
@@ -35,49 +30,61 @@ def validate_csv(csv_content: str, true_labels: dict[str, int]) -> tuple[bool, s
35
  except Exception as e:
36
  return False, f"Invalid CSV format: {str(e)}", None
37
 
38
- if "index" not in df.columns:
39
- return False, "CSV must contain 'index' column", None
40
 
41
- if "prediction" not in df.columns:
42
- return False, "CSV must contain 'prediction' column", None
43
 
44
  if df.empty:
45
  return False, "CSV is empty", None
46
 
47
- df["index"] = df["index"].astype(float).astype(str)
48
 
49
- if df["index"].isna().any():
50
- return False, "index column contains missing values", None
51
 
52
- if df["prediction"].isna().any():
53
- return False, "prediction column contains missing values", None
54
 
55
- normalized_predictions = []
56
- invalid_predictions = []
57
 
58
  for idx, row in df.iterrows():
59
- index_val = str(row["index"]).strip()
60
- pred = normalize_prediction(row["prediction"])
61
 
62
- if pred is None:
63
- invalid_predictions.append(f"Row {idx + 1}: invalid prediction value '{row['prediction']}'")
64
  else:
65
- normalized_predictions.append(pred)
66
 
67
- if invalid_predictions:
68
- return False, "Invalid predictions found:\n" + "\n".join(invalid_predictions[:5]), None
69
 
70
- df["prediction"] = normalized_predictions
71
 
72
- missing_indices = []
73
- for index_val in df["index"]:
74
- if str(index_val) not in true_labels:
75
- missing_indices.append(str(index_val))
76
 
77
- if missing_indices:
78
  return (
79
  False,
80
- f"Unknown indices found: {', '.join(missing_indices[:5])}{'...' if len(missing_indices) > 5 else ''}",
 
 
 
 
 
 
 
 
 
 
 
 
81
  None,
82
  )
83
 
 
3
  import pandas as pd
4
 
5
 
6
+ def normalize_label(label: any) -> float | None:
7
+ if pd.isna(label):
8
  return None
9
 
10
+ if isinstance(label, (int, float)):
11
+ if label == 0.0 or label == 1.0:
12
+ return float(label)
 
 
13
  return None
14
 
15
+ if isinstance(label, str):
16
+ label_stripped = label.strip()
17
+ if label_stripped in ["0.0", "1.0"]:
18
+ return float(label_stripped)
 
 
 
19
  return None
20
 
21
  return None
22
 
23
 
24
+ def validate_csv(csv_content: str, true_labels: dict[str, float]) -> tuple[bool, str, pd.DataFrame | None]:
25
  if not csv_content or not csv_content.strip():
26
  return False, "CSV content is empty", None
27
 
 
30
  except Exception as e:
31
  return False, f"Invalid CSV format: {str(e)}", None
32
 
33
+ if "id" not in df.columns:
34
+ return False, "CSV must contain 'id' column", None
35
 
36
+ if "label" not in df.columns:
37
+ return False, "CSV must contain 'label' column", None
38
 
39
  if df.empty:
40
  return False, "CSV is empty", None
41
 
42
+ df["id"] = df["id"].astype(str).str.strip()
43
 
44
+ if df["id"].isna().any():
45
+ return False, "id column contains missing values", None
46
 
47
+ if df["label"].isna().any():
48
+ return False, "label column contains missing values", None
49
 
50
+ normalized_labels = []
51
+ invalid_labels = []
52
 
53
  for idx, row in df.iterrows():
54
+ id_val = str(row["id"]).strip()
55
+ label = normalize_label(row["label"])
56
 
57
+ if label is None:
58
+ invalid_labels.append(f"Row {idx + 1}: invalid label value '{row['label']}' (must be 0.0 or 1.0)")
59
  else:
60
+ normalized_labels.append(label)
61
 
62
+ if invalid_labels:
63
+ return False, "Invalid labels found:\n" + "\n".join(invalid_labels[:5]), None
64
 
65
+ df["label"] = normalized_labels
66
 
67
+ unknown_ids = []
68
+ for id_val in df["id"]:
69
+ if str(id_val) not in true_labels:
70
+ unknown_ids.append(str(id_val))
71
 
72
+ if unknown_ids:
73
  return (
74
  False,
75
+ f"Unknown IDs found: {', '.join(unknown_ids[:5])}{'...' if len(unknown_ids) > 5 else ''}",
76
+ None,
77
+ )
78
+
79
+ missing_ids = []
80
+ for true_id in true_labels.keys():
81
+ if true_id not in df["id"].values:
82
+ missing_ids.append(true_id)
83
+
84
+ if missing_ids:
85
+ return (
86
+ False,
87
+ f"Missing IDs from true labels: {', '.join(missing_ids[:5])}{'...' if len(missing_ids) > 5 else ''} (total: {len(missing_ids)})",
88
  None,
89
  )
90