Nicolas Wagner commited on
Commit
ebff827
·
1 Parent(s): 4be69f1

Update UI with centered layout and move assets to LFS

Browse files
.gitattributes CHANGED
@@ -37,3 +37,4 @@ scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
37
  *.avif filter=lfs diff=lfs merge=lfs -text
38
  background.png filter=lfs diff=lfs merge=lfs -text
39
  logo.avif filter=lfs diff=lfs merge=lfs -text
 
 
37
  *.avif filter=lfs diff=lfs merge=lfs -text
38
  background.png filter=lfs diff=lfs merge=lfs -text
39
  logo.avif filter=lfs diff=lfs merge=lfs -text
40
+ assets/*.png filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -12,3 +12,9 @@ eval-results/
12
  eval-queue-bk/
13
  eval-results-bk/
14
  logs/
 
 
 
 
 
 
 
12
  eval-queue-bk/
13
  eval-results-bk/
14
  logs/
15
+
16
+ submissions/
17
+ teams/
18
+ true-labels/
19
+
20
+ .DS_Store
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
@@ -5,8 +7,6 @@ from gradio_leaderboard import Leaderboard, SelectColumns
5
  from huggingface_hub import snapshot_download
6
 
7
  from src.about import (
8
- CITATION_BUTTON_LABEL,
9
- CITATION_BUTTON_TEXT,
10
  INTRODUCTION_TEXT,
11
  LLM_BENCHMARKS_TEXT,
12
  TITLE,
@@ -78,54 +78,75 @@ LEADERBOARD_DF = get_leaderboard_df(SUBMISSIONS_PATH, COLS)
78
 
79
 
80
  def init_leaderboard(dataframe):
81
- team_columns = [c for c in fields(TeamColumn) if c is not None and hasattr(c, "name") and c.name is not None]
 
 
 
 
82
 
83
  if dataframe is None or dataframe.empty:
84
- empty_df = pd.DataFrame(columns=COLS)
85
  column_to_type = {c.name: c.type for c in team_columns}
86
- datatypes = [column_to_type.get(col, "str") for col in COLS]
 
 
 
 
 
 
 
 
 
 
 
87
  return Leaderboard(
88
  value=empty_df,
89
  datatype=datatypes,
 
 
 
 
 
 
 
 
90
  interactive=False,
91
  )
92
 
93
- missing_cols = [col for col in COLS if col not in dataframe.columns]
94
- if missing_cols:
95
- for col in missing_cols:
96
- dataframe[col] = None
 
 
 
 
 
 
 
 
97
 
98
  column_to_type = {c.name: c.type for c in team_columns}
99
- datatypes = [column_to_type.get(col, "str") for col in dataframe.columns]
 
 
 
 
 
100
 
101
  default_selection = [
102
- str(c.name)
103
- for c in team_columns
104
- if getattr(c, "displayed_by_default", False) and c.name is not None and str(c.name) in dataframe.columns
105
  ]
106
 
107
- cant_deselect = [
108
- str(c.name)
109
- for c in team_columns
110
- if getattr(c, "never_hidden", False) and c.name is not None and str(c.name) in dataframe.columns
111
- ]
112
 
113
- hide_cols = [
114
- str(c.name)
115
- for c in team_columns
116
- if getattr(c, "hidden", False) and c.name is not None and str(c.name) in dataframe.columns
117
- ]
118
 
119
  search_cols = []
120
  if hasattr(TeamColumn, "team_name") and hasattr(TeamColumn.team_name, "name"):
121
  search_col_name = TeamColumn.team_name.name
122
- if search_col_name is not None and str(search_col_name) in dataframe.columns:
123
- search_cols = [str(search_col_name)]
124
-
125
- default_selection = [c for c in default_selection if c is not None]
126
- cant_deselect = [c for c in cant_deselect if c is not None]
127
- hide_cols = [c for c in hide_cols if c is not None]
128
- search_cols = [c for c in search_cols if c is not None]
129
 
130
  return Leaderboard(
131
  value=dataframe,
@@ -163,29 +184,49 @@ def register_team_ui(team_name: str, num_teammates: int):
163
 
164
 
165
  def submit_csv_ui(token: str, csv_file):
 
 
166
  if not token or not token.strip():
167
- return styled_error("Please provide your team token.")
168
 
169
  if csv_file is None:
170
- return styled_error("Please upload a CSV file.")
171
 
172
  try:
173
  with open(csv_file.name, "r") as f:
174
  csv_content = f.read()
175
  except Exception as e:
176
- return styled_error(f"Could not read CSV file: {str(e)}")
177
 
178
  success, message = submit_csv(token, csv_content)
179
 
 
 
180
  if success:
181
- return styled_message(message)
182
  else:
183
- return styled_error(message)
 
184
 
 
 
185
 
186
- demo = gr.Blocks(css=custom_css)
187
  with demo:
188
- gr.HTML(TITLE)
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
190
 
191
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
@@ -233,7 +274,7 @@ with demo:
233
  gr.Markdown("## Submit Your Predictions", elem_classes="markdown-text")
234
  gr.Markdown(
235
  "Upload a CSV file with your predictions. The CSV must have two columns: "
236
- "`file_name` and `prediction`. Predictions should be binary (0/1 or 'real'/'fake').",
237
  elem_classes="markdown-text",
238
  )
239
 
@@ -256,7 +297,7 @@ with demo:
256
  submit_button.click(
257
  submit_csv_ui,
258
  [token_input, csv_file_input],
259
- submission_result,
260
  )
261
 
262
  with gr.Accordion("📊 Submission History", open=False):
@@ -264,36 +305,25 @@ with demo:
264
  with gr.TabItem("✅ Accepted Submissions"):
265
  accepted_table = gr.components.Dataframe(
266
  value=accepted_submissions_df,
267
- headers=SUBMISSION_COLS,
268
  datatype=SUBMISSION_TYPES,
269
  row_count=10,
270
  )
271
  with gr.TabItem("❌ Rejected Submissions"):
272
  rejected_table = gr.components.Dataframe(
273
  value=rejected_submissions_df,
274
- headers=SUBMISSION_COLS,
275
  datatype=SUBMISSION_TYPES,
276
  row_count=10,
277
  )
278
  with gr.TabItem("📋 All Submissions"):
279
  all_table = gr.components.Dataframe(
280
  value=all_submissions_df,
281
- headers=SUBMISSION_COLS,
282
  datatype=SUBMISSION_TYPES,
283
  row_count=10,
284
  )
285
 
286
- with gr.Row():
287
- with gr.Accordion("📙 Citation", open=False):
288
- citation_button = gr.Textbox(
289
- value=CITATION_BUTTON_TEXT,
290
- label=CITATION_BUTTON_LABEL,
291
- lines=20,
292
- elem_id="citation-button",
293
- show_copy_button=True,
294
- )
295
-
296
  scheduler = BackgroundScheduler()
297
  scheduler.add_job(restart_space, "interval", seconds=1800)
298
  scheduler.start()
299
- demo.queue(default_concurrency_limit=40).launch()
 
 
 
1
+ import os
2
+
3
  import gradio as gr
4
  import pandas as pd
5
  from apscheduler.schedulers.background import BackgroundScheduler
 
7
  from huggingface_hub import snapshot_download
8
 
9
  from src.about import (
 
 
10
  INTRODUCTION_TEXT,
11
  LLM_BENCHMARKS_TEXT,
12
  TITLE,
 
78
 
79
 
80
  def init_leaderboard(dataframe):
81
+ team_columns = [c for c in fields(TeamColumn) if isinstance(c, type(TeamColumn.team_name))]
82
+
83
+ valid_cols = [col for col in COLS if col is not None and isinstance(col, str) and col.strip() != ""]
84
+ if not valid_cols:
85
+ valid_cols = ["Team Name", "Best Accuracy ⬆️", "Best F1 Score", "Best Error Rate", "Last Submission"]
86
 
87
  if dataframe is None or dataframe.empty:
88
+ empty_df = pd.DataFrame(columns=valid_cols)
89
  column_to_type = {c.name: c.type for c in team_columns}
90
+ datatypes = []
91
+ for col in valid_cols:
92
+ dtype = column_to_type.get(col, "str")
93
+ if not dtype or dtype == "":
94
+ dtype = "str"
95
+ datatypes.append(dtype)
96
+
97
+ print(empty_df)
98
+ print(datatypes)
99
+
100
+ search_col = TeamColumn.team_name.name if TeamColumn.team_name.name in valid_cols else valid_cols[0]
101
+
102
  return Leaderboard(
103
  value=empty_df,
104
  datatype=datatypes,
105
+ search_columns=[search_col],
106
+ select_columns=SelectColumns(
107
+ default_selection=valid_cols,
108
+ cant_deselect=[search_col],
109
+ label="Select Columns to Display:",
110
+ ),
111
+ filter_columns=[],
112
+ hide_columns=[],
113
  interactive=False,
114
  )
115
 
116
+ dataframe = dataframe[
117
+ [col for col in dataframe.columns if col is not None and isinstance(col, str) and col.strip() != ""]
118
+ ]
119
+
120
+ if dataframe.empty or len(dataframe.columns) == 0:
121
+ dataframe = pd.DataFrame(columns=valid_cols)
122
+
123
+ missing_cols = [col for col in valid_cols if col not in dataframe.columns]
124
+ for col in missing_cols:
125
+ dataframe[col] = None
126
+
127
+ dataframe = dataframe[valid_cols]
128
 
129
  column_to_type = {c.name: c.type for c in team_columns}
130
+ datatypes = []
131
+ for col in dataframe.columns:
132
+ dtype = column_to_type.get(col, "str")
133
+ if not dtype or dtype == "":
134
+ dtype = "str"
135
+ datatypes.append(dtype)
136
 
137
  default_selection = [
138
+ c.name for c in team_columns if getattr(c, "displayed_by_default", False) and c.name in dataframe.columns
 
 
139
  ]
140
 
141
+ cant_deselect = [c.name for c in team_columns if getattr(c, "never_hidden", False) and c.name in dataframe.columns]
 
 
 
 
142
 
143
+ hide_cols = [c.name for c in team_columns if getattr(c, "hidden", False) and c.name in dataframe.columns]
 
 
 
 
144
 
145
  search_cols = []
146
  if hasattr(TeamColumn, "team_name") and hasattr(TeamColumn.team_name, "name"):
147
  search_col_name = TeamColumn.team_name.name
148
+ if search_col_name and search_col_name in dataframe.columns:
149
+ search_cols = [search_col_name]
 
 
 
 
 
150
 
151
  return Leaderboard(
152
  value=dataframe,
 
184
 
185
 
186
  def submit_csv_ui(token: str, csv_file):
187
+ updated_leaderboard_df = get_leaderboard_df(SUBMISSIONS_PATH, COLS)
188
+
189
  if not token or not token.strip():
190
+ return styled_error("Please provide your team token."), updated_leaderboard_df
191
 
192
  if csv_file is None:
193
+ return styled_error("Please upload a CSV file."), updated_leaderboard_df
194
 
195
  try:
196
  with open(csv_file.name, "r") as f:
197
  csv_content = f.read()
198
  except Exception as e:
199
+ return styled_error(f"Could not read CSV file: {str(e)}"), updated_leaderboard_df
200
 
201
  success, message = submit_csv(token, csv_content)
202
 
203
+ updated_leaderboard_df = get_leaderboard_df(SUBMISSIONS_PATH, COLS)
204
+
205
  if success:
206
+ return styled_message(message), updated_leaderboard_df
207
  else:
208
+ return styled_error(message), updated_leaderboard_df
209
+
210
 
211
+ background_image_path = os.path.abspath("assets/background.png")
212
+ logo_image_path = os.path.abspath("assets/logo.png")
213
 
214
+ demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
215
  with demo:
216
+ with gr.Row(elem_id="title-row"):
217
+ with gr.Column(scale=0, min_width=120):
218
+ gr.Image(
219
+ value=logo_image_path,
220
+ show_label=False,
221
+ container=False,
222
+ height=120,
223
+ width=120,
224
+ show_download_button=False,
225
+ show_fullscreen_button=False,
226
+ interactive=False,
227
+ )
228
+ with gr.Column(scale=1):
229
+ gr.HTML(TITLE)
230
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
231
 
232
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
 
274
  gr.Markdown("## Submit Your Predictions", elem_classes="markdown-text")
275
  gr.Markdown(
276
  "Upload a CSV file with your predictions. The CSV must have two columns: "
277
+ "`index` and `prediction`. Predictions should be binary (0/1 or 'real'/'fake').",
278
  elem_classes="markdown-text",
279
  )
280
 
 
297
  submit_button.click(
298
  submit_csv_ui,
299
  [token_input, csv_file_input],
300
+ [submission_result, leaderboard],
301
  )
302
 
303
  with gr.Accordion("📊 Submission History", open=False):
 
305
  with gr.TabItem("✅ Accepted Submissions"):
306
  accepted_table = gr.components.Dataframe(
307
  value=accepted_submissions_df,
 
308
  datatype=SUBMISSION_TYPES,
309
  row_count=10,
310
  )
311
  with gr.TabItem("❌ Rejected Submissions"):
312
  rejected_table = gr.components.Dataframe(
313
  value=rejected_submissions_df,
 
314
  datatype=SUBMISSION_TYPES,
315
  row_count=10,
316
  )
317
  with gr.TabItem("📋 All Submissions"):
318
  all_table = gr.components.Dataframe(
319
  value=all_submissions_df,
 
320
  datatype=SUBMISSION_TYPES,
321
  row_count=10,
322
  )
323
 
 
 
 
 
 
 
 
 
 
 
324
  scheduler = BackgroundScheduler()
325
  scheduler.add_job(restart_space, "interval", seconds=1800)
326
  scheduler.start()
327
+ demo.queue(default_concurrency_limit=40).launch(
328
+ allowed_paths=[background_image_path, logo_image_path],
329
+ )
background.png → assets/background.png RENAMED
File without changes
logo.avif → assets/logo.png RENAMED
File without changes
src/about.py CHANGED
@@ -1,4 +1,4 @@
1
- TITLE = """<h1 align="center" id="space-title">Truth vs. Machine Hackathon Leaderboard</h1>"""
2
 
3
  INTRODUCTION_TEXT = """
4
  Welcome to the Truth vs. Machine Hackathon Leaderboard! This leaderboard tracks teams competing in an audio deepfake detection challenge. Teams submit predictions on audio samples to determine whether they are real or fake, and the leaderboard displays the best performance metrics for each team.
@@ -15,16 +15,16 @@ LLM_BENCHMARKS_TEXT = """
15
 
16
  ### 2. Prepare Your Predictions
17
  Create a CSV file with two columns:
18
- - `file_name`: The name of the audio file (must match the test set)
19
  - `prediction`: Your prediction (binary: 0/1, or "real"/"fake")
20
 
21
  Example CSV format:
22
  ```csv
23
- file_name,prediction
24
- audio_001.wav,0
25
- audio_002.wav,1
26
- audio_003.wav,real
27
- audio_004.wav,fake
28
  ```
29
 
30
  ### 3. Submit Your Predictions
@@ -49,15 +49,15 @@ Your predictions are evaluated on:
49
  - True labels are kept private and not accessible to participants
50
  - You can submit multiple times - only your best scores count
51
  - Make sure your CSV file format is correct before submitting
52
- - File names in your CSV must exactly match the test set file names
53
  """
54
 
55
  EVALUATION_QUEUE_TEXT = """
56
  ## Submission Guidelines
57
 
58
  ### CSV File Requirements
59
- - Must contain exactly two columns: `file_name` and `prediction`
60
- - `file_name` must match the test set file names exactly
61
  - `prediction` must be binary: 0/1 or "real"/"fake"
62
  - No missing values allowed
63
 
@@ -72,7 +72,3 @@ Accepted formats for predictions:
72
  - Only submissions that improve your best scores are accepted
73
  - Rejected submissions are logged but don't update the leaderboard
74
  """
75
-
76
- CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
77
- CITATION_BUTTON_TEXT = r"""
78
- """
 
1
+ TITLE = """<h1 id="space-title">Truth vs. Machine Hackathon Leaderboard</h1>"""
2
 
3
  INTRODUCTION_TEXT = """
4
  Welcome to the Truth vs. Machine Hackathon Leaderboard! This leaderboard tracks teams competing in an audio deepfake detection challenge. Teams submit predictions on audio samples to determine whether they are real or fake, and the leaderboard displays the best performance metrics for each team.
 
15
 
16
  ### 2. Prepare Your Predictions
17
  Create a CSV file with two columns:
18
+ - `index`: The index of the test sample (must match the test set)
19
  - `prediction`: Your prediction (binary: 0/1, or "real"/"fake")
20
 
21
  Example CSV format:
22
  ```csv
23
+ index,prediction
24
+ 43555,0
25
+ 43556,1
26
+ 43557,real
27
+ 43558,fake
28
  ```
29
 
30
  ### 3. Submit Your Predictions
 
49
  - True labels are kept private and not accessible to participants
50
  - You can submit multiple times - only your best scores count
51
  - Make sure your CSV file format is correct before submitting
52
+ - Indices in your CSV must exactly match the test set indices
53
  """
54
 
55
  EVALUATION_QUEUE_TEXT = """
56
  ## Submission Guidelines
57
 
58
  ### CSV File Requirements
59
+ - Must contain exactly two columns: `index` and `prediction`
60
+ - `index` must match the test set indices exactly
61
  - `prediction` must be binary: 0/1 or "real"/"fake"
62
  - No missing values allowed
63
 
 
72
  - Only submissions that improve your best scores are accepted
73
  - Rejected submissions are logged but don't update the leaderboard
74
  """
 
 
 
 
src/display/css_html_js.py CHANGED
@@ -1,7 +1,32 @@
1
  custom_css = """
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  .markdown-text {
4
  font-size: 16px !important;
 
5
  }
6
 
7
  #models-to-add-text {
 
1
  custom_css = """
2
 
3
+ body {background-image: file("./assets/background.png");}
4
+
5
+ .gradio-container {
6
+ max-width: 1400px !important;
7
+ margin: 0 auto !important;
8
+ }
9
+
10
+ #title-row {
11
+ display: flex;
12
+ align-items: center;
13
+ justify-content: center;
14
+ margin: 20px auto;
15
+ gap: 15px;
16
+ }
17
+
18
+ #title-row img {
19
+ object-fit: contain;
20
+ }
21
+
22
+ #space-title {
23
+ text-align: center;
24
+ margin: 0;
25
+ }
26
+
27
  .markdown-text {
28
  font-size: 16px !important;
29
+ text-align: center;
30
  }
31
 
32
  #models-to-add-text {
src/display/utils.py CHANGED
@@ -2,7 +2,14 @@ from dataclasses import dataclass
2
 
3
 
4
  def fields(raw_class):
5
- return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
 
 
 
 
 
 
 
6
 
7
 
8
  @dataclass
@@ -33,7 +40,7 @@ class SubmissionQueueColumn:
33
  status = ColumnContent("Status", "str", True)
34
 
35
 
36
- COLS = [c.name for c in fields(TeamColumn) if not c.hidden]
37
 
38
- SUBMISSION_COLS = [c.name for c in fields(SubmissionQueueColumn)]
39
- SUBMISSION_TYPES = [c.type for c in fields(SubmissionQueueColumn)]
 
2
 
3
 
4
  def fields(raw_class):
5
+ from dataclasses import fields as dataclass_fields
6
+ from dataclasses import is_dataclass
7
+
8
+ if is_dataclass(raw_class):
9
+ df = dataclass_fields(raw_class)
10
+ if df:
11
+ return [getattr(raw_class, field.name) for field in df]
12
+ return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__" and not callable(v)]
13
 
14
 
15
  @dataclass
 
40
  status = ColumnContent("Status", "str", True)
41
 
42
 
43
+ COLS = [c.name for c in fields(TeamColumn) if hasattr(c, "hidden") and not c.hidden]
44
 
45
+ SUBMISSION_COLS = [c.name for c in fields(SubmissionQueueColumn) if hasattr(c, "name")]
46
+ SUBMISSION_TYPES = [c.type for c in fields(SubmissionQueueColumn) if hasattr(c, "type")]
src/evaluation/compute_metrics.py CHANGED
@@ -7,11 +7,11 @@ def compute_metrics(predictions_df: pd.DataFrame, true_labels: dict[str, int]) -
7
  y_pred = []
8
 
9
  for _, row in predictions_df.iterrows():
10
- file_name = str(row["file_name"]).strip()
11
- if file_name not in true_labels:
12
  continue
13
 
14
- true_label = true_labels[file_name]
15
  pred_label = int(row["prediction"])
16
 
17
  y_true.append(true_label)
 
7
  y_pred = []
8
 
9
  for _, row in predictions_df.iterrows():
10
+ index_val = str(row["index"]).strip()
11
+ if index_val not in true_labels:
12
  continue
13
 
14
+ true_label = true_labels[index_val]
15
  pred_label = int(row["prediction"])
16
 
17
  y_true.append(true_label)
src/evaluation/load_labels.py CHANGED
@@ -44,9 +44,9 @@ def load_true_labels() -> dict[str, int]:
44
 
45
  try:
46
  df = pd.read_csv(os.path.join(root, file))
47
- if "file_name" in df.columns and "label" in df.columns:
48
  for _, row in df.iterrows():
49
- labels[str(row["file_name"])] = int(row["label"])
50
  except Exception:
51
  continue
52
 
 
44
 
45
  try:
46
  df = pd.read_csv(os.path.join(root, file))
47
+ if "index" in df.columns and "label" in df.columns:
48
  for _, row in df.iterrows():
49
+ labels[str(row["index"])] = int(row["label"])
50
  except Exception:
51
  continue
52
 
src/submission/validate_csv.py CHANGED
@@ -35,8 +35,8 @@ def validate_csv(csv_content: str, true_labels: dict[str, int]) -> tuple[bool, s
35
  except Exception as e:
36
  return False, f"Invalid CSV format: {str(e)}", None
37
 
38
- if "file_name" not in df.columns:
39
- return False, "CSV must contain 'file_name' column", None
40
 
41
  if "prediction" not in df.columns:
42
  return False, "CSV must contain 'prediction' column", None
@@ -44,8 +44,10 @@ def validate_csv(csv_content: str, true_labels: dict[str, int]) -> tuple[bool, s
44
  if df.empty:
45
  return False, "CSV is empty", None
46
 
47
- if df["file_name"].isna().any():
48
- return False, "file_name column contains missing values", None
 
 
49
 
50
  if df["prediction"].isna().any():
51
  return False, "prediction column contains missing values", None
@@ -54,7 +56,7 @@ def validate_csv(csv_content: str, true_labels: dict[str, int]) -> tuple[bool, s
54
  invalid_predictions = []
55
 
56
  for idx, row in df.iterrows():
57
- file_name = str(row["file_name"]).strip()
58
  pred = normalize_prediction(row["prediction"])
59
 
60
  if pred is None:
@@ -67,15 +69,15 @@ def validate_csv(csv_content: str, true_labels: dict[str, int]) -> tuple[bool, s
67
 
68
  df["prediction"] = normalized_predictions
69
 
70
- missing_files = []
71
- for file_name in df["file_name"]:
72
- if str(file_name) not in true_labels:
73
- missing_files.append(str(file_name))
74
 
75
- if missing_files:
76
  return (
77
  False,
78
- f"Unknown file names found: {', '.join(missing_files[:5])}{'...' if len(missing_files) > 5 else ''}",
79
  None,
80
  )
81
 
 
35
  except Exception as e:
36
  return False, f"Invalid CSV format: {str(e)}", None
37
 
38
+ if "index" not in df.columns:
39
+ return False, "CSV must contain 'index' column", None
40
 
41
  if "prediction" not in df.columns:
42
  return False, "CSV must contain 'prediction' column", None
 
44
  if df.empty:
45
  return False, "CSV is empty", None
46
 
47
+ df["index"] = df["index"].astype(float).astype(str)
48
+
49
+ if df["index"].isna().any():
50
+ return False, "index column contains missing values", None
51
 
52
  if df["prediction"].isna().any():
53
  return False, "prediction column contains missing values", None
 
56
  invalid_predictions = []
57
 
58
  for idx, row in df.iterrows():
59
+ index_val = str(row["index"]).strip()
60
  pred = normalize_prediction(row["prediction"])
61
 
62
  if pred is None:
 
69
 
70
  df["prediction"] = normalized_predictions
71
 
72
+ missing_indices = []
73
+ for index_val in df["index"]:
74
+ if str(index_val) not in true_labels:
75
+ missing_indices.append(str(index_val))
76
 
77
+ if missing_indices:
78
  return (
79
  False,
80
+ f"Unknown indices found: {', '.join(missing_indices[:5])}{'...' if len(missing_indices) > 5 else ''}",
81
  None,
82
  )
83