Maria Castellanos commited on
Commit
a095a04
·
1 Parent(s): d16313e

fix duplicated usernames

Browse files
Files changed (2) hide show
  1. app.py +4 -0
  2. final_lb.py +12 -2
app.py CHANGED
@@ -136,6 +136,9 @@ with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
136
 
137
  Go to the **Leaderboard** to check out how the challenge is going.
138
  To participate, head out to the **Submit** tab and upload your results as a `CSV` file.
 
 
 
139
 
140
  """
141
  )
@@ -241,6 +244,7 @@ with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
241
  - **October 14:** Second announcement and sample data release
242
  - **October 27:** Challenge starts
243
  - **October-November:** Online Q&A sessions and support via the Discord channel
 
244
  - **January 19, 2026:** Submission closes
245
  - **January 26, 2026:** Winners announced
246
  ## Acknowledgements
 
136
 
137
  Go to the **Leaderboard** to check out how the challenge is going.
138
  To participate, head out to the **Submit** tab and upload your results as a `CSV` file.
139
+
140
+ **We are releasing an intermediate leaderboard on December 1st so stay tuned!
141
+ The submission cutoff for this leaderboard will be November 30th at 6pm EST. **
142
 
143
  """
144
  )
 
244
  - **October 14:** Second announcement and sample data release
245
  - **October 27:** Challenge starts
246
  - **October-November:** Online Q&A sessions and support via the Discord channel
247
+ - **December 1st:** Intermediate leaderboard release
248
  - **January 19, 2026:** Submission closes
249
  - **January 26, 2026:** Winners announced
250
  ## Acknowledgements
final_lb.py CHANGED
@@ -31,10 +31,13 @@ def build_leaderboard(df_results, df_results_raw):
31
  # MA-RAE is the average of the RAE per endpoint
32
  df = df.rename(columns={"mean_RAE": "mean_MA-RAE",
33
  "std_RAE": "std_MA-RAE"})
 
 
 
 
 
34
  sorted_df = df.sort_values(by='mean_MA-RAE', ascending=True, kind="stable")
35
  sorted_df = map_metric_to_stats(sorted_df, average=True)
36
- # Add ranking column
37
- sorted_df['rank'] = np.arange(1, len(sorted_df) + 1)
38
  avg_leaderboard = sorted_df.copy()
39
  avg_cols = LB_AVG
40
  # Add CLD
@@ -58,9 +61,16 @@ def build_leaderboard(df_results, df_results_raw):
58
  avg_leaderboard['user_real'] = avg_leaderboard['hf_username'].apply(validate_hf_username)
59
  avg_leaderboard_clean = avg_leaderboard[avg_leaderboard['user_real']]
60
 
 
 
 
61
  per_ep[ep] = avg_leaderboard_clean[avg_cols]
62
 
63
  else:
 
 
 
 
64
  sorted_df = df.sort_values(by="mean_MAE", ascending=True, kind="stable")
65
  sorted_df = map_metric_to_stats(sorted_df)
66
  # Make sure Hugging Face username exists, if not, delete the row
 
31
  # MA-RAE is the average of the RAE per endpoint
32
  df = df.rename(columns={"mean_RAE": "mean_MA-RAE",
33
  "std_RAE": "std_MA-RAE"})
34
+ # Delete duplicate entries before sorting (fixing case-sensitive duplicate check)
35
+ df['hf_username'] = df['hf_username'].apply(lambda s: s.lower())
36
+ df = df.sort_values(by="submission time", ascending=False, kind="stable")
37
+ df = df.drop_duplicates(subset=['hf_username'], keep='first')
38
+ # Sort by MAE-RAE
39
  sorted_df = df.sort_values(by='mean_MA-RAE', ascending=True, kind="stable")
40
  sorted_df = map_metric_to_stats(sorted_df, average=True)
 
 
41
  avg_leaderboard = sorted_df.copy()
42
  avg_cols = LB_AVG
43
  # Add CLD
 
61
  avg_leaderboard['user_real'] = avg_leaderboard['hf_username'].apply(validate_hf_username)
62
  avg_leaderboard_clean = avg_leaderboard[avg_leaderboard['user_real']]
63
 
64
+ # Add ranking column
65
+ avg_leaderboard_clean['rank'] = np.arange(1, len(avg_leaderboard_clean) + 1)
66
+
67
  per_ep[ep] = avg_leaderboard_clean[avg_cols]
68
 
69
  else:
70
+ # Delete duplicate entries before sorting (fixing case-sensitive duplicate check)
71
+ df['hf_username'] = df['hf_username'].apply(lambda s: s.lower())
72
+ df = df.sort_values(by="submission time", ascending=False, kind="stable")
73
+ df = df.drop_duplicates(subset=['hf_username'], keep='first')
74
  sorted_df = df.sort_values(by="mean_MAE", ascending=True, kind="stable")
75
  sorted_df = map_metric_to_stats(sorted_df)
76
  # Make sure Hugging Face username exists, if not, delete the row