Maria Castellanos commited on
Commit ·
a095a04
1
Parent(s): d16313e
fix duplicated usernames
Browse files- app.py +4 -0
- final_lb.py +12 -2
app.py
CHANGED
|
@@ -136,6 +136,9 @@ with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
|
|
| 136 |
|
| 137 |
Go to the **Leaderboard** to check out how the challenge is going.
|
| 138 |
To participate, head out to the **Submit** tab and upload your results as a `CSV` file.
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
"""
|
| 141 |
)
|
|
@@ -241,6 +244,7 @@ with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
|
|
| 241 |
- **October 14:** Second announcement and sample data release
|
| 242 |
- **October 27:** Challenge starts
|
| 243 |
- **October-November:** Online Q&A sessions and support via the Discord channel
|
|
|
|
| 244 |
- **January 19, 2026:** Submission closes
|
| 245 |
- **January 26, 2026:** Winners announced
|
| 246 |
## Acknowledgements
|
|
|
|
| 136 |
|
| 137 |
Go to the **Leaderboard** to check out how the challenge is going.
|
| 138 |
To participate, head out to the **Submit** tab and upload your results as a `CSV` file.
|
| 139 |
+
|
| 140 |
+
**We are releasing an intermediate leaderboard on December 1st so stay tuned!
|
| 141 |
+
The submission cutoff for this leaderboard will be November 30th at 6pm EST. **
|
| 142 |
|
| 143 |
"""
|
| 144 |
)
|
|
|
|
| 244 |
- **October 14:** Second announcement and sample data release
|
| 245 |
- **October 27:** Challenge starts
|
| 246 |
- **October-November:** Online Q&A sessions and support via the Discord channel
|
| 247 |
+
- **December 1st:** Intermediate leaderboard release
|
| 248 |
- **January 19, 2026:** Submission closes
|
| 249 |
- **January 26, 2026:** Winners announced
|
| 250 |
## Acknowledgements
|
final_lb.py
CHANGED
|
@@ -31,10 +31,13 @@ def build_leaderboard(df_results, df_results_raw):
|
|
| 31 |
# MA-RAE is the average of the RAE per endpoint
|
| 32 |
df = df.rename(columns={"mean_RAE": "mean_MA-RAE",
|
| 33 |
"std_RAE": "std_MA-RAE"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
sorted_df = df.sort_values(by='mean_MA-RAE', ascending=True, kind="stable")
|
| 35 |
sorted_df = map_metric_to_stats(sorted_df, average=True)
|
| 36 |
-
# Add ranking column
|
| 37 |
-
sorted_df['rank'] = np.arange(1, len(sorted_df) + 1)
|
| 38 |
avg_leaderboard = sorted_df.copy()
|
| 39 |
avg_cols = LB_AVG
|
| 40 |
# Add CLD
|
|
@@ -58,9 +61,16 @@ def build_leaderboard(df_results, df_results_raw):
|
|
| 58 |
avg_leaderboard['user_real'] = avg_leaderboard['hf_username'].apply(validate_hf_username)
|
| 59 |
avg_leaderboard_clean = avg_leaderboard[avg_leaderboard['user_real']]
|
| 60 |
|
|
|
|
|
|
|
|
|
|
| 61 |
per_ep[ep] = avg_leaderboard_clean[avg_cols]
|
| 62 |
|
| 63 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
sorted_df = df.sort_values(by="mean_MAE", ascending=True, kind="stable")
|
| 65 |
sorted_df = map_metric_to_stats(sorted_df)
|
| 66 |
# Make sure Hugging Face username exists, if not, delete the row
|
|
|
|
| 31 |
# MA-RAE is the average of the RAE per endpoint
|
| 32 |
df = df.rename(columns={"mean_RAE": "mean_MA-RAE",
|
| 33 |
"std_RAE": "std_MA-RAE"})
|
| 34 |
+
# Delete duplicate entries before sorting (fixing case-sensitive duplicate check)
|
| 35 |
+
df['hf_username'] = df['hf_username'].apply(lambda s: s.lower())
|
| 36 |
+
df = df.sort_values(by="submission time", ascending=False, kind="stable")
|
| 37 |
+
df = df.drop_duplicates(subset=['hf_username'], keep='first')
|
| 38 |
+
# Sort by MAE-RAE
|
| 39 |
sorted_df = df.sort_values(by='mean_MA-RAE', ascending=True, kind="stable")
|
| 40 |
sorted_df = map_metric_to_stats(sorted_df, average=True)
|
|
|
|
|
|
|
| 41 |
avg_leaderboard = sorted_df.copy()
|
| 42 |
avg_cols = LB_AVG
|
| 43 |
# Add CLD
|
|
|
|
| 61 |
avg_leaderboard['user_real'] = avg_leaderboard['hf_username'].apply(validate_hf_username)
|
| 62 |
avg_leaderboard_clean = avg_leaderboard[avg_leaderboard['user_real']]
|
| 63 |
|
| 64 |
+
# Add ranking column
|
| 65 |
+
avg_leaderboard_clean['rank'] = np.arange(1, len(avg_leaderboard_clean) + 1)
|
| 66 |
+
|
| 67 |
per_ep[ep] = avg_leaderboard_clean[avg_cols]
|
| 68 |
|
| 69 |
else:
|
| 70 |
+
# Delete duplicate entries before sorting (fixing case-sensitive duplicate check)
|
| 71 |
+
df['hf_username'] = df['hf_username'].apply(lambda s: s.lower())
|
| 72 |
+
df = df.sort_values(by="submission time", ascending=False, kind="stable")
|
| 73 |
+
df = df.drop_duplicates(subset=['hf_username'], keep='first')
|
| 74 |
sorted_df = df.sort_values(by="mean_MAE", ascending=True, kind="stable")
|
| 75 |
sorted_df = map_metric_to_stats(sorted_df)
|
| 76 |
# Make sure Hugging Face username exists, if not, delete the row
|