File size: 5,136 Bytes
3070e58 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
from helpers.excel_processor import ExcelProcessor
from helpers.reorganize_indices import reorganize_indices
import json
def update_ranks():
final_leaderboard_paths = [
"/Users/kevinxie/Desktop/projects/BRIDGE-Medical-Leaderboard/leaderboards/CoT_leaderboard.json",
"/Users/kevinxie/Desktop/projects/BRIDGE-Medical-Leaderboard/leaderboards/Few-Shot_leaderboard.json",
"/Users/kevinxie/Desktop/projects/BRIDGE-Medical-Leaderboard/leaderboards/Zero-Shot_leaderboard.json"
]
for leaderboard_path in final_leaderboard_paths:
with open(leaderboard_path, 'r') as f:
data = json.load(f)
avg_performance_dict = data['Average Performance']
# Tuples of the original index (key) and the performance score
tps = []
for idx, value in avg_performance_dict.items():
tps.append((idx, value))
# Sort the tuples by the performance score in descending order
tps.sort(key=lambda x: float(x[1]), reverse=True)
for rank, tp in enumerate(tps):
original_idx = tp[0]
data['T'][original_idx] = rank + 1 # Rank starts from 1
with open(leaderboard_path, 'w') as f:
json.dump(data, f, indent=4, ensure_ascii=False)
def create_leaderboards(
excel_path: str,
output_path: str,
sheet_names_list: list,
invalid_models=None
):
"""
Function that updates a singular leaderboard (JSON).
Args:
excel_path: Path to the excel file
output_path: Path to the output file
sheet_names_list: List of sheet names to create leaderboards from
invalid_models: List of models to exclude from the leaderboards
"""
excel_processor = ExcelProcessor(excel_path, invalid_models)
# Create leaderboards (JSON)
excel_processor.create_leaderboards(sheet_names_list=sheet_names_list, output_path=output_path)
# Reorganize the leaderboard inices
reorganize_indices(output_path)
# Create task information JSON
excel_processor.create_task_information('task_information.json')
def create_all_leaderboards(
excel_path: str,
leaderboard_configs: list,
invalid_models=None
):
"""
Loops through each leaderboard's configs to update all leaderboards
(calls the above function multiple times)
Args:
excel_path: Path to the excel file
leaderboard_configs: List of leaderboard configs
invalid_models: List of models to exclude from the leaderboards
"""
for config in leaderboard_configs:
print(f"Creating {config['name']} leaderboard...")
create_leaderboards(
excel_path,
config['output_path'],
config['sheet_names'],
invalid_models=invalid_models
)
print(f"{config['name']} leaderboard created successfully!")
if __name__ == "__main__":
print("***" * 50)
print("Starting script...")
# # ######################################################### #
# # ######################################################### #
# HOW TO UPDATE LEADERBOARDS
# 1. Download the new excel sheet and/or update the path to the excel sheet
# 2. Specify which models to exclude from the leaderboard in "invalid_models" list
# 3. Run scripts/main.py
# 4. Done! All leaderboards and task information have been updated.
# 5. Push to GitHub and deploy to Hugging Face Spaces.
# # ######################################################### #
# # ######################################################### #
# excel_path --> path to the Google Sheet version you want to use (Clinical Benchmark and LLM)
excel_path = "/Users/kevinxie/Desktop/projects/BRIDGE-Leaderboard-INTERNAL/Clinical Benchmark and LLM.xlsx"
# Configuration for all leaderboards
leaderboard_configs = [
{
'name': 'Zero-Shot',
'output_path': 'leaderboards/Zero-Shot_leaderboard.json',
'sheet_names': ["B-CLF", "B-EXT", "B-GEN"]
},
{
'name': 'Few-Shot',
'output_path': 'leaderboards/Few-Shot_leaderboard.json',
'sheet_names': ["B-CLF-5shot", "B-EXT-5shot", "B-GEN-5shot"]
},
{
'name': 'CoT',
'output_path': 'leaderboards/CoT_leaderboard.json',
'sheet_names': ["B-CLF-CoT", "B-EXT-CoT", "B-GEN-CoT"]
}
]
invalid_models = [
"gemma-3-27b-pt",
"gemma-3-12b-pt",
"gemma-3-12b-pt-ylab-4-1-1",
"gemma-3-12b-pt-ylab-8-1-1",
"gemma-3-12b-pt-ylab-16-1-1"
]
# Create all leaderboards with a single function call
create_all_leaderboards(excel_path, leaderboard_configs, invalid_models)
print("***" * 50)
print("Leaderboards created successfully!")
# Update the ranks of the leaderboards (leftmost column)
update_ranks()
print("***" * 50)
print("Ranks updated successfully!")
print("***" * 50)
print("Complete!")
|