Commit
·
444cb2e
1
Parent(s):
0030c6c
Add base LLM and modelling framework to submission metadata; update leaderboard columns
Browse files- src/config.py +1 -1
- src/hf_utils.py +29 -7
- src/ui.py +14 -7
src/config.py
CHANGED
|
@@ -7,7 +7,7 @@ DS_RESULTS_PATH = "results"
|
|
| 7 |
|
| 8 |
# leaderboard
|
| 9 |
# LDB_COLS = ["Submission Name", "Solution Found (%)", "Consistency (%)", "Final Solution Accuracy (%)", "# of Models submitted"]
|
| 10 |
-
LDB_COLS = ["Name", "Models Submitted (%)", "Accuracy (%)", "Runtime Errors (%)"]
|
| 11 |
|
| 12 |
|
| 13 |
# modelling frameworks
|
|
|
|
| 7 |
|
| 8 |
# leaderboard
|
| 9 |
# LDB_COLS = ["Submission Name", "Solution Found (%)", "Consistency (%)", "Final Solution Accuracy (%)", "# of Models submitted"]
|
| 10 |
+
LDB_COLS = ["Name", 'Modelling Framework', 'Base LLM', "Models Submitted (%)", "Accuracy (%)", "Runtime Errors (%)"]
|
| 11 |
|
| 12 |
|
| 13 |
# modelling frameworks
|
src/hf_utils.py
CHANGED
|
@@ -43,17 +43,38 @@ def load_leaderboard_data():
|
|
| 43 |
if f.endswith("submission.jsonl") and f.startswith(DS_SUBMISSIONS_PATH + "/")
|
| 44 |
]
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
# for file_path in summary_files:
|
| 47 |
for file_path in submissions:
|
| 48 |
dir_name = Path(file_path).parent.name
|
| 49 |
if dir_name in processed_result_dirs:
|
| 50 |
continue
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
processed_result_dirs.add(dir_name)
|
| 53 |
entry = {LDB_COLS[0]: dir_name,
|
| 54 |
-
LDB_COLS[1]:
|
| 55 |
-
LDB_COLS[2]:
|
| 56 |
-
LDB_COLS[3]: '*Calculating...*'
|
|
|
|
|
|
|
| 57 |
|
| 58 |
# check if summary file exists, otherwise skip
|
| 59 |
if f"{DS_RESULTS_PATH}/{dir_name}/summary.txt" not in repo_files:
|
|
@@ -72,11 +93,11 @@ def load_leaderboard_data():
|
|
| 72 |
with open(local_summary_path, "r", encoding="utf-8") as f:
|
| 73 |
for line in f:
|
| 74 |
if 'Error perc' in line:
|
| 75 |
-
entry[LDB_COLS[
|
| 76 |
if 'Final Solution Accuracy' in line:
|
| 77 |
-
entry[LDB_COLS[
|
| 78 |
if 'Submission coverage perc' in line:
|
| 79 |
-
entry[LDB_COLS[
|
| 80 |
os.remove(local_summary_path)
|
| 81 |
else:
|
| 82 |
print(f"Warning: Summary file {local_summary_path} does not exist or is empty.")
|
|
@@ -92,7 +113,7 @@ def load_leaderboard_data():
|
|
| 92 |
return pd.DataFrame(leaderboard_entries)
|
| 93 |
|
| 94 |
|
| 95 |
-
def upload_submission(uploaded_file, dir_name, report_file, model_framework):
|
| 96 |
"""Upload submission to Hugging Face Dataset."""
|
| 97 |
if not HF_API:
|
| 98 |
return False, "Hugging Face API not initialized"
|
|
@@ -119,6 +140,7 @@ def upload_submission(uploaded_file, dir_name, report_file, model_framework):
|
|
| 119 |
metadata = {
|
| 120 |
"submission_name": dir_name,
|
| 121 |
"modelling_framework": model_framework,
|
|
|
|
| 122 |
}
|
| 123 |
HF_API.upload_file(
|
| 124 |
path_or_fileobj=io.BytesIO(json.dumps(metadata, indent=4).encode('utf-8')),
|
|
|
|
| 43 |
if f.endswith("submission.jsonl") and f.startswith(DS_SUBMISSIONS_PATH + "/")
|
| 44 |
]
|
| 45 |
|
| 46 |
+
metadata_files = [
|
| 47 |
+
f for f in repo_files
|
| 48 |
+
if f.endswith("metadata.json") and f.startswith(DS_SUBMISSIONS_PATH + "/")
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
# for file_path in summary_files:
|
| 52 |
for file_path in submissions:
|
| 53 |
dir_name = Path(file_path).parent.name
|
| 54 |
if dir_name in processed_result_dirs:
|
| 55 |
continue
|
| 56 |
|
| 57 |
+
# download metadata file of this submission
|
| 58 |
+
metadata_file = next((f for f in metadata_files if f.startswith(f"{DS_SUBMISSIONS_PATH}/{dir_name}/")), None)
|
| 59 |
+
if metadata_file:
|
| 60 |
+
local_metadata_path = hf_hub_download(
|
| 61 |
+
repo_id=DATASET_REPO_ID,
|
| 62 |
+
filename=metadata_file,
|
| 63 |
+
repo_type="dataset",
|
| 64 |
+
local_dir=os.path.join("local_hf_downloads", dir_name),
|
| 65 |
+
)
|
| 66 |
+
with open(local_metadata_path, "r", encoding="utf-8") as f:
|
| 67 |
+
metadata = json.load(f)
|
| 68 |
+
os.remove(local_metadata_path)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
processed_result_dirs.add(dir_name)
|
| 72 |
entry = {LDB_COLS[0]: dir_name,
|
| 73 |
+
LDB_COLS[1]: metadata.get("modelling_framework", "Unknown"),
|
| 74 |
+
LDB_COLS[2]: metadata.get("base_llm", "Unknown"),
|
| 75 |
+
LDB_COLS[3]: '*Calculating...*',
|
| 76 |
+
LDB_COLS[4]: '*Calculating...*',
|
| 77 |
+
LDB_COLS[5]: '*Calculating...*'}
|
| 78 |
|
| 79 |
# check if summary file exists, otherwise skip
|
| 80 |
if f"{DS_RESULTS_PATH}/{dir_name}/summary.txt" not in repo_files:
|
|
|
|
| 93 |
with open(local_summary_path, "r", encoding="utf-8") as f:
|
| 94 |
for line in f:
|
| 95 |
if 'Error perc' in line:
|
| 96 |
+
entry[LDB_COLS[4]] = float(line.split(":")[1].strip().replace("%", ""))
|
| 97 |
if 'Final Solution Accuracy' in line:
|
| 98 |
+
entry[LDB_COLS[3]] = float(line.split(":")[1].strip().replace("%", ""))
|
| 99 |
if 'Submission coverage perc' in line:
|
| 100 |
+
entry[LDB_COLS[2]] = float(line.split(":")[1].strip().replace("%", ""))
|
| 101 |
os.remove(local_summary_path)
|
| 102 |
else:
|
| 103 |
print(f"Warning: Summary file {local_summary_path} does not exist or is empty.")
|
|
|
|
| 113 |
return pd.DataFrame(leaderboard_entries)
|
| 114 |
|
| 115 |
|
| 116 |
+
def upload_submission(uploaded_file, dir_name, report_file, model_framework, base_llm):
|
| 117 |
"""Upload submission to Hugging Face Dataset."""
|
| 118 |
if not HF_API:
|
| 119 |
return False, "Hugging Face API not initialized"
|
|
|
|
| 140 |
metadata = {
|
| 141 |
"submission_name": dir_name,
|
| 142 |
"modelling_framework": model_framework,
|
| 143 |
+
"base_llm": base_llm,
|
| 144 |
}
|
| 145 |
HF_API.upload_file(
|
| 146 |
path_or_fileobj=io.BytesIO(json.dumps(metadata, indent=4).encode('utf-8')),
|
src/ui.py
CHANGED
|
@@ -8,7 +8,7 @@ from src.hf_utils import load_leaderboard_data, upload_submission, check_name_ex
|
|
| 8 |
from src.eval import start_background_evaluation
|
| 9 |
|
| 10 |
|
| 11 |
-
def handle_upload(submission_name, uploaded_file, report_file, model_framework, progress=gr.Progress()):
|
| 12 |
"""Handle file upload and start evaluation."""
|
| 13 |
if model_framework not in SUPPORTED_FRAMEWORKS:
|
| 14 |
return f"Unsupported modelling framework: {model_framework}. Supported frameworks are: {', '.join(SUPPORTED_FRAMEWORKS)}"
|
|
@@ -22,13 +22,14 @@ def handle_upload(submission_name, uploaded_file, report_file, model_framework,
|
|
| 22 |
# normalize the submission name
|
| 23 |
submission_name = submission_name.strip().replace(" ", "_").lower()
|
| 24 |
# keep only alphanumeric characters and underscores, restrict to 30 characters
|
| 25 |
-
submission_name = "".join(
|
| 26 |
-
c for c in submission_name if c.isalnum() or c == "_"
|
| 27 |
-
)[:30]
|
| 28 |
|
| 29 |
if not submission_name or submission_name.strip() == "":
|
| 30 |
return "Submission name is required"
|
| 31 |
|
|
|
|
|
|
|
|
|
|
| 32 |
if check_name_exists(submission_name):
|
| 33 |
return f"Submission name '{submission_name}' already exists. Please choose a different name."
|
| 34 |
|
|
@@ -50,7 +51,7 @@ def handle_upload(submission_name, uploaded_file, report_file, model_framework,
|
|
| 50 |
if not found_one:
|
| 51 |
return "Empty file. Please upload a valid JSONL file."
|
| 52 |
|
| 53 |
-
success, result = upload_submission(uploaded_file, submission_name, report_file, model_framework)
|
| 54 |
if not success:
|
| 55 |
return f"Upload failed: {result}"
|
| 56 |
|
|
@@ -116,7 +117,7 @@ def create_ui():
|
|
| 116 |
label="Submission Name (required)",
|
| 117 |
placeholder="Enter a unique name for your submission",
|
| 118 |
interactive=True,
|
| 119 |
-
info="This name will appear on the leaderboard"
|
| 120 |
)
|
| 121 |
model_framework = gr.Dropdown(
|
| 122 |
label="Modelling Framework (required)",
|
|
@@ -128,6 +129,12 @@ def create_ui():
|
|
| 128 |
allow_custom_value=False,
|
| 129 |
filterable=False,
|
| 130 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
with gr.Row():
|
| 133 |
report_file = gr.File(
|
|
@@ -153,7 +160,7 @@ def create_ui():
|
|
| 153 |
# Event handlers
|
| 154 |
upload_button.click(
|
| 155 |
fn=handle_upload,
|
| 156 |
-
inputs=[submission_name, submission_file, report_file, model_framework],
|
| 157 |
outputs=[status_box],
|
| 158 |
show_progress="full",
|
| 159 |
)
|
|
|
|
| 8 |
from src.eval import start_background_evaluation
|
| 9 |
|
| 10 |
|
| 11 |
+
def handle_upload(submission_name, uploaded_file, report_file, model_framework, base_llm, progress=gr.Progress()):
|
| 12 |
"""Handle file upload and start evaluation."""
|
| 13 |
if model_framework not in SUPPORTED_FRAMEWORKS:
|
| 14 |
return f"Unsupported modelling framework: {model_framework}. Supported frameworks are: {', '.join(SUPPORTED_FRAMEWORKS)}"
|
|
|
|
| 22 |
# normalize the submission name
|
| 23 |
submission_name = submission_name.strip().replace(" ", "_").lower()
|
| 24 |
# keep only alphanumeric characters and underscores, restrict to 30 characters
|
| 25 |
+
submission_name = "".join(c for c in submission_name if c.isalnum() or c == "_")[:30]
|
|
|
|
|
|
|
| 26 |
|
| 27 |
if not submission_name or submission_name.strip() == "":
|
| 28 |
return "Submission name is required"
|
| 29 |
|
| 30 |
+
if not base_llm or base_llm.strip() == "":
|
| 31 |
+
return "Base LLM is required. Please specify the base language model used for generating the models."
|
| 32 |
+
|
| 33 |
if check_name_exists(submission_name):
|
| 34 |
return f"Submission name '{submission_name}' already exists. Please choose a different name."
|
| 35 |
|
|
|
|
| 51 |
if not found_one:
|
| 52 |
return "Empty file. Please upload a valid JSONL file."
|
| 53 |
|
| 54 |
+
success, result = upload_submission(uploaded_file, submission_name, report_file, model_framework, base_llm)
|
| 55 |
if not success:
|
| 56 |
return f"Upload failed: {result}"
|
| 57 |
|
|
|
|
| 117 |
label="Submission Name (required)",
|
| 118 |
placeholder="Enter a unique name for your submission",
|
| 119 |
interactive=True,
|
| 120 |
+
info="This name will appear on the leaderboard. It is recommended that it represents the approach you used to generate the models (e.g. 'smart_prompting')",
|
| 121 |
)
|
| 122 |
model_framework = gr.Dropdown(
|
| 123 |
label="Modelling Framework (required)",
|
|
|
|
| 129 |
allow_custom_value=False,
|
| 130 |
filterable=False,
|
| 131 |
)
|
| 132 |
+
base_llm = gr.Textbox(
|
| 133 |
+
label="Base LLM (required)",
|
| 134 |
+
placeholder="Enter the base LLM used for generating the models (e.g., GPT-4, Llama-3.3)",
|
| 135 |
+
interactive=True,
|
| 136 |
+
info="The base LLM used for generating the models."
|
| 137 |
+
)
|
| 138 |
|
| 139 |
with gr.Row():
|
| 140 |
report_file = gr.File(
|
|
|
|
| 160 |
# Event handlers
|
| 161 |
upload_button.click(
|
| 162 |
fn=handle_upload,
|
| 163 |
+
inputs=[submission_name, submission_file, report_file, model_framework, base_llm],
|
| 164 |
outputs=[status_box],
|
| 165 |
show_progress="full",
|
| 166 |
)
|