Spaces:
Sleeping
Sleeping
xeon27
commited on
Commit
·
954d8ee
1
Parent(s):
8471f6d
Change model names to reflect version
Browse files- refactor_eval_results.py +18 -1
- src/leaderboard/read_evals.py +3 -1
refactor_eval_results.py
CHANGED
|
@@ -30,7 +30,7 @@ METRIC_NAME = {
|
|
| 30 |
|
| 31 |
MODEL_SHA_MAP = {
|
| 32 |
# open source models
|
| 33 |
-
"c4ai-command-r-plus": "https://huggingface.co/CohereForAI/c4ai-command-r-plus",
|
| 34 |
"Meta-Llama-3.1-70B-Instruct": "https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
|
| 35 |
"Mistral-Large-Instruct-2407": "https://huggingface.co/mistralai/Mistral-Large-Instruct-2407",
|
| 36 |
"Qwen2.5-72B-Instruct": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
|
|
@@ -44,6 +44,22 @@ MODEL_SHA_MAP = {
|
|
| 44 |
"o1": "https://openai.com/o1",
|
| 45 |
}
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
AGENTIC_LOG_MODEL_NAME_MAP = {
|
| 48 |
"claude-3-5-sonnet-20241022": "claude-3-5-sonnet-20241022",
|
| 49 |
"gemini-1.5-pro": "gemini-1.5-pro-002",
|
|
@@ -150,6 +166,7 @@ def main():
|
|
| 150 |
requests = {
|
| 151 |
"model": model_name,
|
| 152 |
"model_sha": MODEL_SHA_MAP[model_name],
|
|
|
|
| 153 |
"base_model": "",
|
| 154 |
"revision": "main",
|
| 155 |
"private": False,
|
|
|
|
| 30 |
|
| 31 |
MODEL_SHA_MAP = {
|
| 32 |
# open source models
|
| 33 |
+
"c4ai-command-r-plus": "https://huggingface.co/CohereForAI/c4ai-command-r-plus",
|
| 34 |
"Meta-Llama-3.1-70B-Instruct": "https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
|
| 35 |
"Mistral-Large-Instruct-2407": "https://huggingface.co/mistralai/Mistral-Large-Instruct-2407",
|
| 36 |
"Qwen2.5-72B-Instruct": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
|
|
|
|
| 44 |
"o1": "https://openai.com/o1",
|
| 45 |
}
|
| 46 |
|
| 47 |
+
MODEL_VERSION_MAP = {
|
| 48 |
+
# open source models
|
| 49 |
+
"c4ai-command-r-plus": "c4ai-command-r-plus",
|
| 50 |
+
"Meta-Llama-3.1-70B-Instruct": "Llama-3.1-70B-Instruct",
|
| 51 |
+
"Mistral-Large-Instruct-2407": "Mistral-Large-Instruct-2407",
|
| 52 |
+
"Qwen2.5-72B-Instruct": "Qwen2.5-72B-Instruct",
|
| 53 |
+
|
| 54 |
+
# closed source models
|
| 55 |
+
"claude-3-5-sonnet-20241022": "Claude-3.5-Sonnet-20241022",
|
| 56 |
+
"gemini-1.5-flash": "Gemini-1.5-Flash",
|
| 57 |
+
"gemini-1.5-pro": "Gemini-1.5-Pro-002",
|
| 58 |
+
"gpt-4o": "GPT-4o-20240806",
|
| 59 |
+
"gpt-4o-mini": "GPT-4o-mini-20240718",
|
| 60 |
+
"o1": "o1-20241217",
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
AGENTIC_LOG_MODEL_NAME_MAP = {
|
| 64 |
"claude-3-5-sonnet-20241022": "claude-3-5-sonnet-20241022",
|
| 65 |
"gemini-1.5-pro": "gemini-1.5-pro-002",
|
|
|
|
| 166 |
requests = {
|
| 167 |
"model": model_name,
|
| 168 |
"model_sha": MODEL_SHA_MAP[model_name],
|
| 169 |
+
"model_version": MODEL_VERSION_MAP[model_name],
|
| 170 |
"base_model": "",
|
| 171 |
"revision": "main",
|
| 172 |
"private": False,
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -20,6 +20,7 @@ class EvalResult:
|
|
| 20 |
full_model: str # org/model (path on hub)
|
| 21 |
org: str
|
| 22 |
model: str
|
|
|
|
| 23 |
revision: str # commit hash, "" if main
|
| 24 |
results: dict
|
| 25 |
precision: Precision = Precision.Unknown
|
|
@@ -103,6 +104,7 @@ class EvalResult:
|
|
| 103 |
with open(request_file, "r") as f:
|
| 104 |
request = json.load(f)
|
| 105 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
|
|
|
| 106 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
| 107 |
self.license = request.get("license", "?")
|
| 108 |
self.likes = request.get("likes", 0)
|
|
@@ -115,7 +117,7 @@ class EvalResult:
|
|
| 115 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 116 |
data_dict = {
|
| 117 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 118 |
-
AutoEvalColumn.model.name: make_clickable_model(self.
|
| 119 |
}
|
| 120 |
|
| 121 |
for task in Tasks:
|
|
|
|
| 20 |
full_model: str # org/model (path on hub)
|
| 21 |
org: str
|
| 22 |
model: str
|
| 23 |
+
model_version: str
|
| 24 |
revision: str # commit hash, "" if main
|
| 25 |
results: dict
|
| 26 |
precision: Precision = Precision.Unknown
|
|
|
|
| 104 |
with open(request_file, "r") as f:
|
| 105 |
request = json.load(f)
|
| 106 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
| 107 |
+
self.model_version = request.get("model_version", "")
|
| 108 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
| 109 |
self.license = request.get("license", "?")
|
| 110 |
self.likes = request.get("likes", 0)
|
|
|
|
| 117 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 118 |
data_dict = {
|
| 119 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 120 |
+
AutoEvalColumn.model.name: make_clickable_model(self.model_version, self.revision),
|
| 121 |
}
|
| 122 |
|
| 123 |
for task in Tasks:
|