Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Gregor Betz
commited on
add dashboard
Browse files- src/display/formatting.py +8 -6
- src/display/utils.py +21 -18
- src/envs.py +4 -2
- src/leaderboard/read_evals.py +4 -4
src/display/formatting.py
CHANGED
|
@@ -1,12 +1,8 @@
|
|
| 1 |
-
|
| 2 |
-
from datetime import datetime, timezone
|
| 3 |
|
| 4 |
-
from
|
| 5 |
-
from huggingface_hub.hf_api import ModelInfo
|
| 6 |
|
| 7 |
|
| 8 |
-
API = HfApi()
|
| 9 |
-
|
| 10 |
def model_hyperlink(link, model_name):
|
| 11 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
| 12 |
|
|
@@ -16,6 +12,12 @@ def make_clickable_model(model_name):
|
|
| 16 |
return model_hyperlink(link, model_name)
|
| 17 |
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
def styled_error(error):
|
| 20 |
return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
|
| 21 |
|
|
|
|
| 1 |
+
# utility functions for formatting text and data for display in the leaderboard
|
|
|
|
| 2 |
|
| 3 |
+
from src.envs import DASHBOARD_LINK
|
|
|
|
| 4 |
|
| 5 |
|
|
|
|
|
|
|
| 6 |
def model_hyperlink(link, model_name):
|
| 7 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
| 8 |
|
|
|
|
| 12 |
return model_hyperlink(link, model_name)
|
| 13 |
|
| 14 |
|
| 15 |
+
def model_dashboard_hyperlink(model_name):
|
| 16 |
+
link = DASHBOARD_LINK.format(model_id=model_name)
|
| 17 |
+
html_link = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">show in 📊</a>'
|
| 18 |
+
return html_link
|
| 19 |
+
|
| 20 |
+
|
| 21 |
def styled_error(error):
|
| 22 |
return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
|
| 23 |
|
src/display/utils.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
from dataclasses import dataclass, make_dataclass
|
| 2 |
from enum import Enum
|
|
|
|
| 3 |
|
| 4 |
-
import pandas as pd
|
| 5 |
|
| 6 |
from src.display.about import Tasks
|
| 7 |
|
|
@@ -22,26 +23,28 @@ class ColumnContent:
|
|
| 22 |
dummy: bool = False
|
| 23 |
|
| 24 |
## Leaderboard columns
|
| 25 |
-
auto_eval_column_dict = []
|
| 26 |
# Init
|
| 27 |
-
auto_eval_column_dict.append(
|
| 28 |
-
auto_eval_column_dict.append(
|
| 29 |
-
#Scores
|
| 30 |
-
auto_eval_column_dict.append(
|
| 31 |
for task in Tasks:
|
| 32 |
-
auto_eval_column_dict.append(
|
|
|
|
|
|
|
| 33 |
# Model information
|
| 34 |
-
auto_eval_column_dict.append(
|
| 35 |
-
auto_eval_column_dict.append(
|
| 36 |
-
auto_eval_column_dict.append(
|
| 37 |
-
auto_eval_column_dict.append(
|
| 38 |
-
auto_eval_column_dict.append(
|
| 39 |
-
auto_eval_column_dict.append(
|
| 40 |
-
auto_eval_column_dict.append(
|
| 41 |
-
auto_eval_column_dict.append(
|
| 42 |
-
auto_eval_column_dict.append(
|
| 43 |
# Dummy column for the search bar (hidden by the custom CSS)
|
| 44 |
-
auto_eval_column_dict.append(
|
| 45 |
|
| 46 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 47 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
@@ -53,7 +56,7 @@ class EvalQueueColumn: # Queue column
|
|
| 53 |
revision = ColumnContent("revision", "str", True)
|
| 54 |
private = ColumnContent("private", "bool", True)
|
| 55 |
precision = ColumnContent("precision", "str", True)
|
| 56 |
-
weight_type = ColumnContent("weight_type", "str",
|
| 57 |
status = ColumnContent("status", "str", True)
|
| 58 |
|
| 59 |
## All the model information that we might need
|
|
|
|
| 1 |
from dataclasses import dataclass, make_dataclass
|
| 2 |
from enum import Enum
|
| 3 |
+
from typing import Any
|
| 4 |
|
| 5 |
+
import pandas as pd # type: ignore
|
| 6 |
|
| 7 |
from src.display.about import Tasks
|
| 8 |
|
|
|
|
| 23 |
dummy: bool = False
|
| 24 |
|
| 25 |
## Leaderboard columns
|
| 26 |
+
auto_eval_column_dict: list[tuple[str, type, Any]] = []
|
| 27 |
# Init
|
| 28 |
+
auto_eval_column_dict.append(("model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)))
|
| 29 |
+
auto_eval_column_dict.append(("model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)))
|
| 30 |
+
# Scores
|
| 31 |
+
auto_eval_column_dict.append(("average", ColumnContent, ColumnContent("Average ⬆️", "number", True)))
|
| 32 |
for task in Tasks:
|
| 33 |
+
auto_eval_column_dict.append((task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)))
|
| 34 |
+
# Dashboard
|
| 35 |
+
auto_eval_column_dict.append(("dashboard_link", ColumnContent, ColumnContent("Dashboard", "markdown", False)))
|
| 36 |
# Model information
|
| 37 |
+
auto_eval_column_dict.append(("model_type", ColumnContent, ColumnContent("Type", "str", False)))
|
| 38 |
+
auto_eval_column_dict.append(("architecture", ColumnContent, ColumnContent("Architecture", "str", False)))
|
| 39 |
+
auto_eval_column_dict.append(("weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)))
|
| 40 |
+
auto_eval_column_dict.append(("precision", ColumnContent, ColumnContent("Precision", "str", False)))
|
| 41 |
+
auto_eval_column_dict.append(("license", ColumnContent, ColumnContent("Hub License", "str", False)))
|
| 42 |
+
auto_eval_column_dict.append(("params", ColumnContent, ColumnContent("#Params (B)", "number", False)))
|
| 43 |
+
auto_eval_column_dict.append(("likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)))
|
| 44 |
+
auto_eval_column_dict.append(("still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)))
|
| 45 |
+
auto_eval_column_dict.append(("revision", ColumnContent, ColumnContent("Model sha", "str", False, False)))
|
| 46 |
# Dummy column for the search bar (hidden by the custom CSS)
|
| 47 |
+
auto_eval_column_dict.append(("dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)))
|
| 48 |
|
| 49 |
# We use make dataclass to dynamically fill the scores from Tasks
|
| 50 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
|
|
| 56 |
revision = ColumnContent("revision", "str", True)
|
| 57 |
private = ColumnContent("private", "bool", True)
|
| 58 |
precision = ColumnContent("precision", "str", True)
|
| 59 |
+
weight_type = ColumnContent("weight_type", "str", True)
|
| 60 |
status = ColumnContent("status", "str", True)
|
| 61 |
|
| 62 |
## All the model information that we might need
|
src/envs.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
-
from huggingface_hub import HfApi
|
| 4 |
|
| 5 |
# clone / pull the lmeh eval data
|
| 6 |
TOKEN = os.environ.get("TOKEN", None)
|
|
@@ -11,9 +11,11 @@ REPO_ID = f"{OWNER}/open_cot_leaderboard"
|
|
| 11 |
QUEUE_REPO = f"{DATA_OWNER}/cot-leaderboard-requests"
|
| 12 |
RESULTS_REPO = f"{DATA_OWNER}/cot-leaderboard-results"
|
| 13 |
|
| 14 |
-
|
| 15 |
CACHE_PATH=os.getenv("HF_HOME", ".")
|
| 16 |
|
|
|
|
|
|
|
|
|
|
| 17 |
# Local caches
|
| 18 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
| 19 |
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
|
|
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
+
from huggingface_hub import HfApi # type: ignore
|
| 4 |
|
| 5 |
# clone / pull the lmeh eval data
|
| 6 |
TOKEN = os.environ.get("TOKEN", None)
|
|
|
|
| 11 |
QUEUE_REPO = f"{DATA_OWNER}/cot-leaderboard-requests"
|
| 12 |
RESULTS_REPO = f"{DATA_OWNER}/cot-leaderboard-results"
|
| 13 |
|
|
|
|
| 14 |
CACHE_PATH=os.getenv("HF_HOME", ".")
|
| 15 |
|
| 16 |
+
# Dashboard
|
| 17 |
+
DASHBOARD_LINK = "https://huggingface.co/cot-leaderboard/open-cot-dashboard?model={model_id}"
|
| 18 |
+
|
| 19 |
# Local caches
|
| 20 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
| 21 |
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -1,13 +1,12 @@
|
|
| 1 |
import glob
|
| 2 |
import json
|
| 3 |
-
import math
|
| 4 |
import os
|
| 5 |
from dataclasses import dataclass
|
| 6 |
|
| 7 |
-
import dateutil
|
| 8 |
import numpy as np
|
| 9 |
|
| 10 |
-
from src.display.formatting import make_clickable_model
|
| 11 |
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
|
| 12 |
from src.submission.check_validity import is_model_on_hub
|
| 13 |
from src.envs import TOKEN
|
|
@@ -117,6 +116,7 @@ class EvalResult:
|
|
| 117 |
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
| 118 |
AutoEvalColumn.architecture.name: self.architecture,
|
| 119 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
|
|
|
| 120 |
AutoEvalColumn.dummy.name: self.full_model,
|
| 121 |
AutoEvalColumn.revision.name: self.revision,
|
| 122 |
AutoEvalColumn.average.name: average,
|
|
@@ -172,7 +172,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
| 172 |
for file in files:
|
| 173 |
model_result_filepaths.append(os.path.join(root, file))
|
| 174 |
|
| 175 |
-
eval_results = {}
|
| 176 |
for model_result_filepath in model_result_filepaths:
|
| 177 |
# Creation of result
|
| 178 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
|
|
|
| 1 |
import glob
|
| 2 |
import json
|
|
|
|
| 3 |
import os
|
| 4 |
from dataclasses import dataclass
|
| 5 |
|
| 6 |
+
import dateutil # type: ignore
|
| 7 |
import numpy as np
|
| 8 |
|
| 9 |
+
from src.display.formatting import make_clickable_model, model_dashboard_hyperlink
|
| 10 |
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
|
| 11 |
from src.submission.check_validity import is_model_on_hub
|
| 12 |
from src.envs import TOKEN
|
|
|
|
| 116 |
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
| 117 |
AutoEvalColumn.architecture.name: self.architecture,
|
| 118 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
| 119 |
+
AutoEvalColumn.dashboard_link.name: model_dashboard_hyperlink(self.full_model),
|
| 120 |
AutoEvalColumn.dummy.name: self.full_model,
|
| 121 |
AutoEvalColumn.revision.name: self.revision,
|
| 122 |
AutoEvalColumn.average.name: average,
|
|
|
|
| 172 |
for file in files:
|
| 173 |
model_result_filepaths.append(os.path.join(root, file))
|
| 174 |
|
| 175 |
+
eval_results: dict[str, EvalResult] = {}
|
| 176 |
for model_result_filepath in model_result_filepaths:
|
| 177 |
# Creation of result
|
| 178 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|