Spaces:
Sleeping
Sleeping
Alvaro Romo commited on
Commit ·
bec5baa
1
Parent(s): 6aaf516
Modified dataframe to adapt to new format with real data. Fixed html code
Browse files- app.py +49 -31
- assets/html/02_technical_detail.html +2 -1
app.py
CHANGED
|
@@ -18,21 +18,24 @@ st.set_page_config(page_title="IVACE Leaderboard", layout="wide")
|
|
| 18 |
request_file = Path("user_request/") / f"data_{uuid.uuid4()}.json"
|
| 19 |
request_folder = request_file.parent
|
| 20 |
|
| 21 |
-
# column order
|
| 22 |
-
columns = [
|
| 23 |
-
"eval_name",
|
| 24 |
-
"Model",
|
| 25 |
-
"Type",
|
| 26 |
-
"Average ⬆️",
|
| 27 |
-
"IFEval",
|
| 28 |
-
"MMLU-PRO",
|
| 29 |
-
"GPQA",
|
| 30 |
-
"MUSR",
|
| 31 |
-
"CO₂ cost (kg)",
|
| 32 |
-
]
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
# languages
|
| 35 |
-
lang_list = ["
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
scheduler = CommitScheduler(
|
| 38 |
repo_id="iberbench/ivace-user-request",
|
|
@@ -55,33 +58,43 @@ def log_submission(input_dict: dict) -> None:
|
|
| 55 |
f.write("\n")
|
| 56 |
|
| 57 |
|
| 58 |
-
def get_url(html_content: str) -> str:
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
|
| 67 |
@st.cache_data
|
| 68 |
-
def load_data() -> pd.DataFrame:
|
| 69 |
try:
|
| 70 |
data = (
|
| 71 |
-
load_dataset("
|
| 72 |
.to_pandas()
|
| 73 |
-
.head(10)
|
| 74 |
)
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
data
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
# add column to apply filtering
|
| 80 |
data["Active"] = False
|
| 81 |
|
| 82 |
return data
|
| 83 |
except FileNotFoundError:
|
| 84 |
-
st.error("
|
| 85 |
return pd.DataFrame()
|
| 86 |
|
| 87 |
|
|
@@ -129,12 +142,17 @@ def create_search_per_language(lang: str, search_dict: dict):
|
|
| 129 |
"Active",
|
| 130 |
] = True
|
| 131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
edited_data = st.data_editor(
|
| 133 |
active_data(lang),
|
| 134 |
column_order=columns,
|
| 135 |
key=f"edited_data_{lang}",
|
| 136 |
hide_index=False,
|
| 137 |
-
column_config={"Model": st.column_config.LinkColumn("Model")},
|
|
|
|
| 138 |
)
|
| 139 |
else:
|
| 140 |
st.write("No data found to display on leaderboard.")
|
|
@@ -142,8 +160,8 @@ def create_search_per_language(lang: str, search_dict: dict):
|
|
| 142 |
|
| 143 |
# streamlit UI
|
| 144 |
for lang in lang_list:
|
| 145 |
-
# todo: load a different dataset per language
|
| 146 |
-
leaderboard_data = load_data()
|
| 147 |
if f"leaderboard_data_{lang}" not in st.session_state:
|
| 148 |
st.session_state[f"leaderboard_data_{lang}"] = leaderboard_data
|
| 149 |
|
|
|
|
| 18 |
request_file = Path("user_request/") / f"data_{uuid.uuid4()}.json"
|
| 19 |
request_folder = request_file.parent
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
+
# columns = [
|
| 23 |
+
# "eval_name",
|
| 24 |
+
# "Model",
|
| 25 |
+
# "Type",
|
| 26 |
+
# "Average ⬆️",
|
| 27 |
+
# "IFEval",
|
| 28 |
+
# "MMLU-PRO",
|
| 29 |
+
# "GPQA",
|
| 30 |
+
# "MUSR",
|
| 31 |
+
# "CO₂ cost (kg)",
|
| 32 |
+
# ]
|
| 33 |
# languages
|
| 34 |
+
lang_list = ["Spain", "Costa Rica", "Mexico", "Peru", "Uruguay"]
|
| 35 |
+
|
| 36 |
+
# column order
|
| 37 |
+
model_columns = ["model_name", "url", "type"]
|
| 38 |
+
task_columns = [f"tass_{lang.lower().replace(' ', '_')}" for lang in lang_list]
|
| 39 |
|
| 40 |
scheduler = CommitScheduler(
|
| 41 |
repo_id="iberbench/ivace-user-request",
|
|
|
|
| 58 |
f.write("\n")
|
| 59 |
|
| 60 |
|
| 61 |
+
# def get_url(html_content: str) -> str:
|
| 62 |
+
# match = re.search(r'href=["\'](https?://[^\s"\']+)', html_content)
|
| 63 |
+
# if match:
|
| 64 |
+
# url = match.group(1)
|
| 65 |
+
# return url
|
| 66 |
+
# else:
|
| 67 |
+
# raise ValueError("Url not found in the link")
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def get_lang_columns(columns: list, lang: str):
|
| 71 |
+
"""Filter columns per language"""
|
| 72 |
+
lang_norm = lang.lower().replace(" ", "_")
|
| 73 |
+
|
| 74 |
+
return [col for col in columns if lang_norm in col]
|
| 75 |
|
| 76 |
|
| 77 |
@st.cache_data
|
| 78 |
+
def load_data(lang) -> pd.DataFrame:
|
| 79 |
try:
|
| 80 |
data = (
|
| 81 |
+
load_dataset("iberbench/lm-eval-results-ac", token=st.secrets["HF_TOKEN"])["train"]
|
| 82 |
.to_pandas()
|
|
|
|
| 83 |
)
|
| 84 |
+
# filter lang columns
|
| 85 |
+
task_lang_columns = get_lang_columns(task_columns, lang)
|
| 86 |
+
data = data[model_columns + task_lang_columns]
|
| 87 |
+
|
| 88 |
+
# data["Model"] = data["Model"].apply(get_url)
|
| 89 |
+
# data.sort_values(by="Average ⬆️", ascending=False, inplace=True)
|
| 90 |
+
# data.reset_index(drop=True, inplace=True)
|
| 91 |
+
|
| 92 |
# add column to apply filtering
|
| 93 |
data["Active"] = False
|
| 94 |
|
| 95 |
return data
|
| 96 |
except FileNotFoundError:
|
| 97 |
+
st.error("iberbench/lm-eval-results-ac was not found in the hub")
|
| 98 |
return pd.DataFrame()
|
| 99 |
|
| 100 |
|
|
|
|
| 142 |
"Active",
|
| 143 |
] = True
|
| 144 |
|
| 145 |
+
# select columns to display
|
| 146 |
+
task_lang_columns = get_lang_columns(task_columns, lang)
|
| 147 |
+
columns = model_columns + task_lang_columns
|
| 148 |
+
|
| 149 |
edited_data = st.data_editor(
|
| 150 |
active_data(lang),
|
| 151 |
column_order=columns,
|
| 152 |
key=f"edited_data_{lang}",
|
| 153 |
hide_index=False,
|
| 154 |
+
# column_config={"Model": st.column_config.LinkColumn("Model")},
|
| 155 |
+
column_config={"url": st.column_config.LinkColumn("url")},
|
| 156 |
)
|
| 157 |
else:
|
| 158 |
st.write("No data found to display on leaderboard.")
|
|
|
|
| 160 |
|
| 161 |
# streamlit UI
|
| 162 |
for lang in lang_list:
|
| 163 |
+
# todo: load a different dataset per language of load different column per lang
|
| 164 |
+
leaderboard_data = load_data(lang)
|
| 165 |
if f"leaderboard_data_{lang}" not in st.session_state:
|
| 166 |
st.session_state[f"leaderboard_data_{lang}"] = leaderboard_data
|
| 167 |
|
assets/html/02_technical_detail.html
CHANGED
|
@@ -5,11 +5,12 @@
|
|
| 5 |
</p>
|
| 6 |
<div style="background-color: #f5f5f5; padding: 1rem; border-radius: 5px; font-family: monospace; color: #212529;">
|
| 7 |
<pre style="margin: 0; padding: 0; font-size: 1rem; white-space: pre-wrap; word-wrap: break-word;">
|
|
|
|
| 8 |
from transformers import AutoConfig, AutoModel, AutoTokenizer
|
| 9 |
-
|
| 10 |
config = AutoConfig.from_pretrained("your-username/your-model", revision="main")
|
| 11 |
model = AutoModel.from_pretrained("your-username/your-model", revision="main")
|
| 12 |
tokenizer = AutoTokenizer.from_pretrained("your-username/your-model", revision="main")
|
|
|
|
| 13 |
</pre>
|
| 14 |
</div>
|
| 15 |
<a href="https://huggingface.co/docs/transformers/installation" target="_blank" style="color: #007BFF; text-decoration: underline; font-family: monospace;">Transformers documentation →</a>
|
|
|
|
| 5 |
</p>
|
| 6 |
<div style="background-color: #f5f5f5; padding: 1rem; border-radius: 5px; font-family: monospace; color: #212529;">
|
| 7 |
<pre style="margin: 0; padding: 0; font-size: 1rem; white-space: pre-wrap; word-wrap: break-word;">
|
| 8 |
+
<code>
|
| 9 |
from transformers import AutoConfig, AutoModel, AutoTokenizer
|
|
|
|
| 10 |
config = AutoConfig.from_pretrained("your-username/your-model", revision="main")
|
| 11 |
model = AutoModel.from_pretrained("your-username/your-model", revision="main")
|
| 12 |
tokenizer = AutoTokenizer.from_pretrained("your-username/your-model", revision="main")
|
| 13 |
+
</code>
|
| 14 |
</pre>
|
| 15 |
</div>
|
| 16 |
<a href="https://huggingface.co/docs/transformers/installation" target="_blank" style="color: #007BFF; text-decoration: underline; font-family: monospace;">Transformers documentation →</a>
|