Alvaro Romo commited on
Commit
bec5baa
·
1 Parent(s): 6aaf516

Modified dataframe to adapt to new format with real data. Fixed html code

Browse files
Files changed (2) hide show
  1. app.py +49 -31
  2. assets/html/02_technical_detail.html +2 -1
app.py CHANGED
@@ -18,21 +18,24 @@ st.set_page_config(page_title="IVACE Leaderboard", layout="wide")
18
  request_file = Path("user_request/") / f"data_{uuid.uuid4()}.json"
19
  request_folder = request_file.parent
20
 
21
- # column order
22
- columns = [
23
- "eval_name",
24
- "Model",
25
- "Type",
26
- "Average ⬆️",
27
- "IFEval",
28
- "MMLU-PRO",
29
- "GPQA",
30
- "MUSR",
31
- "CO₂ cost (kg)",
32
- ]
33
 
 
 
 
 
 
 
 
 
 
 
 
34
  # languages
35
- lang_list = ["Spanish", "Galician", "Basque", "Argentinian", "Chilean"]
 
 
 
 
36
 
37
  scheduler = CommitScheduler(
38
  repo_id="iberbench/ivace-user-request",
@@ -55,33 +58,43 @@ def log_submission(input_dict: dict) -> None:
55
  f.write("\n")
56
 
57
 
58
- def get_url(html_content: str) -> str:
59
- match = re.search(r'href=["\'](https?://[^\s"\']+)', html_content)
60
- if match:
61
- url = match.group(1)
62
- return url
63
- else:
64
- raise ValueError("Url not found in the link")
 
 
 
 
 
 
 
65
 
66
 
67
  @st.cache_data
68
- def load_data() -> pd.DataFrame:
69
  try:
70
  data = (
71
- load_dataset("open-llm-leaderboard/contents")["train"]
72
  .to_pandas()
73
- .head(10)
74
  )
75
- data = data[columns]
76
- data["Model"] = data["Model"].apply(get_url)
77
- data.sort_values(by="Average ⬆️", ascending=False, inplace=True)
78
- data.reset_index(drop=True, inplace=True)
 
 
 
 
79
  # add column to apply filtering
80
  data["Active"] = False
81
 
82
  return data
83
  except FileNotFoundError:
84
- st.error("open-llm-leaderboard/contents was not found in the hub")
85
  return pd.DataFrame()
86
 
87
 
@@ -129,12 +142,17 @@ def create_search_per_language(lang: str, search_dict: dict):
129
  "Active",
130
  ] = True
131
 
 
 
 
 
132
  edited_data = st.data_editor(
133
  active_data(lang),
134
  column_order=columns,
135
  key=f"edited_data_{lang}",
136
  hide_index=False,
137
- column_config={"Model": st.column_config.LinkColumn("Model")},
 
138
  )
139
  else:
140
  st.write("No data found to display on leaderboard.")
@@ -142,8 +160,8 @@ def create_search_per_language(lang: str, search_dict: dict):
142
 
143
  # streamlit UI
144
  for lang in lang_list:
145
- # todo: load a different dataset per language
146
- leaderboard_data = load_data()
147
  if f"leaderboard_data_{lang}" not in st.session_state:
148
  st.session_state[f"leaderboard_data_{lang}"] = leaderboard_data
149
 
 
18
  request_file = Path("user_request/") / f"data_{uuid.uuid4()}.json"
19
  request_folder = request_file.parent
20
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ # columns = [
23
+ # "eval_name",
24
+ # "Model",
25
+ # "Type",
26
+ # "Average ⬆️",
27
+ # "IFEval",
28
+ # "MMLU-PRO",
29
+ # "GPQA",
30
+ # "MUSR",
31
+ # "CO₂ cost (kg)",
32
+ # ]
33
  # languages
34
+ lang_list = ["Spain", "Costa Rica", "Mexico", "Peru", "Uruguay"]
35
+
36
+ # column order
37
+ model_columns = ["model_name", "url", "type"]
38
+ task_columns = [f"tass_{lang.lower().replace(' ', '_')}" for lang in lang_list]
39
 
40
  scheduler = CommitScheduler(
41
  repo_id="iberbench/ivace-user-request",
 
58
  f.write("\n")
59
 
60
 
61
+ # def get_url(html_content: str) -> str:
62
+ # match = re.search(r'href=["\'](https?://[^\s"\']+)', html_content)
63
+ # if match:
64
+ # url = match.group(1)
65
+ # return url
66
+ # else:
67
+ # raise ValueError("Url not found in the link")
68
+
69
+
70
+ def get_lang_columns(columns: list, lang: str):
71
+ """Filter columns per language"""
72
+ lang_norm = lang.lower().replace(" ", "_")
73
+
74
+ return [col for col in columns if lang_norm in col]
75
 
76
 
77
  @st.cache_data
78
+ def load_data(lang) -> pd.DataFrame:
79
  try:
80
  data = (
81
+ load_dataset("iberbench/lm-eval-results-ac", token=st.secrets["HF_TOKEN"])["train"]
82
  .to_pandas()
 
83
  )
84
+ # filter lang columns
85
+ task_lang_columns = get_lang_columns(task_columns, lang)
86
+ data = data[model_columns + task_lang_columns]
87
+
88
+ # data["Model"] = data["Model"].apply(get_url)
89
+ # data.sort_values(by="Average ⬆️", ascending=False, inplace=True)
90
+ # data.reset_index(drop=True, inplace=True)
91
+
92
  # add column to apply filtering
93
  data["Active"] = False
94
 
95
  return data
96
  except FileNotFoundError:
97
+ st.error("iberbench/lm-eval-results-ac was not found in the hub")
98
  return pd.DataFrame()
99
 
100
 
 
142
  "Active",
143
  ] = True
144
 
145
+ # select columns to display
146
+ task_lang_columns = get_lang_columns(task_columns, lang)
147
+ columns = model_columns + task_lang_columns
148
+
149
  edited_data = st.data_editor(
150
  active_data(lang),
151
  column_order=columns,
152
  key=f"edited_data_{lang}",
153
  hide_index=False,
154
+ # column_config={"Model": st.column_config.LinkColumn("Model")},
155
+ column_config={"url": st.column_config.LinkColumn("url")},
156
  )
157
  else:
158
  st.write("No data found to display on leaderboard.")
 
160
 
161
  # streamlit UI
162
  for lang in lang_list:
163
+ # todo: load a different dataset per language of load different column per lang
164
+ leaderboard_data = load_data(lang)
165
  if f"leaderboard_data_{lang}" not in st.session_state:
166
  st.session_state[f"leaderboard_data_{lang}"] = leaderboard_data
167
 
assets/html/02_technical_detail.html CHANGED
@@ -5,11 +5,12 @@
5
  </p>
6
  <div style="background-color: #f5f5f5; padding: 1rem; border-radius: 5px; font-family: monospace; color: #212529;">
7
  <pre style="margin: 0; padding: 0; font-size: 1rem; white-space: pre-wrap; word-wrap: break-word;">
 
8
  from transformers import AutoConfig, AutoModel, AutoTokenizer
9
-
10
  config = AutoConfig.from_pretrained("your-username/your-model", revision="main")
11
  model = AutoModel.from_pretrained("your-username/your-model", revision="main")
12
  tokenizer = AutoTokenizer.from_pretrained("your-username/your-model", revision="main")
 
13
  </pre>
14
  </div>
15
  <a href="https://huggingface.co/docs/transformers/installation" target="_blank" style="color: #007BFF; text-decoration: underline; font-family: monospace;">Transformers documentation →</a>
 
5
  </p>
6
  <div style="background-color: #f5f5f5; padding: 1rem; border-radius: 5px; font-family: monospace; color: #212529;">
7
  <pre style="margin: 0; padding: 0; font-size: 1rem; white-space: pre-wrap; word-wrap: break-word;">
8
+ <code>
9
  from transformers import AutoConfig, AutoModel, AutoTokenizer
 
10
  config = AutoConfig.from_pretrained("your-username/your-model", revision="main")
11
  model = AutoModel.from_pretrained("your-username/your-model", revision="main")
12
  tokenizer = AutoTokenizer.from_pretrained("your-username/your-model", revision="main")
13
+ </code>
14
  </pre>
15
  </div>
16
  <a href="https://huggingface.co/docs/transformers/installation" target="_blank" style="color: #007BFF; text-decoration: underline; font-family: monospace;">Transformers documentation →</a>