saleh-alibrahim commited on
Commit
f797f3e
Β·
verified Β·
1 Parent(s): 95fd2b2

Remove Elm/STC from providers, add Eval method column (API vs HuggingFace)

Browse files
app.py CHANGED
@@ -60,6 +60,7 @@ def build_leaderboard_df(results: list[dict]) -> pd.DataFrame:
60
  "Rank": i,
61
  "Model": r["model_name"],
62
  "Provider": r.get("provider", ""),
 
63
  "Parameters": r.get("parameters", ""),
64
  "Score": r["average_score"],
65
  "Tier": score_to_tier(r["average_score"]),
@@ -201,7 +202,7 @@ def make_bar_chart(model_name: str) -> go.Figure:
201
  def build_leaderboard_display() -> pd.DataFrame:
202
  if LEADERBOARD_DF.empty:
203
  return pd.DataFrame()
204
- df = LEADERBOARD_DF[["Rank", "Model", "Provider", "Parameters", "Score", "Tier"]].copy()
205
  df["Score"] = df["Score"].apply(lambda x: f"**{x:.2f}**")
206
  df["Tier"] = df["Tier"].apply(lambda t: f"**{t}**")
207
  return df
@@ -401,11 +402,11 @@ with demo:
401
  with gr.TabItem("Leaderboard", id=0):
402
  gr.Dataframe(
403
  value=DISPLAY_DF,
404
- datatype=["number", "markdown", "str", "str", "markdown", "markdown"],
405
  interactive=False,
406
  wrap=True,
407
  show_search="filter",
408
- column_widths=[60, 280, 160, 100, 100, 60],
409
  )
410
 
411
  # ── Tab 2: Skills Breakdown ──────────────────────────────────────
 
60
  "Rank": i,
61
  "Model": r["model_name"],
62
  "Provider": r.get("provider", ""),
63
+ "Eval": r.get("eval_method", ""),
64
  "Parameters": r.get("parameters", ""),
65
  "Score": r["average_score"],
66
  "Tier": score_to_tier(r["average_score"]),
 
202
  def build_leaderboard_display() -> pd.DataFrame:
203
  if LEADERBOARD_DF.empty:
204
  return pd.DataFrame()
205
+ df = LEADERBOARD_DF[["Rank", "Model", "Provider", "Eval", "Parameters", "Score", "Tier"]].copy()
206
  df["Score"] = df["Score"].apply(lambda x: f"**{x:.2f}**")
207
  df["Tier"] = df["Tier"].apply(lambda t: f"**{t}**")
208
  return df
 
402
  with gr.TabItem("Leaderboard", id=0):
403
  gr.Dataframe(
404
  value=DISPLAY_DF,
405
+ datatype=["number", "markdown", "str", "str", "str", "markdown", "markdown"],
406
  interactive=False,
407
  wrap=True,
408
  show_search="filter",
409
+ column_widths=[60, 250, 130, 110, 80, 90, 60],
410
  )
411
 
412
  # ── Tab 2: Skills Breakdown ──────────────────────────────────────
results/allam-7b-instruct.json CHANGED
@@ -1,7 +1,8 @@
1
  {
2
  "model_name": "ALLaM-7B-Instruct",
3
  "model_id": "humain-ai/ALLaM-7B-Instruct-preview",
4
- "provider": "Humain (STC)",
 
5
  "size_category": "Small",
6
  "parameters": "7B",
7
  "open_source": true,
 
1
  {
2
  "model_name": "ALLaM-7B-Instruct",
3
  "model_id": "humain-ai/ALLaM-7B-Instruct-preview",
4
+ "provider": "Humain",
5
+ "eval_method": "HuggingFace",
6
  "size_category": "Small",
7
  "parameters": "7B",
8
  "open_source": true,
results/nuha_llama-3.3-70b-versatile.json CHANGED
@@ -1,7 +1,8 @@
1
  {
2
  "model_name": "Nuha (llama-3.3-70b-versatile)",
3
  "model_id": "llama-3.3-70b-versatile",
4
- "provider": "Nuha API (Elm)",
 
5
  "size_category": "Large",
6
  "parameters": "70B",
7
  "open_source": true,
 
1
  {
2
  "model_name": "Nuha (llama-3.3-70b-versatile)",
3
  "model_id": "llama-3.3-70b-versatile",
4
+ "provider": "Nuha API",
5
+ "eval_method": "API",
6
  "size_category": "Large",
7
  "parameters": "70B",
8
  "open_source": true,