davidlf-hp commited on
Commit
2164614
·
verified ·
1 Parent(s): 396c4dc

Fix: read from leaderboard.json instead of leaderboard_space.json

Browse files
Files changed (1) hide show
  1. app.py +126 -0
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Streamlit app to display the NPU Arabic leaderboard."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+ from typing import List, Sequence
9
+
10
+ import streamlit as st
11
+
12
+ # Use the aggregated space JSON which includes score and quality_overall
13
+ # On HuggingFace, this is uploaded as leaderboard.json (aggregated version)
14
+ _DATA_PATH = Path("leaderboard.json")
15
+
16
+ # Column order for display - score and quality_overall are prominent
17
+ _COLUMNS: Sequence[str] = (
18
+ "model_name",
19
+ "status",
20
+ "score",
21
+ "quality_overall",
22
+ "avg_tps",
23
+ "mlqa_ar_ar_f1",
24
+ "xquad_ar_f1",
25
+ "iwslt2017-en-ar_sacrebleu",
26
+ "xlsum_title_ar_rougeL",
27
+ "xlsum_summary_ar_rougeLsum",
28
+ "arabic_mmlu_acc",
29
+ "timestamp",
30
+ )
31
+ _METRIC_COLUMNS: Sequence[str] = tuple(
32
+ col for col in _COLUMNS if col not in {"model_name", "status", "timestamp"}
33
+ )
34
+
35
+
36
+ def _load_rows() -> List[dict]:
37
+ if not _DATA_PATH.exists():
38
+ return []
39
+ try:
40
+ raw = json.loads(_DATA_PATH.read_text(encoding="utf-8"))
41
+ except json.JSONDecodeError:
42
+ return []
43
+
44
+ if isinstance(raw, dict):
45
+ data = [raw]
46
+ elif isinstance(raw, list):
47
+ data = [item for item in raw if isinstance(item, dict)]
48
+ else:
49
+ data = []
50
+
51
+ # Filter to desired columns
52
+ filtered: List[dict] = []
53
+ for row in data:
54
+ compact = {key: row.get(key) for key in _COLUMNS}
55
+ status = compact.get("status")
56
+ if status is None:
57
+ status = "Completed"
58
+ compact["status"] = status
59
+ if status != "Completed":
60
+ for metric_col in _METRIC_COLUMNS:
61
+ compact[metric_col] = float("nan")
62
+ filtered.append(compact)
63
+
64
+ # Sort by score (highest first), then by timestamp for ties
65
+ def _sort_key(item: dict) -> tuple:
66
+ score = item.get("score")
67
+ score_val = float(score) if score is not None else -1.0
68
+ stamp = item.get("timestamp")
69
+ try:
70
+ parsed = datetime.fromisoformat(str(stamp))
71
+ if parsed.tzinfo is None:
72
+ parsed = parsed.replace(tzinfo=timezone.utc)
73
+ else:
74
+ parsed = parsed.astimezone(timezone.utc)
75
+ except Exception:
76
+ parsed = datetime.min.replace(tzinfo=timezone.utc)
77
+ return (score_val, parsed)
78
+
79
+ filtered.sort(key=_sort_key, reverse=True)
80
+ return filtered
81
+
82
+
83
+ # Column display names for better readability
84
+ _COLUMN_LABELS = {
85
+ "model_name": "Model",
86
+ "status": "Status",
87
+ "score": "Score",
88
+ "quality_overall": "Quality",
89
+ "avg_tps": "Tokens/sec",
90
+ "mlqa_ar_ar_f1": "MLQA F1",
91
+ "xquad_ar_f1": "XQuAD F1",
92
+ "iwslt2017-en-ar_sacrebleu": "IWSLT BLEU",
93
+ "xlsum_title_ar_rougeL": "XLSum Title",
94
+ "xlsum_summary_ar_rougeLsum": "XLSum Summary",
95
+ "arabic_mmlu_acc": "MMLU Acc",
96
+ "timestamp": "Last Updated",
97
+ }
98
+
99
+ st.set_page_config(page_title="Intel NPU Arabic Leaderboard", layout="wide")
100
+ st.title("🏆 Intel® NPU Arabic Leaderboard")
101
+
102
+ st.markdown("""
103
+ **Score** = √(Quality × Speed) - balances model quality with inference speed on Intel NPU.
104
+
105
+ **Quality** = Average of all benchmark scores (0-100 scale).
106
+ """)
107
+
108
+ rows = _load_rows()
109
+ if not rows:
110
+ st.info("No evaluations uploaded yet. Trigger a run to populate the leaderboard.")
111
+ else:
112
+ st.write(
113
+ "Submit your model for evaluation by emailing: **model=your-hf-model-id**"
114
+ )
115
+ st.dataframe(
116
+ rows,
117
+ column_config={
118
+ col: st.column_config.NumberColumn(_COLUMN_LABELS.get(col, col), format="%.2f")
119
+ if col in _METRIC_COLUMNS
120
+ else st.column_config.TextColumn(_COLUMN_LABELS.get(col, col))
121
+ for col in _COLUMNS
122
+ },
123
+ hide_index=True,
124
+ )
125
+
126
+ st.caption("Data auto-synced from leaderboard.json produced by the evaluation pipeline.")