davidlf-hp commited on
Commit
dd971cb
·
verified ·
1 Parent(s): 4669125

Update leaderboard for Mistral-7B-Instruct-v0.3-int4-cw-ov (NPU)

Browse files
Files changed (3) hide show
  1. leaderboard.csv +1 -1
  2. leaderboard.json +6 -6
  3. src/app.py +84 -69
leaderboard.csv CHANGED
@@ -1,2 +1,2 @@
1
  model_name,status,avg_tps,iwslt2017-en-ar_sacrebleu,mlqa_ar_ar_f1,xquad_ar_f1,timestamp
2
- OpenVINO/Mistral-7B-Instruct-v0.3-int4-cw-ov,Evaluating,,,,,2025-11-13T08:15:11.662244+00:00
 
1
  model_name,status,avg_tps,iwslt2017-en-ar_sacrebleu,mlqa_ar_ar_f1,xquad_ar_f1,timestamp
2
+ OpenVINO/Mistral-7B-Instruct-v0.3-int4-cw-ov,Completed,15.048454525898997,2.572647590716642,36.82539682539683,16.5158371040724,2025-11-13T08:21:26.093702+00:00
leaderboard.json CHANGED
@@ -1,11 +1,11 @@
1
  [
2
  {
3
  "model_name": "OpenVINO/Mistral-7B-Instruct-v0.3-int4-cw-ov",
4
- "status": "Evaluating",
5
- "avg_tps": null,
6
- "iwslt2017-en-ar_sacrebleu": null,
7
- "mlqa_ar_ar_f1": null,
8
- "xquad_ar_f1": null,
9
- "timestamp": "2025-11-13T08:15:11.662244+00:00"
10
  }
11
  ]
 
1
  [
2
  {
3
  "model_name": "OpenVINO/Mistral-7B-Instruct-v0.3-int4-cw-ov",
4
+ "status": "Completed",
5
+ "avg_tps": 15.048454525898997,
6
+ "iwslt2017-en-ar_sacrebleu": 2.572647590716642,
7
+ "mlqa_ar_ar_f1": 36.82539682539683,
8
+ "xquad_ar_f1": 16.5158371040724,
9
+ "timestamp": "2025-11-13T08:21:26.093702+00:00"
10
  }
11
  ]
src/app.py CHANGED
@@ -1,69 +1,84 @@
1
- """Streamlit app to display the NPU Arabic leaderboard."""
2
-
3
- from __future__ import annotations
4
-
5
- import json
6
- from datetime import datetime
7
- from pathlib import Path
8
- from typing import List, Sequence
9
-
10
- import streamlit as st
11
-
12
- _DATA_PATH = Path("leaderboard.json")
13
- _COLUMNS: Sequence[str] = (
14
- "model_name",
15
- "status",
16
- "avg_tps",
17
- "iwslt2017-en-ar_sacrebleu",
18
- "mlqa_ar_ar_f1",
19
- "xquad_ar_f1",
20
- "timestamp",
21
- )
22
-
23
-
24
- def _load_rows() -> List[dict]:
25
- if not _DATA_PATH.exists():
26
- return []
27
- try:
28
- raw = json.loads(_DATA_PATH.read_text(encoding="utf-8"))
29
- except json.JSONDecodeError:
30
- return []
31
-
32
- if isinstance(raw, dict):
33
- data = [raw]
34
- elif isinstance(raw, list):
35
- data = [item for item in raw if isinstance(item, dict)]
36
- else:
37
- data = []
38
-
39
- # Filter to desired columns and sort newest-first.
40
- filtered: List[dict] = []
41
- for row in data:
42
- compact = {key: row.get(key) for key in _COLUMNS}
43
- filtered.append(compact)
44
-
45
- def _sort_key(item: dict) -> tuple:
46
- stamp = item.get("timestamp")
47
- try:
48
- return (datetime.fromisoformat(str(stamp)),)
49
- except Exception:
50
- return (datetime.min,)
51
-
52
- filtered.sort(key=_sort_key, reverse=True)
53
- return filtered
54
-
55
-
56
- st.set_page_config(page_title="Intel NPU Arabic Leaderboard", layout="wide")
57
- st.title("Intel® NPU Arabic Leaderboard")
58
-
59
- rows = _load_rows()
60
- if not rows:
61
- st.info("No evaluations uploaded yet. Trigger a run to populate the leaderboard.")
62
- else:
63
- st.write(
64
- "Latest evaluation per model. Add new results by emailing the evaluation endpoint "
65
- "or running the CLI with the Hugging Face publishing flags."
66
- )
67
- st.dataframe(rows, column_config={col: st.column_config.Column(col) for col in _COLUMNS})
68
-
69
- st.caption("Data auto-synced from leaderboard.json produced by the evaluation pipeline.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Streamlit app to display the NPU Arabic leaderboard."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+ from typing import List, Sequence
9
+
10
+ import streamlit as st
11
+
12
+ _DATA_PATH = Path("leaderboard.json")
13
+ _COLUMNS: Sequence[str] = (
14
+ "model_name",
15
+ "status",
16
+ "avg_tps",
17
+ "iwslt2017-en-ar_sacrebleu",
18
+ "mlqa_ar_ar_f1",
19
+ "xquad_ar_f1",
20
+ "timestamp",
21
+ )
22
+ _METRIC_COLUMNS: Sequence[str] = tuple(
23
+ col for col in _COLUMNS if col not in {"model_name", "status", "timestamp"}
24
+ )
25
+
26
+
27
+ def _load_rows() -> List[dict]:
28
+ if not _DATA_PATH.exists():
29
+ return []
30
+ try:
31
+ raw = json.loads(_DATA_PATH.read_text(encoding="utf-8"))
32
+ except json.JSONDecodeError:
33
+ return []
34
+
35
+ if isinstance(raw, dict):
36
+ data = [raw]
37
+ elif isinstance(raw, list):
38
+ data = [item for item in raw if isinstance(item, dict)]
39
+ else:
40
+ data = []
41
+
42
+ # Filter to desired columns and sort newest-first.
43
+ filtered: List[dict] = []
44
+ for row in data:
45
+ compact = {key: row.get(key) for key in _COLUMNS}
46
+ status = compact.get("status")
47
+ if status is None:
48
+ status = "Completed"
49
+ compact["status"] = status
50
+ if status != "Completed":
51
+ for metric_col in _METRIC_COLUMNS:
52
+ compact[metric_col] = float("nan")
53
+ filtered.append(compact)
54
+
55
+ def _sort_key(item: dict) -> tuple:
56
+ stamp = item.get("timestamp")
57
+ try:
58
+ parsed = datetime.fromisoformat(str(stamp))
59
+ if parsed.tzinfo is None:
60
+ parsed = parsed.replace(tzinfo=timezone.utc)
61
+ else:
62
+ parsed = parsed.astimezone(timezone.utc)
63
+ except Exception:
64
+ parsed = datetime.min.replace(tzinfo=timezone.utc)
65
+ return (parsed,)
66
+
67
+ filtered.sort(key=_sort_key, reverse=True)
68
+ return filtered
69
+
70
+
71
+ st.set_page_config(page_title="Intel NPU Arabic Leaderboard", layout="wide")
72
+ st.title("Intel® NPU Arabic Leaderboard")
73
+
74
+ rows = _load_rows()
75
+ if not rows:
76
+ st.info("No evaluations uploaded yet. Trigger a run to populate the leaderboard.")
77
+ else:
78
+ st.write(
79
+ "Latest evaluation per model. Add new results by emailing the evaluation endpoint "
80
+ "or running the CLI with the Hugging Face publishing flags."
81
+ )
82
+ st.dataframe(rows, column_config={col: st.column_config.Column(col) for col in _COLUMNS})
83
+
84
+ st.caption("Data auto-synced from leaderboard.json produced by the evaluation pipeline.")