Spaces:

KFUPM-JRCAI
/

intel_npu_arabic

Running

App Files Files Community

davidlf-hp commited on Nov 13, 2025

Commit

dd971cb

verified ·

1 Parent(s): 4669125

Update leaderboard for Mistral-7B-Instruct-v0.3-int4-cw-ov (NPU)

Browse files

Files changed (3) hide show

leaderboard.csv +1 -1
leaderboard.json +6 -6
src/app.py +84 -69

leaderboard.csv CHANGED Viewed

	@@ -1,2 +1,2 @@
1	model_name,status,avg_tps,iwslt2017-en-ar_sacrebleu,mlqa_ar_ar_f1,xquad_ar_f1,timestamp
2	- OpenVINO/Mistral-7B-Instruct-v0.3-int4-cw-ov,~~Evaluating~~,,,,,2025-11-13T08:15:11.~~662244~~+00:00


1	model_name,status,avg_tps,iwslt2017-en-ar_sacrebleu,mlqa_ar_ar_f1,xquad_ar_f1,timestamp
2	+ OpenVINO/Mistral-7B-Instruct-v0.3-int4-cw-ov,Completed,15.048454525898997,2.572647590716642,36.82539682539683,16.5158371040724,2025-11-13T08:21:26.093702+00:00

leaderboard.json CHANGED Viewed

@@ -1,11 +1,11 @@
 [
   {
     "model_name": "OpenVINO/Mistral-7B-Instruct-v0.3-int4-cw-ov",
-    "status": "Evaluating",
-    "avg_tps": null,
-    "iwslt2017-en-ar_sacrebleu": null,
-    "mlqa_ar_ar_f1": null,
-    "xquad_ar_f1": null,
-    "timestamp": "2025-11-13T08:15:11.662244+00:00"
   }
 ]

 [
   {
     "model_name": "OpenVINO/Mistral-7B-Instruct-v0.3-int4-cw-ov",
+    "status": "Completed",
+    "avg_tps": 15.048454525898997,
+    "iwslt2017-en-ar_sacrebleu": 2.572647590716642,
+    "mlqa_ar_ar_f1": 36.82539682539683,
+    "xquad_ar_f1": 16.5158371040724,
+    "timestamp": "2025-11-13T08:21:26.093702+00:00"
   }
 ]

src/app.py CHANGED Viewed

@@ -1,69 +1,84 @@
-"""Streamlit app to display the NPU Arabic leaderboard."""
-from __future__ import annotations
-import json
-from datetime import datetime
-from pathlib import Path
-from typing import List, Sequence
-import streamlit as st
-_DATA_PATH = Path("leaderboard.json")
-_COLUMNS: Sequence[str] = (
-    "model_name",
-    "status",
-    "avg_tps",
-    "iwslt2017-en-ar_sacrebleu",
-    "mlqa_ar_ar_f1",
-    "xquad_ar_f1",
-    "timestamp",
-)
-def _load_rows() -> List[dict]:
-    if not _DATA_PATH.exists():
-        return []
-    try:
-        raw = json.loads(_DATA_PATH.read_text(encoding="utf-8"))
-    except json.JSONDecodeError:
-        return []
-    if isinstance(raw, dict):
-        data = [raw]
-    elif isinstance(raw, list):
-        data = [item for item in raw if isinstance(item, dict)]
-    else:
-        data = []
-    # Filter to desired columns and sort newest-first.
-    filtered: List[dict] = []
-    for row in data:
-        compact = {key: row.get(key) for key in _COLUMNS}
-        filtered.append(compact)
-    def _sort_key(item: dict) -> tuple:
-        stamp = item.get("timestamp")
-        try:
-            return (datetime.fromisoformat(str(stamp)),)
-        except Exception:
-            return (datetime.min,)
-    filtered.sort(key=_sort_key, reverse=True)
-    return filtered
-st.set_page_config(page_title="Intel NPU Arabic Leaderboard", layout="wide")
-st.title("Intel® NPU Arabic Leaderboard")
-rows = _load_rows()
-if not rows:
-    st.info("No evaluations uploaded yet. Trigger a run to populate the leaderboard.")
-else:
-    st.write(
-        "Latest evaluation per model. Add new results by emailing the evaluation endpoint "
-        "or running the CLI with the Hugging Face publishing flags."
-    )
-    st.dataframe(rows, column_config={col: st.column_config.Column(col) for col in _COLUMNS})
-st.caption("Data auto-synced from leaderboard.json produced by the evaluation pipeline.")

+"""Streamlit app to display the NPU Arabic leaderboard."""
+from __future__ import annotations
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import List, Sequence
+import streamlit as st
+_DATA_PATH = Path("leaderboard.json")
+_COLUMNS: Sequence[str] = (
+    "model_name",
+    "status",
+    "avg_tps",
+    "iwslt2017-en-ar_sacrebleu",
+    "mlqa_ar_ar_f1",
+    "xquad_ar_f1",
+    "timestamp",
+)
+_METRIC_COLUMNS: Sequence[str] = tuple(
+    col for col in _COLUMNS if col not in {"model_name", "status", "timestamp"}
+)
+def _load_rows() -> List[dict]:
+    if not _DATA_PATH.exists():
+        return []
+    try:
+        raw = json.loads(_DATA_PATH.read_text(encoding="utf-8"))
+    except json.JSONDecodeError:
+        return []
+    if isinstance(raw, dict):
+        data = [raw]
+    elif isinstance(raw, list):
+        data = [item for item in raw if isinstance(item, dict)]
+    else:
+        data = []
+    # Filter to desired columns and sort newest-first.
+    filtered: List[dict] = []
+    for row in data:
+        compact = {key: row.get(key) for key in _COLUMNS}
+        status = compact.get("status")
+        if status is None:
+            status = "Completed"
+            compact["status"] = status
+        if status != "Completed":
+            for metric_col in _METRIC_COLUMNS:
+                compact[metric_col] = float("nan")
+        filtered.append(compact)
+    def _sort_key(item: dict) -> tuple:
+        stamp = item.get("timestamp")
+        try:
+            parsed = datetime.fromisoformat(str(stamp))
+            if parsed.tzinfo is None:
+                parsed = parsed.replace(tzinfo=timezone.utc)
+            else:
+                parsed = parsed.astimezone(timezone.utc)
+        except Exception:
+            parsed = datetime.min.replace(tzinfo=timezone.utc)
+        return (parsed,)
+    filtered.sort(key=_sort_key, reverse=True)
+    return filtered
+st.set_page_config(page_title="Intel NPU Arabic Leaderboard", layout="wide")
+st.title("Intel® NPU Arabic Leaderboard")
+rows = _load_rows()
+if not rows:
+    st.info("No evaluations uploaded yet. Trigger a run to populate the leaderboard.")
+else:
+    st.write(
+        "Latest evaluation per model. Add new results by emailing the evaluation endpoint "
+        "or running the CLI with the Hugging Face publishing flags."
+    )
+    st.dataframe(rows, column_config={col: st.column_config.Column(col) for col in _COLUMNS})
+st.caption("Data auto-synced from leaderboard.json produced by the evaluation pipeline.")