"""Streamlit app to display the NPU Arabic leaderboard.""" from __future__ import annotations import json from datetime import datetime, timezone from pathlib import Path from typing import List, Sequence import streamlit as st # Use the aggregated space JSON which includes score and quality_overall # On HuggingFace, this is uploaded as leaderboard.json (aggregated version) _DATA_PATH = Path("leaderboard.json") # Column order for display - score and quality_overall are prominent _COLUMNS: Sequence[str] = ( "model_name", "status", "score", "quality_overall", "avg_tps", "mlqa_ar_ar_f1", "xquad_ar_f1", "iwslt2017-en-ar_sacrebleu", "xlsum_title_ar_rougeL", "xlsum_summary_ar_rougeLsum", "arabic_mmlu_acc", "timestamp", ) _METRIC_COLUMNS: Sequence[str] = tuple( col for col in _COLUMNS if col not in {"model_name", "status", "timestamp"} ) def _load_rows() -> List[dict]: if not _DATA_PATH.exists(): return [] try: raw = json.loads(_DATA_PATH.read_text(encoding="utf-8")) except json.JSONDecodeError: return [] if isinstance(raw, dict): data = [raw] elif isinstance(raw, list): data = [item for item in raw if isinstance(item, dict)] else: data = [] # Filter to desired columns filtered: List[dict] = [] for row in data: compact = {key: row.get(key) for key in _COLUMNS} status = compact.get("status") if status is None: status = "Completed" compact["status"] = status if status != "Completed": for metric_col in _METRIC_COLUMNS: compact[metric_col] = float("nan") filtered.append(compact) # Sort by score (highest first), then by timestamp for ties def _sort_key(item: dict) -> tuple: score = item.get("score") score_val = float(score) if score is not None else -1.0 stamp = item.get("timestamp") try: parsed = datetime.fromisoformat(str(stamp)) if parsed.tzinfo is None: parsed = parsed.replace(tzinfo=timezone.utc) else: parsed = parsed.astimezone(timezone.utc) except Exception: parsed = datetime.min.replace(tzinfo=timezone.utc) return (score_val, parsed) filtered.sort(key=_sort_key, reverse=True) return filtered # Column display names for better readability _COLUMN_LABELS = { "model_name": "Model", "status": "Status", "score": "Score", "quality_overall": "Quality", "avg_tps": "Tokens/sec", "mlqa_ar_ar_f1": "MLQA F1", "xquad_ar_f1": "XQuAD F1", "iwslt2017-en-ar_sacrebleu": "IWSLT BLEU", "xlsum_title_ar_rougeL": "XLSum Title", "xlsum_summary_ar_rougeLsum": "XLSum Summary", "arabic_mmlu_acc": "MMLU Acc", "timestamp": "Last Updated", } st.set_page_config(page_title="Intel NPU Arabic Leaderboard", layout="wide") st.title("Intel NPU Arabic Leaderboard") rows = _load_rows() if not rows: st.info("No evaluations uploaded yet.") else: st.dataframe( rows, column_config={ col: st.column_config.NumberColumn(_COLUMN_LABELS.get(col, col), format="%.2f") if col in _METRIC_COLUMNS else st.column_config.TextColumn(_COLUMN_LABELS.get(col, col)) for col in _COLUMNS }, hide_index=True, ) st.caption("Submit your model for evaluation by emailing: **model:your-hf/model-id**")