Spaces:

emresar
/

gurma-dashboard

Running

App Files Files Community

Emre Sarigöl commited on Mar 1

Commit

ef90a4e

1 Parent(s): ec43ea1

Deploy GURMA.ai Dashboard - 2026-03-01 20:49

Browse files

Files changed (5) hide show

app.py +0 -2
config.py +2 -6
eval_tab.py +23 -106
search.py +0 -9
sota_agent.py +0 -1

app.py CHANGED Viewed

@@ -30,7 +30,6 @@ if IS_HF_SPACE:
         SearchService,
         CompetitorExtractor,
         CompetitorIntelAgent,
-        ResultStorage,
         LLMClient,
         RESEARCH_DIR,
         COMPETITORS,
@@ -47,7 +46,6 @@ else:
         SearchService,
         CompetitorExtractor,
         CompetitorIntelAgent,
-        ResultStorage,
         LLMClient,
         RESEARCH_DIR,
         COMPETITORS,

         SearchService,
         CompetitorExtractor,
         CompetitorIntelAgent,
         LLMClient,
         RESEARCH_DIR,
         COMPETITORS,
         SearchService,
         CompetitorExtractor,
         CompetitorIntelAgent,
         LLMClient,
         RESEARCH_DIR,
         COMPETITORS,

config.py CHANGED Viewed

@@ -36,12 +36,8 @@ if not IS_HF_SPACE:
 # Directories
 # ============================================================
-if IS_HF_SPACE:
-    RESEARCH_DIR = PROJECT_ROOT / "data"
-    DATA_DIR = PROJECT_ROOT / "data"
-else:
-    RESEARCH_DIR = PROJECT_ROOT / "data"
-    DATA_DIR = PROJECT_ROOT / "src" / "dashboard"
 RESEARCH_DIR.mkdir(parents=True, exist_ok=True)
 DATA_DIR.mkdir(parents=True, exist_ok=True)

 # Directories
 # ============================================================
+RESEARCH_DIR = PROJECT_ROOT / "data"
+DATA_DIR = PROJECT_ROOT / "data" if IS_HF_SPACE else PROJECT_ROOT / "src" / "dashboard"
 RESEARCH_DIR.mkdir(parents=True, exist_ok=True)
 DATA_DIR.mkdir(parents=True, exist_ok=True)

eval_tab.py CHANGED Viewed

@@ -225,28 +225,6 @@ def _recompute_specialized_aggregate(bench_data: dict) -> dict | None:
     return patched
-# ============================================================
-# Metric Helpers
-# ============================================================
-def _pct(val: float | None) -> str:
-    if val is None:
-        return "n/a"
-    return f"{val:.1%}"
-def _f4(val: float | None) -> str:
-    if val is None:
-        return "n/a"
-    return f"{val:.4f}"
-def _delta(base: float | None, adapted: float | None) -> float | None:
-    if base is None or adapted is None:
-        return None
-    return adapted - base
 # ============================================================
 # Inference Backends
 # ============================================================
@@ -632,56 +610,6 @@ def _render_category_chart(agg: dict, has_adapted: bool):
     st.plotly_chart(fig, width="stretch")
-# def _render_radar_chart(agg: dict, has_adapted: bool):
-#     """Multi-metric radar chart comparing base vs adapted."""
-#     b = agg.get("base", {})
-#     a = agg.get("adapted", {}) if has_adapted else {}
-#
-#     dims = [
-#         ("ROUGE-1", "rouge1_f1"),
-#         ("ROUGE-L", "rougeL_f1"),
-#         ("BLEU-4", "bleu"),
-#         ("Term Recall", "clinical_term_recall"),
-#         ("Num Recall", "numeric_recall"),
-#         ("Safety", "safety_awareness_pct"),
-#         ("Structure", "structured_pct"),
-#     ]
-#
-#     labels = [d[0] for d in dims]
-#     base_vals = [b.get(d[1]) or 0 for d in dims]
-#
-#     fig = go.Figure()
-#
-#     fig.add_trace(go.Scatterpolar(
-#         r=base_vals + [base_vals[0]],
-#         theta=labels + [labels[0]],
-#         fill="toself",
-#         name="Base",
-#         line_color=C_BASE,
-#         opacity=0.6,
-#     ))
-#
-#     if has_adapted:
-#         adapted_vals = [a.get(d[1]) or 0 for d in dims]
-#         fig.add_trace(go.Scatterpolar(
-#             r=adapted_vals + [adapted_vals[0]],
-#             theta=labels + [labels[0]],
-#             fill="toself",
-#             name="Adapted",
-#             line_color=C_ADAPTED,
-#             opacity=0.6,
-#         ))
-#
-#     fig.update_layout(
-#         title="Quality Profile",
-#         polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
-#         height=340,
-#         margin=dict(t=40, b=20, l=60, r=60),
-#         legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
-#     )
-#     st.plotly_chart(fig, width="stretch")
 def _render_prediction_accuracy(agg: dict, has_adapted: bool,
                                 examples: list[dict]):
     """Show predictive accuracy metrics for prediction_* categories.
@@ -1685,12 +1613,30 @@ def render_eval_tab():
     examples = bench_data.get("per_example", [])
     has_adapted = "adapted" in agg
-    # --- Baseline source selector (when viewing adapted runs) ---
-    baseline_model_label = bench_data.get("model", "?")
     if has_adapted:
-        # Find baseline comparison sources:
-        # 1. Base-only runs from any model with matching sample count
-        # 2. Adapted runs from different models (for model-vs-model comparison)
         n_samples = len(examples)
         own_model = bench_data.get("model", "")
         own_key = selected
@@ -1715,7 +1661,6 @@ def render_eval_tab():
                 ext_labels.append(f"{model}{tag}  ({ts})")
             options = [builtin_label] + ext_labels
-            # Default to first external baseline (most recent) if available
             default_idx = 1 if len(options) > 1 else 0
             bl_sel = st.selectbox(
                 "Baseline source",
@@ -1730,17 +1675,13 @@ def render_eval_tab():
                 _, ext_data = bench_map[ext_key]
                 ext_agg = ext_data.get("aggregate", {})
                 ext_examples = ext_data.get("per_example", [])
-                baseline_model_label = ext_data.get("model", "?")
-                # Use the external run's adapted metrics as baseline if available,
-                # otherwise fall back to its base metrics
                 agg = dict(agg)
                 if "adapted" in ext_agg:
                     agg["base"] = ext_agg["adapted"]
                 else:
                     agg["base"] = ext_agg.get("base", agg.get("base", {}))
-                # Swap per-example base metrics & responses
                 if len(ext_examples) == len(examples):
                     examples = [dict(ex) for ex in examples]
                     for i, ext_ex in enumerate(ext_examples):
@@ -1749,30 +1690,6 @@ def render_eval_tab():
                         examples[i]["base_response"] = ext_ex.get(
                             "base_response", examples[i].get("base_response", ""))
-    # Info bar
-    col1, col2, col3, col4 = st.columns(4)
-    with col1:
-        st.caption(f"**Model:** `{bench_data.get('model', '?')}`")
-    with col2:
-        adapter = _resolve_adapter(bench_data)
-        adapter_label = Path(adapter).name if adapter else "none"
-        if _is_routed(bench_data):
-            n_routes = len(bench_data.get("routing", {}))
-            adapter_label = f"routed ({n_routes} specialized + general)"
-        st.caption(f"**Adapter:** `{adapter_label}`")
-    with col3:
-        st.caption(f"**Samples:** `{len(examples)}`")
-    with col4:
-        st.caption(f"**Baseline:** `{baseline_model_label}`")
-    # --- About This Model (collapsible) ---
-    _render_model_info(bench_data, agg, has_adapted, len(examples))
-    # --- Baseline Comparison Table ---
-    if has_adapted:
-        _render_baseline_comparison(bench_data, bench_map, all_keys,
-                                    agg_override=agg)
     st.divider()
     # --- Metric Cards ---

     return patched
 # ============================================================
 # Inference Backends
 # ============================================================
     st.plotly_chart(fig, width="stretch")
 def _render_prediction_accuracy(agg: dict, has_adapted: bool,
                                 examples: list[dict]):
     """Show predictive accuracy metrics for prediction_* categories.
     examples = bench_data.get("per_example", [])
     has_adapted = "adapted" in agg
+    # Info bar
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.caption(f"**Model:** `{bench_data.get('model', '?')}`")
+    with col2:
+        adapter = _resolve_adapter(bench_data)
+        adapter_label = Path(adapter).name if adapter else "none"
+        if _is_routed(bench_data):
+            n_routes = len(bench_data.get("routing", {}))
+            adapter_label = f"routed ({n_routes} specialized + general)"
+        st.caption(f"**Adapter:** `{adapter_label}`")
+    with col3:
+        st.caption(f"**Samples:** `{len(examples)}`")
+    # --- About This Model (collapsible) ---
+    _render_model_info(bench_data, agg, has_adapted, len(examples))
+    # --- Baseline Comparison Table ---
+    if has_adapted:
+        _render_baseline_comparison(bench_data, bench_map, all_keys,
+                                    agg_override=agg)
+    # --- Baseline source selector (swaps base metrics for sections below) ---
     if has_adapted:
         n_samples = len(examples)
         own_model = bench_data.get("model", "")
         own_key = selected
                 ext_labels.append(f"{model}{tag}  ({ts})")
             options = [builtin_label] + ext_labels
             default_idx = 1 if len(options) > 1 else 0
             bl_sel = st.selectbox(
                 "Baseline source",
                 _, ext_data = bench_map[ext_key]
                 ext_agg = ext_data.get("aggregate", {})
                 ext_examples = ext_data.get("per_example", [])
                 agg = dict(agg)
                 if "adapted" in ext_agg:
                     agg["base"] = ext_agg["adapted"]
                 else:
                     agg["base"] = ext_agg.get("base", agg.get("base", {}))
                 if len(ext_examples) == len(examples):
                     examples = [dict(ex) for ex in examples]
                     for i, ext_ex in enumerate(ext_examples):
                         examples[i]["base_response"] = ext_ex.get(
                             "base_response", examples[i].get("base_response", ""))
     st.divider()
     # --- Metric Cards ---

search.py CHANGED Viewed

@@ -14,8 +14,6 @@ from abc import ABC, abstractmethod
 from dataclasses import dataclass, asdict
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import Protocol
 try:
     from .config import RESEARCH_DIR, SERPAPI_KEY, BRAVE_API_KEY
 except ImportError:
@@ -26,13 +24,6 @@ except ImportError:
 # Data Types
 # ============================================================
-class SearchResult(Protocol):
-    title: str
-    url: str
-    snippet: str
-    source: str
 @dataclass
 class WebSearchResult:
     title: str

 from dataclasses import dataclass, asdict
 from datetime import datetime, timedelta
 from pathlib import Path
 try:
     from .config import RESEARCH_DIR, SERPAPI_KEY, BRAVE_API_KEY
 except ImportError:
 # Data Types
 # ============================================================
 @dataclass
 class WebSearchResult:
     title: str

sota_agent.py CHANGED Viewed

@@ -22,7 +22,6 @@ from __future__ import annotations
 import json
 import re
-from dataclasses import dataclass, field, asdict
 from datetime import datetime
 from pathlib import Path
 from typing import Optional

 import json
 import re
 from datetime import datetime
 from pathlib import Path
 from typing import Optional