Spaces:

mediabiasgroup
/

model_compare

Sleeping

App Files Files Community

bitwise31337 commited on Aug 27, 2025

Commit

cca7ea6

verified ·

1 Parent(s): 02d6bdb

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -100

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
 """
 MediaBiasGroup — Model Comparator (Gradio Space)
-- Discovers models under the org and groups them by pipeline_tag
 - Lets users pick a task, select multiple models, and compare outputs on the same input
-- Reads any 'model-index' metrics from model cards and shows them in a table
 - Falls back to base_model's tokenizer if a fine-tuned repo lacks tokenizer files
-- Canonicalizes label names across models (e.g., LABEL_0 -> neutral)
 Requirements (see requirements.txt):
   gradio>=4.31.4
@@ -40,7 +41,7 @@ HF_TOKEN = (
 api = HfApi(token=HF_TOKEN)
-# Canonical label mapping (expand as needed)
 CANON = {
     "LABEL_0": "neutral",
     "LABEL_1": "lexical_bias",
@@ -54,9 +55,8 @@ CANON = {
     "lexical_bias": "lexical_bias",
 }
 # =========================
-# Discovery & metadata
 # =========================
 @lru_cache(maxsize=1)
 def list_org_models() -> List[Any]:
@@ -68,15 +68,12 @@ def discover_tasks_and_models() -> Tuple[List[str], Dict[str, List[str]]]:
     infos = list_org_models()
     task2models: Dict[str, List[str]] = {}
     for info in infos:
-        # Prefer the explicit pipeline_tag
         task = getattr(info, "pipeline_tag", None)
-        # Heuristic fallback via tags if pipeline_tag is missing
         if not task:
             tags = set(getattr(info, "tags", []) or [])
             if "text-classification" in tags:
                 task = "text-classification"
         if task:
             task2models.setdefault(task, []).append(info.modelId)
@@ -87,45 +84,16 @@ def discover_tasks_and_models() -> Tuple[List[str], Dict[str, List[str]]]:
 @lru_cache(maxsize=256)
-def get_card_data(repo_id: str) -> dict:
     try:
         info = api.model_info(repo_id, token=HF_TOKEN)
         data = getattr(info, "cardData", None)
-        return dict(getattr(data, "data", {})) if data else {}
     except Exception:
         return {}
-def extract_model_index_metrics(repo_id: str) -> pd.DataFrame:
-    data = get_card_data(repo_id)
-    rows: List[Dict[str, Any]] = []
-    if not data:
-        return pd.DataFrame(columns=["model", "dataset", "task", "metric", "value", "repo_id"])
-    mi = data.get("model-index") or data.get("model_index") or []
-    for entry in mi:
-        name = entry.get("name", repo_id)
-        for res in entry.get("results", []):
-            task = res.get("task", {})
-            task_type = task.get("type", task.get("name", ""))
-            dset = res.get("dataset", {})
-            dname = dset.get("name", dset.get("type", ""))
-            for m in res.get("metrics", []):
-                rows.append(
-                    {
-                        "model": name,
-                        "dataset": dname,
-                        "task": task_type,
-                        "metric": m.get("name", ""),
-                        "value": m.get("value", None),
-                        "repo_id": repo_id,
-                    }
-                )
-    if not rows:
-        return pd.DataFrame(columns=["model", "dataset", "task", "metric", "value", "repo_id"])
-    return pd.DataFrame(rows)
 # =========================
 # Tokenizer fallback logic
 # =========================
@@ -134,11 +102,14 @@ def _has_tokenizer_files(repo_id: str) -> bool:
         files = set(list_repo_files(repo_id, repo_type="model", token=HF_TOKEN))
     except Exception:
         return False
-    return (
-        "tokenizer.json" in files
-        or {"vocab.json", "merges.txt"}.issubset(files)
-        or "spiece.model" in files
-    )
 def _base_model_from_card(repo_id: str) -> str | None:
@@ -150,13 +121,12 @@ def _base_model_from_card(repo_id: str) -> str | None:
 def _tokenizer_source(repo_id: str) -> str:
-    # prefer repo tokenizer; else fall back to base_model; else repo_id
     if _has_tokenizer_files(repo_id):
         return repo_id
     base = _base_model_from_card(repo_id)
     return base or repo_id
 # =========================
 # Pipelines & prediction
 # =========================
@@ -170,27 +140,20 @@ def get_pipeline(repo_id: str, task: str):
     tok_src = _tokenizer_source(repo_id)
-    # 1) Download a local snapshot of the model repo (robust to xet/LFS)
     try:
         local_dir = snapshot_download(
-            repo_id,
-            allow_patterns=[
-                "config.json",
-                "*.safetensors",
-                "*.bin",
-                "tokenizer.json",
-                "vocab.json",
-                "merges.txt",
-                "tokenizer_config.json",
-                "special_tokens_map.json",
-            ],
-            token=HF_TOKEN,   # fine if None for public repos
         )
-    except Exception as e:
-        # Fallback: try remote loading if snapshot fails
-        local_dir = repo_id
-    # 2) Build pipeline; still pass tokenizer source (repo or base_model)
     if task == "text-classification":
         pipe = pipeline(
             task,
@@ -201,6 +164,7 @@ def get_pipeline(repo_id: str, task: str):
             token=HF_TOKEN,
         )
     else:
         pipe = pipeline(task, model=local_dir, tokenizer=tok_src, token=HF_TOKEN)
     PIPE_CACHE[key] = pipe
@@ -215,11 +179,11 @@ def _canonicalize(scores: Dict[str, float]) -> Dict[str, float]:
     return out
-def predict(models: List[str], task: str, text: str) -> Tuple[str, pd.DataFrame, pd.DataFrame]:
     if not text.strip():
-        return "Please enter some text.", pd.DataFrame(), pd.DataFrame()
     if not models:
-        return f"Please select 1–{MAX_MODELS} models.", pd.DataFrame(), pd.DataFrame()
     if len(models) > MAX_MODELS:
         models = models[:MAX_MODELS]
@@ -233,13 +197,11 @@ def predict(models: List[str], task: str, text: str) -> Tuple[str, pd.DataFrame,
             pipe = get_pipeline(rid, task)
             out = pipe(text)
-            # text-classification pipeline:
-            # typical shape: [ [ {label, score}, ... ] ] or [ {label, score}, ... ]
-            scores: Dict[str, float]
             if isinstance(out, list) and out and isinstance(out[0], list):
                 scores = {d["label"]: float(d["score"]) for d in out[0]}
             elif isinstance(out, list) and out and isinstance(out[0], dict) and "label" in out[0]:
-                # some classifiers return flat list
                 scores = {d["label"]: float(d["score"]) for d in out}
             else:
                 scores = {}
@@ -269,22 +231,11 @@ def predict(models: List[str], task: str, text: str) -> Tuple[str, pd.DataFrame,
     pred_df = pd.DataFrame(table_rows, columns=["model"] + label_cols + ["predicted_label"])
-    # Collect reported metrics if present
-    metrics_frames = []
-    for rid in models:
-        df = extract_model_index_metrics(rid)
-        if not df.empty:
-            df = df.copy()
-            df.insert(0, "repo_id", rid)
-            metrics_frames.append(df)
-    metrics_df = pd.concat(metrics_frames, ignore_index=True) if metrics_frames else pd.DataFrame()
     msg = f"✓ Done. Compared {len(models)} model(s) on task: `{task}`"
     if errors:
         msg += "\n\n**Errors**:\n" + "\n".join(f"- {k}: {v}" for k, v in errors.items())
-    return msg, pred_df, metrics_df
 # =========================
 # UI wiring
@@ -300,12 +251,16 @@ def on_task_change(selected_task: str) -> List[str]:
     return task2models.get(selected_task, [])
 def build_ui() -> gr.Blocks:
     with gr.Blocks(fill_height=True, title="MediaBiasGroup — Model Comparator") as demo:
         gr.Markdown(
             "# MediaBiasGroup — Model Comparator\n"
-            "Select a **task**, choose multiple models, enter text, and compare outputs side-by-side. "
-            "If models provide a `model-index` in their cards, reported metrics appear below."
         )
         with gr.Row():
@@ -324,12 +279,12 @@ def build_ui() -> gr.Blocks:
                     multiselect=True,
                     label="Models",
                 )
-                refresh_btn = gr.Button("🔄 Refresh list from Hub")
                 gr.Markdown(f"**Organization:** `{ORG}`  \n**Max models per run:** {MAX_MODELS}")
             with gr.Column(scale=2):
                 text_in = gr.Textbox(lines=4, placeholder="Paste a sentence…", label="Input text")
-                examples = gr.Examples(
                     examples=[
                         ["The bill passed the House on Tuesday in a 220–210 vote."],  # unbiased/factual
                         ["Lawmakers shamelessly rammed the bill through the House on Tuesday."],  # biased/loaded
@@ -342,23 +297,18 @@ def build_ui() -> gr.Blocks:
                 run_btn = gr.Button("Compare")
                 status = gr.Markdown("")
-        with gr.Row():
-            with gr.Column():
-                gr.Markdown("### Predictions")
-                pred_df = gr.Dataframe(interactive=False)
-            with gr.Column():
-                gr.Markdown("### Reported metrics (from model cards)")
-                metrics_df = gr.Dataframe(interactive=False)
         # Events
         task_dd.change(fn=on_task_change, inputs=[task_dd], outputs=[model_ms])
-        refresh_btn.click(fn=refresh_models, inputs=[task_dd], outputs=[task_dd, model_ms])
-        run_btn.click(fn=predict, inputs=[model_ms, task_dd, text_in], outputs=[status, pred_df, metrics_df])
     return demo
 if __name__ == "__main__":
     demo = build_ui()
-    # queue() allows concurrent requests; adjust concurrency per Space hardware
     demo.queue(max_size=16).launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))

 """
 MediaBiasGroup — Model Comparator (Gradio Space)
+- Discovers org models by pipeline_tag
 - Lets users pick a task, select multiple models, and compare outputs on the same input
+- Uses a full local snapshot for robustness (avoids NoneType path issues)
 - Falls back to base_model's tokenizer if a fine-tuned repo lacks tokenizer files
+- Canonicalizes label names across models (LABEL_0 -> neutral, etc.)
+- "Select all" button to quickly select all models for the chosen task
 Requirements (see requirements.txt):
   gradio>=4.31.4
 api = HfApi(token=HF_TOKEN)
+# Canonical label mapping (extend if needed)
 CANON = {
     "LABEL_0": "neutral",
     "LABEL_1": "lexical_bias",
     "lexical_bias": "lexical_bias",
 }
 # =========================
+# Discovery & card helpers
 # =========================
 @lru_cache(maxsize=1)
 def list_org_models() -> List[Any]:
     infos = list_org_models()
     task2models: Dict[str, List[str]] = {}
     for info in infos:
         task = getattr(info, "pipeline_tag", None)
         if not task:
+            # Heuristic fallback via tags if pipeline_tag is missing
             tags = set(getattr(info, "tags", []) or [])
             if "text-classification" in tags:
                 task = "text-classification"
         if task:
             task2models.setdefault(task, []).append(info.modelId)
 @lru_cache(maxsize=256)
+def get_card_data(repo_id: str) -> Dict[str, Any]:
     try:
         info = api.model_info(repo_id, token=HF_TOKEN)
         data = getattr(info, "cardData", None)
+        if hasattr(data, "data"):  # ModelCardData -> dict
+            return dict(data.data)
+        return data or {}
     except Exception:
         return {}
 # =========================
 # Tokenizer fallback logic
 # =========================
         files = set(list_repo_files(repo_id, repo_type="model", token=HF_TOKEN))
     except Exception:
         return False
+    if "tokenizer.json" in files:
+        return True
+    if {"vocab.json", "merges.txt"}.issubset(files):
+        return True
+    if "spiece.model" in files:
+        return True
+    return False
 def _base_model_from_card(repo_id: str) -> str | None:
 def _tokenizer_source(repo_id: str) -> str:
+    # Prefer repo tokenizer; else fall back to base_model; else repo_id
     if _has_tokenizer_files(repo_id):
         return repo_id
     base = _base_model_from_card(repo_id)
     return base or repo_id
 # =========================
 # Pipelines & prediction
 # =========================
     tok_src = _tokenizer_source(repo_id)
+    # Robust path: download a full local snapshot (no restrictive allow_patterns)
     try:
         local_dir = snapshot_download(
+            repo_id=repo_id,
+            repo_type="model",
+            token=HF_TOKEN,           # works for public and gated/private (if token has access)
+            local_files_only=False,
         )
+        if not isinstance(local_dir, str) or not local_dir:
+            # extremely defensive: fall back to remote id
+            local_dir = repo_id
+    except Exception:
+        local_dir = repo_id  # fall back to remote if snapshot fails
     if task == "text-classification":
         pipe = pipeline(
             task,
             token=HF_TOKEN,
         )
     else:
+        # Add more tasks if you release them later
         pipe = pipeline(task, model=local_dir, tokenizer=tok_src, token=HF_TOKEN)
     PIPE_CACHE[key] = pipe
     return out
+def predict(models: List[str], task: str, text: str) -> Tuple[str, pd.DataFrame]:
     if not text.strip():
+        return "Please enter some text.", pd.DataFrame()
     if not models:
+        return f"Please select 1–{MAX_MODELS} models.", pd.DataFrame()
     if len(models) > MAX_MODELS:
         models = models[:MAX_MODELS]
             pipe = get_pipeline(rid, task)
             out = pipe(text)
+            # text-classification pipeline typical shapes:
+            #   [[{label, score}, ...]] or [{label, score}, ...]
             if isinstance(out, list) and out and isinstance(out[0], list):
                 scores = {d["label"]: float(d["score"]) for d in out[0]}
             elif isinstance(out, list) and out and isinstance(out[0], dict) and "label" in out[0]:
                 scores = {d["label"]: float(d["score"]) for d in out}
             else:
                 scores = {}
     pred_df = pd.DataFrame(table_rows, columns=["model"] + label_cols + ["predicted_label"])
     msg = f"✓ Done. Compared {len(models)} model(s) on task: `{task}`"
     if errors:
         msg += "\n\n**Errors**:\n" + "\n".join(f"- {k}: {v}" for k, v in errors.items())
+    return msg, pred_df
 # =========================
 # UI wiring
     return task2models.get(selected_task, [])
+def select_all_models(selected_task: str) -> List[str]:
+    _, task2models = discover_tasks_and_models()
+    return task2models.get(selected_task, [])
 def build_ui() -> gr.Blocks:
     with gr.Blocks(fill_height=True, title="MediaBiasGroup — Model Comparator") as demo:
         gr.Markdown(
             "# MediaBiasGroup — Model Comparator\n"
+            "Select a **task**, choose multiple models, enter text, and compare outputs side-by-side."
         )
         with gr.Row():
                     multiselect=True,
                     label="Models",
                 )
+                select_all_btn = gr.Button("Select all")
                 gr.Markdown(f"**Organization:** `{ORG}`  \n**Max models per run:** {MAX_MODELS}")
             with gr.Column(scale=2):
                 text_in = gr.Textbox(lines=4, placeholder="Paste a sentence…", label="Input text")
+                gr.Examples(
                     examples=[
                         ["The bill passed the House on Tuesday in a 220–210 vote."],  # unbiased/factual
                         ["Lawmakers shamelessly rammed the bill through the House on Tuesday."],  # biased/loaded
                 run_btn = gr.Button("Compare")
                 status = gr.Markdown("")
+        # Single wide results table
+        gr.Markdown("### Predictions")
+        pred_df = gr.Dataframe(interactive=False)
         # Events
         task_dd.change(fn=on_task_change, inputs=[task_dd], outputs=[model_ms])
+        select_all_btn.click(fn=select_all_models, inputs=[task_dd], outputs=[model_ms])
+        run_btn.click(fn=predict, inputs=[model_ms, task_dd, text_in], outputs=[status, pred_df])
     return demo
 if __name__ == "__main__":
     demo = build_ui()
     demo.queue(max_size=16).launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))