| """ |
| Populate the GuardBench leaderboard from HuggingFace datasets. |
| """ |
|
|
| import json |
| import os |
| import pandas as pd |
| import tempfile |
| from typing import Dict, List, Optional |
| from datetime import datetime |
| import numpy as np |
|
|
| from huggingface_hub import hf_hub_download, HfApi |
| from datasets import load_dataset |
|
|
| from src.display.utils import GUARDBENCH_COLUMN, DISPLAY_COLS, CATEGORIES |
| from src.envs import RESULTS_DATASET_ID, TOKEN, CACHE_PATH |
| from src.leaderboard.processor import leaderboard_to_dataframe |
|
|
|
|
| def get_latest_leaderboard(version="v0") -> Optional[Dict]: |
| """ |
| Get the latest leaderboard data from HuggingFace dataset. |
| """ |
| try: |
| |
| leaderboard_path = hf_hub_download( |
| repo_id=RESULTS_DATASET_ID, |
| filename=f"leaderboards/leaderboard_{version}.json", |
| repo_type="dataset", |
| token=TOKEN |
| ) |
|
|
| with open(leaderboard_path, 'r') as f: |
| return json.load(f) |
| except Exception as e: |
| print(f"Error downloading leaderboard: {e}") |
| return None |
|
|
|
|
| def get_model_entry(model_name: str, mode: str, version="v0") -> Optional[Dict]: |
| """ |
| Get a specific model's entry from the entries folder, uniquely identified by model_name, mode, and version. |
| """ |
| try: |
| model_name_safe = model_name.replace("/", "_").replace(" ", "_") |
| mode_safe = str(mode).replace("/", "_").replace(" ", "_").lower() |
| entry_path = hf_hub_download( |
| repo_id=RESULTS_DATASET_ID, |
| filename=f"entries/entry_{model_name_safe}_{mode_safe}_{version}.json", |
| repo_type="dataset", |
| token=TOKEN |
| ) |
| with open(entry_path, 'r') as f: |
| return json.load(f) |
| except Exception as e: |
| print(f"Error downloading model entry: {e}") |
| return None |
|
|
|
|
| def get_all_entries(version="v0", mode: str = None) -> List[Dict]: |
| """ |
| Get all model entries from the entries folder. If mode is provided, only return entries matching that mode. |
| """ |
| try: |
| api = HfApi(token=TOKEN) |
| files = api.list_repo_files(repo_id=RESULTS_DATASET_ID, repo_type="dataset") |
| if mode is not None: |
| mode_safe = str(mode).replace("/", "_").replace(" ", "_").lower() |
| entry_files = [f for f in files if f.startswith("entries/") and f"_{mode_safe}_" in f and f.endswith(f"_{version}.json")] |
| else: |
| entry_files = [f for f in files if f.startswith("entries/") and f.endswith(f"_{version}.json")] |
| entries = [] |
| for entry_file in entry_files: |
| try: |
| entry_path = hf_hub_download( |
| repo_id=RESULTS_DATASET_ID, |
| filename=entry_file, |
| repo_type="dataset", |
| token=TOKEN |
| ) |
| with open(entry_path, 'r') as f: |
| entry_data = json.load(f) |
| entries.append(entry_data) |
| except Exception as e: |
| print(f"Error loading entry {entry_file}: {e}") |
| return entries |
| except Exception as e: |
| print(f"Error listing entries: {e}") |
| return [] |
|
|
|
|
| def get_leaderboard_df(version="v0") -> pd.DataFrame: |
| """ |
| Get the leaderboard data as a DataFrame. |
| """ |
| |
| leaderboard_data = get_latest_leaderboard(version) |
|
|
| if not leaderboard_data: |
| |
| entries = get_all_entries(version) |
| if entries: |
| leaderboard_data = { |
| "entries": entries, |
| "last_updated": datetime.now().isoformat(), |
| "version": version |
| } |
| else: |
| |
| return pd.DataFrame(columns=DISPLAY_COLS) |
|
|
| |
| return leaderboard_to_dataframe(leaderboard_data) |
|
|
|
|
| def get_category_leaderboard_df(category: str, version="v0") -> pd.DataFrame: |
| """ |
| Get the leaderboard data filtered by a specific category. |
| """ |
| |
| leaderboard_data = get_latest_leaderboard(version) |
|
|
| if not leaderboard_data: |
| |
| entries = get_all_entries(version) |
| if entries: |
| leaderboard_data = { |
| "entries": entries, |
| "last_updated": datetime.now().isoformat(), |
| "version": version |
| } |
| else: |
| |
| return pd.DataFrame(columns=DISPLAY_COLS) |
|
|
| |
| filtered_entries = [] |
|
|
| for entry in leaderboard_data.get("entries", []): |
| |
| filtered_entry = { |
| "model_name": entry.get("model_name", "Unknown Model"), |
| "model_type": entry.get("model_type", "Unknown"), |
| "guard_model_type": entry.get("guard_model_type", "Unknown"), |
| "mode": entry.get("mode", "Strict"), |
| "submission_date": entry.get("submission_date", ""), |
| "version": entry.get("version", version), |
| "base_model": entry.get("base_model", ""), |
| "revision": entry.get("revision", ""), |
| "precision": entry.get("precision", ""), |
| "weight_type": entry.get("weight_type", "") |
| } |
|
|
| if "per_category_metrics" in entry and category in entry["per_category_metrics"]: |
| category_metrics = entry["per_category_metrics"][category] |
|
|
| |
| for test_type, metrics in category_metrics.items(): |
| if isinstance(metrics, dict): |
| for metric, value in metrics.items(): |
| col_name = f"{test_type}_{metric}" |
| filtered_entry[col_name] = value |
|
|
| |
| if metric == "f1_binary": |
| filtered_entry[f"{test_type}_f1"] = value |
|
|
| |
| f1_values = [] |
| recall_values = [] |
| precision_values = [] |
| accuracy_values = [] |
| category_recall_values = [] |
| total_samples = 0 |
|
|
| for test_type in ["default_prompts", "jailbreaked_prompts", "default_answers", "jailbreaked_answers"]: |
| if test_type in category_metrics and isinstance(category_metrics[test_type], dict): |
| test_metrics = category_metrics[test_type] |
| if "f1_binary" in test_metrics and pd.notna(test_metrics["f1_binary"]): |
| f1_values.append(test_metrics["f1_binary"]) |
| if "recall_binary" in test_metrics and pd.notna(test_metrics["recall_binary"]): |
| recall_values.append(test_metrics["recall_binary"]) |
| category_recall_values.append(test_metrics["recall_binary"]) |
| if "precision_binary" in test_metrics and pd.notna(test_metrics["precision_binary"]): |
| precision_values.append(test_metrics["precision_binary"]) |
| if "accuracy" in test_metrics and pd.notna(test_metrics["accuracy"]): |
| accuracy_values.append(test_metrics["accuracy"]) |
| if "sample_count" in test_metrics and pd.notna(test_metrics["sample_count"]): |
| total_samples += test_metrics["sample_count"] |
|
|
| |
| |
|
|
|
|
| |
| if f1_values: |
| filtered_entry["average_f1"] = sum(f1_values) / len(f1_values) |
| if recall_values: |
| filtered_entry["average_recall"] = sum(recall_values) / len(recall_values) |
| if precision_values: |
| filtered_entry["average_precision"] = sum(precision_values) / len(precision_values) |
|
|
| |
| if accuracy_values: |
| filtered_entry["macro_accuracy"] = sum(accuracy_values) / len(accuracy_values) |
| else: |
| filtered_entry["macro_accuracy"] = np.nan |
|
|
| if category_recall_values: |
| filtered_entry["macro_recall"] = sum(category_recall_values) / len(category_recall_values) |
| else: |
| filtered_entry["macro_recall"] = np.nan |
|
|
| if total_samples > 0: |
| filtered_entry["total_evals_count"] = total_samples |
| else: |
| filtered_entry["total_evals_count"] = np.nan |
|
|
| filtered_entries.append(filtered_entry) |
|
|
| |
| filtered_leaderboard = { |
| "entries": filtered_entries, |
| "last_updated": leaderboard_data.get("last_updated", datetime.now().isoformat()), |
| "version": version |
| } |
| |
|
|
| |
| return leaderboard_to_dataframe(filtered_leaderboard) |
|
|
|
|
| def get_detailed_model_data(model_name: str, mode: str, version="v0") -> Dict: |
| """ |
| Get detailed data for a specific model and mode. |
| """ |
| entry = get_model_entry(model_name, mode, version) |
| if entry: |
| return entry |
| leaderboard_data = get_latest_leaderboard(version) |
| if leaderboard_data: |
| for entry in leaderboard_data.get("entries", []): |
| if entry.get("model_name") == model_name and str(entry.get("mode")).lower() == str(mode).lower(): |
| return entry |
| return {} |
|
|