| """Dataset utilities for saving and loading test results.""" |
|
|
| from datetime import datetime |
|
|
| from datasets import Dataset, load_dataset |
| from huggingface_hub import HfApi |
|
|
| from utils.model_interface import extract_model_id, get_model_info |
|
|
|
|
| def get_username_from_token(token: str | None) -> str: |
| """ |
| Get username from Hugging Face token using whoami. |
| |
| Args: |
| token: HF token string or None |
| |
| Returns: |
| Username string, or "yjernite" as fallback if token is None or whoami fails |
| """ |
| if token is None: |
| return "yjernite" |
| |
| try: |
| api = HfApi() |
| user_info = api.whoami(token=token) |
| return user_info.get("name", "yjernite") |
| except Exception: |
| return "yjernite" |
|
|
|
|
| def get_dataset_repo_id(token: str | None) -> str: |
| """ |
| Get dataset repository ID for the current user. |
| |
| Args: |
| token: HF token string or None |
| |
| Returns: |
| Dataset repo ID in format "{username}/moderation-test-results" |
| """ |
| username = get_username_from_token(token) |
| return f"{username}/moderation-test-results" |
|
|
|
|
| def load_dataset_from_hub(token: str | None) -> tuple[list[dict], Exception | None]: |
| """ |
| Load dataset from Hub and return list of examples. |
| |
| Args: |
| token: HF token string or None |
| |
| Returns: |
| Tuple of (list of example dicts, error Exception or None if successful) |
| """ |
| repo_id = get_dataset_repo_id(token) |
| |
| try: |
| |
| dataset_dict = load_dataset(repo_id, token=token) |
| |
| dataset = dataset_dict[list(dataset_dict.keys())[0]] |
| |
| |
| examples = dataset.to_list() |
| return examples, None |
| except FileNotFoundError: |
| |
| return [], None |
| except Exception as e: |
| |
| return [], e |
|
|
|
|
| def format_categories_and_reasoning(parsed: dict) -> str: |
| """ |
| Format categories and reasoning from parsed JSON response. |
| |
| Args: |
| parsed: Parsed JSON dict with 'categories' key |
| |
| Returns: |
| Formatted markdown string |
| """ |
| categories = parsed.get("categories", []) |
| |
| if categories and len(categories) > 0: |
| cat_text = "### Categories:\n\n" |
| for cat in categories: |
| category_name = cat.get('category', 'Unknown') |
| reasoning_text = cat.get('reasoning', 'No reasoning provided') |
| policy_source = cat.get('policy_source', '') |
| |
| cat_text += f"- **Category:** {category_name}\n" |
| cat_text += f" - **Explanation:** {reasoning_text}\n" |
| if policy_source: |
| cat_text += f" - **Policy Source:** {policy_source}\n" |
| cat_text += "\n\n" |
| return cat_text |
| else: |
| return "*No categories found in response*\n\nThis output expects a valid JSON response, as specified for example in the default prompt.\n\nThe raw response can be seen in the Model Response section below." |
|
|
|
|
| def save_to_dataset(token: str | None, data: dict) -> tuple[bool, str]: |
| """ |
| Save test result to Hugging Face dataset. |
| |
| Args: |
| token: HF token string or None |
| data: Dict with all test result fields |
| |
| Returns: |
| Tuple of (success: bool, message: str) |
| """ |
| try: |
| repo_id = get_dataset_repo_id(token) |
| |
| |
| examples, load_error = load_dataset_from_hub(token) |
| |
| |
| if load_error is not None: |
| raise load_error |
| |
| |
| examples.append(data) |
| |
| |
| dataset = Dataset.from_list(examples) |
| |
| |
| dataset.push_to_hub(repo_id, token=token, private=True) |
| return True, f"Saved to {repo_id}" |
| except FileNotFoundError: |
| |
| try: |
| repo_id = get_dataset_repo_id(token) |
| dataset = Dataset.from_list([data]) |
| dataset.push_to_hub(repo_id, token=token, private=True) |
| return True, f"Saved to {repo_id}" |
| except Exception as e: |
| return False, f"Failed to create new dataset: {str(e)}" |
| except Exception as e: |
| return False, f"Failed to save: {str(e)}" |
|
|
|
|
| def load_dataset_examples(token: str | None) -> tuple[list[dict], list[str]]: |
| """ |
| Load examples from Hugging Face dataset. |
| |
| Args: |
| token: HF token string or None |
| |
| Returns: |
| Tuple of (list of example dicts, list of formatted dropdown labels) |
| """ |
| |
| examples, load_error = load_dataset_from_hub(token) |
| |
| |
| if load_error is not None: |
| return [], [] |
| |
| if not examples: |
| return [], [] |
| |
| |
| labels = [] |
| for idx, example in enumerate(examples): |
| input_text = example.get("input", "") |
| model_selection = example.get("model_selection", "") |
| policy_violation = example.get("policy_violation", -1) |
| |
| |
| if policy_violation == 1: |
| label_emoji = "❌" |
| elif policy_violation == 0: |
| label_emoji = "✅" |
| else: |
| label_emoji = "⚠️" |
| |
| |
| model_id = extract_model_id(model_selection) |
| model_info = get_model_info(model_id) if model_id else None |
| model_name = model_info.get("name", model_id) if model_info else model_id or "Unknown" |
| |
| |
| input_preview = input_text[:40] + "..." if len(input_text) > 40 else input_text |
| label = f"{input_preview} - {label_emoji} - {model_name} - #{idx}" |
| labels.append(label) |
| |
| return examples, labels |
|
|
|
|