import pandas as pd import os # PII Detection Categories PII_CATEGORIES = { "Overall": ["Overall Accuracy"], "Entity Types": ["Names", "Addresses", "Phone Numbers", "SSN", "Medical IDs", "Financial Info"], "Document Types": ["Healthcare", "Financial", "Government", "Legal", "Personal"], "Performance": ["Precision", "Recall", "F1 Score"], "Efficiency": ["Processing Time", "Cost per Document"] } # Model type definitions MODEL_TYPES = { "Proprietary": "🔒", "Open Source": "🔓" } def load_data(): """Load and preprocess the PII detection evaluation data from CSV file.""" # Load from CSV file csv_path = "results/pii_detection_results.csv" if not os.path.exists(csv_path): raise FileNotFoundError(f"Results file not found: {csv_path}. Please ensure the CSV file exists in the results folder.") df = pd.read_csv(csv_path) # Clean and prepare data df = df.fillna('') # Round numeric columns for better display numeric_cols = [ 'Overall Accuracy', 'Precision', 'Recall', 'F1 Score', 'Over-redaction Rate', 'Processing Time (s)', 'Cost per Document ($)', 'Healthcare Accuracy', 'Financial Accuracy', 'Government Accuracy', 'Legal Accuracy', 'Personal Accuracy' ] for col in numeric_cols: if col in df.columns: df[col] = pd.to_numeric(df[col], errors='coerce').round(3) return df # Nutrient brand color palette COLORS = { # Light mode colors "white": "#FFFFFF", "black": "#000000", "disc_pink": "#DE9DCC", "code_coral": "#F25E45", "data_green": "#6EB579", "digital_pollen": "#F0C968", # Primary yellow accent "warm_black": "#1A1414", "off_white": "#EFEBE7", "pixel_mist": "#E2DBD9", "soft_grey": "#C2B8AE", "warm_grey": "#67594B", # Dark mode colors "disc_pink_dm": "#4F2B45", "code_coral_dm": "#672D23", "data_green_dm": "#2B412F", "digital_pollen_dm": "#5B481A", } # Header content with PII detection focus HEADER_CONTENT = f"""
Comprehensive benchmark for language models' performance in detecting and redacting personally identifiable information (PII) across various document types and scenarios. "How well do LLMs protect sensitive information?"
Our evaluation methodology assesses language models' capabilities in detecting and handling personally identifiable information (PII) across realistic document scenarios. Each model is tested on synthetic documents containing embedded PII entities across 5 document categories.
Powered by Nutrient