Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Configuration Models for Perturbation Testing | |
| Provides Pydantic models for configuring: | |
| - Jailbreak testing | |
| - Counterfactual bias testing | |
| - Execution settings | |
| - Overall perturbation test configuration | |
| """ | |
| from typing import List, Optional, Literal, Dict, Any | |
| from pydantic import BaseModel, Field | |
| class ExecutionConfig(BaseModel): | |
| """Execution configuration for concurrent testing.""" | |
| max_workers: int = Field( | |
| default=5, | |
| ge=1, | |
| le=20, | |
| description="Maximum concurrent workers" | |
| ) | |
| max_retries: int = Field( | |
| default=3, | |
| ge=1, | |
| le=10, | |
| description="Maximum retry attempts" | |
| ) | |
| base_delay: float = Field( | |
| default=1.0, | |
| ge=0.1, | |
| le=10.0, | |
| description="Base delay for exponential backoff (seconds)" | |
| ) | |
| max_delay: float = Field( | |
| default=60.0, | |
| ge=1.0, | |
| le=300.0, | |
| description="Maximum delay between retries (seconds)" | |
| ) | |
| rate_limit_per_minute: int = Field( | |
| default=60, | |
| ge=10, | |
| le=500, | |
| description="Maximum requests per minute" | |
| ) | |
| class JailbreakTestConfig(BaseModel): | |
| """Configuration for jailbreak testing.""" | |
| enabled: bool = Field( | |
| default=True, | |
| description="Whether jailbreak testing is enabled" | |
| ) | |
| num_techniques: int = Field( | |
| default=10, | |
| ge=1, | |
| le=50, | |
| description="Number of jailbreak techniques to test per relation" | |
| ) | |
| technique_categories: Optional[List[str]] = Field( | |
| default=None, | |
| description="Filter techniques by category: ['DAN', 'Omega', 'Developer Mode', etc.]" | |
| ) | |
| random_seed: Optional[int] = Field( | |
| default=None, | |
| description="Random seed for reproducible technique selection" | |
| ) | |
| prompt_source: str = Field( | |
| default="standard", | |
| description="Prompt source: 'standard' or name of custom uploaded prompts" | |
| ) | |
| custom_prompts: Optional[List[Dict[str, Any]]] = Field( | |
| default=None, | |
| description="Custom jailbreak prompts to use instead of dataset" | |
| ) | |
| class DemographicConfig(BaseModel): | |
| """Configuration for a demographic group.""" | |
| gender: str = Field(description="Gender: male, female, non-binary, etc.") | |
| race: str = Field(description="Race/ethnicity: White, Black, Asian, Hispanic, etc.") | |
| def __str__(self): | |
| return f"{self.gender} {self.race}" | |
| class CounterfactualBiasTestConfig(BaseModel): | |
| """Configuration for counterfactual bias testing.""" | |
| enabled: bool = Field( | |
| default=True, | |
| description="Whether counterfactual bias testing is enabled" | |
| ) | |
| demographics: List[DemographicConfig] = Field( | |
| default=[ | |
| DemographicConfig(gender="male", race="White"), | |
| DemographicConfig(gender="female", race="White"), | |
| DemographicConfig(gender="male", race="Black"), | |
| DemographicConfig(gender="female", race="Black"), | |
| ], | |
| description="Demographics to test" | |
| ) | |
| include_baseline: bool = Field( | |
| default=True, | |
| description="Include baseline (no demographic) for comparison" | |
| ) | |
| comparison_mode: Literal["all_pairs", "vs_baseline", "both"] = Field( | |
| default="both", | |
| description="Comparison mode: all_pairs, vs_baseline, or both" | |
| ) | |
| extended_dimensions: Optional[List[str]] = Field( | |
| default=None, | |
| description="Additional dimensions: ['age', 'disability', 'socioeconomic']" | |
| ) | |
| class PerturbationTestConfig(BaseModel): | |
| """Overall perturbation test configuration.""" | |
| # General settings | |
| model: str = Field( | |
| default="gpt-4o-mini", | |
| description="LLM model for testing" | |
| ) | |
| judge_model: str = Field( | |
| default="gpt-4o-mini", | |
| description="LLM model for evaluation/judging" | |
| ) | |
| max_relations: Optional[int] = Field( | |
| default=None, | |
| description="Maximum relations to test (None = all)" | |
| ) | |
| # Execution configuration | |
| execution: ExecutionConfig = Field( | |
| default_factory=ExecutionConfig, | |
| description="Concurrent execution settings" | |
| ) | |
| # Test-specific configurations | |
| jailbreak: JailbreakTestConfig = Field( | |
| default_factory=JailbreakTestConfig, | |
| description="Jailbreak testing configuration" | |
| ) | |
| counterfactual_bias: CounterfactualBiasTestConfig = Field( | |
| default_factory=CounterfactualBiasTestConfig, | |
| description="Counterfactual bias testing configuration" | |
| ) | |
| # Preset configurations | |
| PRESET_CONFIGS = { | |
| "quick": PerturbationTestConfig( | |
| max_relations=3, | |
| execution=ExecutionConfig(max_workers=3), | |
| jailbreak=JailbreakTestConfig(num_techniques=3), | |
| counterfactual_bias=CounterfactualBiasTestConfig( | |
| demographics=[ | |
| DemographicConfig(gender="male", race="White"), | |
| DemographicConfig(gender="female", race="Black"), | |
| ], | |
| comparison_mode="vs_baseline" | |
| ) | |
| ), | |
| "standard": PerturbationTestConfig( | |
| max_relations=10, | |
| execution=ExecutionConfig(max_workers=5), | |
| jailbreak=JailbreakTestConfig(num_techniques=10), | |
| counterfactual_bias=CounterfactualBiasTestConfig( | |
| comparison_mode="both" | |
| ) | |
| ), | |
| "comprehensive": PerturbationTestConfig( | |
| max_relations=None, | |
| execution=ExecutionConfig(max_workers=10, max_retries=5), | |
| jailbreak=JailbreakTestConfig(num_techniques=20), | |
| counterfactual_bias=CounterfactualBiasTestConfig( | |
| demographics=[ | |
| DemographicConfig(gender="male", race="White"), | |
| DemographicConfig(gender="female", race="White"), | |
| DemographicConfig(gender="male", race="Black"), | |
| DemographicConfig(gender="female", race="Black"), | |
| DemographicConfig(gender="male", race="Asian"), | |
| DemographicConfig(gender="female", race="Asian"), | |
| DemographicConfig(gender="male", race="Hispanic"), | |
| DemographicConfig(gender="female", race="Hispanic"), | |
| DemographicConfig(gender="non-binary", race="White"), | |
| ], | |
| extended_dimensions=["age"], | |
| comparison_mode="both" | |
| ) | |
| ) | |
| } | |
| # Extended demographics for optional use | |
| EXTENDED_DEMOGRAPHICS = { | |
| "age": [ | |
| "young (20s)", | |
| "middle-aged (40s)", | |
| "elderly (70s)" | |
| ], | |
| "disability": [ | |
| "with no disability", | |
| "with a physical disability", | |
| "with a visual impairment" | |
| ], | |
| "socioeconomic": [ | |
| "from a wealthy background", | |
| "from a middle-class background", | |
| "from a low-income background" | |
| ] | |
| } | |
| def get_preset_config(preset_name: str) -> PerturbationTestConfig: | |
| """ | |
| Get a preset configuration by name. | |
| Args: | |
| preset_name: One of 'quick', 'standard', 'comprehensive' | |
| Returns: | |
| PerturbationTestConfig for the preset | |
| Raises: | |
| ValueError: If preset name is invalid | |
| """ | |
| if preset_name not in PRESET_CONFIGS: | |
| raise ValueError( | |
| f"Unknown preset: {preset_name}. " | |
| f"Available: {list(PRESET_CONFIGS.keys())}" | |
| ) | |
| return PRESET_CONFIGS[preset_name].model_copy(deep=True) | |
| def create_config_from_dict(config_dict: Dict[str, Any]) -> PerturbationTestConfig: | |
| """ | |
| Create a configuration from a dictionary. | |
| Args: | |
| config_dict: Configuration dictionary | |
| Returns: | |
| PerturbationTestConfig instance | |
| """ | |
| return PerturbationTestConfig(**config_dict) | |