""" Base Attack Strategy Abstract base class for all attack strategies. """ from abc import ABC, abstractmethod from typing import Any, Dict class BaseAttackStrategy(ABC): """ Abstract base class for attack strategies. All attack strategies must implement the `apply` method. This provides a plugin architecture for extensible attacks. """ def __init__(self, name: str = "base"): """ Initialize the attack strategy. Args: name: Name of the attack strategy """ self.name = name @abstractmethod def apply(self, prompt: str, **kwargs: Any) -> str: """ Apply the attack strategy to a prompt. Args: prompt: The original prompt to mutate **kwargs: Additional strategy-specific parameters Returns: The mutated adversarial prompt """ raise NotImplementedError def get_metadata(self) -> Dict[str, Any]: """ Get metadata about this attack strategy. Returns: Dictionary containing strategy metadata """ return { "strategy_name": self.name, "strategy_type": self.__class__.__name__, } def validate_prompt(self, prompt: str) -> bool: """ Validate that the prompt is suitable for this attack. Args: prompt: The prompt to validate Returns: True if valid, False otherwise """ return isinstance(prompt, str) and len(prompt.strip()) > 0 class PromptInjectionStrategy(BaseAttackStrategy): """Base class for prompt injection attacks.""" def __init__(self): super().__init__(name="prompt_injection") def get_injection_markers(self) -> list[str]: """ Get common injection markers for detection. Returns: List of injection marker strings """ return [ "ignore previous instructions", "ignore all previous instructions", "you are now operating under new rules", "disregard your previous instructions", "forget your previous instructions", "new system instructions", "override system", ] class JailbreakStrategy(BaseAttackStrategy): """Base class for jailbreak attacks.""" def __init__(self): super().__init__(name="jailbreak") class BiasTriggerStrategy(BaseAttackStrategy): """Base class for bias trigger attacks.""" def __init__(self, protected_attributes: list[str] | None = None): super().__init__(name="bias_trigger") self.protected_attributes = protected_attributes or [ "race", "gender", "religion", "nationality", "age", "disability", "sexual orientation", ] class ContextPoisoningStrategy(BaseAttackStrategy): """Base class for context poisoning attacks.""" def __init__(self): super().__init__(name="context_poisoning") class RoleConfusionStrategy(BaseAttackStrategy): """Base class for role confusion attacks.""" def __init__(self): super().__init__(name="role_confusion") class ChainingStrategy(BaseAttackStrategy): """Base class for attack chaining.""" def __init__(self, chain_depth: int = 3): super().__init__(name="chaining") self.chain_depth = chain_depth def get_chain_steps(self) -> list[str]: """ Get the chain steps for multi-turn attacks. Returns: List of chain step descriptions """ return [ "soften_context", "introduce_hypothetical", "escalate_topic", ]