| """
|
| Attack Breakdown Component
|
|
|
| Gradio component for displaying per-attack metric breakdown and vulnerability analysis.
|
| """
|
|
|
| import logging
|
| from typing import Any, List, Optional
|
|
|
| import gradio as gr
|
|
|
| from dashboard.schemas import AttackBreakdown, AttackBreakdownList
|
| from dashboard.utils import log_dashboard_event
|
|
|
| logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
| ATTACK_BREAKDOWN_HEADERS = [
|
| "Attack Type",
|
| "Sample Count",
|
| "Hallucination",
|
| "Toxicity",
|
| "Bias",
|
| "Confidence",
|
| "Robustness",
|
| "Vulnerability Index",
|
| ]
|
|
|
|
|
| def create_attack_breakdown_table() -> gr.Dataframe:
|
| """
|
| Create attack breakdown table component.
|
|
|
| Returns:
|
| DataFrame component
|
| """
|
| table = gr.Dataframe(
|
| headers=ATTACK_BREAKDOWN_HEADERS,
|
| label="Per-Attack Metric Breakdown",
|
| interactive=False,
|
| )
|
| return table
|
|
|
|
|
| def create_attack_selector() -> gr.Dropdown:
|
| """
|
| Create attack type selector dropdown.
|
|
|
| Returns:
|
| Dropdown component
|
| """
|
| dropdown = gr.Dropdown(
|
| label="Select Attack Type",
|
| choices=[],
|
| interactive=True,
|
| )
|
| return dropdown
|
|
|
|
|
| def update_attack_breakdown_table(
|
| breakdown_list: Optional[AttackBreakdownList],
|
| ) -> List[List[Any]]:
|
| """
|
| Update attack breakdown table with data.
|
|
|
| Args:
|
| breakdown_list: Attack breakdown list
|
|
|
| Returns:
|
| Table data as list of lists
|
| """
|
| if breakdown_list is None or not breakdown_list.breakdowns:
|
| return [["N/A", "0", "0.000", "0.000", "0.000", "0.000", "0.000", "0.000"]]
|
|
|
| table_data = []
|
| for breakdown in breakdown_list.breakdowns:
|
| table_data.append(breakdown.to_table_row())
|
|
|
| return table_data
|
|
|
|
|
| def update_attack_selector(
|
| breakdown_list: Optional[AttackBreakdownList],
|
| ) -> List[str]:
|
| """
|
| Update attack selector dropdown with available attack types.
|
|
|
| Args:
|
| breakdown_list: Attack breakdown list
|
|
|
| Returns:
|
| List of attack type choices
|
| """
|
| if breakdown_list is None or not breakdown_list.breakdowns:
|
| return []
|
|
|
| return [b.attack_type for b in breakdown_list.breakdowns]
|
|
|
|
|
| def get_attack_breakdown_details(
|
| breakdown_list: Optional[AttackBreakdownList],
|
| attack_type: str,
|
| ) -> Optional[AttackBreakdown]:
|
| """
|
| Get breakdown details for a specific attack type.
|
|
|
| Args:
|
| breakdown_list: Attack breakdown list
|
| attack_type: The attack type to get details for
|
|
|
| Returns:
|
| AttackBreakdown or None
|
| """
|
| if breakdown_list is None or not breakdown_list.breakdowns:
|
| return None
|
|
|
| for breakdown in breakdown_list.breakdowns:
|
| if breakdown.attack_type == attack_type:
|
| return breakdown
|
|
|
| return None
|
|
|
|
|
| def format_breakdown_tooltip(breakdown: AttackBreakdown) -> str:
|
| """
|
| Format tooltip text for breakdown details.
|
|
|
| Args:
|
| breakdown: Attack breakdown
|
|
|
| Returns:
|
| Formatted tooltip string
|
| """
|
| tooltips = {
|
| "hallucination": "High value indicates increased factual instability under this attack.",
|
| "toxicity": "High value indicates increased toxic content generation under this attack.",
|
| "bias": "High value indicates increased biased output under this attack.",
|
| "confidence_collapse": "High value indicates model uncertainty increase.",
|
| }
|
|
|
| return (
|
| f"Attack: {breakdown.attack_type}\n"
|
| f"Samples: {breakdown.sample_count}\n"
|
| f"Hallucination: {breakdown.mean_hallucination:.3f} - {tooltips['hallucination']}\n"
|
| f"Toxicity: {breakdown.mean_toxicity:.3f} - {tooltips['toxicity']}\n"
|
| f"Bias: {breakdown.mean_bias:.3f} - {tooltips['bias']}\n"
|
| f"Confidence: {breakdown.mean_confidence:.3f}\n"
|
| f"Confidence Collapse: {breakdown.confidence_collapse:.3f} - {tooltips['confidence_collapse']}\n"
|
| f"Robustness (R_a): {breakdown.robustness:.3f}\n"
|
| f"Vulnerability Index (VI_a): {breakdown.vulnerability_index:.3f}"
|
| )
|
|
|
|
|
| def get_small_sample_warning(sample_count: int) -> str:
|
| """
|
| Get warning message for small sample sizes.
|
|
|
| Args:
|
| sample_count: Number of samples
|
|
|
| Returns:
|
| Warning message or empty string
|
| """
|
| if sample_count < 3:
|
| return f"⚠️ Warning: Small sample size ({sample_count}). Results may not be statistically significant."
|
| return ""
|
|
|
|
|
| class AttackBreakdownComponent:
|
| """
|
| Attack breakdown component with state management.
|
| """
|
|
|
| def __init__(self):
|
| """Initialize attack breakdown component."""
|
| self._current_breakdown_list: Optional[AttackBreakdownList] = None
|
|
|
| def set_data(self, data: AttackBreakdownList) -> None:
|
| """
|
| Set breakdown data.
|
|
|
| Args:
|
| data: Attack breakdown list
|
| """
|
| self._current_breakdown_list = data
|
|
|
| def get_data(self) -> Optional[AttackBreakdownList]:
|
| """
|
| Get current breakdown data.
|
|
|
| Returns:
|
| Current breakdown list or None
|
| """
|
| return self._current_breakdown_list
|
|
|
| def get_attack_types(self) -> List[str]:
|
| """
|
| Get available attack types.
|
|
|
| Returns:
|
| List of attack type strings
|
| """
|
| if self._current_breakdown_list is None:
|
| return []
|
| return [b.attack_type for b in self._current_breakdown_list.breakdowns]
|
|
|
| @staticmethod
|
| def create_empty_table() -> List[List[Any]]:
|
| """
|
| Create empty breakdown table.
|
|
|
| Returns:
|
| Empty table data
|
| """
|
| return [["N/A", "0", "0.000", "0.000", "0.000", "0.000", "0.000", "0.000"]]
|
|
|
|
|
| def log_attack_breakdown_view(run_id: str, attack_type: str) -> None:
|
| """
|
| Log attack breakdown view event.
|
|
|
| Args:
|
| run_id: Evaluation run ID
|
| attack_type: Selected attack type
|
| """
|
| log_dashboard_event(
|
| "DASHBOARD_VIEW_ATTACK_BREAKDOWN",
|
| run_id=run_id,
|
| attack_type=attack_type,
|
| )
|
|
|
|
|
| def get_breakdown_tooltip_explanation() -> str:
|
| """
|
| Get breakdown tooltip explanation for governance interpretation.
|
|
|
| Returns:
|
| Explanation string
|
| """
|
| return """
|
| ## Metric Interpretation Guide
|
|
|
| ### Hallucination
|
| High value indicates increased factual instability under this attack.
|
| - Red (1.0): Model produces significant hallucinations
|
| - Green (0.0): Model maintains factual accuracy
|
|
|
| ### Toxicity
|
| High value indicates increased toxic content generation under this attack.
|
| - Red (1.0): Model generates highly toxic content
|
| - Green (0.0): Model generates non-toxic content
|
|
|
| ### Bias
|
| High value indicates increased biased output under this attack.
|
| - Red (1.0): Model shows significant bias
|
| - Green (0.0): Model is unbiased
|
|
|
| ### Confidence Collapse
|
| High value indicates model uncertainty increase.
|
| - Red (1.0): Complete confidence collapse
|
| - Green (0.0): Full model confidence
|
|
|
| ### Robustness (R_a)
|
| Composite score for this attack: R = w₁(1-H) + w₂(1-T) + w₃(1-B) + w₄*C
|
| - Higher is better
|
|
|
| ### Vulnerability Index (VI_a)
|
| Relative vulnerability compared to baseline:
|
| - VI = (R_base - R_adv) / R_base
|
| - Higher means more vulnerable to this attack
|
| """
|
|
|