AgentGraph / frontend /src /components /features /traces /PerturbationTestConfig.tsx
wu981526092's picture
Add comprehensive perturbation testing system with E2E tests
795b72e
import React, { useState } from "react";
import {
Dialog,
DialogContent,
DialogDescription,
DialogFooter,
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { Badge } from "@/components/ui/badge";
import { Checkbox } from "@/components/ui/checkbox";
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from "@/components/ui/select";
import {
Accordion,
AccordionContent,
AccordionItem,
AccordionTrigger,
} from "@/components/ui/accordion";
import { Shield, Users, Settings, Play, X, Plus, Trash2 } from "lucide-react";
import { PerturbationConfig, Demographic } from "@/types";
interface PerturbationTestConfigProps {
open: boolean;
onOpenChange: (open: boolean) => void;
onRun: (config: PerturbationConfig) => void;
}
const DEFAULT_DEMOGRAPHICS: Demographic[] = [
{ gender: "male", race: "White" },
{ gender: "female", race: "White" },
{ gender: "male", race: "Black" },
{ gender: "female", race: "Black" },
];
const GENDER_OPTIONS = ["male", "female", "non-binary"];
const RACE_OPTIONS = ["White", "Black", "Asian", "Hispanic", "Middle Eastern", "Indigenous"];
const PRESET_CONFIGS = {
quick: {
name: "Quick",
description: "Fast testing with minimal coverage",
config: {
model: "gpt-4o-mini",
judge_model: "gpt-4o-mini",
max_relations: 3,
jailbreak: { enabled: true, num_techniques: 3, prompt_source: "standard" },
counterfactual_bias: {
enabled: true,
demographics: [
{ gender: "male", race: "White" },
{ gender: "female", race: "Black" },
],
include_baseline: true,
comparison_mode: "vs_baseline" as const,
},
},
},
standard: {
name: "Standard",
description: "Balanced testing with good coverage",
config: {
model: "gpt-4o-mini",
judge_model: "gpt-4o-mini",
max_relations: 10,
jailbreak: { enabled: true, num_techniques: 10, prompt_source: "standard" },
counterfactual_bias: {
enabled: true,
demographics: DEFAULT_DEMOGRAPHICS,
include_baseline: true,
comparison_mode: "both" as const,
},
},
},
comprehensive: {
name: "Comprehensive",
description: "Thorough testing with full coverage",
config: {
model: "gpt-4o-mini",
judge_model: "gpt-4o-mini",
max_relations: null,
jailbreak: { enabled: true, num_techniques: 20, prompt_source: "standard" },
counterfactual_bias: {
enabled: true,
demographics: [
...DEFAULT_DEMOGRAPHICS,
{ gender: "male", race: "Asian" },
{ gender: "female", race: "Asian" },
{ gender: "male", race: "Hispanic" },
{ gender: "female", race: "Hispanic" },
],
include_baseline: true,
comparison_mode: "both" as const,
},
},
},
};
export function PerturbationTestConfig({
open,
onOpenChange,
onRun,
}: PerturbationTestConfigProps) {
const [config, setConfig] = useState<PerturbationConfig>(
PRESET_CONFIGS.standard.config
);
const [selectedPreset, setSelectedPreset] = useState<string>("standard");
const applyPreset = (presetKey: string) => {
const preset = PRESET_CONFIGS[presetKey as keyof typeof PRESET_CONFIGS];
if (preset) {
setConfig(preset.config);
setSelectedPreset(presetKey);
}
};
const updateJailbreak = (updates: Partial<NonNullable<PerturbationConfig["jailbreak"]>>) => {
setConfig((prev) => ({
...prev,
jailbreak: { ...prev.jailbreak!, ...updates },
}));
setSelectedPreset("");
};
const updateBias = (
updates: Partial<NonNullable<PerturbationConfig["counterfactual_bias"]>>
) => {
setConfig((prev) => ({
...prev,
counterfactual_bias: { ...prev.counterfactual_bias!, ...updates },
}));
setSelectedPreset("");
};
const addDemographic = () => {
const newDemo: Demographic = { gender: "male", race: "White" };
updateBias({
demographics: [...(config.counterfactual_bias?.demographics || []), newDemo],
});
};
const removeDemographic = (index: number) => {
const demographics = [...(config.counterfactual_bias?.demographics || [])];
demographics.splice(index, 1);
updateBias({ demographics });
};
const updateDemographic = (index: number, field: keyof Demographic, value: string) => {
const demographics = [...(config.counterfactual_bias?.demographics || [])];
const current = demographics[index];
demographics[index] = {
gender: current?.gender || "",
race: current?.race || "",
[field]: value,
};
updateBias({ demographics });
};
const handleRun = () => {
onRun(config);
onOpenChange(false);
};
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="max-w-2xl max-h-[85vh] overflow-y-auto">
<DialogHeader>
<DialogTitle className="flex items-center gap-2">
<Settings className="h-5 w-5" />
Perturbation Test Configuration
</DialogTitle>
<DialogDescription>
Configure which tests to run and their parameters. Choose a preset or customize settings.
</DialogDescription>
</DialogHeader>
<div className="space-y-6 py-4">
{/* Preset Selection */}
<div className="space-y-3">
<Label className="text-sm font-medium">Quick Presets</Label>
<div className="flex gap-2">
{Object.entries(PRESET_CONFIGS).map(([key, preset]) => (
<Button
key={key}
variant={selectedPreset === key ? "default" : "outline"}
size="sm"
onClick={() => applyPreset(key)}
className="flex-1"
>
{preset.name}
</Button>
))}
</div>
{selectedPreset && (
<p className="text-xs text-muted-foreground">
{PRESET_CONFIGS[selectedPreset as keyof typeof PRESET_CONFIGS].description}
</p>
)}
</div>
{/* General Settings */}
<div className="space-y-3">
<Label className="text-sm font-medium">General Settings</Label>
<div className="grid grid-cols-2 gap-4">
<div className="space-y-2">
<Label htmlFor="model" className="text-xs">Test Model</Label>
<Select
value={config.model}
onValueChange={(v) => {
setConfig((prev) => ({ ...prev, model: v }));
setSelectedPreset("");
}}
>
<SelectTrigger id="model">
<SelectValue />
</SelectTrigger>
<SelectContent>
<SelectItem value="gpt-4o-mini">GPT-4o Mini</SelectItem>
<SelectItem value="gpt-4o">GPT-4o</SelectItem>
<SelectItem value="gpt-4">GPT-4</SelectItem>
</SelectContent>
</Select>
</div>
<div className="space-y-2">
<Label htmlFor="maxRelations" className="text-xs">Max Relations</Label>
<Input
id="maxRelations"
type="number"
placeholder="All"
value={config.max_relations ?? ""}
onChange={(e) => {
const val = e.target.value ? parseInt(e.target.value) : null;
setConfig((prev) => ({ ...prev, max_relations: val }));
setSelectedPreset("");
}}
/>
</div>
</div>
</div>
<Accordion type="multiple" defaultValue={["jailbreak", "bias"]} className="w-full">
{/* Jailbreak Settings */}
<AccordionItem value="jailbreak">
<AccordionTrigger className="hover:no-underline">
<div className="flex items-center gap-2">
<Shield className="h-4 w-4" />
<span>Jailbreak Testing</span>
<Badge
variant={config.jailbreak?.enabled ? "default" : "secondary"}
className="ml-2"
>
{config.jailbreak?.enabled ? "Enabled" : "Disabled"}
</Badge>
</div>
</AccordionTrigger>
<AccordionContent className="space-y-4 pt-4">
<div className="flex items-center justify-between">
<Label htmlFor="jailbreak-enabled">Enable Jailbreak Testing</Label>
<Checkbox
id="jailbreak-enabled"
checked={config.jailbreak?.enabled}
onCheckedChange={(v: boolean) => updateJailbreak({ enabled: v })}
/>
</div>
{config.jailbreak?.enabled && (
<>
<div className="space-y-2">
<Label htmlFor="num-techniques" className="text-xs">
Techniques per Relation
</Label>
<Input
id="num-techniques"
type="number"
min={1}
max={50}
value={config.jailbreak?.num_techniques}
onChange={(e) =>
updateJailbreak({ num_techniques: parseInt(e.target.value) || 10 })
}
/>
<p className="text-xs text-muted-foreground">
Number of jailbreak techniques to test per relation (1-50)
</p>
</div>
<div className="space-y-2">
<Label htmlFor="prompt-source" className="text-xs">
Prompt Source
</Label>
<Select
value={config.jailbreak?.prompt_source}
onValueChange={(v) => updateJailbreak({ prompt_source: v })}
>
<SelectTrigger id="prompt-source">
<SelectValue />
</SelectTrigger>
<SelectContent>
<SelectItem value="standard">Standard Dataset (814 prompts)</SelectItem>
</SelectContent>
</Select>
</div>
</>
)}
</AccordionContent>
</AccordionItem>
{/* Counterfactual Bias Settings */}
<AccordionItem value="bias">
<AccordionTrigger className="hover:no-underline">
<div className="flex items-center gap-2">
<Users className="h-4 w-4" />
<span>Counterfactual Bias Testing</span>
<Badge
variant={config.counterfactual_bias?.enabled ? "default" : "secondary"}
className="ml-2"
>
{config.counterfactual_bias?.enabled ? "Enabled" : "Disabled"}
</Badge>
</div>
</AccordionTrigger>
<AccordionContent className="space-y-4 pt-4">
<div className="flex items-center justify-between">
<Label htmlFor="bias-enabled">Enable Bias Testing</Label>
<Checkbox
id="bias-enabled"
checked={config.counterfactual_bias?.enabled}
onCheckedChange={(v: boolean) => updateBias({ enabled: v })}
/>
</div>
{config.counterfactual_bias?.enabled && (
<>
<div className="space-y-2">
<Label className="text-xs">Comparison Mode</Label>
<Select
value={config.counterfactual_bias?.comparison_mode}
onValueChange={(v) =>
updateBias({ comparison_mode: v as "all_pairs" | "vs_baseline" | "both" })
}
>
<SelectTrigger>
<SelectValue />
</SelectTrigger>
<SelectContent>
<SelectItem value="both">Both (Baseline + Pairs)</SelectItem>
<SelectItem value="vs_baseline">Baseline Only</SelectItem>
<SelectItem value="all_pairs">All Pairs</SelectItem>
</SelectContent>
</Select>
</div>
<div className="flex items-center justify-between">
<Label htmlFor="include-baseline">Include Baseline</Label>
<Checkbox
id="include-baseline"
checked={config.counterfactual_bias?.include_baseline}
onCheckedChange={(v: boolean) => updateBias({ include_baseline: v })}
/>
</div>
<div className="space-y-2">
<div className="flex items-center justify-between">
<Label className="text-xs">Demographics to Test</Label>
<Button size="sm" variant="outline" onClick={addDemographic}>
<Plus className="h-3 w-3 mr-1" />
Add
</Button>
</div>
<div className="space-y-2 max-h-48 overflow-y-auto">
{config.counterfactual_bias?.demographics.map((demo, index) => (
<div key={index} className="flex items-center gap-2">
<Select
value={demo.gender}
onValueChange={(v) => updateDemographic(index, "gender", v)}
>
<SelectTrigger className="w-32">
<SelectValue />
</SelectTrigger>
<SelectContent>
{GENDER_OPTIONS.map((g) => (
<SelectItem key={g} value={g}>
{g}
</SelectItem>
))}
</SelectContent>
</Select>
<Select
value={demo.race}
onValueChange={(v) => updateDemographic(index, "race", v)}
>
<SelectTrigger className="flex-1">
<SelectValue />
</SelectTrigger>
<SelectContent>
{RACE_OPTIONS.map((r) => (
<SelectItem key={r} value={r}>
{r}
</SelectItem>
))}
</SelectContent>
</Select>
<Button
size="icon"
variant="ghost"
onClick={() => removeDemographic(index)}
disabled={config.counterfactual_bias!.demographics.length <= 1}
>
<Trash2 className="h-4 w-4 text-muted-foreground" />
</Button>
</div>
))}
</div>
</div>
</>
)}
</AccordionContent>
</AccordionItem>
</Accordion>
</div>
<DialogFooter>
<Button variant="outline" onClick={() => onOpenChange(false)}>
<X className="h-4 w-4 mr-2" />
Cancel
</Button>
<Button onClick={handleRun}>
<Play className="h-4 w-4 mr-2" />
Run Tests
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
);
}