wu981526092's picture
Add comprehensive perturbation testing system with E2E tests
795b72e
#!/usr/bin/env python3
"""
Testing API Router
Provides endpoints for:
- Custom jailbreak prompts upload
- Listing available prompt sources
- Test configuration management
"""
import os
import io
import json
from typing import List, Optional
from fastapi import APIRouter, UploadFile, File, HTTPException, Query
from pydantic import BaseModel, Field
router = APIRouter(prefix="/api/testing", tags=["testing"])
# Directory for custom prompts
CUSTOM_PROMPTS_DIR = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
"datasets", "custom_jailbreak_prompts"
)
class PromptSource(BaseModel):
"""Information about a prompt source."""
name: str
description: str
count: int
source_type: str # 'builtin' or 'custom'
categories: Optional[List[str]] = None
class UploadResponse(BaseModel):
"""Response for prompt upload."""
status: str
name: str
prompt_count: int
path: str
categories: Optional[List[str]] = None
class TestPreset(BaseModel):
"""Test preset configuration."""
name: str
description: str
max_relations: Optional[int] = None
jailbreak_techniques: int = 10
demographics_count: int = 4
comparison_mode: str = "both"
# Available presets
PRESETS = {
"quick": TestPreset(
name="quick",
description="Fast testing with minimal coverage",
max_relations=3,
jailbreak_techniques=3,
demographics_count=2,
comparison_mode="vs_baseline"
),
"standard": TestPreset(
name="standard",
description="Balanced testing with good coverage",
max_relations=10,
jailbreak_techniques=10,
demographics_count=4,
comparison_mode="both"
),
"comprehensive": TestPreset(
name="comprehensive",
description="Thorough testing with full coverage",
max_relations=None,
jailbreak_techniques=20,
demographics_count=8,
comparison_mode="both"
)
}
def get_builtin_prompts_info() -> PromptSource:
"""Get information about the built-in jailbreak prompts dataset."""
try:
dataset_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
"datasets", "redTeaming_jailbreaking_standard.csv"
)
if os.path.exists(dataset_path):
import pandas as pd
df = pd.read_csv(dataset_path)
count = len(df)
# Get unique categories/topics if available
categories = None
if 'topic' in df.columns:
categories = df['topic'].dropna().unique().tolist()[:20]
return PromptSource(
name="standard",
description="Built-in jailbreak dataset",
count=count,
source_type="builtin",
categories=categories
)
else:
return PromptSource(
name="standard",
description="Built-in jailbreak dataset (file not found)",
count=0,
source_type="builtin"
)
except Exception as e:
return PromptSource(
name="standard",
description=f"Built-in jailbreak dataset (error: {str(e)})",
count=0,
source_type="builtin"
)
def get_custom_prompts_info() -> List[PromptSource]:
"""Get information about custom uploaded prompts."""
sources = []
if not os.path.exists(CUSTOM_PROMPTS_DIR):
return sources
for filename in os.listdir(CUSTOM_PROMPTS_DIR):
if filename.endswith('.json'):
try:
filepath = os.path.join(CUSTOM_PROMPTS_DIR, filename)
with open(filepath, 'r') as f:
prompts = json.load(f)
name = filename.replace('.json', '')
sources.append(PromptSource(
name=name,
description=f"Custom uploaded prompts",
count=len(prompts) if isinstance(prompts, list) else 0,
source_type="custom"
))
except Exception as e:
continue
return sources
@router.post("/jailbreak-prompts/upload", response_model=UploadResponse)
async def upload_custom_jailbreak_prompts(
file: UploadFile = File(...),
name: str = Query(default="custom", description="Name for this prompt set")
):
"""
Upload custom jailbreak prompts.
Supports CSV or JSON formats:
- CSV: Must have 'prompt' column. Optional: 'name', 'description', 'topic'
- JSON: Array of objects with at least 'prompt' field
"""
# Validate filename
if not file.filename:
raise HTTPException(400, "No filename provided")
filename_lower = file.filename.lower()
if not (filename_lower.endswith('.csv') or filename_lower.endswith('.json')):
raise HTTPException(400, "Only CSV or JSON files are supported")
# Create directory if needed
os.makedirs(CUSTOM_PROMPTS_DIR, exist_ok=True)
try:
content = await file.read()
if filename_lower.endswith('.csv'):
import pandas as pd
df = pd.read_csv(io.BytesIO(content))
if 'prompt' not in df.columns:
raise HTTPException(400, "CSV must have a 'prompt' column")
prompts = df.to_dict('records')
else:
prompts = json.loads(content)
if not isinstance(prompts, list):
raise HTTPException(400, "JSON must be an array of prompt objects")
# Validate prompts have 'prompt' field
for i, p in enumerate(prompts):
if 'prompt' not in p:
raise HTTPException(400, f"Prompt at index {i} missing 'prompt' field")
# Save to JSON file
output_path = os.path.join(CUSTOM_PROMPTS_DIR, f"{name}.json")
with open(output_path, 'w') as f:
json.dump(prompts, f, indent=2)
# Get categories if available
categories = None
if prompts and 'topic' in prompts[0]:
categories = list(set(p.get('topic', '') for p in prompts if p.get('topic')))[:20]
return UploadResponse(
status="success",
name=name,
prompt_count=len(prompts),
path=output_path,
categories=categories
)
except HTTPException:
raise
except Exception as e:
raise HTTPException(500, f"Error processing file: {str(e)}")
@router.get("/jailbreak-prompts/list")
async def list_jailbreak_prompt_sources() -> dict:
"""
List all available jailbreak prompt sources.
Returns both built-in and custom uploaded prompt sets.
"""
sources = []
# Add built-in
builtin = get_builtin_prompts_info()
sources.append(builtin.model_dump())
# Add custom
custom_sources = get_custom_prompts_info()
for source in custom_sources:
sources.append(source.model_dump())
return {"sources": sources}
@router.delete("/jailbreak-prompts/{name}")
async def delete_custom_prompts(name: str):
"""
Delete a custom prompt set.
Cannot delete the built-in 'standard' dataset.
"""
if name == "standard":
raise HTTPException(400, "Cannot delete built-in dataset")
filepath = os.path.join(CUSTOM_PROMPTS_DIR, f"{name}.json")
if not os.path.exists(filepath):
raise HTTPException(404, f"Prompt set '{name}' not found")
try:
os.remove(filepath)
return {"status": "success", "message": f"Deleted prompt set '{name}'"}
except Exception as e:
raise HTTPException(500, f"Error deleting file: {str(e)}")
@router.get("/presets")
async def list_test_presets() -> dict:
"""
List available test configuration presets.
"""
return {
"presets": [preset.model_dump() for preset in PRESETS.values()]
}
@router.get("/presets/{preset_name}")
async def get_test_preset(preset_name: str) -> dict:
"""
Get a specific test preset configuration.
"""
if preset_name not in PRESETS:
raise HTTPException(404, f"Preset '{preset_name}' not found")
return PRESETS[preset_name].model_dump()
@router.get("/demographics")
async def list_available_demographics() -> dict:
"""
List available demographic options for counterfactual bias testing.
"""
return {
"demographics": {
"gender": ["male", "female", "non-binary"],
"race": ["White", "Black", "Asian", "Hispanic", "Middle Eastern", "Indigenous"],
"presets": {
"minimal": [
{"gender": "male", "race": "White"},
{"gender": "female", "race": "Black"}
],
"standard": [
{"gender": "male", "race": "White"},
{"gender": "female", "race": "White"},
{"gender": "male", "race": "Black"},
{"gender": "female", "race": "Black"}
],
"comprehensive": [
{"gender": "male", "race": "White"},
{"gender": "female", "race": "White"},
{"gender": "male", "race": "Black"},
{"gender": "female", "race": "Black"},
{"gender": "male", "race": "Asian"},
{"gender": "female", "race": "Asian"},
{"gender": "male", "race": "Hispanic"},
{"gender": "female", "race": "Hispanic"},
{"gender": "non-binary", "race": "White"}
]
}
},
"extended_dimensions": {
"age": ["young (20s)", "middle-aged (40s)", "elderly (70s)"],
"disability": ["with no disability", "with a physical disability", "with a visual impairment"],
"socioeconomic": ["from a wealthy background", "from a middle-class background", "from a low-income background"]
},
"comparison_modes": ["vs_baseline", "all_pairs", "both"]
}