wu981526092 commited on
Commit
ef5205a
·
1 Parent(s): 290f82e
agentgraph/testing/perturbation_types/jailbreak.py CHANGED
@@ -43,9 +43,9 @@ def load_jailbreak_techniques() -> List[Dict[str, Any]]:
43
  techniques = []
44
  for _, row in df.iterrows():
45
  techniques.append({
46
- 'name': row['topic'] if 'topic' in row else 'Unknown',
47
- 'prompt': row['prompt'] if 'prompt' in row else '',
48
- 'description': row['source'] if 'source' in row else ''
49
  })
50
 
51
  logger.info(f"Successfully loaded {len(techniques)} jailbreak techniques from CSV file")
 
43
  techniques = []
44
  for _, row in df.iterrows():
45
  techniques.append({
46
+ 'name': row.get('name', 'Unknown'),
47
+ 'prompt': row.get('prompt', ''),
48
+ 'description': row.get('description', '')
49
  })
50
 
51
  logger.info(f"Successfully loaded {len(techniques)} jailbreak techniques from CSV file")
extraction_analysis/cot_extraction_20250909_172744_855dfc94.json ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "timestamp": "20250909_172744",
3
+ "extraction_id": "855dfc94",
4
+ "model": "gpt-4o-mini",
5
+ "reasoning_steps": [],
6
+ "knowledge_graph": {
7
+ "system_name": "User Message Response System",
8
+ "system_summary": "This system is designed to process user messages and generate appropriate responses. The process begins when a user sends a message, which is handled by the `Assistant Agent` (agent_001). The agent undertakes the `Message Processing Task` (task_001) to analyze the user input and generate an output. This collaborative process allows the system to efficiently respond to user inquiries.",
9
+ "entities": [
10
+ {
11
+ "id": "agent_001",
12
+ "type": "Agent",
13
+ "name": "Assistant Agent",
14
+ "importance": "HIGH",
15
+ "raw_prompt": "",
16
+ "raw_prompt_ref": [
17
+ {
18
+ "line_start": 2,
19
+ "line_end": 2
20
+ }
21
+ ]
22
+ },
23
+ {
24
+ "id": "task_001",
25
+ "type": "Task",
26
+ "name": "Message Processing Task",
27
+ "importance": "HIGH",
28
+ "raw_prompt": "",
29
+ "raw_prompt_ref": [
30
+ {
31
+ "line_start": 3,
32
+ "line_end": 3
33
+ }
34
+ ]
35
+ },
36
+ {
37
+ "id": "input_001",
38
+ "type": "Input",
39
+ "name": "User Message",
40
+ "importance": "HIGH",
41
+ "raw_prompt": "",
42
+ "raw_prompt_ref": [
43
+ {
44
+ "line_start": 3,
45
+ "line_end": 3
46
+ }
47
+ ]
48
+ },
49
+ {
50
+ "id": "output_001",
51
+ "type": "Output",
52
+ "name": "Agent Response",
53
+ "importance": "HIGH",
54
+ "raw_prompt": "",
55
+ "raw_prompt_ref": [
56
+ {
57
+ "line_start": 4,
58
+ "line_end": 4
59
+ }
60
+ ]
61
+ },
62
+ {
63
+ "id": "human_001",
64
+ "type": "Human",
65
+ "name": "End User",
66
+ "importance": "HIGH",
67
+ "raw_prompt": "",
68
+ "raw_prompt_ref": [
69
+ {
70
+ "line_start": 3,
71
+ "line_end": 3
72
+ }
73
+ ]
74
+ }
75
+ ],
76
+ "relations": [
77
+ {
78
+ "id": "rel_001",
79
+ "source": "input_001",
80
+ "target": "agent_001",
81
+ "type": "CONSUMED_BY",
82
+ "importance": "HIGH",
83
+ "interaction_prompt": "",
84
+ "interaction_prompt_ref": [
85
+ {
86
+ "line_start": 2,
87
+ "line_end": 2
88
+ }
89
+ ]
90
+ },
91
+ {
92
+ "id": "rel_002",
93
+ "source": "agent_001",
94
+ "target": "task_001",
95
+ "type": "PERFORMS",
96
+ "importance": "HIGH",
97
+ "interaction_prompt": "",
98
+ "interaction_prompt_ref": [
99
+ {
100
+ "line_start": 2,
101
+ "line_end": 2
102
+ }
103
+ ]
104
+ },
105
+ {
106
+ "id": "rel_003",
107
+ "source": "task_001",
108
+ "target": "output_001",
109
+ "type": "PRODUCES",
110
+ "importance": "HIGH",
111
+ "interaction_prompt": "",
112
+ "interaction_prompt_ref": [
113
+ {
114
+ "line_start": 4,
115
+ "line_end": 4
116
+ }
117
+ ]
118
+ },
119
+ {
120
+ "id": "rel_004",
121
+ "source": "output_001",
122
+ "target": "human_001",
123
+ "type": "DELIVERS_TO",
124
+ "importance": "HIGH",
125
+ "interaction_prompt": "",
126
+ "interaction_prompt_ref": [
127
+ {
128
+ "line_start": 4,
129
+ "line_end": 4
130
+ }
131
+ ]
132
+ }
133
+ ],
134
+ "failures": [],
135
+ "optimizations": []
136
+ },
137
+ "input_trace_length": 184,
138
+ "input_trace_preview": "<L1> {\n<L2> \"agent_name\": \"assistant\",\n<L3> \"input\": \"user message\",\n<L4> \"output\": \"agent response\",\n<L5> \"timestamp\": \"2024-08-31\",\n<L6> \"trace_id\": \"test-trace-123\"\n<L7> }"
139
+ }