ibibek commited on
Commit
0d6c495
·
1 Parent(s): da6425a

Upload 2 files

Browse files
aeo_ex_generator/__init__.py ADDED
File without changes
aeo_ex_generator/aeo_example_generator.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import openai
3
+ import json
4
+ import rdflib
5
+
6
+
7
+ class ExampleGenerator:
8
+ def __init__(self):
9
+ self.ontologies = {}
10
+ self.ontology_files = []
11
+ self.rules = {}
12
+ def add_ontology(self, onto):
13
+ if onto in self.ontology_files:
14
+ raise ValueError("Ontology file already exists.")
15
+ else:
16
+ onto_data = self.get_ontology_file(onto)
17
+ if onto_data:
18
+ self.ontology_files.append(onto)
19
+ self.ontologies[onto] = self.get_ontology_file(onto)
20
+ self.rules[onto] = self.generate_rule(onto)
21
+ else:
22
+ raise ValueError("Ontology file error.")
23
+ def get_ontology_file(self,filename):
24
+ text = ""
25
+ if os.path.isfile(filename):
26
+ with open(filename,'r') as f:
27
+ text = f.read()
28
+ f.close()
29
+ return text
30
+ else:
31
+ raise ValueError("Invalid filename.")
32
+ def ChatGPTTextSplitter(self,text):
33
+ """Splits text in smaller subblocks to feed to the LLM"""
34
+ prompt = f"""The total length of content that I want to send you is too large to send in only one piece.
35
+
36
+ For sending you that content, I will follow this rule:
37
+
38
+ [START PART 1/10]
39
+ this is the content of the part 1 out of 10 in total
40
+ [END PART 1/10]
41
+
42
+ Then you just answer: "Instructions Sent."
43
+
44
+ And when I tell you "ALL PARTS SENT", then you can continue processing the data and answering my requests.
45
+ """
46
+ if type(text) == str:
47
+ textsize = 12000
48
+ blocksize = int(len(text) / textsize)
49
+ if blocksize > 0:
50
+ yield prompt
51
+
52
+ for b in range(1,blocksize+1):
53
+ if b < blocksize+1:
54
+ prompt = f"""Do not answer yet. This is just another part of the text I want to send you. Just receive and acknowledge as "Part {b}/{blocksize} received" and wait for the next part.
55
+ [START PART {b}/{blocksize}]
56
+ {text[(b-1)*textsize:b*textsize]}
57
+ [END PART {b}/{blocksize}]
58
+ Remember not answering yet. Just acknowledge you received this part with the message "Part {b}/{blocksize} received" and wait for the next part.
59
+ """
60
+ yield prompt
61
+ else:
62
+ prompt = f"""
63
+ [START PART {b}/{blocksize}]
64
+ {text[(b-1)*textsize:b*textsize]}
65
+ [END PART {b}/{blocksize}]
66
+ ALL PARTS SENT. Now you can continue processing the request.
67
+ """
68
+ yield prompt
69
+ else:
70
+ yield text
71
+ elif type(text) == list:
72
+ yield prompt
73
+
74
+ for n,block in enumerate(text):
75
+ if n+1 < len(text):
76
+ prompt = f"""Do not answer yet. This is just another part of the text I want to send you. Just receive and acknowledge as "Part {n+1}/{len(text)} received" and wait for the next part.
77
+ [START PART {n+1}/{len(text)}]
78
+ {text[n]}
79
+ [END PART {n+1}/{len(text)}]
80
+ Remember not answering yet. Just acknowledge you received this part with the message "Part {n+1}/{len(text)} received" and wait for the next part.
81
+ """
82
+ yield prompt
83
+ else:
84
+ prompt = f"""
85
+ [START PART {n+1}/{len(text)}]
86
+ {text[n]}
87
+ [END PART {n+1}/{len(text)}]
88
+ ALL PARTS SENT. Now you can continue processing the request.
89
+ """
90
+ yield prompt
91
+
92
+ def send_ontology(self):
93
+ ontology = ""
94
+ if len(self.ontologies) > 0:
95
+ for k,v in self.ontologies.items():
96
+ ontology+=v+"\n"
97
+ print("Sending Ontology in Parts")
98
+ for i in self.ChatGPTTextSplitter(ontology):
99
+ print(self.llm_api(i))
100
+ else:
101
+ raise ValueError("No loaded ontology to send.")
102
+ def llm_api(self,prompt,model="gpt-3.5-turbo"):
103
+ messages = [{
104
+ "role":"user",
105
+ "content":prompt
106
+ }]
107
+ res = openai.ChatCompletion.create(model=model,messages=messages,temperature=0)
108
+ return res.choices[0].message['content']
109
+
110
+ def generate_rule(self,onto=None):
111
+ """Raw rule string of AEO."""
112
+ v = """Remember make a json-ld format example that only uses classes and properties terms from Adversary Engagement Ontology, Unified Cyber Ontology.
113
+
114
+ Each engagement:Narrative has property:
115
+ engagement:hasStoryline connects to an engagement:Storyline
116
+ Each engagement:Storyline has property:
117
+ engagement:hasEvent connects to a uco-types:Thread
118
+ Each uco-types:Thread has properties:
119
+ co:element contains all engagement:PlannedEvents
120
+ co:item contains all uco-types:ThreadItem one each for each engagement:PlannedEvent.
121
+ co:size
122
+ uco-types:threadOriginItem is the uco-types:ThreadItem for the first engagement:PlannedEvent
123
+ uco-types:threadTerminalItem is the uco-types:ThreadItem for the last engagement:PlannedEvent
124
+ Each co:size has properties:
125
+ @type as xsd:nonNegativeInteger
126
+ @value which is the number of uco-types:ThreadItem
127
+ Each uco-types:ThreadItem has property:
128
+ co:itemContent is the engagement:PlannedEvent
129
+ optional uco-types:threadNextItem is the next uco-types:ThreadItem for the next engagement:PlannedEvent if there is one,
130
+ optional uco-types:threadPreviousItem is the previous uco-types:ThreadItem for the previous engagement:PlannedEvent if there is one
131
+ Each engagement:PlannedEvent has property:
132
+ engagement:eventContext connects to one engagement action has property @type one of the following:
133
+ engagement:Access
134
+ engagement:Alert
135
+ engagement:Beacon
136
+ engagement:Deploy
137
+ engagement:Obfuscate
138
+ engagement:Respond
139
+ Each engagement action has properties:
140
+ @type is the action
141
+ uco-core:performer
142
+ uco-core:object connects to one of the following engagement deception object denoted as "EDO" objects:
143
+ engagement:Honeypot
144
+ engagement:Honeytoken
145
+ engagement:Breadcrumb
146
+ engagement:BreadcrumbTrail
147
+ engagement:LureObject
148
+ engagement:HoneyObject
149
+ engagement:Decoy
150
+ engagement:DataSource
151
+ Each "EDO" object has properties:
152
+ engagement:hasCharacterization connects to a uco-core:UcoObject
153
+ objective:hasObjective with @type objective:Objective and @id with one of the following instances:
154
+ objective:CommandAndControl
155
+ objective:CredentialAccess
156
+ objective:DevelopResource
157
+ objective:Discover
158
+ objective:EscalatePrivilege
159
+ objective:Evade
160
+ objective:Execute
161
+ objective:Exfilitrate
162
+ objective:GainInitialAccess
163
+ objective:Impact
164
+ objective:MoveLaterally
165
+ objective:Persist
166
+ objective:Reconnaissance
167
+ objective:Affect
168
+ objective:Collect
169
+ objective:Detect
170
+ objective:Direct
171
+ objective:Disrupt
172
+ objective:Elicit
173
+ objective:Expose
174
+ objective:Motivate
175
+ objective:Plan
176
+ objective:Prepare
177
+ objective:Prevent
178
+ objective:Reassure
179
+ objective:Analyze
180
+ objective:Deny
181
+ objective:ElicitBehavior
182
+ objective:Lure
183
+ objective:TimeSink
184
+ objective:Track
185
+ objective:Trap
186
+ uco-core:name is the objective
187
+ All people have property:
188
+ @type is uco-identity:Person
189
+ uco-core:hasFacet that connects to one of the following:
190
+ uco-identity:SimpleNameFacet which has the property:
191
+ uco-identity:familyName
192
+ uco-identity:givenName
193
+ Each uco-core:Role has properties:
194
+ @id is the role
195
+ uco-core:name is the role
196
+ Each uco-core:Role there is a uco-core:Relationship with properties:
197
+ uco-core:kindofRelationship is "has_Role"
198
+ uco-core:source connects to the person who has the role
199
+ uco-core:target connects to uco-core:Role
200
+ Each engagement:BreadcrumbTrail has property:
201
+ engagement:hasBreadcrumb connects to uco-types:Thread
202
+ This uco-types:Thread has property:
203
+ co:element contains all engagement:Breadcrumb that belong to this engagement:BreadcrumbTrail
204
+ co:item contains all uco-types:ThreadItem one each for each engagement:Breadcrumb
205
+ co:size
206
+ uco-types:threadOriginItem is the uco-types:ThreadItem for the first engagement:Breadcrumb belonging to this engagement:BreadcrumbTrail
207
+ uco-types:threadTerminalItem is the uco-types:ThreadItem for the last engagement:Breadcrumb belonging to this engagement:BreadcrumbTrail
208
+ Each engagement:Breadcrumb has the properties:
209
+ engagement:hasCharacterization which connects to a uco-core:UcoObject with the property:
210
+ uco-core:description which describes the object characterizing the breadcrumb
211
+ All classes must include property:
212
+ @type is the class
213
+ @id is a unique identifier
214
+
215
+ If namespace "engagement" prefix is used then https://ontology.adversaryengagement.org/ae/engagement#
216
+ If namespace "objective" prefix is used then https://ontology.adversaryengagement.org/ae/objective#
217
+ If namespace "role" prefix is used then https://ontology.adversaryengagement.org/ae/role#
218
+ If namespace "identity" prefix is used then https://ontology.adversaryengagement.org/ae/identity#
219
+ If namespace "uco-core" prefix is used then https://ontology.unifiedcyberontology.org/uco/core#
220
+ If namespace "uco-types" prefix is used then https://ontology.unifiedcyberontology.org/uco/types#
221
+ If namespace "uco-role" prefix is used then https://ontology.unifiedcyberontology.org/uco/role#
222
+ """
223
+ return v
224
+
225
+ def generate_continue(self):
226
+ v = """
227
+ continue
228
+ """
229
+ return v
230
+
231
+ def raw_prompt(self,description):
232
+
233
+ def run(val):
234
+ prompt = f"""Give me a full json-ld format example for the following scenario:
235
+ {description}
236
+
237
+ {"".join(val)}
238
+ """
239
+ for i in self.ChatGPTTextSplitter(prompt):
240
+ res = self.llm_api(i)
241
+ return res
242
+ # return json.loads(res)
243
+ res_val = run(self.generate_rule())
244
+ #res_val = run(self.generate_rules())
245
+ try:
246
+ val = json.loads(res_val)
247
+ return val
248
+ except:
249
+ #the response was cut off, prompt for the continuation.
250
+ data = []
251
+ data.append(res_val)
252
+ while True:
253
+ res = self.llm_api(self.generate_continue())
254
+ data.append(res)
255
+ try:
256
+ full = "".join(data)
257
+ return json.loads(full)
258
+ except:
259
+ pass
260
+
261
+ return None
262
+
263
+ def get_ns(self,string):
264
+ return string.split(":")[0]
265
+
266
+
267
+ def prompt(self,description):
268
+ res = self.raw_prompt(description)
269
+
270
+ #include only relevent namespaces
271
+ prefixes = []
272
+
273
+ def is_nested(LIST):
274
+ if type(LIST) == list:
275
+ for JSON in LIST:
276
+ for key in JSON.keys():
277
+ if type(JSON[key]) == dict:
278
+ is_nested(JSON[key])
279
+ if '@type' in JSON.keys():
280
+ prefixes.append(self.get_ns(JSON['@type']))
281
+ else:
282
+ JSON = LIST
283
+ for key in JSON.keys():
284
+ if type(JSON[key]) == dict:
285
+ is_nested(JSON[key])
286
+ if '@type' in JSON.keys():
287
+ prefixes.append(self.get_ns(JSON['@type']))
288
+
289
+
290
+ is_nested(res['@graph'])
291
+ prefixes = set(prefixes)
292
+
293
+ new_prefixes = {}
294
+ for prefix in prefixes:
295
+ if prefix in res['@context']:
296
+ new_prefixes[prefix] = res['@context'][prefix]
297
+
298
+ res['@context'] = new_prefixes
299
+
300
+ return res