AYI-NEDJIMI commited on
Commit
9acd8ac
·
verified ·
1 Parent(s): 3977f96

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +500 -0
app.py ADDED
@@ -0,0 +1,500 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MITRE ATT&CK Explorer - Interactive Gradio Application
3
+ Explore MITRE ATT&CK Framework data in English and French
4
+ """
5
+
6
+ import gradio as gr
7
+ import pandas as pd
8
+ import plotly.express as px
9
+ import plotly.graph_objects as go
10
+ from datasets import load_dataset
11
+ import json
12
+ from typing import Dict, List, Tuple
13
+
14
+ # Global data cache
15
+ data_cache = {}
16
+
17
+ def load_data():
18
+ """Load datasets from HuggingFace for both languages"""
19
+ global data_cache
20
+
21
+ languages = {
22
+ "en": "AYI-NEDJIMI/mitre-attack-en",
23
+ "fr": "AYI-NEDJIMI/mitre-attack-fr"
24
+ }
25
+
26
+ for lang, repo in languages.items():
27
+ try:
28
+ print(f"Loading {lang.upper()} dataset...")
29
+ dataset = load_dataset(
30
+ repo,
31
+ data_files={
32
+ "tactics": "tactics.json",
33
+ "techniques": "techniques.json",
34
+ "mitigations": "mitigations.json",
35
+ "groups": "groups.json",
36
+ "qa": "qa_dataset.json"
37
+ }
38
+ )
39
+
40
+ # Convert to DataFrames
41
+ data_cache[lang] = {
42
+ "tactics": pd.DataFrame(dataset["tactics"]["train"]),
43
+ "techniques": pd.DataFrame(dataset["techniques"]["train"]),
44
+ "mitigations": pd.DataFrame(dataset["mitigations"]["train"]),
45
+ "groups": pd.DataFrame(dataset["groups"]["train"]),
46
+ "qa": pd.DataFrame(dataset["qa"]["train"])
47
+ }
48
+
49
+ print(f"Loaded {lang.upper()}: {len(data_cache[lang]['tactics'])} tactics, "
50
+ f"{len(data_cache[lang]['techniques'])} techniques")
51
+
52
+ except Exception as e:
53
+ print(f"Error loading {lang.upper()} data: {e}")
54
+ data_cache[lang] = None
55
+
56
+ return data_cache
57
+
58
+ def convert_list_to_string(val):
59
+ """Convert list values to comma-separated strings"""
60
+ if isinstance(val, list):
61
+ return ", ".join(str(x) for x in val if x)
62
+ return val
63
+
64
+ def prepare_dataframe(df: pd.DataFrame, exclude_cols: List[str] = None) -> pd.DataFrame:
65
+ """Prepare dataframe for display"""
66
+ if df is None or df.empty:
67
+ return pd.DataFrame()
68
+
69
+ df = df.copy()
70
+ if exclude_cols:
71
+ df = df.drop(columns=[col for col in exclude_cols if col in df.columns])
72
+
73
+ # Convert list fields to strings
74
+ for col in df.columns:
75
+ df[col] = df[col].apply(convert_list_to_string)
76
+
77
+ return df
78
+
79
+ def get_tactics_df(lang: str) -> pd.DataFrame:
80
+ """Get tactics dataframe"""
81
+ if lang not in data_cache or data_cache[lang] is None:
82
+ return pd.DataFrame()
83
+ df = data_cache[lang]["tactics"]
84
+ return prepare_dataframe(df, exclude_cols=["source_url"])
85
+
86
+ def get_techniques_df(lang: str, search: str = "", tactic_filter: str = "") -> pd.DataFrame:
87
+ """Get techniques dataframe with filters"""
88
+ if lang not in data_cache or data_cache[lang] is None:
89
+ return pd.DataFrame()
90
+
91
+ df = data_cache[lang]["techniques"].copy()
92
+
93
+ # Apply search filter
94
+ if search.strip():
95
+ search_lower = search.lower()
96
+ df = df[
97
+ df["name"].str.lower().str.contains(search_lower, na=False) |
98
+ df["description"].str.lower().str.contains(search_lower, na=False) |
99
+ df["id"].str.lower().str.contains(search_lower, na=False)
100
+ ]
101
+
102
+ # Apply tactic filter
103
+ if tactic_filter and tactic_filter != "All":
104
+ df = df[df["tactic"].str.contains(tactic_filter, case=False, na=False)]
105
+
106
+ return prepare_dataframe(df, exclude_cols=["source_url", "sub_techniques"])
107
+
108
+ def get_mitigations_df(lang: str, search: str = "") -> pd.DataFrame:
109
+ """Get mitigations dataframe with search"""
110
+ if lang not in data_cache or data_cache[lang] is None:
111
+ return pd.DataFrame()
112
+
113
+ df = data_cache[lang]["mitigations"].copy()
114
+
115
+ if search.strip():
116
+ search_lower = search.lower()
117
+ df = df[
118
+ df["name"].str.lower().str.contains(search_lower, na=False) |
119
+ df["description"].str.lower().str.contains(search_lower, na=False) |
120
+ df["id"].str.lower().str.contains(search_lower, na=False)
121
+ ]
122
+
123
+ return prepare_dataframe(df, exclude_cols=["source_url"])
124
+
125
+ def get_groups_df(lang: str, search: str = "") -> pd.DataFrame:
126
+ """Get APT groups dataframe with search"""
127
+ if lang not in data_cache or data_cache[lang] is None:
128
+ return pd.DataFrame()
129
+
130
+ df = data_cache[lang]["groups"].copy()
131
+
132
+ if search.strip():
133
+ search_lower = search.lower()
134
+ df = df[
135
+ df["name"].str.lower().str.contains(search_lower, na=False) |
136
+ df["description"].str.lower().str.contains(search_lower, na=False) |
137
+ df["id"].str.lower().str.contains(search_lower, na=False) |
138
+ df["aliases"].astype(str).str.lower().str.contains(search_lower, na=False)
139
+ ]
140
+
141
+ return prepare_dataframe(df, exclude_cols=["source_url"])
142
+
143
+ def get_qa_df(lang: str, search: str = "", category_filter: str = "") -> pd.DataFrame:
144
+ """Get QA dataset with filters"""
145
+ if lang not in data_cache or data_cache[lang] is None:
146
+ return pd.DataFrame()
147
+
148
+ df = data_cache[lang]["qa"].copy()
149
+
150
+ if search.strip():
151
+ search_lower = search.lower()
152
+ df = df[
153
+ df["question"].str.lower().str.contains(search_lower, na=False) |
154
+ df["answer"].str.lower().str.contains(search_lower, na=False) |
155
+ df["keywords"].astype(str).str.lower().str.contains(search_lower, na=False)
156
+ ]
157
+
158
+ if category_filter and category_filter != "All":
159
+ df = df[df["category"].str.lower() == category_filter.lower()]
160
+
161
+ return prepare_dataframe(df, exclude_cols=["source_url"])
162
+
163
+ def create_tactic_chart(lang: str):
164
+ """Create techniques per tactic bar chart"""
165
+ if lang not in data_cache or data_cache[lang] is None:
166
+ return go.Figure()
167
+
168
+ techniques_df = data_cache[lang]["techniques"]
169
+ if techniques_df.empty:
170
+ return go.Figure()
171
+
172
+ # Expand tactics (they may be lists)
173
+ tactic_counts = {}
174
+ for tactics in techniques_df["tactic"]:
175
+ if isinstance(tactics, list):
176
+ for tactic in tactics:
177
+ tactic_counts[tactic] = tactic_counts.get(tactic, 0) + 1
178
+ elif isinstance(tactics, str):
179
+ for tactic in tactics.split(","):
180
+ t = tactic.strip()
181
+ tactic_counts[t] = tactic_counts.get(t, 0) + 1
182
+
183
+ if not tactic_counts:
184
+ return go.Figure()
185
+
186
+ tactic_df = pd.DataFrame(
187
+ list(tactic_counts.items()),
188
+ columns=["Tactic", "Count"]
189
+ ).sort_values("Count", ascending=False)
190
+
191
+ fig = px.bar(
192
+ tactic_df,
193
+ x="Tactic",
194
+ y="Count",
195
+ title="Techniques per Tactic",
196
+ labels={"Count": "Number of Techniques"},
197
+ color="Count",
198
+ color_continuous_scale="Reds"
199
+ )
200
+ fig.update_layout(height=400, xaxis_tickangle=-45)
201
+ return fig
202
+
203
+ def create_groups_chart(lang: str):
204
+ """Create top 10 APT groups by techniques chart"""
205
+ if lang not in data_cache or data_cache[lang] is None:
206
+ return go.Figure()
207
+
208
+ groups_df = data_cache[lang]["groups"]
209
+ if groups_df.empty:
210
+ return go.Figure()
211
+
212
+ # Count techniques per group
213
+ group_technique_counts = []
214
+ for _, row in groups_df.iterrows():
215
+ techniques = row.get("techniques_used", [])
216
+ if isinstance(techniques, list):
217
+ count = len(techniques)
218
+ else:
219
+ count = 0
220
+ group_technique_counts.append({
221
+ "name": row["name"],
222
+ "count": count
223
+ })
224
+
225
+ if not group_technique_counts:
226
+ return go.Figure()
227
+
228
+ groups_chart_df = pd.DataFrame(group_technique_counts).sort_values(
229
+ "count", ascending=False
230
+ ).head(10)
231
+
232
+ fig = px.bar(
233
+ groups_chart_df,
234
+ y="name",
235
+ x="count",
236
+ title="Top 10 APT Groups by Techniques Used",
237
+ labels={"count": "Techniques", "name": "APT Group"},
238
+ color="count",
239
+ color_continuous_scale="Oranges",
240
+ orientation="h"
241
+ )
242
+ fig.update_layout(height=400)
243
+ return fig
244
+
245
+ def update_all_filters(lang: str):
246
+ """Update all filter options based on language"""
247
+ if lang not in data_cache or data_cache[lang] is None:
248
+ return (
249
+ gr.update(choices=["All"]),
250
+ gr.update(choices=["All"]),
251
+ gr.update(choices=["All"])
252
+ )
253
+
254
+ techniques_df = data_cache[lang]["techniques"]
255
+ qa_df = data_cache[lang]["qa"]
256
+
257
+ # Get unique tactics
258
+ tactics = set()
259
+ for tactic_list in techniques_df["tactic"]:
260
+ if isinstance(tactic_list, list):
261
+ tactics.update(tactic_list)
262
+ elif isinstance(tactic_list, str):
263
+ tactics.update([t.strip() for t in tactic_list.split(",")])
264
+
265
+ tactic_choices = ["All"] + sorted(list(tactics))
266
+
267
+ # Get unique categories from QA
268
+ categories = ["All"] + sorted(qa_df["category"].unique().tolist())
269
+
270
+ return (
271
+ gr.update(choices=tactic_choices),
272
+ gr.update(choices=categories),
273
+ None
274
+ )
275
+
276
+ # Load data at startup
277
+ print("Initializing MITRE ATT&CK Explorer...")
278
+ load_data()
279
+
280
+ # Create Gradio interface
281
+ with gr.Blocks(title="MITRE ATT&CK Explorer", theme=gr.themes.Soft()) as app:
282
+ gr.Markdown("# MITRE ATT&CK Explorer")
283
+ gr.Markdown("Explore the MITRE ATT&CK Framework - Tactics, Techniques, Mitigations, and APT Groups")
284
+
285
+ # Language selector
286
+ with gr.Row():
287
+ language = gr.Radio(
288
+ choices=["English", "Français"],
289
+ value="English",
290
+ label="Language / Langue",
291
+ interactive=True
292
+ )
293
+
294
+ # Tabs
295
+ with gr.Tabs():
296
+ # Tactics Tab
297
+ with gr.TabItem("Tactics"):
298
+ with gr.Row():
299
+ tactics_search = gr.Textbox(
300
+ placeholder="Search tactics...",
301
+ label="Search",
302
+ scale=1
303
+ )
304
+ tactics_df = gr.Dataframe(
305
+ value=get_tactics_df("en"),
306
+ interactive=False,
307
+ label="Tactics"
308
+ )
309
+
310
+ # Techniques Tab
311
+ with gr.TabItem("Techniques"):
312
+ with gr.Row():
313
+ techniques_search = gr.Textbox(
314
+ placeholder="Search techniques by name, ID, or description...",
315
+ label="Search",
316
+ scale=2
317
+ )
318
+ tactic_filter = gr.Dropdown(
319
+ choices=["All"],
320
+ value="All",
321
+ label="Filter by Tactic",
322
+ scale=1
323
+ )
324
+ techniques_df = gr.Dataframe(
325
+ value=get_techniques_df("en"),
326
+ interactive=False,
327
+ label="Techniques"
328
+ )
329
+
330
+ # Mitigations Tab
331
+ with gr.TabItem("Mitigations"):
332
+ with gr.Row():
333
+ mitigations_search = gr.Textbox(
334
+ placeholder="Search mitigations...",
335
+ label="Search",
336
+ scale=1
337
+ )
338
+ mitigations_df = gr.Dataframe(
339
+ value=get_mitigations_df("en"),
340
+ interactive=False,
341
+ label="Mitigations"
342
+ )
343
+
344
+ # APT Groups Tab
345
+ with gr.TabItem("APT Groups"):
346
+ with gr.Row():
347
+ groups_search = gr.Textbox(
348
+ placeholder="Search groups by name, aliases, or description...",
349
+ label="Search",
350
+ scale=1
351
+ )
352
+ groups_df = gr.Dataframe(
353
+ value=get_groups_df("en"),
354
+ interactive=False,
355
+ label="APT Groups"
356
+ )
357
+
358
+ # Q&A Tab
359
+ with gr.TabItem("Q&A"):
360
+ with gr.Row():
361
+ qa_search = gr.Textbox(
362
+ placeholder="Search Q&A...",
363
+ label="Search",
364
+ scale=2
365
+ )
366
+ qa_category = gr.Dropdown(
367
+ choices=["All"],
368
+ value="All",
369
+ label="Filter by Category",
370
+ scale=1
371
+ )
372
+ qa_df = gr.Dataframe(
373
+ value=get_qa_df("en"),
374
+ interactive=False,
375
+ label="Q&A Dataset"
376
+ )
377
+
378
+ # Statistics Tab
379
+ with gr.TabItem("Statistics"):
380
+ with gr.Row():
381
+ tactics_chart = gr.Plot(label="Techniques per Tactic")
382
+ with gr.Row():
383
+ groups_chart = gr.Plot(label="Top APT Groups")
384
+
385
+ # Footer
386
+ gr.HTML("""
387
+ <div style='text-align:center; padding:20px; color:#666;'>
388
+ <p>Created by <a href='https://www.ayinedjimi-consultants.fr' target='_blank'>Ayi NEDJIMI</a> - Senior Offensive Cybersecurity & AI Consultant</p>
389
+ <p><a href='https://www.linkedin.com/in/ayi-nedjimi' target='_blank'>LinkedIn</a> | <a href='https://github.com/ayinedjimi' target='_blank'>GitHub</a> | <a href='https://x.com/AyiNEDJIMI' target='_blank'>Twitter/X</a></p>
390
+ </div>
391
+ """)
392
+
393
+ # Language change handler
394
+ def on_language_change(lang_choice):
395
+ lang = "en" if lang_choice == "English" else "fr"
396
+ return (
397
+ get_tactics_df(lang),
398
+ get_techniques_df(lang),
399
+ get_mitigations_df(lang),
400
+ get_groups_df(lang),
401
+ get_qa_df(lang),
402
+ create_tactic_chart(lang),
403
+ create_groups_chart(lang),
404
+ *update_all_filters(lang)
405
+ )
406
+
407
+ # Search and filter handlers
408
+ def on_tactics_search(lang_choice, search_text):
409
+ lang = "en" if lang_choice == "English" else "fr"
410
+ df = data_cache[lang]["tactics"] if lang in data_cache else pd.DataFrame()
411
+ if df.empty:
412
+ return pd.DataFrame()
413
+ df = df.copy()
414
+ if search_text.strip():
415
+ search_lower = search_text.lower()
416
+ df = df[
417
+ df["name"].str.lower().str.contains(search_lower, na=False) |
418
+ df["description"].str.lower().str.contains(search_lower, na=False) |
419
+ df["id"].str.lower().str.contains(search_lower, na=False)
420
+ ]
421
+ return prepare_dataframe(df, exclude_cols=["source_url"])
422
+
423
+ def on_techniques_search(lang_choice, search_text, tactic):
424
+ lang = "en" if lang_choice == "English" else "fr"
425
+ return get_techniques_df(lang, search_text, tactic)
426
+
427
+ def on_mitigations_search(lang_choice, search_text):
428
+ lang = "en" if lang_choice == "English" else "fr"
429
+ return get_mitigations_df(lang, search_text)
430
+
431
+ def on_groups_search(lang_choice, search_text):
432
+ lang = "en" if lang_choice == "English" else "fr"
433
+ return get_groups_df(lang, search_text)
434
+
435
+ def on_qa_search(lang_choice, search_text, category):
436
+ lang = "en" if lang_choice == "English" else "fr"
437
+ return get_qa_df(lang, search_text, category)
438
+
439
+ # Register event handlers
440
+ language.change(
441
+ fn=on_language_change,
442
+ inputs=language,
443
+ outputs=[
444
+ tactics_df,
445
+ techniques_df,
446
+ mitigations_df,
447
+ groups_df,
448
+ qa_df,
449
+ tactics_chart,
450
+ groups_chart,
451
+ tactic_filter,
452
+ qa_category,
453
+ language
454
+ ]
455
+ )
456
+
457
+ tactics_search.change(
458
+ fn=on_tactics_search,
459
+ inputs=[language, tactics_search],
460
+ outputs=tactics_df
461
+ )
462
+
463
+ techniques_search.change(
464
+ fn=on_techniques_search,
465
+ inputs=[language, techniques_search, tactic_filter],
466
+ outputs=techniques_df
467
+ )
468
+
469
+ tactic_filter.change(
470
+ fn=on_techniques_search,
471
+ inputs=[language, techniques_search, tactic_filter],
472
+ outputs=techniques_df
473
+ )
474
+
475
+ mitigations_search.change(
476
+ fn=on_mitigations_search,
477
+ inputs=[language, mitigations_search],
478
+ outputs=mitigations_df
479
+ )
480
+
481
+ groups_search.change(
482
+ fn=on_groups_search,
483
+ inputs=[language, groups_search],
484
+ outputs=groups_df
485
+ )
486
+
487
+ qa_search.change(
488
+ fn=on_qa_search,
489
+ inputs=[language, qa_search, qa_category],
490
+ outputs=qa_df
491
+ )
492
+
493
+ qa_category.change(
494
+ fn=on_qa_search,
495
+ inputs=[language, qa_search, qa_category],
496
+ outputs=qa_df
497
+ )
498
+
499
+ if __name__ == "__main__":
500
+ app.launch()