AYI-NEDJIMI's picture
Upload app.py with huggingface_hub
9acd8ac verified
"""
MITRE ATT&CK Explorer - Interactive Gradio Application
Explore MITRE ATT&CK Framework data in English and French
"""
import gradio as gr
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from datasets import load_dataset
import json
from typing import Dict, List, Tuple
# Global data cache
data_cache = {}
def load_data():
"""Load datasets from HuggingFace for both languages"""
global data_cache
languages = {
"en": "AYI-NEDJIMI/mitre-attack-en",
"fr": "AYI-NEDJIMI/mitre-attack-fr"
}
for lang, repo in languages.items():
try:
print(f"Loading {lang.upper()} dataset...")
dataset = load_dataset(
repo,
data_files={
"tactics": "tactics.json",
"techniques": "techniques.json",
"mitigations": "mitigations.json",
"groups": "groups.json",
"qa": "qa_dataset.json"
}
)
# Convert to DataFrames
data_cache[lang] = {
"tactics": pd.DataFrame(dataset["tactics"]["train"]),
"techniques": pd.DataFrame(dataset["techniques"]["train"]),
"mitigations": pd.DataFrame(dataset["mitigations"]["train"]),
"groups": pd.DataFrame(dataset["groups"]["train"]),
"qa": pd.DataFrame(dataset["qa"]["train"])
}
print(f"Loaded {lang.upper()}: {len(data_cache[lang]['tactics'])} tactics, "
f"{len(data_cache[lang]['techniques'])} techniques")
except Exception as e:
print(f"Error loading {lang.upper()} data: {e}")
data_cache[lang] = None
return data_cache
def convert_list_to_string(val):
"""Convert list values to comma-separated strings"""
if isinstance(val, list):
return ", ".join(str(x) for x in val if x)
return val
def prepare_dataframe(df: pd.DataFrame, exclude_cols: List[str] = None) -> pd.DataFrame:
"""Prepare dataframe for display"""
if df is None or df.empty:
return pd.DataFrame()
df = df.copy()
if exclude_cols:
df = df.drop(columns=[col for col in exclude_cols if col in df.columns])
# Convert list fields to strings
for col in df.columns:
df[col] = df[col].apply(convert_list_to_string)
return df
def get_tactics_df(lang: str) -> pd.DataFrame:
"""Get tactics dataframe"""
if lang not in data_cache or data_cache[lang] is None:
return pd.DataFrame()
df = data_cache[lang]["tactics"]
return prepare_dataframe(df, exclude_cols=["source_url"])
def get_techniques_df(lang: str, search: str = "", tactic_filter: str = "") -> pd.DataFrame:
"""Get techniques dataframe with filters"""
if lang not in data_cache or data_cache[lang] is None:
return pd.DataFrame()
df = data_cache[lang]["techniques"].copy()
# Apply search filter
if search.strip():
search_lower = search.lower()
df = df[
df["name"].str.lower().str.contains(search_lower, na=False) |
df["description"].str.lower().str.contains(search_lower, na=False) |
df["id"].str.lower().str.contains(search_lower, na=False)
]
# Apply tactic filter
if tactic_filter and tactic_filter != "All":
df = df[df["tactic"].str.contains(tactic_filter, case=False, na=False)]
return prepare_dataframe(df, exclude_cols=["source_url", "sub_techniques"])
def get_mitigations_df(lang: str, search: str = "") -> pd.DataFrame:
"""Get mitigations dataframe with search"""
if lang not in data_cache or data_cache[lang] is None:
return pd.DataFrame()
df = data_cache[lang]["mitigations"].copy()
if search.strip():
search_lower = search.lower()
df = df[
df["name"].str.lower().str.contains(search_lower, na=False) |
df["description"].str.lower().str.contains(search_lower, na=False) |
df["id"].str.lower().str.contains(search_lower, na=False)
]
return prepare_dataframe(df, exclude_cols=["source_url"])
def get_groups_df(lang: str, search: str = "") -> pd.DataFrame:
"""Get APT groups dataframe with search"""
if lang not in data_cache or data_cache[lang] is None:
return pd.DataFrame()
df = data_cache[lang]["groups"].copy()
if search.strip():
search_lower = search.lower()
df = df[
df["name"].str.lower().str.contains(search_lower, na=False) |
df["description"].str.lower().str.contains(search_lower, na=False) |
df["id"].str.lower().str.contains(search_lower, na=False) |
df["aliases"].astype(str).str.lower().str.contains(search_lower, na=False)
]
return prepare_dataframe(df, exclude_cols=["source_url"])
def get_qa_df(lang: str, search: str = "", category_filter: str = "") -> pd.DataFrame:
"""Get QA dataset with filters"""
if lang not in data_cache or data_cache[lang] is None:
return pd.DataFrame()
df = data_cache[lang]["qa"].copy()
if search.strip():
search_lower = search.lower()
df = df[
df["question"].str.lower().str.contains(search_lower, na=False) |
df["answer"].str.lower().str.contains(search_lower, na=False) |
df["keywords"].astype(str).str.lower().str.contains(search_lower, na=False)
]
if category_filter and category_filter != "All":
df = df[df["category"].str.lower() == category_filter.lower()]
return prepare_dataframe(df, exclude_cols=["source_url"])
def create_tactic_chart(lang: str):
"""Create techniques per tactic bar chart"""
if lang not in data_cache or data_cache[lang] is None:
return go.Figure()
techniques_df = data_cache[lang]["techniques"]
if techniques_df.empty:
return go.Figure()
# Expand tactics (they may be lists)
tactic_counts = {}
for tactics in techniques_df["tactic"]:
if isinstance(tactics, list):
for tactic in tactics:
tactic_counts[tactic] = tactic_counts.get(tactic, 0) + 1
elif isinstance(tactics, str):
for tactic in tactics.split(","):
t = tactic.strip()
tactic_counts[t] = tactic_counts.get(t, 0) + 1
if not tactic_counts:
return go.Figure()
tactic_df = pd.DataFrame(
list(tactic_counts.items()),
columns=["Tactic", "Count"]
).sort_values("Count", ascending=False)
fig = px.bar(
tactic_df,
x="Tactic",
y="Count",
title="Techniques per Tactic",
labels={"Count": "Number of Techniques"},
color="Count",
color_continuous_scale="Reds"
)
fig.update_layout(height=400, xaxis_tickangle=-45)
return fig
def create_groups_chart(lang: str):
"""Create top 10 APT groups by techniques chart"""
if lang not in data_cache or data_cache[lang] is None:
return go.Figure()
groups_df = data_cache[lang]["groups"]
if groups_df.empty:
return go.Figure()
# Count techniques per group
group_technique_counts = []
for _, row in groups_df.iterrows():
techniques = row.get("techniques_used", [])
if isinstance(techniques, list):
count = len(techniques)
else:
count = 0
group_technique_counts.append({
"name": row["name"],
"count": count
})
if not group_technique_counts:
return go.Figure()
groups_chart_df = pd.DataFrame(group_technique_counts).sort_values(
"count", ascending=False
).head(10)
fig = px.bar(
groups_chart_df,
y="name",
x="count",
title="Top 10 APT Groups by Techniques Used",
labels={"count": "Techniques", "name": "APT Group"},
color="count",
color_continuous_scale="Oranges",
orientation="h"
)
fig.update_layout(height=400)
return fig
def update_all_filters(lang: str):
"""Update all filter options based on language"""
if lang not in data_cache or data_cache[lang] is None:
return (
gr.update(choices=["All"]),
gr.update(choices=["All"]),
gr.update(choices=["All"])
)
techniques_df = data_cache[lang]["techniques"]
qa_df = data_cache[lang]["qa"]
# Get unique tactics
tactics = set()
for tactic_list in techniques_df["tactic"]:
if isinstance(tactic_list, list):
tactics.update(tactic_list)
elif isinstance(tactic_list, str):
tactics.update([t.strip() for t in tactic_list.split(",")])
tactic_choices = ["All"] + sorted(list(tactics))
# Get unique categories from QA
categories = ["All"] + sorted(qa_df["category"].unique().tolist())
return (
gr.update(choices=tactic_choices),
gr.update(choices=categories),
None
)
# Load data at startup
print("Initializing MITRE ATT&CK Explorer...")
load_data()
# Create Gradio interface
with gr.Blocks(title="MITRE ATT&CK Explorer", theme=gr.themes.Soft()) as app:
gr.Markdown("# MITRE ATT&CK Explorer")
gr.Markdown("Explore the MITRE ATT&CK Framework - Tactics, Techniques, Mitigations, and APT Groups")
# Language selector
with gr.Row():
language = gr.Radio(
choices=["English", "Français"],
value="English",
label="Language / Langue",
interactive=True
)
# Tabs
with gr.Tabs():
# Tactics Tab
with gr.TabItem("Tactics"):
with gr.Row():
tactics_search = gr.Textbox(
placeholder="Search tactics...",
label="Search",
scale=1
)
tactics_df = gr.Dataframe(
value=get_tactics_df("en"),
interactive=False,
label="Tactics"
)
# Techniques Tab
with gr.TabItem("Techniques"):
with gr.Row():
techniques_search = gr.Textbox(
placeholder="Search techniques by name, ID, or description...",
label="Search",
scale=2
)
tactic_filter = gr.Dropdown(
choices=["All"],
value="All",
label="Filter by Tactic",
scale=1
)
techniques_df = gr.Dataframe(
value=get_techniques_df("en"),
interactive=False,
label="Techniques"
)
# Mitigations Tab
with gr.TabItem("Mitigations"):
with gr.Row():
mitigations_search = gr.Textbox(
placeholder="Search mitigations...",
label="Search",
scale=1
)
mitigations_df = gr.Dataframe(
value=get_mitigations_df("en"),
interactive=False,
label="Mitigations"
)
# APT Groups Tab
with gr.TabItem("APT Groups"):
with gr.Row():
groups_search = gr.Textbox(
placeholder="Search groups by name, aliases, or description...",
label="Search",
scale=1
)
groups_df = gr.Dataframe(
value=get_groups_df("en"),
interactive=False,
label="APT Groups"
)
# Q&A Tab
with gr.TabItem("Q&A"):
with gr.Row():
qa_search = gr.Textbox(
placeholder="Search Q&A...",
label="Search",
scale=2
)
qa_category = gr.Dropdown(
choices=["All"],
value="All",
label="Filter by Category",
scale=1
)
qa_df = gr.Dataframe(
value=get_qa_df("en"),
interactive=False,
label="Q&A Dataset"
)
# Statistics Tab
with gr.TabItem("Statistics"):
with gr.Row():
tactics_chart = gr.Plot(label="Techniques per Tactic")
with gr.Row():
groups_chart = gr.Plot(label="Top APT Groups")
# Footer
gr.HTML("""
<div style='text-align:center; padding:20px; color:#666;'>
<p>Created by <a href='https://www.ayinedjimi-consultants.fr' target='_blank'>Ayi NEDJIMI</a> - Senior Offensive Cybersecurity & AI Consultant</p>
<p><a href='https://www.linkedin.com/in/ayi-nedjimi' target='_blank'>LinkedIn</a> | <a href='https://github.com/ayinedjimi' target='_blank'>GitHub</a> | <a href='https://x.com/AyiNEDJIMI' target='_blank'>Twitter/X</a></p>
</div>
""")
# Language change handler
def on_language_change(lang_choice):
lang = "en" if lang_choice == "English" else "fr"
return (
get_tactics_df(lang),
get_techniques_df(lang),
get_mitigations_df(lang),
get_groups_df(lang),
get_qa_df(lang),
create_tactic_chart(lang),
create_groups_chart(lang),
*update_all_filters(lang)
)
# Search and filter handlers
def on_tactics_search(lang_choice, search_text):
lang = "en" if lang_choice == "English" else "fr"
df = data_cache[lang]["tactics"] if lang in data_cache else pd.DataFrame()
if df.empty:
return pd.DataFrame()
df = df.copy()
if search_text.strip():
search_lower = search_text.lower()
df = df[
df["name"].str.lower().str.contains(search_lower, na=False) |
df["description"].str.lower().str.contains(search_lower, na=False) |
df["id"].str.lower().str.contains(search_lower, na=False)
]
return prepare_dataframe(df, exclude_cols=["source_url"])
def on_techniques_search(lang_choice, search_text, tactic):
lang = "en" if lang_choice == "English" else "fr"
return get_techniques_df(lang, search_text, tactic)
def on_mitigations_search(lang_choice, search_text):
lang = "en" if lang_choice == "English" else "fr"
return get_mitigations_df(lang, search_text)
def on_groups_search(lang_choice, search_text):
lang = "en" if lang_choice == "English" else "fr"
return get_groups_df(lang, search_text)
def on_qa_search(lang_choice, search_text, category):
lang = "en" if lang_choice == "English" else "fr"
return get_qa_df(lang, search_text, category)
# Register event handlers
language.change(
fn=on_language_change,
inputs=language,
outputs=[
tactics_df,
techniques_df,
mitigations_df,
groups_df,
qa_df,
tactics_chart,
groups_chart,
tactic_filter,
qa_category,
language
]
)
tactics_search.change(
fn=on_tactics_search,
inputs=[language, tactics_search],
outputs=tactics_df
)
techniques_search.change(
fn=on_techniques_search,
inputs=[language, techniques_search, tactic_filter],
outputs=techniques_df
)
tactic_filter.change(
fn=on_techniques_search,
inputs=[language, techniques_search, tactic_filter],
outputs=techniques_df
)
mitigations_search.change(
fn=on_mitigations_search,
inputs=[language, mitigations_search],
outputs=mitigations_df
)
groups_search.change(
fn=on_groups_search,
inputs=[language, groups_search],
outputs=groups_df
)
qa_search.change(
fn=on_qa_search,
inputs=[language, qa_search, qa_category],
outputs=qa_df
)
qa_category.change(
fn=on_qa_search,
inputs=[language, qa_search, qa_category],
outputs=qa_df
)
if __name__ == "__main__":
app.launch()