Spaces:

AYI-NEDJIMI
/

mitre-attack-explorer

Paused

App Files Files Community

mitre-attack-explorer / app.py

AYI-NEDJIMI

Upload app.py with huggingface_hub

9acd8ac verified 17 days ago

raw

history blame contribute delete

16.3 kB

	"""
	MITRE ATT&CK Explorer - Interactive Gradio Application
	Explore MITRE ATT&CK Framework data in English and French
	"""

	import gradio as gr
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from datasets import load_dataset
	import json
	from typing import Dict, List, Tuple

	# Global data cache
	data_cache = {}

	def load_data():
	"""Load datasets from HuggingFace for both languages"""
	global data_cache

	languages = {
	"en": "AYI-NEDJIMI/mitre-attack-en",
	"fr": "AYI-NEDJIMI/mitre-attack-fr"
	}

	for lang, repo in languages.items():
	try:
	print(f"Loading {lang.upper()} dataset...")
	dataset = load_dataset(
	repo,
	data_files={
	"tactics": "tactics.json",
	"techniques": "techniques.json",
	"mitigations": "mitigations.json",
	"groups": "groups.json",
	"qa": "qa_dataset.json"
	}
	)

	# Convert to DataFrames
	data_cache[lang] = {
	"tactics": pd.DataFrame(dataset["tactics"]["train"]),
	"techniques": pd.DataFrame(dataset["techniques"]["train"]),
	"mitigations": pd.DataFrame(dataset["mitigations"]["train"]),
	"groups": pd.DataFrame(dataset["groups"]["train"]),
	"qa": pd.DataFrame(dataset["qa"]["train"])
	}

	print(f"Loaded {lang.upper()}: {len(data_cache[lang]['tactics'])} tactics, "
	f"{len(data_cache[lang]['techniques'])} techniques")

	except Exception as e:
	print(f"Error loading {lang.upper()} data: {e}")
	data_cache[lang] = None

	return data_cache

	def convert_list_to_string(val):
	"""Convert list values to comma-separated strings"""
	if isinstance(val, list):
	return ", ".join(str(x) for x in val if x)
	return val

	def prepare_dataframe(df: pd.DataFrame, exclude_cols: List[str] = None) -> pd.DataFrame:
	"""Prepare dataframe for display"""
	if df is None or df.empty:
	return pd.DataFrame()

	df = df.copy()
	if exclude_cols:
	df = df.drop(columns=[col for col in exclude_cols if col in df.columns])

	# Convert list fields to strings
	for col in df.columns:
	df[col] = df[col].apply(convert_list_to_string)

	return df

	def get_tactics_df(lang: str) -> pd.DataFrame:
	"""Get tactics dataframe"""
	if lang not in data_cache or data_cache[lang] is None:
	return pd.DataFrame()
	df = data_cache[lang]["tactics"]
	return prepare_dataframe(df, exclude_cols=["source_url"])

	def get_techniques_df(lang: str, search: str = "", tactic_filter: str = "") -> pd.DataFrame:
	"""Get techniques dataframe with filters"""
	if lang not in data_cache or data_cache[lang] is None:
	return pd.DataFrame()

	df = data_cache[lang]["techniques"].copy()

	# Apply search filter
	if search.strip():
	search_lower = search.lower()
	df = df[
	df["name"].str.lower().str.contains(search_lower, na=False) \|
	df["description"].str.lower().str.contains(search_lower, na=False) \|
	df["id"].str.lower().str.contains(search_lower, na=False)
	]

	# Apply tactic filter
	if tactic_filter and tactic_filter != "All":
	df = df[df["tactic"].str.contains(tactic_filter, case=False, na=False)]

	return prepare_dataframe(df, exclude_cols=["source_url", "sub_techniques"])

	def get_mitigations_df(lang: str, search: str = "") -> pd.DataFrame:
	"""Get mitigations dataframe with search"""
	if lang not in data_cache or data_cache[lang] is None:
	return pd.DataFrame()

	df = data_cache[lang]["mitigations"].copy()

	if search.strip():
	search_lower = search.lower()
	df = df[
	df["name"].str.lower().str.contains(search_lower, na=False) \|
	df["description"].str.lower().str.contains(search_lower, na=False) \|
	df["id"].str.lower().str.contains(search_lower, na=False)
	]

	return prepare_dataframe(df, exclude_cols=["source_url"])

	def get_groups_df(lang: str, search: str = "") -> pd.DataFrame:
	"""Get APT groups dataframe with search"""
	if lang not in data_cache or data_cache[lang] is None:
	return pd.DataFrame()

	df = data_cache[lang]["groups"].copy()

	if search.strip():
	search_lower = search.lower()
	df = df[
	df["name"].str.lower().str.contains(search_lower, na=False) \|
	df["description"].str.lower().str.contains(search_lower, na=False) \|
	df["id"].str.lower().str.contains(search_lower, na=False) \|
	df["aliases"].astype(str).str.lower().str.contains(search_lower, na=False)
	]

	return prepare_dataframe(df, exclude_cols=["source_url"])

	def get_qa_df(lang: str, search: str = "", category_filter: str = "") -> pd.DataFrame:
	"""Get QA dataset with filters"""
	if lang not in data_cache or data_cache[lang] is None:
	return pd.DataFrame()

	df = data_cache[lang]["qa"].copy()

	if search.strip():
	search_lower = search.lower()
	df = df[
	df["question"].str.lower().str.contains(search_lower, na=False) \|
	df["answer"].str.lower().str.contains(search_lower, na=False) \|
	df["keywords"].astype(str).str.lower().str.contains(search_lower, na=False)
	]

	if category_filter and category_filter != "All":
	df = df[df["category"].str.lower() == category_filter.lower()]

	return prepare_dataframe(df, exclude_cols=["source_url"])

	def create_tactic_chart(lang: str):
	"""Create techniques per tactic bar chart"""
	if lang not in data_cache or data_cache[lang] is None:
	return go.Figure()

	techniques_df = data_cache[lang]["techniques"]
	if techniques_df.empty:
	return go.Figure()

	# Expand tactics (they may be lists)
	tactic_counts = {}
	for tactics in techniques_df["tactic"]:
	if isinstance(tactics, list):
	for tactic in tactics:
	tactic_counts[tactic] = tactic_counts.get(tactic, 0) + 1
	elif isinstance(tactics, str):
	for tactic in tactics.split(","):
	t = tactic.strip()
	tactic_counts[t] = tactic_counts.get(t, 0) + 1

	if not tactic_counts:
	return go.Figure()

	tactic_df = pd.DataFrame(
	list(tactic_counts.items()),
	columns=["Tactic", "Count"]
	).sort_values("Count", ascending=False)

	fig = px.bar(
	tactic_df,
	x="Tactic",
	y="Count",
	title="Techniques per Tactic",
	labels={"Count": "Number of Techniques"},
	color="Count",
	color_continuous_scale="Reds"
	)
	fig.update_layout(height=400, xaxis_tickangle=-45)
	return fig

	def create_groups_chart(lang: str):
	"""Create top 10 APT groups by techniques chart"""
	if lang not in data_cache or data_cache[lang] is None:
	return go.Figure()

	groups_df = data_cache[lang]["groups"]
	if groups_df.empty:
	return go.Figure()

	# Count techniques per group
	group_technique_counts = []
	for _, row in groups_df.iterrows():
	techniques = row.get("techniques_used", [])
	if isinstance(techniques, list):
	count = len(techniques)
	else:
	count = 0
	group_technique_counts.append({
	"name": row["name"],
	"count": count
	})

	if not group_technique_counts:
	return go.Figure()

	groups_chart_df = pd.DataFrame(group_technique_counts).sort_values(
	"count", ascending=False
	).head(10)

	fig = px.bar(
	groups_chart_df,
	y="name",
	x="count",
	title="Top 10 APT Groups by Techniques Used",
	labels={"count": "Techniques", "name": "APT Group"},
	color="count",
	color_continuous_scale="Oranges",
	orientation="h"
	)
	fig.update_layout(height=400)
	return fig

	def update_all_filters(lang: str):
	"""Update all filter options based on language"""
	if lang not in data_cache or data_cache[lang] is None:
	return (
	gr.update(choices=["All"]),
	gr.update(choices=["All"]),
	gr.update(choices=["All"])
	)

	techniques_df = data_cache[lang]["techniques"]
	qa_df = data_cache[lang]["qa"]

	# Get unique tactics
	tactics = set()
	for tactic_list in techniques_df["tactic"]:
	if isinstance(tactic_list, list):
	tactics.update(tactic_list)
	elif isinstance(tactic_list, str):
	tactics.update([t.strip() for t in tactic_list.split(",")])

	tactic_choices = ["All"] + sorted(list(tactics))

	# Get unique categories from QA
	categories = ["All"] + sorted(qa_df["category"].unique().tolist())

	return (
	gr.update(choices=tactic_choices),
	gr.update(choices=categories),
	None
	)

	# Load data at startup
	print("Initializing MITRE ATT&CK Explorer...")
	load_data()

	# Create Gradio interface
	with gr.Blocks(title="MITRE ATT&CK Explorer", theme=gr.themes.Soft()) as app:
	gr.Markdown("# MITRE ATT&CK Explorer")
	gr.Markdown("Explore the MITRE ATT&CK Framework - Tactics, Techniques, Mitigations, and APT Groups")

	# Language selector
	with gr.Row():
	language = gr.Radio(
	choices=["English", "Français"],
	value="English",
	label="Language / Langue",
	interactive=True
	)

	# Tabs
	with gr.Tabs():
	# Tactics Tab
	with gr.TabItem("Tactics"):
	with gr.Row():
	tactics_search = gr.Textbox(
	placeholder="Search tactics...",
	label="Search",
	scale=1
	)
	tactics_df = gr.Dataframe(
	value=get_tactics_df("en"),
	interactive=False,
	label="Tactics"
	)

	# Techniques Tab
	with gr.TabItem("Techniques"):
	with gr.Row():
	techniques_search = gr.Textbox(
	placeholder="Search techniques by name, ID, or description...",
	label="Search",
	scale=2
	)
	tactic_filter = gr.Dropdown(
	choices=["All"],
	value="All",
	label="Filter by Tactic",
	scale=1
	)
	techniques_df = gr.Dataframe(
	value=get_techniques_df("en"),
	interactive=False,
	label="Techniques"
	)

	# Mitigations Tab
	with gr.TabItem("Mitigations"):
	with gr.Row():
	mitigations_search = gr.Textbox(
	placeholder="Search mitigations...",
	label="Search",
	scale=1
	)
	mitigations_df = gr.Dataframe(
	value=get_mitigations_df("en"),
	interactive=False,
	label="Mitigations"
	)

	# APT Groups Tab
	with gr.TabItem("APT Groups"):
	with gr.Row():
	groups_search = gr.Textbox(
	placeholder="Search groups by name, aliases, or description...",
	label="Search",
	scale=1
	)
	groups_df = gr.Dataframe(
	value=get_groups_df("en"),
	interactive=False,
	label="APT Groups"
	)

	# Q&A Tab
	with gr.TabItem("Q&A"):
	with gr.Row():
	qa_search = gr.Textbox(
	placeholder="Search Q&A...",
	label="Search",
	scale=2
	)
	qa_category = gr.Dropdown(
	choices=["All"],
	value="All",
	label="Filter by Category",
	scale=1
	)
	qa_df = gr.Dataframe(
	value=get_qa_df("en"),
	interactive=False,
	label="Q&A Dataset"
	)

	# Statistics Tab
	with gr.TabItem("Statistics"):
	with gr.Row():
	tactics_chart = gr.Plot(label="Techniques per Tactic")
	with gr.Row():
	groups_chart = gr.Plot(label="Top APT Groups")

	# Footer
	gr.HTML("""
	<div style='text-align:center; padding:20px; color:#666;'>
	<p>Created by <a href='https://www.ayinedjimi-consultants.fr' target='_blank'>Ayi NEDJIMI</a> - Senior Offensive Cybersecurity & AI Consultant</p>
	<p><a href='https://www.linkedin.com/in/ayi-nedjimi' target='_blank'>LinkedIn</a> \| <a href='https://github.com/ayinedjimi' target='_blank'>GitHub</a> \| <a href='https://x.com/AyiNEDJIMI' target='_blank'>Twitter/X</a></p>
	</div>
	""")

	# Language change handler
	def on_language_change(lang_choice):
	lang = "en" if lang_choice == "English" else "fr"
	return (
	get_tactics_df(lang),
	get_techniques_df(lang),
	get_mitigations_df(lang),
	get_groups_df(lang),
	get_qa_df(lang),
	create_tactic_chart(lang),
	create_groups_chart(lang),
	*update_all_filters(lang)
	)

	# Search and filter handlers
	def on_tactics_search(lang_choice, search_text):
	lang = "en" if lang_choice == "English" else "fr"
	df = data_cache[lang]["tactics"] if lang in data_cache else pd.DataFrame()
	if df.empty:
	return pd.DataFrame()
	df = df.copy()
	if search_text.strip():
	search_lower = search_text.lower()
	df = df[
	df["name"].str.lower().str.contains(search_lower, na=False) \|
	df["description"].str.lower().str.contains(search_lower, na=False) \|
	df["id"].str.lower().str.contains(search_lower, na=False)
	]
	return prepare_dataframe(df, exclude_cols=["source_url"])

	def on_techniques_search(lang_choice, search_text, tactic):
	lang = "en" if lang_choice == "English" else "fr"
	return get_techniques_df(lang, search_text, tactic)

	def on_mitigations_search(lang_choice, search_text):
	lang = "en" if lang_choice == "English" else "fr"
	return get_mitigations_df(lang, search_text)

	def on_groups_search(lang_choice, search_text):
	lang = "en" if lang_choice == "English" else "fr"
	return get_groups_df(lang, search_text)

	def on_qa_search(lang_choice, search_text, category):
	lang = "en" if lang_choice == "English" else "fr"
	return get_qa_df(lang, search_text, category)

	# Register event handlers
	language.change(
	fn=on_language_change,
	inputs=language,
	outputs=[
	tactics_df,
	techniques_df,
	mitigations_df,
	groups_df,
	qa_df,
	tactics_chart,
	groups_chart,
	tactic_filter,
	qa_category,
	language
	]
	)

	tactics_search.change(
	fn=on_tactics_search,
	inputs=[language, tactics_search],
	outputs=tactics_df
	)

	techniques_search.change(
	fn=on_techniques_search,
	inputs=[language, techniques_search, tactic_filter],
	outputs=techniques_df
	)

	tactic_filter.change(
	fn=on_techniques_search,
	inputs=[language, techniques_search, tactic_filter],
	outputs=techniques_df
	)

	mitigations_search.change(
	fn=on_mitigations_search,
	inputs=[language, mitigations_search],
	outputs=mitigations_df
	)

	groups_search.change(
	fn=on_groups_search,
	inputs=[language, groups_search],
	outputs=groups_df
	)

	qa_search.change(
	fn=on_qa_search,
	inputs=[language, qa_search, qa_category],
	outputs=qa_df
	)

	qa_category.change(
	fn=on_qa_search,
	inputs=[language, qa_search, qa_category],
	outputs=qa_df
	)

	if __name__ == "__main__":
	app.launch()