| | """ |
| | MITRE ATT&CK Explorer - Interactive Gradio Application |
| | Explore MITRE ATT&CK Framework data in English and French |
| | """ |
| |
|
| | import gradio as gr |
| | import pandas as pd |
| | import plotly.express as px |
| | import plotly.graph_objects as go |
| | from datasets import load_dataset |
| | import json |
| | from typing import Dict, List, Tuple |
| |
|
| | |
| | data_cache = {} |
| |
|
| | def load_data(): |
| | """Load datasets from HuggingFace for both languages""" |
| | global data_cache |
| |
|
| | languages = { |
| | "en": "AYI-NEDJIMI/mitre-attack-en", |
| | "fr": "AYI-NEDJIMI/mitre-attack-fr" |
| | } |
| |
|
| | for lang, repo in languages.items(): |
| | try: |
| | print(f"Loading {lang.upper()} dataset...") |
| | dataset = load_dataset( |
| | repo, |
| | data_files={ |
| | "tactics": "tactics.json", |
| | "techniques": "techniques.json", |
| | "mitigations": "mitigations.json", |
| | "groups": "groups.json", |
| | "qa": "qa_dataset.json" |
| | } |
| | ) |
| |
|
| | |
| | data_cache[lang] = { |
| | "tactics": pd.DataFrame(dataset["tactics"]["train"]), |
| | "techniques": pd.DataFrame(dataset["techniques"]["train"]), |
| | "mitigations": pd.DataFrame(dataset["mitigations"]["train"]), |
| | "groups": pd.DataFrame(dataset["groups"]["train"]), |
| | "qa": pd.DataFrame(dataset["qa"]["train"]) |
| | } |
| |
|
| | print(f"Loaded {lang.upper()}: {len(data_cache[lang]['tactics'])} tactics, " |
| | f"{len(data_cache[lang]['techniques'])} techniques") |
| |
|
| | except Exception as e: |
| | print(f"Error loading {lang.upper()} data: {e}") |
| | data_cache[lang] = None |
| |
|
| | return data_cache |
| |
|
| | def convert_list_to_string(val): |
| | """Convert list values to comma-separated strings""" |
| | if isinstance(val, list): |
| | return ", ".join(str(x) for x in val if x) |
| | return val |
| |
|
| | def prepare_dataframe(df: pd.DataFrame, exclude_cols: List[str] = None) -> pd.DataFrame: |
| | """Prepare dataframe for display""" |
| | if df is None or df.empty: |
| | return pd.DataFrame() |
| |
|
| | df = df.copy() |
| | if exclude_cols: |
| | df = df.drop(columns=[col for col in exclude_cols if col in df.columns]) |
| |
|
| | |
| | for col in df.columns: |
| | df[col] = df[col].apply(convert_list_to_string) |
| |
|
| | return df |
| |
|
| | def get_tactics_df(lang: str) -> pd.DataFrame: |
| | """Get tactics dataframe""" |
| | if lang not in data_cache or data_cache[lang] is None: |
| | return pd.DataFrame() |
| | df = data_cache[lang]["tactics"] |
| | return prepare_dataframe(df, exclude_cols=["source_url"]) |
| |
|
| | def get_techniques_df(lang: str, search: str = "", tactic_filter: str = "") -> pd.DataFrame: |
| | """Get techniques dataframe with filters""" |
| | if lang not in data_cache or data_cache[lang] is None: |
| | return pd.DataFrame() |
| |
|
| | df = data_cache[lang]["techniques"].copy() |
| |
|
| | |
| | if search.strip(): |
| | search_lower = search.lower() |
| | df = df[ |
| | df["name"].str.lower().str.contains(search_lower, na=False) | |
| | df["description"].str.lower().str.contains(search_lower, na=False) | |
| | df["id"].str.lower().str.contains(search_lower, na=False) |
| | ] |
| |
|
| | |
| | if tactic_filter and tactic_filter != "All": |
| | df = df[df["tactic"].str.contains(tactic_filter, case=False, na=False)] |
| |
|
| | return prepare_dataframe(df, exclude_cols=["source_url", "sub_techniques"]) |
| |
|
| | def get_mitigations_df(lang: str, search: str = "") -> pd.DataFrame: |
| | """Get mitigations dataframe with search""" |
| | if lang not in data_cache or data_cache[lang] is None: |
| | return pd.DataFrame() |
| |
|
| | df = data_cache[lang]["mitigations"].copy() |
| |
|
| | if search.strip(): |
| | search_lower = search.lower() |
| | df = df[ |
| | df["name"].str.lower().str.contains(search_lower, na=False) | |
| | df["description"].str.lower().str.contains(search_lower, na=False) | |
| | df["id"].str.lower().str.contains(search_lower, na=False) |
| | ] |
| |
|
| | return prepare_dataframe(df, exclude_cols=["source_url"]) |
| |
|
| | def get_groups_df(lang: str, search: str = "") -> pd.DataFrame: |
| | """Get APT groups dataframe with search""" |
| | if lang not in data_cache or data_cache[lang] is None: |
| | return pd.DataFrame() |
| |
|
| | df = data_cache[lang]["groups"].copy() |
| |
|
| | if search.strip(): |
| | search_lower = search.lower() |
| | df = df[ |
| | df["name"].str.lower().str.contains(search_lower, na=False) | |
| | df["description"].str.lower().str.contains(search_lower, na=False) | |
| | df["id"].str.lower().str.contains(search_lower, na=False) | |
| | df["aliases"].astype(str).str.lower().str.contains(search_lower, na=False) |
| | ] |
| |
|
| | return prepare_dataframe(df, exclude_cols=["source_url"]) |
| |
|
| | def get_qa_df(lang: str, search: str = "", category_filter: str = "") -> pd.DataFrame: |
| | """Get QA dataset with filters""" |
| | if lang not in data_cache or data_cache[lang] is None: |
| | return pd.DataFrame() |
| |
|
| | df = data_cache[lang]["qa"].copy() |
| |
|
| | if search.strip(): |
| | search_lower = search.lower() |
| | df = df[ |
| | df["question"].str.lower().str.contains(search_lower, na=False) | |
| | df["answer"].str.lower().str.contains(search_lower, na=False) | |
| | df["keywords"].astype(str).str.lower().str.contains(search_lower, na=False) |
| | ] |
| |
|
| | if category_filter and category_filter != "All": |
| | df = df[df["category"].str.lower() == category_filter.lower()] |
| |
|
| | return prepare_dataframe(df, exclude_cols=["source_url"]) |
| |
|
| | def create_tactic_chart(lang: str): |
| | """Create techniques per tactic bar chart""" |
| | if lang not in data_cache or data_cache[lang] is None: |
| | return go.Figure() |
| |
|
| | techniques_df = data_cache[lang]["techniques"] |
| | if techniques_df.empty: |
| | return go.Figure() |
| |
|
| | |
| | tactic_counts = {} |
| | for tactics in techniques_df["tactic"]: |
| | if isinstance(tactics, list): |
| | for tactic in tactics: |
| | tactic_counts[tactic] = tactic_counts.get(tactic, 0) + 1 |
| | elif isinstance(tactics, str): |
| | for tactic in tactics.split(","): |
| | t = tactic.strip() |
| | tactic_counts[t] = tactic_counts.get(t, 0) + 1 |
| |
|
| | if not tactic_counts: |
| | return go.Figure() |
| |
|
| | tactic_df = pd.DataFrame( |
| | list(tactic_counts.items()), |
| | columns=["Tactic", "Count"] |
| | ).sort_values("Count", ascending=False) |
| |
|
| | fig = px.bar( |
| | tactic_df, |
| | x="Tactic", |
| | y="Count", |
| | title="Techniques per Tactic", |
| | labels={"Count": "Number of Techniques"}, |
| | color="Count", |
| | color_continuous_scale="Reds" |
| | ) |
| | fig.update_layout(height=400, xaxis_tickangle=-45) |
| | return fig |
| |
|
| | def create_groups_chart(lang: str): |
| | """Create top 10 APT groups by techniques chart""" |
| | if lang not in data_cache or data_cache[lang] is None: |
| | return go.Figure() |
| |
|
| | groups_df = data_cache[lang]["groups"] |
| | if groups_df.empty: |
| | return go.Figure() |
| |
|
| | |
| | group_technique_counts = [] |
| | for _, row in groups_df.iterrows(): |
| | techniques = row.get("techniques_used", []) |
| | if isinstance(techniques, list): |
| | count = len(techniques) |
| | else: |
| | count = 0 |
| | group_technique_counts.append({ |
| | "name": row["name"], |
| | "count": count |
| | }) |
| |
|
| | if not group_technique_counts: |
| | return go.Figure() |
| |
|
| | groups_chart_df = pd.DataFrame(group_technique_counts).sort_values( |
| | "count", ascending=False |
| | ).head(10) |
| |
|
| | fig = px.bar( |
| | groups_chart_df, |
| | y="name", |
| | x="count", |
| | title="Top 10 APT Groups by Techniques Used", |
| | labels={"count": "Techniques", "name": "APT Group"}, |
| | color="count", |
| | color_continuous_scale="Oranges", |
| | orientation="h" |
| | ) |
| | fig.update_layout(height=400) |
| | return fig |
| |
|
| | def update_all_filters(lang: str): |
| | """Update all filter options based on language""" |
| | if lang not in data_cache or data_cache[lang] is None: |
| | return ( |
| | gr.update(choices=["All"]), |
| | gr.update(choices=["All"]), |
| | gr.update(choices=["All"]) |
| | ) |
| |
|
| | techniques_df = data_cache[lang]["techniques"] |
| | qa_df = data_cache[lang]["qa"] |
| |
|
| | |
| | tactics = set() |
| | for tactic_list in techniques_df["tactic"]: |
| | if isinstance(tactic_list, list): |
| | tactics.update(tactic_list) |
| | elif isinstance(tactic_list, str): |
| | tactics.update([t.strip() for t in tactic_list.split(",")]) |
| |
|
| | tactic_choices = ["All"] + sorted(list(tactics)) |
| |
|
| | |
| | categories = ["All"] + sorted(qa_df["category"].unique().tolist()) |
| |
|
| | return ( |
| | gr.update(choices=tactic_choices), |
| | gr.update(choices=categories), |
| | None |
| | ) |
| |
|
| | |
| | print("Initializing MITRE ATT&CK Explorer...") |
| | load_data() |
| |
|
| | |
| | with gr.Blocks(title="MITRE ATT&CK Explorer", theme=gr.themes.Soft()) as app: |
| | gr.Markdown("# MITRE ATT&CK Explorer") |
| | gr.Markdown("Explore the MITRE ATT&CK Framework - Tactics, Techniques, Mitigations, and APT Groups") |
| |
|
| | |
| | with gr.Row(): |
| | language = gr.Radio( |
| | choices=["English", "Français"], |
| | value="English", |
| | label="Language / Langue", |
| | interactive=True |
| | ) |
| |
|
| | |
| | with gr.Tabs(): |
| | |
| | with gr.TabItem("Tactics"): |
| | with gr.Row(): |
| | tactics_search = gr.Textbox( |
| | placeholder="Search tactics...", |
| | label="Search", |
| | scale=1 |
| | ) |
| | tactics_df = gr.Dataframe( |
| | value=get_tactics_df("en"), |
| | interactive=False, |
| | label="Tactics" |
| | ) |
| |
|
| | |
| | with gr.TabItem("Techniques"): |
| | with gr.Row(): |
| | techniques_search = gr.Textbox( |
| | placeholder="Search techniques by name, ID, or description...", |
| | label="Search", |
| | scale=2 |
| | ) |
| | tactic_filter = gr.Dropdown( |
| | choices=["All"], |
| | value="All", |
| | label="Filter by Tactic", |
| | scale=1 |
| | ) |
| | techniques_df = gr.Dataframe( |
| | value=get_techniques_df("en"), |
| | interactive=False, |
| | label="Techniques" |
| | ) |
| |
|
| | |
| | with gr.TabItem("Mitigations"): |
| | with gr.Row(): |
| | mitigations_search = gr.Textbox( |
| | placeholder="Search mitigations...", |
| | label="Search", |
| | scale=1 |
| | ) |
| | mitigations_df = gr.Dataframe( |
| | value=get_mitigations_df("en"), |
| | interactive=False, |
| | label="Mitigations" |
| | ) |
| |
|
| | |
| | with gr.TabItem("APT Groups"): |
| | with gr.Row(): |
| | groups_search = gr.Textbox( |
| | placeholder="Search groups by name, aliases, or description...", |
| | label="Search", |
| | scale=1 |
| | ) |
| | groups_df = gr.Dataframe( |
| | value=get_groups_df("en"), |
| | interactive=False, |
| | label="APT Groups" |
| | ) |
| |
|
| | |
| | with gr.TabItem("Q&A"): |
| | with gr.Row(): |
| | qa_search = gr.Textbox( |
| | placeholder="Search Q&A...", |
| | label="Search", |
| | scale=2 |
| | ) |
| | qa_category = gr.Dropdown( |
| | choices=["All"], |
| | value="All", |
| | label="Filter by Category", |
| | scale=1 |
| | ) |
| | qa_df = gr.Dataframe( |
| | value=get_qa_df("en"), |
| | interactive=False, |
| | label="Q&A Dataset" |
| | ) |
| |
|
| | |
| | with gr.TabItem("Statistics"): |
| | with gr.Row(): |
| | tactics_chart = gr.Plot(label="Techniques per Tactic") |
| | with gr.Row(): |
| | groups_chart = gr.Plot(label="Top APT Groups") |
| |
|
| | |
| | gr.HTML(""" |
| | <div style='text-align:center; padding:20px; color:#666;'> |
| | <p>Created by <a href='https://www.ayinedjimi-consultants.fr' target='_blank'>Ayi NEDJIMI</a> - Senior Offensive Cybersecurity & AI Consultant</p> |
| | <p><a href='https://www.linkedin.com/in/ayi-nedjimi' target='_blank'>LinkedIn</a> | <a href='https://github.com/ayinedjimi' target='_blank'>GitHub</a> | <a href='https://x.com/AyiNEDJIMI' target='_blank'>Twitter/X</a></p> |
| | </div> |
| | """) |
| |
|
| | |
| | def on_language_change(lang_choice): |
| | lang = "en" if lang_choice == "English" else "fr" |
| | return ( |
| | get_tactics_df(lang), |
| | get_techniques_df(lang), |
| | get_mitigations_df(lang), |
| | get_groups_df(lang), |
| | get_qa_df(lang), |
| | create_tactic_chart(lang), |
| | create_groups_chart(lang), |
| | *update_all_filters(lang) |
| | ) |
| |
|
| | |
| | def on_tactics_search(lang_choice, search_text): |
| | lang = "en" if lang_choice == "English" else "fr" |
| | df = data_cache[lang]["tactics"] if lang in data_cache else pd.DataFrame() |
| | if df.empty: |
| | return pd.DataFrame() |
| | df = df.copy() |
| | if search_text.strip(): |
| | search_lower = search_text.lower() |
| | df = df[ |
| | df["name"].str.lower().str.contains(search_lower, na=False) | |
| | df["description"].str.lower().str.contains(search_lower, na=False) | |
| | df["id"].str.lower().str.contains(search_lower, na=False) |
| | ] |
| | return prepare_dataframe(df, exclude_cols=["source_url"]) |
| |
|
| | def on_techniques_search(lang_choice, search_text, tactic): |
| | lang = "en" if lang_choice == "English" else "fr" |
| | return get_techniques_df(lang, search_text, tactic) |
| |
|
| | def on_mitigations_search(lang_choice, search_text): |
| | lang = "en" if lang_choice == "English" else "fr" |
| | return get_mitigations_df(lang, search_text) |
| |
|
| | def on_groups_search(lang_choice, search_text): |
| | lang = "en" if lang_choice == "English" else "fr" |
| | return get_groups_df(lang, search_text) |
| |
|
| | def on_qa_search(lang_choice, search_text, category): |
| | lang = "en" if lang_choice == "English" else "fr" |
| | return get_qa_df(lang, search_text, category) |
| |
|
| | |
| | language.change( |
| | fn=on_language_change, |
| | inputs=language, |
| | outputs=[ |
| | tactics_df, |
| | techniques_df, |
| | mitigations_df, |
| | groups_df, |
| | qa_df, |
| | tactics_chart, |
| | groups_chart, |
| | tactic_filter, |
| | qa_category, |
| | language |
| | ] |
| | ) |
| |
|
| | tactics_search.change( |
| | fn=on_tactics_search, |
| | inputs=[language, tactics_search], |
| | outputs=tactics_df |
| | ) |
| |
|
| | techniques_search.change( |
| | fn=on_techniques_search, |
| | inputs=[language, techniques_search, tactic_filter], |
| | outputs=techniques_df |
| | ) |
| |
|
| | tactic_filter.change( |
| | fn=on_techniques_search, |
| | inputs=[language, techniques_search, tactic_filter], |
| | outputs=techniques_df |
| | ) |
| |
|
| | mitigations_search.change( |
| | fn=on_mitigations_search, |
| | inputs=[language, mitigations_search], |
| | outputs=mitigations_df |
| | ) |
| |
|
| | groups_search.change( |
| | fn=on_groups_search, |
| | inputs=[language, groups_search], |
| | outputs=groups_df |
| | ) |
| |
|
| | qa_search.change( |
| | fn=on_qa_search, |
| | inputs=[language, qa_search, qa_category], |
| | outputs=qa_df |
| | ) |
| |
|
| | qa_category.change( |
| | fn=on_qa_search, |
| | inputs=[language, qa_search, qa_category], |
| | outputs=qa_df |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | app.launch() |
| |
|