Spaces:

AYI-NEDJIMI
/

mitre-attack-explorer

Paused

File size: 16,280 Bytes

9acd8ac

"""
MITRE ATT&CK Explorer - Interactive Gradio Application
Explore MITRE ATT&CK Framework data in English and French
"""

import gradio as gr
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from datasets import load_dataset
import json
from typing import Dict, List, Tuple

# Global data cache
data_cache = {}

def load_data():
    """Load datasets from HuggingFace for both languages"""
    global data_cache

    languages = {
        "en": "AYI-NEDJIMI/mitre-attack-en",
        "fr": "AYI-NEDJIMI/mitre-attack-fr"
    }

    for lang, repo in languages.items():
        try:
            print(f"Loading {lang.upper()} dataset...")
            dataset = load_dataset(
                repo,
                data_files={
                    "tactics": "tactics.json",
                    "techniques": "techniques.json",
                    "mitigations": "mitigations.json",
                    "groups": "groups.json",
                    "qa": "qa_dataset.json"
                }
            )

            # Convert to DataFrames
            data_cache[lang] = {
                "tactics": pd.DataFrame(dataset["tactics"]["train"]),
                "techniques": pd.DataFrame(dataset["techniques"]["train"]),
                "mitigations": pd.DataFrame(dataset["mitigations"]["train"]),
                "groups": pd.DataFrame(dataset["groups"]["train"]),
                "qa": pd.DataFrame(dataset["qa"]["train"])
            }

            print(f"Loaded {lang.upper()}: {len(data_cache[lang]['tactics'])} tactics, "
                  f"{len(data_cache[lang]['techniques'])} techniques")

        except Exception as e:
            print(f"Error loading {lang.upper()} data: {e}")
            data_cache[lang] = None

    return data_cache

def convert_list_to_string(val):
    """Convert list values to comma-separated strings"""
    if isinstance(val, list):
        return ", ".join(str(x) for x in val if x)
    return val

def prepare_dataframe(df: pd.DataFrame, exclude_cols: List[str] = None) -> pd.DataFrame:
    """Prepare dataframe for display"""
    if df is None or df.empty:
        return pd.DataFrame()

    df = df.copy()
    if exclude_cols:
        df = df.drop(columns=[col for col in exclude_cols if col in df.columns])

    # Convert list fields to strings
    for col in df.columns:
        df[col] = df[col].apply(convert_list_to_string)

    return df

def get_tactics_df(lang: str) -> pd.DataFrame:
    """Get tactics dataframe"""
    if lang not in data_cache or data_cache[lang] is None:
        return pd.DataFrame()
    df = data_cache[lang]["tactics"]
    return prepare_dataframe(df, exclude_cols=["source_url"])

def get_techniques_df(lang: str, search: str = "", tactic_filter: str = "") -> pd.DataFrame:
    """Get techniques dataframe with filters"""
    if lang not in data_cache or data_cache[lang] is None:
        return pd.DataFrame()

    df = data_cache[lang]["techniques"].copy()

    # Apply search filter
    if search.strip():
        search_lower = search.lower()
        df = df[
            df["name"].str.lower().str.contains(search_lower, na=False) |
            df["description"].str.lower().str.contains(search_lower, na=False) |
            df["id"].str.lower().str.contains(search_lower, na=False)
        ]

    # Apply tactic filter
    if tactic_filter and tactic_filter != "All":
        df = df[df["tactic"].str.contains(tactic_filter, case=False, na=False)]

    return prepare_dataframe(df, exclude_cols=["source_url", "sub_techniques"])

def get_mitigations_df(lang: str, search: str = "") -> pd.DataFrame:
    """Get mitigations dataframe with search"""
    if lang not in data_cache or data_cache[lang] is None:
        return pd.DataFrame()

    df = data_cache[lang]["mitigations"].copy()

    if search.strip():
        search_lower = search.lower()
        df = df[
            df["name"].str.lower().str.contains(search_lower, na=False) |
            df["description"].str.lower().str.contains(search_lower, na=False) |
            df["id"].str.lower().str.contains(search_lower, na=False)
        ]

    return prepare_dataframe(df, exclude_cols=["source_url"])

def get_groups_df(lang: str, search: str = "") -> pd.DataFrame:
    """Get APT groups dataframe with search"""
    if lang not in data_cache or data_cache[lang] is None:
        return pd.DataFrame()

    df = data_cache[lang]["groups"].copy()

    if search.strip():
        search_lower = search.lower()
        df = df[
            df["name"].str.lower().str.contains(search_lower, na=False) |
            df["description"].str.lower().str.contains(search_lower, na=False) |
            df["id"].str.lower().str.contains(search_lower, na=False) |
            df["aliases"].astype(str).str.lower().str.contains(search_lower, na=False)
        ]

    return prepare_dataframe(df, exclude_cols=["source_url"])

def get_qa_df(lang: str, search: str = "", category_filter: str = "") -> pd.DataFrame:
    """Get QA dataset with filters"""
    if lang not in data_cache or data_cache[lang] is None:
        return pd.DataFrame()

    df = data_cache[lang]["qa"].copy()

    if search.strip():
        search_lower = search.lower()
        df = df[
            df["question"].str.lower().str.contains(search_lower, na=False) |
            df["answer"].str.lower().str.contains(search_lower, na=False) |
            df["keywords"].astype(str).str.lower().str.contains(search_lower, na=False)
        ]

    if category_filter and category_filter != "All":
        df = df[df["category"].str.lower() == category_filter.lower()]

    return prepare_dataframe(df, exclude_cols=["source_url"])

def create_tactic_chart(lang: str):
    """Create techniques per tactic bar chart"""
    if lang not in data_cache or data_cache[lang] is None:
        return go.Figure()

    techniques_df = data_cache[lang]["techniques"]
    if techniques_df.empty:
        return go.Figure()

    # Expand tactics (they may be lists)
    tactic_counts = {}
    for tactics in techniques_df["tactic"]:
        if isinstance(tactics, list):
            for tactic in tactics:
                tactic_counts[tactic] = tactic_counts.get(tactic, 0) + 1
        elif isinstance(tactics, str):
            for tactic in tactics.split(","):
                t = tactic.strip()
                tactic_counts[t] = tactic_counts.get(t, 0) + 1

    if not tactic_counts:
        return go.Figure()

    tactic_df = pd.DataFrame(
        list(tactic_counts.items()),
        columns=["Tactic", "Count"]
    ).sort_values("Count", ascending=False)

    fig = px.bar(
        tactic_df,
        x="Tactic",
        y="Count",
        title="Techniques per Tactic",
        labels={"Count": "Number of Techniques"},
        color="Count",
        color_continuous_scale="Reds"
    )
    fig.update_layout(height=400, xaxis_tickangle=-45)
    return fig

def create_groups_chart(lang: str):
    """Create top 10 APT groups by techniques chart"""
    if lang not in data_cache or data_cache[lang] is None:
        return go.Figure()

    groups_df = data_cache[lang]["groups"]
    if groups_df.empty:
        return go.Figure()

    # Count techniques per group
    group_technique_counts = []
    for _, row in groups_df.iterrows():
        techniques = row.get("techniques_used", [])
        if isinstance(techniques, list):
            count = len(techniques)
        else:
            count = 0
        group_technique_counts.append({
            "name": row["name"],
            "count": count
        })

    if not group_technique_counts:
        return go.Figure()

    groups_chart_df = pd.DataFrame(group_technique_counts).sort_values(
        "count", ascending=False
    ).head(10)

    fig = px.bar(
        groups_chart_df,
        y="name",
        x="count",
        title="Top 10 APT Groups by Techniques Used",
        labels={"count": "Techniques", "name": "APT Group"},
        color="count",
        color_continuous_scale="Oranges",
        orientation="h"
    )
    fig.update_layout(height=400)
    return fig

def update_all_filters(lang: str):
    """Update all filter options based on language"""
    if lang not in data_cache or data_cache[lang] is None:
        return (
            gr.update(choices=["All"]),
            gr.update(choices=["All"]),
            gr.update(choices=["All"])
        )

    techniques_df = data_cache[lang]["techniques"]
    qa_df = data_cache[lang]["qa"]

    # Get unique tactics
    tactics = set()
    for tactic_list in techniques_df["tactic"]:
        if isinstance(tactic_list, list):
            tactics.update(tactic_list)
        elif isinstance(tactic_list, str):
            tactics.update([t.strip() for t in tactic_list.split(",")])

    tactic_choices = ["All"] + sorted(list(tactics))

    # Get unique categories from QA
    categories = ["All"] + sorted(qa_df["category"].unique().tolist())

    return (
        gr.update(choices=tactic_choices),
        gr.update(choices=categories),
        None
    )

# Load data at startup
print("Initializing MITRE ATT&CK Explorer...")
load_data()

# Create Gradio interface
with gr.Blocks(title="MITRE ATT&CK Explorer", theme=gr.themes.Soft()) as app:
    gr.Markdown("# MITRE ATT&CK Explorer")
    gr.Markdown("Explore the MITRE ATT&CK Framework - Tactics, Techniques, Mitigations, and APT Groups")

    # Language selector
    with gr.Row():
        language = gr.Radio(
            choices=["English", "Français"],
            value="English",
            label="Language / Langue",
            interactive=True
        )

    # Tabs
    with gr.Tabs():
        # Tactics Tab
        with gr.TabItem("Tactics"):
            with gr.Row():
                tactics_search = gr.Textbox(
                    placeholder="Search tactics...",
                    label="Search",
                    scale=1
                )
            tactics_df = gr.Dataframe(
                value=get_tactics_df("en"),
                interactive=False,
                label="Tactics"
            )

        # Techniques Tab
        with gr.TabItem("Techniques"):
            with gr.Row():
                techniques_search = gr.Textbox(
                    placeholder="Search techniques by name, ID, or description...",
                    label="Search",
                    scale=2
                )
                tactic_filter = gr.Dropdown(
                    choices=["All"],
                    value="All",
                    label="Filter by Tactic",
                    scale=1
                )
            techniques_df = gr.Dataframe(
                value=get_techniques_df("en"),
                interactive=False,
                label="Techniques"
            )

        # Mitigations Tab
        with gr.TabItem("Mitigations"):
            with gr.Row():
                mitigations_search = gr.Textbox(
                    placeholder="Search mitigations...",
                    label="Search",
                    scale=1
                )
            mitigations_df = gr.Dataframe(
                value=get_mitigations_df("en"),
                interactive=False,
                label="Mitigations"
            )

        # APT Groups Tab
        with gr.TabItem("APT Groups"):
            with gr.Row():
                groups_search = gr.Textbox(
                    placeholder="Search groups by name, aliases, or description...",
                    label="Search",
                    scale=1
                )
            groups_df = gr.Dataframe(
                value=get_groups_df("en"),
                interactive=False,
                label="APT Groups"
            )

        # Q&A Tab
        with gr.TabItem("Q&A"):
            with gr.Row():
                qa_search = gr.Textbox(
                    placeholder="Search Q&A...",
                    label="Search",
                    scale=2
                )
                qa_category = gr.Dropdown(
                    choices=["All"],
                    value="All",
                    label="Filter by Category",
                    scale=1
                )
            qa_df = gr.Dataframe(
                value=get_qa_df("en"),
                interactive=False,
                label="Q&A Dataset"
            )

        # Statistics Tab
        with gr.TabItem("Statistics"):
            with gr.Row():
                tactics_chart = gr.Plot(label="Techniques per Tactic")
            with gr.Row():
                groups_chart = gr.Plot(label="Top APT Groups")

    # Footer
    gr.HTML("""
    <div style='text-align:center; padding:20px; color:#666;'>
        <p>Created by <a href='https://www.ayinedjimi-consultants.fr' target='_blank'>Ayi NEDJIMI</a> - Senior Offensive Cybersecurity & AI Consultant</p>
        <p><a href='https://www.linkedin.com/in/ayi-nedjimi' target='_blank'>LinkedIn</a> | <a href='https://github.com/ayinedjimi' target='_blank'>GitHub</a> | <a href='https://x.com/AyiNEDJIMI' target='_blank'>Twitter/X</a></p>
    </div>
    """)

    # Language change handler
    def on_language_change(lang_choice):
        lang = "en" if lang_choice == "English" else "fr"
        return (
            get_tactics_df(lang),
            get_techniques_df(lang),
            get_mitigations_df(lang),
            get_groups_df(lang),
            get_qa_df(lang),
            create_tactic_chart(lang),
            create_groups_chart(lang),
            *update_all_filters(lang)
        )

    # Search and filter handlers
    def on_tactics_search(lang_choice, search_text):
        lang = "en" if lang_choice == "English" else "fr"
        df = data_cache[lang]["tactics"] if lang in data_cache else pd.DataFrame()
        if df.empty:
            return pd.DataFrame()
        df = df.copy()
        if search_text.strip():
            search_lower = search_text.lower()
            df = df[
                df["name"].str.lower().str.contains(search_lower, na=False) |
                df["description"].str.lower().str.contains(search_lower, na=False) |
                df["id"].str.lower().str.contains(search_lower, na=False)
            ]
        return prepare_dataframe(df, exclude_cols=["source_url"])

    def on_techniques_search(lang_choice, search_text, tactic):
        lang = "en" if lang_choice == "English" else "fr"
        return get_techniques_df(lang, search_text, tactic)

    def on_mitigations_search(lang_choice, search_text):
        lang = "en" if lang_choice == "English" else "fr"
        return get_mitigations_df(lang, search_text)

    def on_groups_search(lang_choice, search_text):
        lang = "en" if lang_choice == "English" else "fr"
        return get_groups_df(lang, search_text)

    def on_qa_search(lang_choice, search_text, category):
        lang = "en" if lang_choice == "English" else "fr"
        return get_qa_df(lang, search_text, category)

    # Register event handlers
    language.change(
        fn=on_language_change,
        inputs=language,
        outputs=[
            tactics_df,
            techniques_df,
            mitigations_df,
            groups_df,
            qa_df,
            tactics_chart,
            groups_chart,
            tactic_filter,
            qa_category,
            language
        ]
    )

    tactics_search.change(
        fn=on_tactics_search,
        inputs=[language, tactics_search],
        outputs=tactics_df
    )

    techniques_search.change(
        fn=on_techniques_search,
        inputs=[language, techniques_search, tactic_filter],
        outputs=techniques_df
    )

    tactic_filter.change(
        fn=on_techniques_search,
        inputs=[language, techniques_search, tactic_filter],
        outputs=techniques_df
    )

    mitigations_search.change(
        fn=on_mitigations_search,
        inputs=[language, mitigations_search],
        outputs=mitigations_df
    )

    groups_search.change(
        fn=on_groups_search,
        inputs=[language, groups_search],
        outputs=groups_df
    )

    qa_search.change(
        fn=on_qa_search,
        inputs=[language, qa_search, qa_category],
        outputs=qa_df
    )

    qa_category.change(
        fn=on_qa_search,
        inputs=[language, qa_search, qa_category],
        outputs=qa_df
    )

if __name__ == "__main__":
    app.launch()