""" MITRE ATT&CK Explorer - Interactive Gradio Application Explore MITRE ATT&CK Framework data in English and French """ import gradio as gr import pandas as pd import plotly.express as px import plotly.graph_objects as go from datasets import load_dataset import json from typing import Dict, List, Tuple # Global data cache data_cache = {} def load_data(): """Load datasets from HuggingFace for both languages""" global data_cache languages = { "en": "AYI-NEDJIMI/mitre-attack-en", "fr": "AYI-NEDJIMI/mitre-attack-fr" } for lang, repo in languages.items(): try: print(f"Loading {lang.upper()} dataset...") dataset = load_dataset( repo, data_files={ "tactics": "tactics.json", "techniques": "techniques.json", "mitigations": "mitigations.json", "groups": "groups.json", "qa": "qa_dataset.json" } ) # Convert to DataFrames data_cache[lang] = { "tactics": pd.DataFrame(dataset["tactics"]["train"]), "techniques": pd.DataFrame(dataset["techniques"]["train"]), "mitigations": pd.DataFrame(dataset["mitigations"]["train"]), "groups": pd.DataFrame(dataset["groups"]["train"]), "qa": pd.DataFrame(dataset["qa"]["train"]) } print(f"Loaded {lang.upper()}: {len(data_cache[lang]['tactics'])} tactics, " f"{len(data_cache[lang]['techniques'])} techniques") except Exception as e: print(f"Error loading {lang.upper()} data: {e}") data_cache[lang] = None return data_cache def convert_list_to_string(val): """Convert list values to comma-separated strings""" if isinstance(val, list): return ", ".join(str(x) for x in val if x) return val def prepare_dataframe(df: pd.DataFrame, exclude_cols: List[str] = None) -> pd.DataFrame: """Prepare dataframe for display""" if df is None or df.empty: return pd.DataFrame() df = df.copy() if exclude_cols: df = df.drop(columns=[col for col in exclude_cols if col in df.columns]) # Convert list fields to strings for col in df.columns: df[col] = df[col].apply(convert_list_to_string) return df def get_tactics_df(lang: str) -> pd.DataFrame: """Get tactics dataframe""" if lang not in data_cache or data_cache[lang] is None: return pd.DataFrame() df = data_cache[lang]["tactics"] return prepare_dataframe(df, exclude_cols=["source_url"]) def get_techniques_df(lang: str, search: str = "", tactic_filter: str = "") -> pd.DataFrame: """Get techniques dataframe with filters""" if lang not in data_cache or data_cache[lang] is None: return pd.DataFrame() df = data_cache[lang]["techniques"].copy() # Apply search filter if search.strip(): search_lower = search.lower() df = df[ df["name"].str.lower().str.contains(search_lower, na=False) | df["description"].str.lower().str.contains(search_lower, na=False) | df["id"].str.lower().str.contains(search_lower, na=False) ] # Apply tactic filter if tactic_filter and tactic_filter != "All": df = df[df["tactic"].str.contains(tactic_filter, case=False, na=False)] return prepare_dataframe(df, exclude_cols=["source_url", "sub_techniques"]) def get_mitigations_df(lang: str, search: str = "") -> pd.DataFrame: """Get mitigations dataframe with search""" if lang not in data_cache or data_cache[lang] is None: return pd.DataFrame() df = data_cache[lang]["mitigations"].copy() if search.strip(): search_lower = search.lower() df = df[ df["name"].str.lower().str.contains(search_lower, na=False) | df["description"].str.lower().str.contains(search_lower, na=False) | df["id"].str.lower().str.contains(search_lower, na=False) ] return prepare_dataframe(df, exclude_cols=["source_url"]) def get_groups_df(lang: str, search: str = "") -> pd.DataFrame: """Get APT groups dataframe with search""" if lang not in data_cache or data_cache[lang] is None: return pd.DataFrame() df = data_cache[lang]["groups"].copy() if search.strip(): search_lower = search.lower() df = df[ df["name"].str.lower().str.contains(search_lower, na=False) | df["description"].str.lower().str.contains(search_lower, na=False) | df["id"].str.lower().str.contains(search_lower, na=False) | df["aliases"].astype(str).str.lower().str.contains(search_lower, na=False) ] return prepare_dataframe(df, exclude_cols=["source_url"]) def get_qa_df(lang: str, search: str = "", category_filter: str = "") -> pd.DataFrame: """Get QA dataset with filters""" if lang not in data_cache or data_cache[lang] is None: return pd.DataFrame() df = data_cache[lang]["qa"].copy() if search.strip(): search_lower = search.lower() df = df[ df["question"].str.lower().str.contains(search_lower, na=False) | df["answer"].str.lower().str.contains(search_lower, na=False) | df["keywords"].astype(str).str.lower().str.contains(search_lower, na=False) ] if category_filter and category_filter != "All": df = df[df["category"].str.lower() == category_filter.lower()] return prepare_dataframe(df, exclude_cols=["source_url"]) def create_tactic_chart(lang: str): """Create techniques per tactic bar chart""" if lang not in data_cache or data_cache[lang] is None: return go.Figure() techniques_df = data_cache[lang]["techniques"] if techniques_df.empty: return go.Figure() # Expand tactics (they may be lists) tactic_counts = {} for tactics in techniques_df["tactic"]: if isinstance(tactics, list): for tactic in tactics: tactic_counts[tactic] = tactic_counts.get(tactic, 0) + 1 elif isinstance(tactics, str): for tactic in tactics.split(","): t = tactic.strip() tactic_counts[t] = tactic_counts.get(t, 0) + 1 if not tactic_counts: return go.Figure() tactic_df = pd.DataFrame( list(tactic_counts.items()), columns=["Tactic", "Count"] ).sort_values("Count", ascending=False) fig = px.bar( tactic_df, x="Tactic", y="Count", title="Techniques per Tactic", labels={"Count": "Number of Techniques"}, color="Count", color_continuous_scale="Reds" ) fig.update_layout(height=400, xaxis_tickangle=-45) return fig def create_groups_chart(lang: str): """Create top 10 APT groups by techniques chart""" if lang not in data_cache or data_cache[lang] is None: return go.Figure() groups_df = data_cache[lang]["groups"] if groups_df.empty: return go.Figure() # Count techniques per group group_technique_counts = [] for _, row in groups_df.iterrows(): techniques = row.get("techniques_used", []) if isinstance(techniques, list): count = len(techniques) else: count = 0 group_technique_counts.append({ "name": row["name"], "count": count }) if not group_technique_counts: return go.Figure() groups_chart_df = pd.DataFrame(group_technique_counts).sort_values( "count", ascending=False ).head(10) fig = px.bar( groups_chart_df, y="name", x="count", title="Top 10 APT Groups by Techniques Used", labels={"count": "Techniques", "name": "APT Group"}, color="count", color_continuous_scale="Oranges", orientation="h" ) fig.update_layout(height=400) return fig def update_all_filters(lang: str): """Update all filter options based on language""" if lang not in data_cache or data_cache[lang] is None: return ( gr.update(choices=["All"]), gr.update(choices=["All"]), gr.update(choices=["All"]) ) techniques_df = data_cache[lang]["techniques"] qa_df = data_cache[lang]["qa"] # Get unique tactics tactics = set() for tactic_list in techniques_df["tactic"]: if isinstance(tactic_list, list): tactics.update(tactic_list) elif isinstance(tactic_list, str): tactics.update([t.strip() for t in tactic_list.split(",")]) tactic_choices = ["All"] + sorted(list(tactics)) # Get unique categories from QA categories = ["All"] + sorted(qa_df["category"].unique().tolist()) return ( gr.update(choices=tactic_choices), gr.update(choices=categories), None ) # Load data at startup print("Initializing MITRE ATT&CK Explorer...") load_data() # Create Gradio interface with gr.Blocks(title="MITRE ATT&CK Explorer", theme=gr.themes.Soft()) as app: gr.Markdown("# MITRE ATT&CK Explorer") gr.Markdown("Explore the MITRE ATT&CK Framework - Tactics, Techniques, Mitigations, and APT Groups") # Language selector with gr.Row(): language = gr.Radio( choices=["English", "Français"], value="English", label="Language / Langue", interactive=True ) # Tabs with gr.Tabs(): # Tactics Tab with gr.TabItem("Tactics"): with gr.Row(): tactics_search = gr.Textbox( placeholder="Search tactics...", label="Search", scale=1 ) tactics_df = gr.Dataframe( value=get_tactics_df("en"), interactive=False, label="Tactics" ) # Techniques Tab with gr.TabItem("Techniques"): with gr.Row(): techniques_search = gr.Textbox( placeholder="Search techniques by name, ID, or description...", label="Search", scale=2 ) tactic_filter = gr.Dropdown( choices=["All"], value="All", label="Filter by Tactic", scale=1 ) techniques_df = gr.Dataframe( value=get_techniques_df("en"), interactive=False, label="Techniques" ) # Mitigations Tab with gr.TabItem("Mitigations"): with gr.Row(): mitigations_search = gr.Textbox( placeholder="Search mitigations...", label="Search", scale=1 ) mitigations_df = gr.Dataframe( value=get_mitigations_df("en"), interactive=False, label="Mitigations" ) # APT Groups Tab with gr.TabItem("APT Groups"): with gr.Row(): groups_search = gr.Textbox( placeholder="Search groups by name, aliases, or description...", label="Search", scale=1 ) groups_df = gr.Dataframe( value=get_groups_df("en"), interactive=False, label="APT Groups" ) # Q&A Tab with gr.TabItem("Q&A"): with gr.Row(): qa_search = gr.Textbox( placeholder="Search Q&A...", label="Search", scale=2 ) qa_category = gr.Dropdown( choices=["All"], value="All", label="Filter by Category", scale=1 ) qa_df = gr.Dataframe( value=get_qa_df("en"), interactive=False, label="Q&A Dataset" ) # Statistics Tab with gr.TabItem("Statistics"): with gr.Row(): tactics_chart = gr.Plot(label="Techniques per Tactic") with gr.Row(): groups_chart = gr.Plot(label="Top APT Groups") # Footer gr.HTML("""

Created by Ayi NEDJIMI - Senior Offensive Cybersecurity & AI Consultant

LinkedIn | GitHub | Twitter/X

""") # Language change handler def on_language_change(lang_choice): lang = "en" if lang_choice == "English" else "fr" return ( get_tactics_df(lang), get_techniques_df(lang), get_mitigations_df(lang), get_groups_df(lang), get_qa_df(lang), create_tactic_chart(lang), create_groups_chart(lang), *update_all_filters(lang) ) # Search and filter handlers def on_tactics_search(lang_choice, search_text): lang = "en" if lang_choice == "English" else "fr" df = data_cache[lang]["tactics"] if lang in data_cache else pd.DataFrame() if df.empty: return pd.DataFrame() df = df.copy() if search_text.strip(): search_lower = search_text.lower() df = df[ df["name"].str.lower().str.contains(search_lower, na=False) | df["description"].str.lower().str.contains(search_lower, na=False) | df["id"].str.lower().str.contains(search_lower, na=False) ] return prepare_dataframe(df, exclude_cols=["source_url"]) def on_techniques_search(lang_choice, search_text, tactic): lang = "en" if lang_choice == "English" else "fr" return get_techniques_df(lang, search_text, tactic) def on_mitigations_search(lang_choice, search_text): lang = "en" if lang_choice == "English" else "fr" return get_mitigations_df(lang, search_text) def on_groups_search(lang_choice, search_text): lang = "en" if lang_choice == "English" else "fr" return get_groups_df(lang, search_text) def on_qa_search(lang_choice, search_text, category): lang = "en" if lang_choice == "English" else "fr" return get_qa_df(lang, search_text, category) # Register event handlers language.change( fn=on_language_change, inputs=language, outputs=[ tactics_df, techniques_df, mitigations_df, groups_df, qa_df, tactics_chart, groups_chart, tactic_filter, qa_category, language ] ) tactics_search.change( fn=on_tactics_search, inputs=[language, tactics_search], outputs=tactics_df ) techniques_search.change( fn=on_techniques_search, inputs=[language, techniques_search, tactic_filter], outputs=techniques_df ) tactic_filter.change( fn=on_techniques_search, inputs=[language, techniques_search, tactic_filter], outputs=techniques_df ) mitigations_search.change( fn=on_mitigations_search, inputs=[language, mitigations_search], outputs=mitigations_df ) groups_search.change( fn=on_groups_search, inputs=[language, groups_search], outputs=groups_df ) qa_search.change( fn=on_qa_search, inputs=[language, qa_search, qa_category], outputs=qa_df ) qa_category.change( fn=on_qa_search, inputs=[language, qa_search, qa_category], outputs=qa_df ) if __name__ == "__main__": app.launch()