import gradio as gr import json import re import time from typing import List, Dict, Tuple import pandas as pd # Mock security entity patterns (in production, use spaCy/transformers) ENTITY_PATTERNS = { 'ThreatActor': [r'APT\d+', r'Cozy Bear', r'Lazarus', r'FIN\d+', r'Carbanak'], 'Vulnerability': [r'CVE-\d{4}-\d{4,7}', r'MS\d{2}-\d{3}'], 'Software': [r'Microsoft \w+', r'Apache \w+', r'Windows \d+', r'Linux', r'Chrome'], 'Tool': [r'Cobalt Strike', r'Metasploit', r'PowerShell', r'Mimikatz', r'PsExec'], 'IOC': [r'\b(?:\d{1,3}\.){3}\d{1,3}\b', r'\b[a-fA-F0-9]{32,64}\b', r'https?://[^\s]+'], 'DetectionRule': [r'SIG-\d{4}-\d{3}', r'YARA-\d+', r'Sigma-\w+'] } # Mock MITRE ATT&CK techniques MITRE_TECHNIQUES = { 'T1059.001': 'PowerShell', 'T1566.001': 'Spearphishing Attachment', 'T1055': 'Process Injection', 'T1003': 'OS Credential Dumping' } class SecurityKnowledgeGraph: def __init__(self): self.entities = [] self.relationships = [] def extract_entities(self, text: str) -> List[Dict]: """Mock entity extraction using regex patterns""" entities = [] entity_id = 0 for entity_type, patterns in ENTITY_PATTERNS.items(): for pattern in patterns: matches = re.finditer(pattern, text, re.IGNORECASE) for match in matches: entities.append({ 'id': entity_id, 'text': match.group(), 'type': entity_type, 'start': match.start(), 'end': match.end(), 'confidence': round(0.85 + (hash(match.group()) % 15) / 100, 2) }) entity_id += 1 # Remove duplicates seen = set() unique_entities = [] for entity in entities: if entity['text'].lower() not in seen: seen.add(entity['text'].lower()) unique_entities.append(entity) return sorted(unique_entities, key=lambda x: x['start']) def extract_relationships(self, entities: List[Dict], text: str) -> List[Dict]: """Mock relationship extraction based on proximity and keywords""" relationships = [] # Define relationship keywords rel_patterns = { 'exploits': ['exploit', 'exploits', 'exploiting', 'leverages'], 'uses': ['uses', 'utilizing', 'deploys', 'employs'], 'targets': ['targets', 'targeting', 'affects'], 'detects': ['detects', 'identifies', 'monitors'], 'mitigates': ['mitigates', 'prevents', 'blocks'] } text_lower = text.lower() for i, source in enumerate(entities): for j, target in enumerate(entities): if i >= j: # Avoid self-relationships and duplicates continue # Check if entities are close to each other in text distance = abs(source['start'] - target['start']) if distance > 200: # Skip if too far apart continue # Find relationship type based on context context_start = min(source['start'], target['start']) - 50 context_end = max(source['end'], target['end']) + 50 context = text_lower[max(0, context_start):context_end] for rel_type, keywords in rel_patterns.items(): if any(keyword in context for keyword in keywords): # Determine relationship direction based on entity types if self._is_valid_relationship(source['type'], target['type'], rel_type): relationships.append({ 'source': source['text'], 'target': target['text'], 'relationship': rel_type, 'confidence': round(0.75 + (hash(source['text'] + target['text']) % 20) / 100, 2), 'source_type': source['type'], 'target_type': target['type'] }) break return relationships def _is_valid_relationship(self, source_type: str, target_type: str, rel_type: str) -> bool: """Check if relationship makes sense given entity types""" valid_relationships = { 'exploits': [('ThreatActor', 'Vulnerability'), ('Tool', 'Vulnerability')], 'uses': [('ThreatActor', 'Tool'), ('ThreatActor', 'Software')], 'targets': [('ThreatActor', 'Software'), ('Tool', 'Software'), ('Vulnerability', 'Software')], 'detects': [('DetectionRule', 'Tool'), ('DetectionRule', 'ThreatActor')], 'affects': [('Vulnerability', 'Software')] } return (source_type, target_type) in valid_relationships.get(rel_type, []) # Initialize the knowledge graph kg = SecurityKnowledgeGraph() def process_threat_intel(text: str) -> Tuple[str, str, str]: """Process threat intelligence text and return formatted results""" if not text.strip(): return "Please provide threat intelligence text to analyze.", "", "" # Extract entities entities = kg.extract_entities(text) # Extract relationships relationships = kg.extract_relationships(entities, text) # Format entities output entities_html = "
" for entity in entities: color_map = { 'ThreatActor': '#fee2e2 border: 1px solid #fca5a5; color: #991b1b', 'Vulnerability': '#fed7aa border: 1px solid #fdba74; color: #9a3412', 'Software': '#dbeafe border: 1px solid #93c5fd; color: #1e40af', 'Tool': '#e9d5ff border: 1px solid #c4b5fd; color: #6b21a8', 'IOC': '#dcfce7 border: 1px solid #86efac; color: #166534', 'DetectionRule': '#e0e7ff border: 1px solid #a5b4fc; color: #3730a3' } style = f"background-color: {color_map.get(entity['type'], '#f3f4f6')}; padding: 4px 8px; border-radius: 12px; font-size: 12px; font-weight: 500;" entities_html += f"{entity['text']} ({entity['type']} - {int(entity['confidence']*100)}%)" entities_html += "
" # Format relationships output relationships_html = "
" for rel in relationships: relationships_html += f"""
{rel['source']} {rel['relationship']} {rel['target']} {int(rel['confidence']*100)}% confidence
""" relationships_html += "
" # Create graph query examples queries_html = f"""

Example Graph Queries:

MATCH (ta:ThreatActor)-[:USES]->(tool:Tool) RETURN ta.name, tool.name
MATCH (v:Vulnerability)<-[:EXPLOITS]-(ta:ThreatActor) RETURN v.name, ta.name
MATCH path=(ta:ThreatActor)-[*2..4]->(s:Software) RETURN path
""" return entities_html, relationships_html, queries_html def create_sample_data(): """Generate sample threat intelligence data""" return """APT29 (Cozy Bear) has been observed exploiting CVE-2023-23397 to target Microsoft Outlook vulnerabilities in financial institutions. The threat actor deploys Cobalt Strike beacons on compromised Windows 10 systems and uses PowerShell for lateral movement and credential dumping. The attack chain typically begins with spearphishing emails containing malicious attachments. Once initial access is gained, APT29 utilizes Mimikatz for credential harvesting and PsExec for remote execution across the network. Security teams can detect this activity using Sigma rule SIG-2023-001 which monitors for suspicious PowerShell execution patterns and YARA-2023-APT29 for Cobalt Strike beacon detection. The IOCs include IP addresses 192.168.1.100 and 10.0.0.50, along with hash values 7d865e959b2466918c9863afca942d0fb89d7c9ac0c99bafc3749504ded97730.""" # Create Gradio interface with gr.Blocks( theme=gr.themes.Base(), css=""" .gradio-container {background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important} .gr-button {background: linear-gradient(90deg, #667eea, #764ba2) !important; border: none !important} .gr-button:hover {transform: translateY(-1px) !important; box-shadow: 0 4px 12px rgba(0,0,0,0.15) !important} """, title="🔒 Security Knowledge Graph Builder" ) as demo: gr.HTML("""

🔒 Security Knowledge Graph Builder

Secure AI-powered threat intelligence without vector database vulnerabilities

Build explainable, auditable security relationships instead of relying on risky RAG embeddings

""") with gr.Row(): with gr.Column(scale=1): gr.HTML("

📊 Threat Intelligence Input

") input_text = gr.Textbox( placeholder="Paste your threat intelligence report here...", lines=8, label="Threat Intelligence Text", value="" ) with gr.Row(): analyze_btn = gr.Button("🔍 Analyze Threat Intelligence", variant="primary") sample_btn = gr.Button("📝 Load Sample Data", variant="secondary") gr.HTML("""

🛡️ Why Knowledge Graphs Beat RAG for Security:

""") with gr.Column(scale=1): gr.HTML("

🎯 Extracted Security Entities

") entities_output = gr.HTML() gr.HTML("

🔗 Security Relationships

") relationships_output = gr.HTML() gr.HTML("

🔍 Graph Query Examples

") queries_output = gr.HTML() # Event handlers sample_btn.click( fn=create_sample_data, outputs=input_text ) analyze_btn.click( fn=process_threat_intel, inputs=input_text, outputs=[entities_output, relationships_output, queries_output] ) # Footer gr.HTML("""

🔒 Secure-by-Design Threat Intelligence - No risky vector embeddings, just explainable relationships

Built for blue teams who need trustworthy, auditable AI in cybersecurity operations

""") if __name__ == "__main__": demo.launch()