Spaces:

omasteam
/

Security-Knowledge-Graph-Builder

Running

App Files Files Community

Security-Knowledge-Graph-Builder / app.py

omasteam

Create app.py

bf68f07 verified 4 months ago

raw

history blame contribute delete

13 kB

	import gradio as gr
	import json
	import re
	import time
	from typing import List, Dict, Tuple
	import pandas as pd

	# Mock security entity patterns (in production, use spaCy/transformers)
	ENTITY_PATTERNS = {
	'ThreatActor': [r'APT\d+', r'Cozy Bear', r'Lazarus', r'FIN\d+', r'Carbanak'],
	'Vulnerability': [r'CVE-\d{4}-\d{4,7}', r'MS\d{2}-\d{3}'],
	'Software': [r'Microsoft \w+', r'Apache \w+', r'Windows \d+', r'Linux', r'Chrome'],
	'Tool': [r'Cobalt Strike', r'Metasploit', r'PowerShell', r'Mimikatz', r'PsExec'],
	'IOC': [r'\b(?:\d{1,3}\.){3}\d{1,3}\b', r'\b[a-fA-F0-9]{32,64}\b', r'https?://[^\s]+'],
	'DetectionRule': [r'SIG-\d{4}-\d{3}', r'YARA-\d+', r'Sigma-\w+']
	}

	# Mock MITRE ATT&CK techniques
	MITRE_TECHNIQUES = {
	'T1059.001': 'PowerShell',
	'T1566.001': 'Spearphishing Attachment',
	'T1055': 'Process Injection',
	'T1003': 'OS Credential Dumping'
	}

	class SecurityKnowledgeGraph:
	def __init__(self):
	self.entities = []
	self.relationships = []

	def extract_entities(self, text: str) -> List[Dict]:
	"""Mock entity extraction using regex patterns"""
	entities = []
	entity_id = 0

	for entity_type, patterns in ENTITY_PATTERNS.items():
	for pattern in patterns:
	matches = re.finditer(pattern, text, re.IGNORECASE)
	for match in matches:
	entities.append({
	'id': entity_id,
	'text': match.group(),
	'type': entity_type,
	'start': match.start(),
	'end': match.end(),
	'confidence': round(0.85 + (hash(match.group()) % 15) / 100, 2)
	})
	entity_id += 1

	# Remove duplicates
	seen = set()
	unique_entities = []
	for entity in entities:
	if entity['text'].lower() not in seen:
	seen.add(entity['text'].lower())
	unique_entities.append(entity)

	return sorted(unique_entities, key=lambda x: x['start'])

	def extract_relationships(self, entities: List[Dict], text: str) -> List[Dict]:
	"""Mock relationship extraction based on proximity and keywords"""
	relationships = []

	# Define relationship keywords
	rel_patterns = {
	'exploits': ['exploit', 'exploits', 'exploiting', 'leverages'],
	'uses': ['uses', 'utilizing', 'deploys', 'employs'],
	'targets': ['targets', 'targeting', 'affects'],
	'detects': ['detects', 'identifies', 'monitors'],
	'mitigates': ['mitigates', 'prevents', 'blocks']
	}

	text_lower = text.lower()

	for i, source in enumerate(entities):
	for j, target in enumerate(entities):
	if i >= j: # Avoid self-relationships and duplicates
	continue

	# Check if entities are close to each other in text
	distance = abs(source['start'] - target['start'])
	if distance > 200: # Skip if too far apart
	continue

	# Find relationship type based on context
	context_start = min(source['start'], target['start']) - 50
	context_end = max(source['end'], target['end']) + 50
	context = text_lower[max(0, context_start):context_end]

	for rel_type, keywords in rel_patterns.items():
	if any(keyword in context for keyword in keywords):
	# Determine relationship direction based on entity types
	if self._is_valid_relationship(source['type'], target['type'], rel_type):
	relationships.append({
	'source': source['text'],
	'target': target['text'],
	'relationship': rel_type,
	'confidence': round(0.75 + (hash(source['text'] + target['text']) % 20) / 100, 2),
	'source_type': source['type'],
	'target_type': target['type']
	})
	break

	return relationships

	def _is_valid_relationship(self, source_type: str, target_type: str, rel_type: str) -> bool:
	"""Check if relationship makes sense given entity types"""
	valid_relationships = {
	'exploits': [('ThreatActor', 'Vulnerability'), ('Tool', 'Vulnerability')],
	'uses': [('ThreatActor', 'Tool'), ('ThreatActor', 'Software')],
	'targets': [('ThreatActor', 'Software'), ('Tool', 'Software'), ('Vulnerability', 'Software')],
	'detects': [('DetectionRule', 'Tool'), ('DetectionRule', 'ThreatActor')],
	'affects': [('Vulnerability', 'Software')]
	}

	return (source_type, target_type) in valid_relationships.get(rel_type, [])

	# Initialize the knowledge graph
	kg = SecurityKnowledgeGraph()

	def process_threat_intel(text: str) -> Tuple[str, str, str]:
	"""Process threat intelligence text and return formatted results"""
	if not text.strip():
	return "Please provide threat intelligence text to analyze.", "", ""

	# Extract entities
	entities = kg.extract_entities(text)

	# Extract relationships
	relationships = kg.extract_relationships(entities, text)

	# Format entities output
	entities_html = "<div style='display: flex; flex-wrap: wrap; gap: 8px; margin: 10px 0;'>"
	for entity in entities:
	color_map = {
	'ThreatActor': '#fee2e2 border: 1px solid #fca5a5; color: #991b1b',
	'Vulnerability': '#fed7aa border: 1px solid #fdba74; color: #9a3412',
	'Software': '#dbeafe border: 1px solid #93c5fd; color: #1e40af',
	'Tool': '#e9d5ff border: 1px solid #c4b5fd; color: #6b21a8',
	'IOC': '#dcfce7 border: 1px solid #86efac; color: #166534',
	'DetectionRule': '#e0e7ff border: 1px solid #a5b4fc; color: #3730a3'
	}

	style = f"background-color: {color_map.get(entity['type'], '#f3f4f6')}; padding: 4px 8px; border-radius: 12px; font-size: 12px; font-weight: 500;"
	entities_html += f"<span style='{style}'>{entity['text']} <small>({entity['type']} - {int(entity['confidence']*100)}%)</small></span>"

	entities_html += "</div>"

	# Format relationships output
	relationships_html = "<div style='margin: 10px 0;'>"
	for rel in relationships:
	relationships_html += f"""
	<div style='margin: 8px 0; padding: 10px; background-color: #f8fafc; border-left: 4px solid #3b82f6; border-radius: 4px;'>
	<strong style='color: #1e40af;'>{rel['source']}</strong>
	<span style='color: #7c3aed; font-weight: 600;'>{rel['relationship']}</span>
	<strong style='color: #059669;'>{rel['target']}</strong>
	<small style='float: right; color: #6b7280;'>{int(rel['confidence']*100)}% confidence</small>
	</div>
	"""
	relationships_html += "</div>"

	# Create graph query examples
	queries_html = f"""
	<div style='margin: 10px 0;'>
	<h4>Example Graph Queries:</h4>
	<div style='background-color: #1f2937; color: #e5e7eb; padding: 10px; border-radius: 6px; font-family: monospace; margin: 5px 0;'>
	MATCH (ta:ThreatActor)-[:USES]->(tool:Tool) RETURN ta.name, tool.name
	</div>
	<div style='background-color: #1f2937; color: #e5e7eb; padding: 10px; border-radius: 6px; font-family: monospace; margin: 5px 0;'>
	MATCH (v:Vulnerability)<-[:EXPLOITS]-(ta:ThreatActor) RETURN v.name, ta.name
	</div>
	<div style='background-color: #1f2937; color: #e5e7eb; padding: 10px; border-radius: 6px; font-family: monospace; margin: 5px 0;'>
	MATCH path=(ta:ThreatActor)-[*2..4]->(s:Software) RETURN path
	</div>
	</div>
	"""

	return entities_html, relationships_html, queries_html

	def create_sample_data():
	"""Generate sample threat intelligence data"""
	return """APT29 (Cozy Bear) has been observed exploiting CVE-2023-23397 to target Microsoft Outlook vulnerabilities in financial institutions. The threat actor deploys Cobalt Strike beacons on compromised Windows 10 systems and uses PowerShell for lateral movement and credential dumping.

	The attack chain typically begins with spearphishing emails containing malicious attachments. Once initial access is gained, APT29 utilizes Mimikatz for credential harvesting and PsExec for remote execution across the network.

	Security teams can detect this activity using Sigma rule SIG-2023-001 which monitors for suspicious PowerShell execution patterns and YARA-2023-APT29 for Cobalt Strike beacon detection. The IOCs include IP addresses 192.168.1.100 and 10.0.0.50, along with hash values 7d865e959b2466918c9863afca942d0fb89d7c9ac0c99bafc3749504ded97730."""

	# Create Gradio interface
	with gr.Blocks(
	theme=gr.themes.Base(),
	css="""
	.gradio-container {background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important}
	.gr-button {background: linear-gradient(90deg, #667eea, #764ba2) !important; border: none !important}
	.gr-button:hover {transform: translateY(-1px) !important; box-shadow: 0 4px 12px rgba(0,0,0,0.15) !important}
	""",
	title="🔒 Security Knowledge Graph Builder"
	) as demo:

	gr.HTML("""
	<div style='text-align: center; padding: 20px; background: rgba(255,255,255,0.1); border-radius: 10px; margin-bottom: 20px;'>
	<h1 style='color: white; margin-bottom: 10px;'>🔒 Security Knowledge Graph Builder</h1>
	<p style='color: rgba(255,255,255,0.8); font-size: 16px;'>
	Secure AI-powered threat intelligence without vector database vulnerabilities
	</p>
	<p style='color: rgba(255,255,255,0.6); font-size: 14px;'>
	Build explainable, auditable security relationships instead of relying on risky RAG embeddings
	</p>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.HTML("<h3 style='color: white;'>📊 Threat Intelligence Input</h3>")

	input_text = gr.Textbox(
	placeholder="Paste your threat intelligence report here...",
	lines=8,
	label="Threat Intelligence Text",
	value=""
	)

	with gr.Row():
	analyze_btn = gr.Button("🔍 Analyze Threat Intelligence", variant="primary")
	sample_btn = gr.Button("📝 Load Sample Data", variant="secondary")

	gr.HTML("""
	<div style='margin-top: 20px; padding: 15px; background: rgba(34, 197, 94, 0.1); border-left: 4px solid #22c55e; border-radius: 6px;'>
	<h4 style='color: #22c55e; margin-top: 0;'>🛡️ Why Knowledge Graphs Beat RAG for Security:</h4>
	<ul style='color: rgba(255,255,255,0.8); font-size: 14px;'>
	<li><strong>No Vector Poisoning:</strong> Explicit relationships prevent embedding manipulation</li>
	<li><strong>Full Auditability:</strong> Every connection can be traced and verified</li>
	<li><strong>Access Control:</strong> Fine-grained permissions on nodes and edges</li>
	<li><strong>Precise Queries:</strong> No ambiguous similarity matching</li>
	</ul>
	</div>
	""")

	with gr.Column(scale=1):
	gr.HTML("<h3 style='color: white;'>🎯 Extracted Security Entities</h3>")
	entities_output = gr.HTML()

	gr.HTML("<h3 style='color: white;'>🔗 Security Relationships</h3>")
	relationships_output = gr.HTML()

	gr.HTML("<h3 style='color: white;'>🔍 Graph Query Examples</h3>")
	queries_output = gr.HTML()

	# Event handlers
	sample_btn.click(
	fn=create_sample_data,
	outputs=input_text
	)

	analyze_btn.click(
	fn=process_threat_intel,
	inputs=input_text,
	outputs=[entities_output, relationships_output, queries_output]
	)

	# Footer
	gr.HTML("""
	<div style='text-align: center; margin-top: 30px; padding: 20px; background: rgba(0,0,0,0.2); border-radius: 10px;'>
	<p style='color: rgba(255,255,255,0.8); margin-bottom: 10px;'>
	🔒 <strong>Secure-by-Design Threat Intelligence</strong> - No risky vector embeddings, just explainable relationships
	</p>
	<p style='color: rgba(255,255,255,0.6); font-size: 14px;'>
	Built for blue teams who need trustworthy, auditable AI in cybersecurity operations
	</p>
	</div>
	""")

	if __name__ == "__main__":
	demo.launch()