import gradio as gr
import json
import re
import time
from typing import List, Dict, Tuple
import pandas as pd

# Mock security entity patterns (in production, use spaCy/transformers)
ENTITY_PATTERNS = {
    'ThreatActor': [r'APT\d+', r'Cozy Bear', r'Lazarus', r'FIN\d+', r'Carbanak'],
    'Vulnerability': [r'CVE-\d{4}-\d{4,7}', r'MS\d{2}-\d{3}'],
    'Software': [r'Microsoft \w+', r'Apache \w+', r'Windows \d+', r'Linux', r'Chrome'],
    'Tool': [r'Cobalt Strike', r'Metasploit', r'PowerShell', r'Mimikatz', r'PsExec'],
    'IOC': [r'\b(?:\d{1,3}\.){3}\d{1,3}\b', r'\b[a-fA-F0-9]{32,64}\b', r'https?://[^\s]+'],
    'DetectionRule': [r'SIG-\d{4}-\d{3}', r'YARA-\d+', r'Sigma-\w+']
}

# Mock MITRE ATT&CK techniques
MITRE_TECHNIQUES = {
    'T1059.001': 'PowerShell',
    'T1566.001': 'Spearphishing Attachment', 
    'T1055': 'Process Injection',
    'T1003': 'OS Credential Dumping'
}

class SecurityKnowledgeGraph:
    def __init__(self):
        self.entities = []
        self.relationships = []
        
    def extract_entities(self, text: str) -> List[Dict]:
        """Mock entity extraction using regex patterns"""
        entities = []
        entity_id = 0
        
        for entity_type, patterns in ENTITY_PATTERNS.items():
            for pattern in patterns:
                matches = re.finditer(pattern, text, re.IGNORECASE)
                for match in matches:
                    entities.append({
                        'id': entity_id,
                        'text': match.group(),
                        'type': entity_type,
                        'start': match.start(),
                        'end': match.end(),
                        'confidence': round(0.85 + (hash(match.group()) % 15) / 100, 2)
                    })
                    entity_id += 1
        
        # Remove duplicates
        seen = set()
        unique_entities = []
        for entity in entities:
            if entity['text'].lower() not in seen:
                seen.add(entity['text'].lower())
                unique_entities.append(entity)
        
        return sorted(unique_entities, key=lambda x: x['start'])
    
    def extract_relationships(self, entities: List[Dict], text: str) -> List[Dict]:
        """Mock relationship extraction based on proximity and keywords"""
        relationships = []
        
        # Define relationship keywords
        rel_patterns = {
            'exploits': ['exploit', 'exploits', 'exploiting', 'leverages'],
            'uses': ['uses', 'utilizing', 'deploys', 'employs'],
            'targets': ['targets', 'targeting', 'affects'],
            'detects': ['detects', 'identifies', 'monitors'],
            'mitigates': ['mitigates', 'prevents', 'blocks']
        }
        
        text_lower = text.lower()
        
        for i, source in enumerate(entities):
            for j, target in enumerate(entities):
                if i >= j:  # Avoid self-relationships and duplicates
                    continue
                
                # Check if entities are close to each other in text
                distance = abs(source['start'] - target['start'])
                if distance > 200:  # Skip if too far apart
                    continue
                
                # Find relationship type based on context
                context_start = min(source['start'], target['start']) - 50
                context_end = max(source['end'], target['end']) + 50
                context = text_lower[max(0, context_start):context_end]
                
                for rel_type, keywords in rel_patterns.items():
                    if any(keyword in context for keyword in keywords):
                        # Determine relationship direction based on entity types
                        if self._is_valid_relationship(source['type'], target['type'], rel_type):
                            relationships.append({
                                'source': source['text'],
                                'target': target['text'],
                                'relationship': rel_type,
                                'confidence': round(0.75 + (hash(source['text'] + target['text']) % 20) / 100, 2),
                                'source_type': source['type'],
                                'target_type': target['type']
                            })
                            break
        
        return relationships
    
    def _is_valid_relationship(self, source_type: str, target_type: str, rel_type: str) -> bool:
        """Check if relationship makes sense given entity types"""
        valid_relationships = {
            'exploits': [('ThreatActor', 'Vulnerability'), ('Tool', 'Vulnerability')],
            'uses': [('ThreatActor', 'Tool'), ('ThreatActor', 'Software')],
            'targets': [('ThreatActor', 'Software'), ('Tool', 'Software'), ('Vulnerability', 'Software')],
            'detects': [('DetectionRule', 'Tool'), ('DetectionRule', 'ThreatActor')],
            'affects': [('Vulnerability', 'Software')]
        }
        
        return (source_type, target_type) in valid_relationships.get(rel_type, [])

# Initialize the knowledge graph
kg = SecurityKnowledgeGraph()

def process_threat_intel(text: str) -> Tuple[str, str, str]:
    """Process threat intelligence text and return formatted results"""
    if not text.strip():
        return "Please provide threat intelligence text to analyze.", "", ""
    
    # Extract entities
    entities = kg.extract_entities(text)
    
    # Extract relationships
    relationships = kg.extract_relationships(entities, text)
    
    # Format entities output
    entities_html = "<div style='display: flex; flex-wrap: wrap; gap: 8px; margin: 10px 0;'>"
    for entity in entities:
        color_map = {
            'ThreatActor': '#fee2e2 border: 1px solid #fca5a5; color: #991b1b',
            'Vulnerability': '#fed7aa border: 1px solid #fdba74; color: #9a3412', 
            'Software': '#dbeafe border: 1px solid #93c5fd; color: #1e40af',
            'Tool': '#e9d5ff border: 1px solid #c4b5fd; color: #6b21a8',
            'IOC': '#dcfce7 border: 1px solid #86efac; color: #166534',
            'DetectionRule': '#e0e7ff border: 1px solid #a5b4fc; color: #3730a3'
        }
        
        style = f"background-color: {color_map.get(entity['type'], '#f3f4f6')}; padding: 4px 8px; border-radius: 12px; font-size: 12px; font-weight: 500;"
        entities_html += f"<span style='{style}'>{entity['text']} <small>({entity['type']} - {int(entity['confidence']*100)}%)</small></span>"
    
    entities_html += "</div>"
    
    # Format relationships output
    relationships_html = "<div style='margin: 10px 0;'>"
    for rel in relationships:
        relationships_html += f"""
        <div style='margin: 8px 0; padding: 10px; background-color: #f8fafc; border-left: 4px solid #3b82f6; border-radius: 4px;'>
            <strong style='color: #1e40af;'>{rel['source']}</strong> 
            <span style='color: #7c3aed; font-weight: 600;'>{rel['relationship']}</span> 
            <strong style='color: #059669;'>{rel['target']}</strong>
            <small style='float: right; color: #6b7280;'>{int(rel['confidence']*100)}% confidence</small>
        </div>
        """
    relationships_html += "</div>"
    
    # Create graph query examples
    queries_html = f"""
    <div style='margin: 10px 0;'>
        <h4>Example Graph Queries:</h4>
        <div style='background-color: #1f2937; color: #e5e7eb; padding: 10px; border-radius: 6px; font-family: monospace; margin: 5px 0;'>
            MATCH (ta:ThreatActor)-[:USES]->(tool:Tool) RETURN ta.name, tool.name
        </div>
        <div style='background-color: #1f2937; color: #e5e7eb; padding: 10px; border-radius: 6px; font-family: monospace; margin: 5px 0;'>
            MATCH (v:Vulnerability)&lt;-[:EXPLOITS]-(ta:ThreatActor) RETURN v.name, ta.name
        </div>
        <div style='background-color: #1f2937; color: #e5e7eb; padding: 10px; border-radius: 6px; font-family: monospace; margin: 5px 0;'>
            MATCH path=(ta:ThreatActor)-[*2..4]->(s:Software) RETURN path
        </div>
    </div>
    """
    
    return entities_html, relationships_html, queries_html

def create_sample_data():
    """Generate sample threat intelligence data"""
    return """APT29 (Cozy Bear) has been observed exploiting CVE-2023-23397 to target Microsoft Outlook vulnerabilities in financial institutions. The threat actor deploys Cobalt Strike beacons on compromised Windows 10 systems and uses PowerShell for lateral movement and credential dumping. 

The attack chain typically begins with spearphishing emails containing malicious attachments. Once initial access is gained, APT29 utilizes Mimikatz for credential harvesting and PsExec for remote execution across the network.

Security teams can detect this activity using Sigma rule SIG-2023-001 which monitors for suspicious PowerShell execution patterns and YARA-2023-APT29 for Cobalt Strike beacon detection. The IOCs include IP addresses 192.168.1.100 and 10.0.0.50, along with hash values 7d865e959b2466918c9863afca942d0fb89d7c9ac0c99bafc3749504ded97730."""

# Create Gradio interface
with gr.Blocks(
    theme=gr.themes.Base(),
    css="""
    .gradio-container {background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important}
    .gr-button {background: linear-gradient(90deg, #667eea, #764ba2) !important; border: none !important}
    .gr-button:hover {transform: translateY(-1px) !important; box-shadow: 0 4px 12px rgba(0,0,0,0.15) !important}
    """,
    title="🔒 Security Knowledge Graph Builder"
) as demo:
    
    gr.HTML("""
    <div style='text-align: center; padding: 20px; background: rgba(255,255,255,0.1); border-radius: 10px; margin-bottom: 20px;'>
        <h1 style='color: white; margin-bottom: 10px;'>🔒 Security Knowledge Graph Builder</h1>
        <p style='color: rgba(255,255,255,0.8); font-size: 16px;'>
            Secure AI-powered threat intelligence without vector database vulnerabilities
        </p>
        <p style='color: rgba(255,255,255,0.6); font-size: 14px;'>
            Build explainable, auditable security relationships instead of relying on risky RAG embeddings
        </p>
    </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.HTML("<h3 style='color: white;'>📊 Threat Intelligence Input</h3>")
            
            input_text = gr.Textbox(
                placeholder="Paste your threat intelligence report here...",
                lines=8,
                label="Threat Intelligence Text",
                value=""
            )
            
            with gr.Row():
                analyze_btn = gr.Button("🔍 Analyze Threat Intelligence", variant="primary")
                sample_btn = gr.Button("📝 Load Sample Data", variant="secondary")
            
            gr.HTML("""
            <div style='margin-top: 20px; padding: 15px; background: rgba(34, 197, 94, 0.1); border-left: 4px solid #22c55e; border-radius: 6px;'>
                <h4 style='color: #22c55e; margin-top: 0;'>🛡️ Why Knowledge Graphs Beat RAG for Security:</h4>
                <ul style='color: rgba(255,255,255,0.8); font-size: 14px;'>
                    <li><strong>No Vector Poisoning:</strong> Explicit relationships prevent embedding manipulation</li>
                    <li><strong>Full Auditability:</strong> Every connection can be traced and verified</li>
                    <li><strong>Access Control:</strong> Fine-grained permissions on nodes and edges</li>
                    <li><strong>Precise Queries:</strong> No ambiguous similarity matching</li>
                </ul>
            </div>
            """)
    
        with gr.Column(scale=1):
            gr.HTML("<h3 style='color: white;'>🎯 Extracted Security Entities</h3>")
            entities_output = gr.HTML()
            
            gr.HTML("<h3 style='color: white;'>🔗 Security Relationships</h3>")
            relationships_output = gr.HTML()
            
            gr.HTML("<h3 style='color: white;'>🔍 Graph Query Examples</h3>")
            queries_output = gr.HTML()
    
    # Event handlers
    sample_btn.click(
        fn=create_sample_data,
        outputs=input_text
    )
    
    analyze_btn.click(
        fn=process_threat_intel,
        inputs=input_text,
        outputs=[entities_output, relationships_output, queries_output]
    )
    
    # Footer
    gr.HTML("""
    <div style='text-align: center; margin-top: 30px; padding: 20px; background: rgba(0,0,0,0.2); border-radius: 10px;'>
        <p style='color: rgba(255,255,255,0.8); margin-bottom: 10px;'>
            🔒 <strong>Secure-by-Design Threat Intelligence</strong> - No risky vector embeddings, just explainable relationships
        </p>
        <p style='color: rgba(255,255,255,0.6); font-size: 14px;'>
            Built for blue teams who need trustworthy, auditable AI in cybersecurity operations
        </p>
    </div>
    """)

if __name__ == "__main__":
    demo.launch()