#!/usr/bin/env python3
"""
SIMPLE GraphRAG - No more complexity hell!
Just semantic search + graph expansion. That's it.
"""
from sentence_transformers import SentenceTransformer
import networkx as nx
from sklearn.metrics.pairwise import cosine_similarity
import pickle
import os
from typing import List, Dict, Tuple, Optional
import numpy as np


class GraphRAG:
    """Dead simple GraphRAG - find stuff, expand from there."""
    
    def __init__(self, graph_path: str = "/tmp/topic_graph.gpickle"):
        self.graph_path = graph_path
        self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
        self.graph: Optional[nx.DiGraph] = None
        self.node_embeddings: Dict[str, np.ndarray] = {}
        
    def load_graph_with_embeddings(self) -> bool:
        """Load graph and compute semantic embeddings."""
        if not os.path.exists(self.graph_path):
            return False
            
        try:
            with open(self.graph_path, "rb") as f:
                self.graph = pickle.load(f)
            self._compute_embeddings()
            return True
        except Exception:
            return False
    
    def _compute_embeddings(self):
        """Compute AI embeddings for all nodes."""
        for node, attrs in self.graph.nodes(data=True):
            label = attrs.get("label", "")
            name = attrs.get("name", str(node))
            text = f"{label}: {name}"
            
            embedding = self.embedder.encode(text)
            self.node_embeddings[node] = embedding
    def semantic_search(self, query: str,
                        top_k: int = 10) -> List[Tuple[str, float]]:
        """Find nodes most similar to query."""
        if not self.node_embeddings:
            return []
            
        query_embedding = self.embedder.encode(query)
        similarities = []
        
        for node, embedding in self.node_embeddings.items():
            sim = cosine_similarity([query_embedding], [embedding])[0][0]
            if sim >= 0.2:  # Reasonable threshold
                similarities.append((node, sim))
        
        return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_k]
    
    def expand_from_nodes(self, start_nodes: List[str], max_nodes: int = 10, direct_only: bool = False) -> set:
        """Expand from starting nodes following only relevant connections."""
        connected = set(start_nodes)
        
        if direct_only:
            # Only add direct neighbors - no expansion of expansions
            for node in start_nodes:
                if node not in self.graph:
                    continue
                    
                # Add direct neighbors (outgoing)
                for neighbor in self.graph.neighbors(node):
                    connected.add(neighbor)
                    
                # Add direct predecessors (incoming)
                for predecessor in self.graph.predecessors(node):
                    connected.add(predecessor)
            
            return connected
        
        # Topic-centered expansion: only expand through meaningful relationships
        to_expand = list(start_nodes)
        
        while to_expand and len(connected) < max_nodes:
            current_node = to_expand.pop(0)
            if current_node not in self.graph:
                continue
                
            current_attrs = self.graph.nodes.get(current_node, {})
            current_label = current_attrs.get('label', '')
            
            # Add directly connected nodes based on meaningful relationships
            for neighbor in self.graph.neighbors(current_node):
                if len(connected) >= max_nodes:
                    break
                if neighbor not in connected:
                    neighbor_attrs = self.graph.nodes.get(neighbor, {})
                    neighbor_label = neighbor_attrs.get('label', '')
                    
                    # Get the edge relationship
                    edge_data = self.graph.get_edge_data(current_node, neighbor, {})
                    edge_label = edge_data.get('label', '')
                    
                    # Only include nodes with strong semantic relationships
                    should_include = False
                    
                    if current_label == 'Topic':
                        # From Topic: include tasks directly labeled with this topic
                        if neighbor_label == 'Task' and edge_label == 'HAS_TASK':
                            should_include = True
                            
                    elif current_label == 'Task':
                        # From Task: include assignees, dates, summaries - NOT other tasks
                        if neighbor_label in ['Person', 'Date', 'Summary', 'Email Index'] and edge_label in ['RESPONSIBLE_TO', 'COLLABORATED_BY', 'DUE_ON', 'START_ON', 'BASED_ON', 'LINKED_TO']:
                            should_include = True
                            
                    elif current_label == 'Person':
                        # From Person: include their role/department/organization hierarchy
                        if neighbor_label in ['Role', 'Department', 'Organization'] and edge_label in ['HAS_ROLE', 'BELONGS_TO', 'IS_IN']:
                            should_include = True
                            
                    elif current_label == 'Role':
                        # From Role: include department
                        if neighbor_label == 'Department' and edge_label == 'BELONGS_TO':
                            should_include = True
                            
                    elif current_label == 'Department':
                        # From Department: include organization
                        if neighbor_label == 'Organization' and edge_label == 'IS_IN':
                            should_include = True
                    
                    if should_include:
                        connected.add(neighbor)
                        
                        # Queue for expansion to get full hierarchies
                        if neighbor_label in ['Task', 'Person', 'Role', 'Department']:
                            to_expand.append(neighbor)
            
            # Also check predecessors for reverse relationships
            for predecessor in self.graph.predecessors(current_node):
                if len(connected) >= max_nodes:
                    break
                if predecessor not in connected:
                    pred_attrs = self.graph.nodes.get(predecessor, {})
                    pred_label = pred_attrs.get('label', '')
                    
                    # Get the edge relationship
                    edge_data = self.graph.get_edge_data(predecessor, current_node, {})
                    edge_label = edge_data.get('label', '')
                    
                    # Include meaningful reverse relationships
                    should_include = False
                    
                    if current_label == 'Task' and pred_label == 'Topic' and edge_label == 'HAS_TASK':
                        should_include = True
                    elif current_label in ['Date', 'Summary', 'Email Index'] and pred_label == 'Task':
                        should_include = True
                    elif current_label in ['Role', 'Department', 'Organization'] and pred_label == 'Person':
                        should_include = True
                    
                    if should_include:
                        connected.add(predecessor)
        
        return connected
    
    def query(self, query: str, direct_only: bool = False, max_nodes: int = 25) -> Dict:
        """Topic-centered query for maximum accuracy."""
        if not self.load_graph_with_embeddings():
            return {
                'query': query,
                'error': 'No graph found. Process emails first.',
                'nodes': []
            }
        
        # Step 1: Try topic name matching first (highest accuracy)
        topic_matches = self.search_topics_by_name(query, semantic_threshold=0.5)
        
        if topic_matches:
            # Found topic name matches - use ALL good matches for inclusive approach
            good_topics = [topic for topic, score in topic_matches if score >= 0.5]
            start_nodes = good_topics  # Include all related topics
            all_nodes = self.expand_from_nodes(
                start_nodes, max_nodes=max_nodes, direct_only=direct_only
            )
            confidence = topic_matches[0][1]  # Use best match confidence
            
            explanation = f"Found {len(all_nodes)} nodes from {len(good_topics)} related topic(s)"
            if direct_only:
                explanation += " (direct neighbors only)"
            
            return {
                'query': query,
                'relevant_nodes': [(topic, score) for topic, score in topic_matches if score >= 0.5],
                'all_nodes': list(all_nodes),
                'confidence_score': round(confidence, 3),
                'explanation': explanation,
                'method': 'topic_name_search'
            }
        
        # No topic matches found - show actual topics
        available_topics = []
        for node, attrs in self.graph.nodes(data=True):
            if attrs.get('label') == 'Topic':
                topic_name = attrs.get('name', str(node))
                available_topics.append(topic_name)
        
        if available_topics:
            topic_list = ", ".join(available_topics)
            error_msg = f'No topic found matching "{query}". Available: {topic_list}.'
        else:
            error_msg = f'No topic found matching "{query}". No topics in graph.'
        
        return {
            'query': query,
            'error': error_msg,
            'nodes': [],
            'method': 'no_match'
        }
    
    def generate_visualization_html(self, query: str, result: Dict) -> str:
        """Generate visualization HTML content directly without saving to file."""
        try:
            from pyvis.network import Network
        except ImportError:
            return "<p>pyvis not installed</p>"
        
        if not self.graph:
            return "<p>No graph loaded</p>"
        
        try:
            net = Network(height="600px", width="100%")
            
            # Show only connected nodes
            nodes_to_show = set(result.get('all_nodes', []))
            if not nodes_to_show:
                return "<p>No nodes found in query result</p>"
                
            subgraph = self.graph.subgraph(nodes_to_show)
            
            # Colors for topic-centered hierarchy
            colors = {
                'Topic': '#FF6B9D',      # Pink - most important
                'Task': '#90EE90',       # Light green
                'Person': '#87CEEB',     # Sky blue  
                'Role': '#FFA500',       # Orange
                'Department': '#DDA0DD', # Plum
                'Organization': '#F0E68C', # Khaki
                'Date': '#D3D3D3',       # Light gray
                'Summary': '#FFE4B5',    # Moccasin
                'Email Index': '#E6E6FA' # Lavender
            }
            
            # Add nodes with topic-centered sizing
            for node, attrs in subgraph.nodes(data=True):
                label = attrs.get('label', '')
                name = attrs.get('name', str(node))
                color = colors.get(label, '#BDC3C7')
                
                # Smaller node sizing for better readability
                if label == 'Topic':
                    node_size = 25  # Reduced from 50
                elif label == 'Task':
                    node_size = 20  # Reduced from 35
                elif label == 'Person':
                    node_size = 15  # Reduced from 25
                else:
                    node_size = 12  # Reduced from 20
                
                # Shorter display names for better visibility
                if label == 'Task':
                    display_name = name[:25] + "..." if len(name) > 25 else name
                elif label == 'Summary':
                    display_name = name[:30] + "..." if len(name) > 30 else name
                else:
                    display_name = name[:20] + "..." if len(name) > 20 else name
                
                # Create detailed tooltip with all attributes
                tooltip_parts = [f"<b>{label}</b>: {name}"]
                for key, value in attrs.items():
                    if key not in ['label', 'name'] and value:
                        tooltip_parts.append(f"{key}: {value}")
                
                # For Person nodes, add FULL role/dept/org info to tooltip
                if label == 'Person':
                    person_details = _get_person_details(self.graph, node)
                    if person_details:
                        details_clean = person_details.strip('() ')
                        tooltip_parts.append(f"<b>Full Details:</b> {details_clean}")
                
                tooltip = "<br>".join(tooltip_parts)
                
                net.add_node(
                    node,
                    label=display_name,
                    title=tooltip,
                    color=color,
                    size=node_size,
                    font={'size': 10, 'color': 'black'}  # Reduced from 14
                )
            
            # Add edges
            for u, v, edge_attrs in subgraph.edges(data=True):
                edge_label = edge_attrs.get('label', '')
                net.add_edge(u, v, label=edge_label)
            
            # Set heading and generate HTML
            net.heading = f"Query: {query}"
            
            # Generate HTML content directly
            html_content = net.generate_html()
            return html_content
            
        except Exception as e:
            return f"<p>Error generating visualization: {str(e)}</p>"

    def search_topics_by_name(self, query: str, semantic_threshold: float = 0.5) -> List[Tuple[str, float]]:
        """Search for topics using semantic similarity with flexible matching."""
        if not self.graph or not self.node_embeddings:
            return []
        
        # Encode the query
        query_embedding = self.embedder.encode(query)
        topic_matches = []
        
        for node, attrs in self.graph.nodes(data=True):
            if attrs.get('label') == 'Topic':
                # Get the embedding for this topic node
                if node in self.node_embeddings:
                    topic_embedding = self.node_embeddings[node]
                    
                    # Calculate semantic similarity
                    similarity = cosine_similarity([query_embedding], [topic_embedding])[0][0]
                    
                    # Also check for substring matches to catch variations
                    topic_name = attrs.get('name', str(node)).lower()
                    query_lower = query.lower()
                    
                    # Boost similarity for substring matches or close variations
                    if (query_lower in topic_name or 
                        any(word in topic_name for word in query_lower.split()) or
                        similarity >= semantic_threshold):
                        
                        # Give higher score to exact or close matches
                        if query_lower in topic_name:
                            similarity = max(similarity, 0.9)
                        
                        topic_matches.append((node, similarity))
        
        return sorted(topic_matches, key=lambda x: x[1], reverse=True)

    # Compatibility methods
    def query_with_semantic_reasoning(self, query: str) -> Dict:
        return self.query(query)


def format_response(result: Dict) -> str:
    """Format response like the old system with structured task details."""
    if 'error' in result:
        return result['error']
    
    if not result.get('all_nodes'):
        return "No information found."
    
    try:
        import pickle
        with open("/tmp/topic_graph.gpickle", "rb") as f:
            graph = pickle.load(f)
        
        # Find all tasks in the result
        tasks = []
        for node in result.get('all_nodes', []):
            if node in graph:
                attrs = graph.nodes[node]
                if attrs.get('label') == 'Task':
                    tasks.append(node)
        
        if not tasks:
            return "No tasks found in the results."
        
        # Format each task in the structured format
        response_parts = []
        
        for task_node in tasks:
            task_attrs = graph.nodes[task_node]
            task_name = task_attrs.get('name', str(task_node))
            
            task_info = [f"**Task:** {task_name}"]
            
            # Find the topic for this task
            for neighbor in graph.neighbors(task_node):
                edge_data = graph.get_edge_data(task_node, neighbor, {})
                edge_label = edge_data.get('label', '')
                neighbor_attrs = graph.nodes[neighbor]
                
                if neighbor_attrs.get('label') == 'Topic':
                    topic_name = neighbor_attrs.get('name', neighbor)
                    task_info.append(f"**Topic:** {topic_name}")
                    break
            
            # Get all the direct neighbors with their relationships
            for neighbor in graph.neighbors(task_node):
                edge_data = graph.get_edge_data(task_node, neighbor, {})
                edge_label = edge_data.get('label', '')
                neighbor_attrs = graph.nodes[neighbor]
                neighbor_name = neighbor_attrs.get('name', neighbor)
                neighbor_label = neighbor_attrs.get('label', '')
                
                if edge_label == 'START_ON':
                    task_info.append(f"   • **Start Date:** {neighbor_name}")
                elif edge_label == 'DUE_ON':
                    task_info.append(f"   • **Due Date:** {neighbor_name}")
                elif edge_label == 'BASED_ON' or neighbor_label == 'Summary':
                    task_info.append(f"   • **Summary:** {neighbor_name}")
                elif edge_label == 'LINKED_TO' or neighbor_label == 'Email Index':
                    task_info.append(f"   • **Email Index:** {neighbor_name}")
                elif edge_label == 'RESPONSIBLE_TO':
                    # Get role/dept/org info for the person
                    person_details = _get_person_details(graph, neighbor)
                    task_info.append(f"   • **Responsible To:** {neighbor_name}{person_details}")
                elif edge_label == 'COLLABORATED_BY':
                    person_details = _get_person_details(graph, neighbor)
                    task_info.append(f"   • **Collaborated By:** {neighbor_name}{person_details}")
            
            response_parts.append("\n".join(task_info))
        
        # Add confidence at the end
        confidence = result.get('confidence_score', 0.0)
        conf_text = "🟢 High" if confidence > 0.7 else "🟡 Medium" if confidence > 0.4 else "🔴 Low"
        response_parts.append(f"\n**Confidence:** {conf_text} ({confidence})")
        
        return "\n\n".join(response_parts)
        
    except Exception as e:
        return f"📊 Error formatting response: {str(e)}"

def _get_person_details(graph, person_node):
    """Get role, department, organization details for a person."""
    details = []
    
    for neighbor in graph.neighbors(person_node):
        edge_data = graph.get_edge_data(person_node, neighbor, {})
        edge_label = edge_data.get('label', '')
        neighbor_attrs = graph.nodes[neighbor]
        
        if edge_label == 'HAS_ROLE' or neighbor_attrs.get('label') == 'Role':
            role_name = neighbor_attrs.get('name', neighbor)
            details.append(f"Role: {role_name}")
            
            # Get department for this role
            for dept_neighbor in graph.neighbors(neighbor):
                dept_edge = graph.get_edge_data(neighbor, dept_neighbor, {})
                dept_edge_label = dept_edge.get('label', '')
                dept_attrs = graph.nodes[dept_neighbor]
                
                if dept_edge_label == 'BELONGS_TO' or dept_attrs.get('label') == 'Department':
                    dept_name = dept_attrs.get('name', dept_neighbor)
                    details.append(f"Department: {dept_name}")
                    
                    # Get organization for this department
                    for org_neighbor in graph.neighbors(dept_neighbor):
                        org_edge = graph.get_edge_data(dept_neighbor, org_neighbor, {})
                        org_edge_label = org_edge.get('label', '')
                        org_attrs = graph.nodes[org_neighbor]
                        
                        if org_edge_label == 'IS_IN' or org_attrs.get('label') == 'Organization':
                            org_name = org_attrs.get('name', org_neighbor)
                            details.append(f"Organization: {org_name}")
                            break
                    break
            break
    
    if details:
        return f" ({', '.join(details)})"
    return ""


def format_graphrag_response(result: Dict) -> str:
    """Compatibility function."""
    return format_response(result)