import gradio as gr
import os
import sqlite3
import json
import hashlib
from datetime import datetime
from typing import List, Dict, Any, Tuple, Optional
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import threading

from utils import (
    process_document,
    extract_axioms,
    generate_response,
    get_embedding,
    compute_similarity,
    Document,
    Axiom,
    ActivityLog
)

# Initialize database
DB_PATH = "rag_nexus.db"
conn = sqlite3.connect(DB_PATH, check_same_thread=False)
cursor = conn.cursor()

# Create tables
cursor.execute("""
CREATE TABLE IF NOT EXISTS documents (
    id TEXT PRIMARY KEY,
    name TEXT,
    content TEXT,
    size INTEGER,
    uploaded_at TEXT,
    chunk_count INTEGER
)
""")

cursor.execute("""
CREATE TABLE IF NOT EXISTS axioms (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    doc_id TEXT,
    source TEXT,
    axiom TEXT,
    confidence REAL,
    FOREIGN KEY (doc_id) REFERENCES documents (id)
)
""")

cursor.execute("""
CREATE TABLE IF NOT EXISTS activity (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    action TEXT,
    details TEXT,
    timestamp TEXT
)
""")

conn.commit()

# Thread-local storage for database connections
thread_local = threading.local()

def get_db():
    """Get thread-local database connection"""
    if not hasattr(thread_local, 'conn'):
        thread_local.conn = sqlite3.connect(DB_PATH)
    return thread_local.conn

class RAGState:
    def __init__(self):
        self.vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
        self.document_chunks = []
        self.chunk_metadata = []
        self.is_initialized = False
        
    def initialize_models(self):
        """Initialize models (simulated)"""
        if not self.is_initialized:
            # Load existing documents
            conn = get_db()
            cursor = conn.cursor()
            cursor.execute("SELECT id, content FROM documents")
            docs = cursor.fetchall()
            
            if docs:
                chunks = []
                metadata = []
                for doc_id, content in docs:
                    doc_chunks = [content[i:i+500] for i in range(0, len(content), 500)]
                    chunks.extend(doc_chunks)
                    metadata.extend([{"doc_id": doc_id, "chunk_idx": i} for i in range(len(doc_chunks))])
                
                if chunks:
                    self.vectorizer.fit(chunks)
                    self.document_chunks = chunks
                    self.chunk_metadata = metadata
            
            self.is_initialized = True

def get_state():
    """Get global state"""
    if not hasattr(get_state, 'state'):
        get_state.state = RAGState()
    return get_state.state

def log_activity(action: str, details: Dict[str, Any]):
    """Log activity to database"""
    conn = get_db()
    cursor = conn.cursor()
    cursor.execute(
        "INSERT INTO activity (action, details, timestamp) VALUES (?, ?, ?)",
        (action, json.dumps(details), datetime.now().isoformat())
    )
    conn.commit()

def get_stats():
    """Get system statistics"""
    conn = get_db()
    cursor = conn.cursor()
    
    cursor.execute("SELECT COUNT(*) FROM documents")
    doc_count = cursor.fetchone()[0]
    
    cursor.execute("SELECT COUNT(*) FROM axioms")
    axiom_count = cursor.fetchone()[0]
    
    cursor.execute("SELECT SUM(size) FROM documents")
    storage = cursor.fetchone()[0] or 0
    
    return {
        "doc_count": doc_count,
        "axiom_count": axiom_count,
        "storage_mb": round(storage / 1024 / 1024, 2)
    }

def load_documents():
    """Load all documents"""
    conn = get_db()
    cursor = conn.cursor()
    cursor.execute("SELECT id, name, size, uploaded_at FROM documents ORDER BY uploaded_at DESC")
    docs = cursor.fetchall()
    
    if not docs:
        return [["No documents found", "", "", ""]]
    
    return [[doc[1], f"{doc[2]} bytes", doc[3], doc[0]] for doc in docs]

def load_axioms(source_filter: str = ""):
    """Load axioms with optional source filter"""
    conn = get_db()
    cursor = conn.cursor()
    
    if source_filter:
        cursor.execute("""
            SELECT a.id, a.source, a.axiom, a.confidence, d.name 
            FROM axioms a 
            JOIN documents d ON a.doc_id = d.id 
            WHERE d.name LIKE ? 
            ORDER BY a.confidence DESC
        """, (f"%{source_filter}%",))
    else:
        cursor.execute("""
            SELECT a.id, a.source, a.axiom, a.confidence, d.name 
            FROM axioms a 
            JOIN documents d ON a.doc_id = d.id 
            ORDER BY a.confidence DESC
        """)
    
    axioms = cursor.fetchall()
    
    if not axioms:
        return [["No axioms found", "", "", "", ""]]
    
    return [[ax[4], ax[1], ax[2][:100] + "...", f"{ax[3]:.2f}", str(ax[0])] for ax in axioms]

def load_activity():
    """Load recent activity"""
    conn = get_db()
    cursor = conn.cursor()
    cursor.execute("SELECT action, details, timestamp FROM activity ORDER BY timestamp DESC LIMIT 20")
    activities = cursor.fetchall()
    
    if not activities:
        return [["No activity yet", "", ""]]
    
    return [[act[0], json.loads(act[1]).get('description', ''), act[2]] for act in activities]

def process_uploaded_files(files: List[str]) -> Tuple[str, str]:
    """Process uploaded files and return status"""
    if not files:
        return "No files uploaded", "⚠️"
    
    state = get_state()
    success_count = 0
    total_count = len(files)
    
    for file_path in files:
        try:
            # Process document
            doc = process_document(file_path)
            
            # Save to database
            conn = get_db()
            cursor = conn.cursor()
            cursor.execute(
                "INSERT INTO documents (id, name, content, size, uploaded_at, chunk_count) VALUES (?, ?, ?, ?, ?, ?)",
                (doc.id, doc.name, doc.content, doc.size, doc.uploaded_at, doc.chunk_count)
            )
            
            # Extract axioms
            axioms = extract_axioms(doc.content, doc.id)
            for axiom in axioms:
                cursor.execute(
                    "INSERT INTO axioms (doc_id, source, axiom, confidence) VALUES (?, ?, ?, ?)",
                    (doc.id, axiom.source, axiom.text, axiom.confidence)
                )
            
            conn.commit()
            
            # Update vector store
            chunks = [doc.content[i:i+500] for i in range(0, len(doc.content), 500)]
            state.document_chunks.extend(chunks)
            state.chunk_metadata.extend([{"doc_id": doc.id, "chunk_idx": i} for i in range(len(chunks))])
            
            # Refit vectorizer if needed
            if state.document_chunks:
                state.vectorizer.fit(state.document_chunks)
            
            log_activity("document_uploaded", {
                "name": doc.name,
                "size": doc.size,
                "chunks": doc.chunk_count
            })
            
            success_count += 1
            
        except Exception as e:
            log_activity("upload_failed", {
                "file": os.path.basename(file_path),
                "error": str(e)
            })
    
    # Clean up temporary files
    for file_path in files:
        try:
            os.unlink(file_path)
        except:
            pass
    
    return f"Processed {success_count}/{total_count} files", "✅" if success_count == total_count else "⚠️"

def generate_rag_response(query: str, use_axioms: bool, use_context: bool) -> Tuple[str, str]:
    """Generate response using RAG"""
    if not query.strip():
        return "Please enter a query", ""
    
    state = get_state()
    state.initialize_models()
    
    # Retrieve context
    context = ""
    retrieved_docs = []
    
    if use_context and state.document_chunks:
        try:
            query_vec = state.vectorizer.transform([query])
            doc_vecs = state.vectorizer.transform(state.document_chunks)
            similarities = cosine_similarity(query_vec, doc_vecs).flatten()
            
            # Get top 3 chunks
            top_indices = np.argsort(similarities)[-3:][::-1]
            
            for idx in top_indices:
                if similarities[idx] > 0.1:
                    chunk = state.document_chunks[idx]
                    doc_id = state.chunk_metadata[idx]["doc_id"]
                    conn = get_db()
                    cursor = conn.cursor()
                    cursor.execute("SELECT name FROM documents WHERE id = ?", (doc_id,))
                    doc_name = cursor.fetchone()[0]
                    
                    context += f"\n\n--- From {doc_name} ---\n{chunk}"
                    retrieved_docs.append(f"{doc_name} (similarity: {similarities[idx]:.2f})")
        except:
            context = ""
            retrieved_docs = ["No relevant context found"]
    
    # Get axioms
    axioms = []
    if use_axioms:
        conn = get_db()
        cursor = conn.cursor()
        cursor.execute("SELECT axiom FROM axioms ORDER BY RANDOM() LIMIT 5")
        axioms = [row[0] for row in cursor.fetchall()]
    
    # Generate response
    response = generate_response(query, context, axioms)
    
    # Log activity
    log_activity("response_generated", {
        "query": query[:100],
        "used_axioms": use_axioms,
        "used_context": use_context
    })
    
    # Format context info
    context_info = "\n".join(retrieved_docs) if retrieved_docs else "No context retrieved"
    
    return response, context_info

def clear_all_data():
    """Clear all data from database"""
    conn = get_db()
    cursor = conn.cursor()
    cursor.execute("DELETE FROM documents")
    cursor.execute("DELETE FROM axioms")
    cursor.execute("DELETE FROM activity")
    conn.commit()
    
    # Reset state
    state = get_state()
    state.document_chunks = []
    state.chunk_metadata = []
    
    log_activity("data_cleared", {"all": True})
    
    return "All data cleared successfully", "✅"

def export_axioms():
    """Export axioms as JSON"""
    conn = get_db()
    cursor = conn.cursor()
    cursor.execute("""
        SELECT d.name as document, a.source, a.axiom, a.confidence 
        FROM axioms a 
        JOIN documents d ON a.doc_id = d.id
    """)
    axioms = [{"document": row[0], "source": row[1], "axiom": row[2], "confidence": row[3]} 
              for row in cursor.fetchall()]
    
    if not axioms:
        return "No axioms to export", "⚠️"
    
    filename = f"axioms_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
    with open(filename, 'w') as f:
        json.dump(axioms, f, indent=2)
    
    log_activity("axioms_exported", {"count": len(axioms), "file": filename})
    
    return f"Exported {len(axioms)} axioms to {filename}", "✅"

# Initialize app state on load
def initialize_app():
    state = get_state()
    state.initialize_models()
    return "✅ Models initialized"

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown(
        """
        # 🔮 RAG Nexus
        ### Intelligent Document Analysis & Axiom Extraction System
        **Built with anycoder** | [View on Hugging Face](https://huggingface.co/spaces/akhaliq/anycoder)
        """
    )
    
    # Status bar
    with gr.Row():
        status_text = gr.Textbox("Initializing...", label="System Status", scale=4)
        init_btn = gr.Button("🔄 Reinitialize", scale=1)
    
    # Tabs
    with gr.Tabs() as tabs:
        # Upload Tab
        with gr.TabItem("📤 Upload", id="upload"):
            gr.Markdown("### Upload Documents for Analysis")
            
            file_output = gr.File(
                label="Drop files here or click to browse",
                file_count="multiple",
                file_types=[".txt", ".md", ".pdf", ".doc", ".docx"]
            )
            
            upload_btn = gr.Button("🚀 Process Files", variant="primary")
            upload_status = gr.Textbox(label="Upload Status", interactive=False)
            
            with gr.Accordion("📋 Upload Queue", open=False):
                upload_queue = gr.Dataframe(
                    headers=["File", "Status", "Size (bytes)"],
                    datatype=["str", "str", "number"],
                    label="Processed Files"
                )
        
        # Documents Tab
        with gr.TabItem("📚 Documents", id="documents"):
            gr.Markdown("### Indexed Documents")
            
            with gr.Row():
                doc_search = gr.Textbox(
                    placeholder="Search documents...",
                    label="Search",
                    scale=3
                )
                clear_docs_btn = gr.Button("🗑️ Clear All", variant="stop", scale=1)
            
            documents_table = gr.Dataframe(
                headers=["Name", "Size", "Uploaded", "ID"],
                datatype=["str", "str", "str", "str"],
                label="Documents",
                wrap=True
            )
            
            doc_search.change(
                fn=lambda search: load_documents(),
                inputs=doc_search,
                outputs=documents_table,
                api_visibility="private"
            )
        
        # Axioms Tab
        with gr.TabItem("⚡ Axioms", id="axioms"):
            gr.Markdown("### Extracted Axioms")
            
            with gr.Row():
                axiom_search = gr.Textbox(
                    placeholder="Search axioms...",
                    label="Search",
                    scale=2
                )
                axiom_filter = gr.Dropdown(
                    choices=[],
                    label="Filter by Document",
                    scale=1
                )
                export_axioms_btn = gr.Button("💾 Export JSON", scale=1)
            
            axioms_table = gr.Dataframe(
                headers=["Document", "Source", "Axiom", "Confidence", "ID"],
                datatype=["str", "str", "str", "number", "str"],
                label="Axioms",
                wrap=True
            )
            
            export_status = gr.Textbox(label="Export Status", interactive=False)
        
        # Generate Tab
        with gr.TabItem("🤖 Generate", id="generate"):
            gr.Markdown("### Intelligent Response Generation")
            
            query_input = gr.Textbox(
                label="Enter your query",
                placeholder="Ask anything about your documents... (e.g., 'What are the fundamental principles based on the uploaded documents?')",
                lines=4,
                max_lines=8
            )
            
            with gr.Row():
                use_axioms = gr.Checkbox(label="Use Axioms", value=True)
                use_context = gr.Checkbox(label="Use Context (RAG)", value=True)
            
            generate_btn = gr.Button("🚀 Generate Response", variant="primary")
            
            with gr.Group():
                response_output = gr.Markdown(
                    label="Generated Response",
                    show_copy_button=True
                )
                
                with gr.Accordion("📚 Retrieved Context & Axioms", open=False):
                    context_output = gr.Textbox(
                        label="Retrieved Documents",
                        lines=5,
                        interactive=False
                    )
            
            query_stats = gr.Textbox(
                label="Query Statistics",
                interactive=False,
                visible=False
            )
        
        # Analytics Tab
        with gr.TabItem("📊 Analytics", id="analytics"):
            gr.Markdown("### System Analytics")
            
            with gr.Row():
                with gr.Column():
                    doc_count_label = gr.Label(value="0", label="📄 Documents", show_label=True)
                with gr.Column():
                    axiom_count_label = gr.Label(value="0", label="⚡ Axioms", show_label=True)
                with gr.Column():
                    storage_label = gr.Label(value="0MB", label="💾 Storage Used", show_label=True)
            
            with gr.Accordion("📈 Recent Activity", open=True):
                activity_log = gr.Dataframe(
                    headers=["Action", "Details", "Timestamp"],
                    datatype=["str", "str", "str"],
                    label="Activity Log",
                    wrap=True,
                    max_height=300
                )
    
    # Event handlers
    init_btn.click(
        fn=initialize_app,
        outputs=status_text,
        api_visibility="private"
    )
    
    # Upload events
    def process_and_update(files):
        if not files:
            return "No files selected", []
        
        # Process files
        status, icon = process_uploaded_files(files)
        
        # Create queue table
        queue_data = []
        for f in files:
            name = os.path.basename(f)
            size = os.path.getsize(f) if os.path.exists(f) else 0
            queue_data.append([name, "✅ Processed", size])
        
        return f"{icon} {status}", queue_data
    
    upload_btn.click(
        fn=process_and_update,
        inputs=file_output,
        outputs=[upload_status, upload_queue],
        api_visibility="private"
    ).then(
        fn=load_documents,
        outputs=documents_table
    ).then(
        fn=lambda: load_axioms(),
        outputs=axioms_table
    ).then(
        fn=get_stats,
        outputs=[doc_count_label, axiom_count_label, storage_label]
    ).then(
        fn=load_activity,
        outputs=activity_log
    )
    
    # Documents tab events
    def refresh_documents():
        docs = load_documents()
        # Update filter choices
        return docs
    
    tabs.change(
        fn=refresh_documents,
        outputs=documents_table,
        api_visibility="private"
    )
    
    clear_docs_btn.click(
        fn=clear_all_data,
        outputs=[status_text],
        api_visibility="private"
    ).then(
        fn=load_documents,
        outputs=documents_table
    ).then(
        fn=lambda: load_axioms(),
        outputs=axioms_table
    ).then(
        fn=get_stats,
        outputs=[doc_count_label, axiom_count_label, storage_label]
    )
    
    # Axioms tab events
    def update_axiom_filter():
        conn = get_db()
        cursor = conn.cursor()
        cursor.execute("SELECT DISTINCT name FROM documents")
        docs = [row[0] for row in cursor.fetchall()]
        return gr.Dropdown(choices=[""] + docs)
    
    tabs.change(
        fn=update_axiom_filter,
        outputs=axiom_filter,
        api_visibility="private"
    )
    
    axiom_filter.change(
        fn=lambda filter_val: load_axioms(filter_val or ""),
        inputs=axiom_filter,
        outputs=axioms_table,
        api_visibility="private"
    )
    
    export_axioms_btn.click(
        fn=export_axioms,
        outputs=[export_status],
        api_visibility="private"
    )
    
    # Generate tab events
    generate_btn.click(
        fn=generate_rag_response,
        inputs=[query_input, use_axioms, use_context],
        outputs=[response_output, context_output],
        api_visibility="private"
    ).then(
        fn=load_activity,
        outputs=activity_log
    )
    
    # Load initial data
    demo.load(
        fn=initialize_app,
        outputs=status_text,
        api_visibility="private"
    ).then(
        fn=load_documents,
        outputs=documents_table
    ).then(
        fn=lambda: load_axioms(),
        outputs=axioms_table
    ).then(
        fn=get_stats,
        outputs=[doc_count_label, axiom_count_label, storage_label]
    ).then(
        fn=load_activity,
        outputs=activity_log
    ).then(
        fn=update_axiom_filter,
        outputs=axiom_filter
    )

# Launch with Gradio 6 theme
demo.launch(
    theme=gr.themes.Soft(
        primary_hue="indigo",
        secondary_hue="violet",
        neutral_hue="slate",
        font=gr.themes.GoogleFont("Inter"),
        text_size="lg",
        spacing_size="lg",
        radius_size="md"
    ).set(
        button_primary_background_fill="*primary_600",
        button_primary_background_fill_hover="*primary_700",
        block_title_text_weight="600",
        block_background_fill="*neutral_50"
    ),
    footer_links=[{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}],
    show_error=True,
    max_threads=40
)