import json
import logging
import sqlite3
from datetime import datetime
from typing import Optional
import config

logger = logging.getLogger(__name__)

DB_PATH = config.SQLITE_DB_PATH


def init_db():
    """Initialize SQLite database."""
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.cursor()

    cursor.execute("""
        CREATE TABLE IF NOT EXISTS documents (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            document_id TEXT UNIQUE NOT NULL,
            filename TEXT,
            file_extension TEXT,
            document_type TEXT,
            language TEXT,
            is_arabic INTEGER DEFAULT 0,
            total_pages INTEGER DEFAULT 0,
            total_words INTEGER DEFAULT 0,
            extraction_source TEXT,
            confidence REAL DEFAULT 0.0,
            full_text TEXT,
            structured_json TEXT,
            processed_at TEXT,
            created_at TEXT DEFAULT CURRENT_TIMESTAMP
        )
    """)

    cursor.execute("""
        CREATE TABLE IF NOT EXISTS entities (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            document_id TEXT,
            entity_type TEXT,
            entity_value TEXT,
            created_at TEXT DEFAULT CURRENT_TIMESTAMP,
            FOREIGN KEY (document_id) REFERENCES documents(document_id)
        )
    """)

    cursor.execute("""
        CREATE TABLE IF NOT EXISTS benchmark_results (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            test_name TEXT,
            language TEXT,
            extraction_source TEXT,
            word_count INTEGER,
            char_count INTEGER,
            confidence REAL,
            processing_time_ms REAL,
            arabic_char_ratio REAL,
            run_at TEXT DEFAULT CURRENT_TIMESTAMP
        )
    """)

    cursor.execute(
        "CREATE INDEX IF NOT EXISTS idx_doc_id ON documents(document_id)"
    )
    cursor.execute(
        "CREATE INDEX IF NOT EXISTS idx_language ON documents(language)"
    )
    cursor.execute(
        "CREATE INDEX IF NOT EXISTS idx_doc_type ON documents(document_type)"
    )

    conn.commit()
    conn.close()
    logger.info(f"Database initialized at {DB_PATH}")


def save_document(structured_doc: dict) -> bool:
    """Save a structured document to the database."""
    try:
        init_db()
        conn = sqlite3.connect(DB_PATH)
        cursor = conn.cursor()

        meta = structured_doc.get("metadata", {})
        analysis = structured_doc.get("document_analysis", {})
        content = structured_doc.get("content", {})

        cursor.execute(
            """
            INSERT OR REPLACE INTO documents (
                document_id, filename, file_extension, document_type,
                language, is_arabic, total_pages, total_words,
                extraction_source, confidence, full_text,
                structured_json, processed_at
            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            """,
            (
                structured_doc.get("document_id"),
                meta.get("filename"),
                meta.get("file_extension"),
                analysis.get("document_type"),
                analysis.get("language"),
                1 if analysis.get("is_arabic") else 0,
                analysis.get("total_pages", 0),
                analysis.get("total_words", 0),
                meta.get("extraction_source"),
                meta.get("extraction_confidence", 0.0),
                content.get("full_text", ""),
                json.dumps(structured_doc, ensure_ascii=False),
                meta.get("processed_at"),
            ),
        )

        doc_id = structured_doc.get("document_id")

        entities = structured_doc.get("extracted_entities", {})
        for entity_type, values in entities.items():
            for value in values if isinstance(values, list) else [values]:
                if value:
                    cursor.execute(
                        """
                        INSERT INTO entities (document_id, entity_type, entity_value)
                        VALUES (?, ?, ?)
                        """,
                        (doc_id, entity_type, str(value)),
                    )

        conn.commit()
        conn.close()
        logger.info(f"Saved document: {doc_id}")
        return True

    except Exception as e:
        logger.error(f"Failed to save document: {e}")
        return False


def get_all_documents(limit: int = 50, offset: int = 0) -> list:
    """Retrieve all documents."""
    try:
        init_db()
        conn = sqlite3.connect(DB_PATH)
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()

        cursor.execute(
            """
            SELECT document_id, filename, document_type, language,
                   is_arabic, total_pages, total_words, extraction_source,
                   confidence, processed_at, created_at
            FROM documents
            ORDER BY created_at DESC
            LIMIT ? OFFSET ?
            """,
            (limit, offset),
        )

        rows = [dict(row) for row in cursor.fetchall()]
        conn.close()
        return rows

    except Exception as e:
        logger.error(f"Failed to retrieve documents: {e}")
        return []


def get_document_by_id(document_id: str) -> Optional[dict]:
    """Retrieve a specific document by ID."""
    try:
        init_db()
        conn = sqlite3.connect(DB_PATH)
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()

        cursor.execute(
            "SELECT * FROM documents WHERE document_id = ?",
            (document_id,),
        )
        row = cursor.fetchone()
        conn.close()

        if row:
            doc = dict(row)
            if doc.get("structured_json"):
                doc["structured_data"] = json.loads(doc["structured_json"])
            return doc
        return None

    except Exception as e:
        logger.error(f"Failed to get document {document_id}: {e}")
        return None


def search_documents(query: str, language: str = None) -> list:
    """Search documents by text content."""
    try:
        init_db()
        conn = sqlite3.connect(DB_PATH)
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()

        if language:
            cursor.execute(
                """
                SELECT document_id, filename, document_type, language,
                       total_words, confidence, processed_at
                FROM documents
                WHERE full_text LIKE ? AND language = ?
                ORDER BY created_at DESC LIMIT 20
                """,
                (f"%{query}%", language),
            )
        else:
            cursor.execute(
                """
                SELECT document_id, filename, document_type, language,
                       total_words, confidence, processed_at
                FROM documents
                WHERE full_text LIKE ?
                ORDER BY created_at DESC LIMIT 20
                """,
                (f"%{query}%",),
            )

        rows = [dict(row) for row in cursor.fetchall()]
        conn.close()
        return rows

    except Exception as e:
        logger.error(f"Search failed: {e}")
        return []


def get_stats() -> dict:
    """Get overall database statistics."""
    try:
        init_db()
        conn = sqlite3.connect(DB_PATH)
        cursor = conn.cursor()

        cursor.execute("SELECT COUNT(*) FROM documents")
        total = cursor.fetchone()[0]

        cursor.execute("SELECT COUNT(*) FROM documents WHERE is_arabic = 1")
        arabic_count = cursor.fetchone()[0]

        cursor.execute(
            "SELECT document_type, COUNT(*) as cnt FROM documents GROUP BY document_type"
        )
        by_type = dict(cursor.fetchall())

        cursor.execute("SELECT AVG(confidence) FROM documents WHERE confidence > 0")
        avg_conf = cursor.fetchone()[0] or 0.0

        cursor.execute("SELECT SUM(total_words) FROM documents")
        total_words = cursor.fetchone()[0] or 0

        conn.close()
        return {
            "total_documents": total,
            "arabic_documents": arabic_count,
            "english_documents": total - arabic_count,
            "by_document_type": by_type,
            "average_confidence": round(avg_conf, 4),
            "total_words_processed": total_words,
        }

    except Exception as e:
        logger.error(f"Stats failed: {e}")
        return {}