"""
vector_database.py
------------------
Manages a FAISS vector store — saving, loading, and updating it.

What is FAISS?
  FAISS (Facebook AI Similarity Search) is a library that lets us store
  embeddings and quickly find the most similar ones to a query embedding.
  Think of it as a very fast search index for vectors.

How we use it here:
  1. The first time documents are uploaded, we create a new FAISS index.
  2. On subsequent uploads, we load the existing index and ADD new vectors.
  3. When a user asks a question, we use FAISS to find the top-k most
     relevant chunks and pass them to the LLM as context.

Files written to disk (inside VECTOR_STORE_DIR):
  - index.faiss   : the vector data
  - index.pkl     : metadata (source file names, page numbers, etc.)
"""

import os
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings

# ── configuration ─────────────────────────────────────────────────────────────

# Where the FAISS index files are saved.
# You can change this path if you prefer a different location.
VECTOR_STORE_DIR = os.path.join(os.path.dirname(__file__), "..", "vector_store")
INDEX_NAME = "index"          # FAISS saves two files: index.faiss + index.pkl


# ── public API ────────────────────────────────────────────────────────────────

def save_vector_store(vector_store: FAISS, directory: str = VECTOR_STORE_DIR) -> None:
    """Persist the FAISS index to disk so it survives restarts."""
    os.makedirs(directory, exist_ok=True)
    vector_store.save_local(directory, index_name=INDEX_NAME)
    print(f"  OK: Vector store saved to '{directory}'")


def load_vector_store(
    embeddings: HuggingFaceEmbeddings,
    directory: str = VECTOR_STORE_DIR,
) -> FAISS | None:
    """
    Load a previously saved FAISS index from disk.

    Returns None if no index exists yet (first run).
    """
    index_file = os.path.join(directory, f"{INDEX_NAME}.faiss")
    if not os.path.exists(index_file):
        print("  No existing vector store found - will create a new one.")
        return None

    print(f"  Loading existing vector store from '{directory}'...")
    vector_store = FAISS.load_local(
        directory,
        embeddings,
        index_name=INDEX_NAME,
        allow_dangerous_deserialization=True,   # required by LangChain for local files
    )
    print("  OK: Vector store loaded.")
    return vector_store


def add_documents_to_store(
    documents: list[Document],
    embeddings: HuggingFaceEmbeddings,
    directory: str = VECTOR_STORE_DIR,
) -> FAISS:
    """
    Add new documents to the vector store.

    If a store already exists on disk, the new documents are merged into it.
    If no store exists, a fresh one is created.

    Parameters
    ----------
    documents  : list[Document]  – chunked documents to index
    embeddings : HuggingFaceEmbeddings – the embedding model
    directory  : str             – where to save the index

    Returns
    -------
    FAISS  – the updated (or newly created) vector store
    """
    existing_store = load_vector_store(embeddings, directory)

    if existing_store is None:
        # First time — create a brand new FAISS index from the documents
        print("  Creating new FAISS index...")
        new_store = FAISS.from_documents(documents, embeddings)
    else:
        # Add new documents to the existing index
        print("  Merging new documents into existing FAISS index...")
        new_store = FAISS.from_documents(documents, embeddings)
        existing_store.merge_from(new_store)
        new_store = existing_store

    save_vector_store(new_store, directory)
    print(f"  OK: {len(documents)} document chunk(s) indexed.")
    return new_store


def get_retriever(
    vector_store: FAISS,
    k: int = 4,
):
    """
    Create a retriever from the vector store.

    Parameters
    ----------
    vector_store : FAISS  – the indexed documents
    k            : int    – number of chunks to retrieve per query

    Returns
    -------
    VectorStoreRetriever  – a LangChain retriever object
    """
    return vector_store.as_retriever(
        search_type="similarity",
        search_kwargs={"k": k},
    )