import faiss
import numpy as np
import os

# ==========================================================
# BUILD FAISS INDEX (Cosine Similarity Safe Version)
# ==========================================================
def build_faiss_index(embeddings):
    """
    📘 Builds a FAISS index optimized for cosine similarity (float32-safe, dimension-aware).

    Args:
        embeddings (list | np.ndarray): List or array of embedding vectors.
    Returns:
        faiss.IndexFlatIP: FAISS index for cosine similarity search.
    """
    # 🧩 Validation
    if embeddings is None:
        raise ValueError("❌ No embeddings provided to build FAISS index.")

    # ✅ Convert ndarray to list if needed
    if isinstance(embeddings, np.ndarray):
        # Handle (n, d) NumPy embeddings
        if embeddings.ndim == 1:
            embeddings = embeddings.reshape(1, -1)
        vectors = embeddings.astype("float32")
    elif isinstance(embeddings, list):
        vectors = np.array(embeddings, dtype="float32")
    else:
        raise TypeError(f"❌ Unexpected embeddings type: {type(embeddings)}")

    # ✅ Ensure there are embeddings to index
    if vectors.size == 0:
        raise ValueError("❌ Empty embeddings array provided.")

    # ✅ Normalize for cosine similarity
    faiss.normalize_L2(vectors)

    # ✅ Build FAISS index (Inner Product = Cosine Similarity)
    dim = vectors.shape[1]
    index = faiss.IndexFlatIP(dim)
    index.add(vectors)

    print(f"✅ FAISS index built successfully — {index.ntotal} vectors | dim={dim} | cosine similarity mode.")
    return index


# ==========================================================
# SEARCH FAISS INDEX (with sanity checks)
# ==========================================================
def search_faiss(query_embedding, index, chunks, top_k=3):
    """
    🔍 Searches FAISS index for semantically similar chunks.

    Args:
        query_embedding (np.ndarray): Query vector (1D or 2D).
        index (faiss.IndexFlatIP): Built FAISS index.
        chunks (list[str]): Original document chunks.
        top_k (int): Number of results to return.

    Returns:
        list[str]: Top-matching chunks.
    """
    if index is None or index.ntotal == 0:
        raise ValueError("❌ FAISS index is empty or not initialized.")

    # ✅ Convert query to float32 and normalize
    query_vector = np.array([query_embedding], dtype="float32") if query_embedding.ndim == 1 else query_embedding.astype("float32")
    faiss.normalize_L2(query_vector)

    # ✅ Dimension check
    if query_vector.shape[1] != index.d:
        raise ValueError(
            f"❌ Embedding dimension mismatch: query={query_vector.shape[1]}, index={index.d}. "
            "Rebuild FAISS index with embeddings from the same model."
        )

    # 🔍 Run search
    distances, indices = index.search(query_vector, top_k)
    results = []
    for idx in indices[0]:
        if 0 <= idx < len(chunks):
            results.append(chunks[idx])

    print(f"🔍 FAISS search completed — retrieved {len(results)} chunks (top_k={top_k})")
    return results


# ==========================================================
# SAVE / LOAD INDEX (Optional Utility)
# ==========================================================
def save_faiss_index(index, path="faiss_index.bin"):
    """💾 Save FAISS index to disk."""
    faiss.write_index(index, path)
    print(f"💾 FAISS index saved to {path}")


def load_faiss_index(path="faiss_index.bin"):
    """📂 Load FAISS index from disk."""
    if not os.path.exists(path):
        raise FileNotFoundError(f"❌ No FAISS index found at {path}")
    index = faiss.read_index(path)
    print(f"📂 FAISS index loaded from {path}")
    return index


# ==========================================================
# DEBUG / LOCAL TEST
# ==========================================================
if __name__ == "__main__":
    # Example embeddings (3 vectors, dim=3)
    sample_embeddings = np.array([
        [0.1, 0.2, 0.3],
        [0.2, 0.1, 0.4],
        [0.9, 0.8, 0.7]
    ], dtype="float32")

    query_vec = np.array([0.15, 0.18, 0.35], dtype="float32")

    # ✅ Build and search
    idx = build_faiss_index(sample_embeddings)
    results = search_faiss(query_vec, idx, ["Chunk A", "Chunk B", "Chunk C"], top_k=2)

    print("🔎 Top Results:")
    for r in results:
        print("-", r)