import os
import shutil
from pathlib import Path

from fastapi import FastAPI, Form
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import JSONResponse, FileResponse
from dotenv import load_dotenv
from huggingface_hub import HfApi, hf_hub_download

from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings

from llama_api import ask_ollama

load_dotenv()

BASE_DIR = Path(__file__).resolve().parent

PDF_PATH = BASE_DIR / "src/data"
DB_DIR = BASE_DIR / "chroma_digital_icfai"
HF_DATASET = "Chaitu2112/ifhe-assets"

EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"

PDF_PATH.mkdir(parents=True, exist_ok=True)


# -----------------------------------------------------------
# DOWNLOAD PDFs FROM HUGGINGFACE
# -----------------------------------------------------------
def download_pdfs(repo_id: str, local_dir: Path):
    api = HfApi()
    try:
        files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
    except Exception as e:
        print(f"⚠️ Cannot list dataset files: {e}")
        return

    pdfs = [f for f in files if f.lower().endswith(".pdf")]

    if not pdfs:
        print("⚠️ No PDFs found in dataset.")
        return

    for f in pdfs:
        local_path = local_dir / os.path.basename(f)
        if local_path.exists() and local_path.stat().st_size > 0:
            continue

        try:
            print(f"📥 Downloading: {f}")
            cached = hf_hub_download(repo_id=repo_id, filename=f, repo_type="dataset")
            shutil.copy(cached, local_path)
            print(f"✅ Saved to {local_path}")
        except Exception as e:
            print(f"⚠️ Download failed for {f}: {e}")


download_pdfs(HF_DATASET, PDF_PATH)


# -----------------------------------------------------------
# BUILD CHROMA VECTOR DB
# -----------------------------------------------------------
def build_chroma_db() -> Chroma:
    print("🔹 Building vector DB from PDFs.")

    documents = []
    for filename in os.listdir(PDF_PATH):
        if filename.lower().endswith(".pdf"):
            path = PDF_PATH / filename
            print(f" Loading PDF: {path}")
            loader = PyMuPDFLoader(str(path))
            documents.extend(loader.load())

    if not documents:
        print(" No PDFs found. Creating empty DB.")
        embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL_NAME)
        return Chroma.from_texts(
            texts=["No content available"],
            embedding=embeddings,
            persist_directory=str(DB_DIR),
        )

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=150,
        separators=["\n\n", "\n", ".", "!", "?", " "],
    )
    docs = splitter.split_documents(documents)
    print(f"🔹 Total chunks after splitting: {len(docs)}")

    embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL_NAME)

    vectordb = Chroma.from_documents(
        docs,
        embedding=embeddings,
        persist_directory=str(DB_DIR),
    )
    print(" Vector DB built and persisted.")
    return vectordb


# -----------------------------------------------------------
# LOAD OR CREATE CHROMA
# -----------------------------------------------------------
def load_or_create_chroma() -> Chroma:
    if DB_DIR.exists() and any(DB_DIR.iterdir()):
        print("🔹 Loading existing Chroma DB.")
        embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL_NAME)
        return Chroma(
            embedding_function=embeddings,
            persist_directory=str(DB_DIR),
        )

    print("🔹 No existing DB found; building a new one...")
    return build_chroma_db()


vectordb = load_or_create_chroma()
retriever = vectordb.as_retriever(search_kwargs={"k": 6})
print("✅ Retriever is ready.")


# -----------------------------------------------------------
# FASTAPI SETUP
# -----------------------------------------------------------
app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

DIST_DIR = BASE_DIR / "dist"

if DIST_DIR.exists():
    assets_dir = DIST_DIR / "assets"
    if assets_dir.exists():
        app.mount("/assets", StaticFiles(directory=str(assets_dir)), name="assets")

    @app.get("/", include_in_schema=False)
    async def serve_index():
        return FileResponse(DIST_DIR / "index.html")
else:
    print(" dist/ folder not found — frontend not served by backend.")


# -----------------------------------------------------------
# MAIN CHAT API
# -----------------------------------------------------------
@app.post("/digital_icfai_chat", response_class=JSONResponse)
async def digital_icfai_chat_post(user_message: str = Form(...)):
    query = user_message.strip()
    print(f" /digital_icfai_chat POST query: {query!r}")

    # -------------------------------------------------------
    # ROBUST RETRIEVER LOGIC
    # -------------------------------------------------------
    try:
        docs = None

        # Preferred method
        if hasattr(retriever, "get_relevant_documents"):
            maybe = retriever.get_relevant_documents(query)
            docs = await maybe if hasattr(maybe, "__await__") else maybe

        # Async alternatives
        elif hasattr(retriever, "aget_relevant_documents"):
            maybe = retriever.aget_relevant_documents(query)
            docs = await maybe if hasattr(maybe, "__await__") else maybe

        elif hasattr(retriever, "get_relevant_documents_async"):
            maybe = retriever.get_relevant_documents_async(query)
            docs = await maybe if hasattr(maybe, "__await__") else maybe

        # Fallback: use Chroma directly
        elif hasattr(vectordb, "similarity_search"):
            maybe = vectordb.similarity_search(query, k=6)
            docs = await maybe if hasattr(maybe, "__await__") else maybe

        else:
            msg = "Retriever does not support document search in this environment."
            print("❌", msg)
            return {"answer": msg}

        print(f" Retrieved {len(docs) if docs else 0} docs")

    except Exception as e:
        print(f"❌ Retriever error: {e}")
        return {"answer": f"Retriever error: {e}"}

    context = "\n\n".join([d.page_content for d in docs]) if docs else ""
    print(f" Context length: {len(context)} chars")

    prompt = f"""
You are the Digital ICFAI Assistant.

Use ONLY the context below to answer.
If the context does not contain the answer, say so politely.
Give a detailed, clear, student-friendly explanation with 4–6 lines. Add examples wherever helpful.

Context:
{context}

Question:
{query}

Answer (clear, student-friendly):
"""

    try:
        answer = ask_ollama(prompt)
    except Exception as e:
        print(f"❌ LLM error: {e}")
        answer = f"LLM error: {e}"

    return {"answer": answer}


# -----------------------------------------------------------
# DEV SERVER
# -----------------------------------------------------------
if __name__ == "__main__":
    import uvicorn
    port = int(os.environ.get("PORT", 7860))
    print(f"🚀 Starting IBS Assistant at http://0.0.0.0:{port}")
    uvicorn.run("app:app", host="0.0.0.0", port=port, reload=False)