import os
import pickle
import sys
from dotenv import load_dotenv
from langchain_core.documents import Document
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_google_genai import HarmBlockThreshold, HarmCategory
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
from langchain_chroma import Chroma
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

load_dotenv()

# --- NEW: GET ABSOLUTE PATHS ---
# This ensures Python finds files regardless of where you run the command from
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DB_PATH = os.path.join(BASE_DIR, "branham_db")
CHUNKS_PATH = os.path.join(BASE_DIR, "sermon_chunks.pkl")


def get_rag_chain():
    """Initializes the RAG system."""

    api_key = os.getenv("GOOGLE_API_KEY")
    if not api_key:
        raise ValueError("GOOGLE_API_KEY missing. Please set it in .env file.")

    # 1. Load Vector DB (Semantic Search)
    embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")

    if not os.path.exists(DB_PATH):
        raise FileNotFoundError(f"Database not found at {DB_PATH}. Run 'python ingest.py' first.")

    vector_db = Chroma(
        persist_directory=DB_PATH,
        embedding_function=embeddings,
        collection_name="branham_sermons"
    )
    vector_retriever = vector_db.as_retriever(search_kwargs={"k": 4})

    # 2. Load Keyword Retriever (BM25)
    # We stopped hiding the error. If it fails, we want to know WHY (Corruption? Version mismatch?)
    if not os.path.exists(CHUNKS_PATH):
        raise FileNotFoundError(f"File not found: {CHUNKS_PATH}")

    try:
        with open(CHUNKS_PATH, "rb") as f:
            chunks = pickle.load(f)
        keyword_retriever = BM25Retriever.from_documents(chunks)
        keyword_retriever.k = 4
    except Exception as e:
        # This will print the ACTUAL error if the file exists but fails to load
        raise RuntimeError(f"Failed to load sermon_chunks.pkl. Error: {e}")

    # 3. Hybrid Search
    ensemble_retriever = EnsembleRetriever(
        retrievers=[vector_retriever, keyword_retriever],
        weights=[0.6, 0.4]
    )

    # 4. Gemini Model (The Voice)
    llm = ChatGoogleGenerativeAI(
        model="gemini-2.5-pro",
        temperature=0.3,
        google_api_key=api_key,
        safety_settings={
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
        }
    )

    # 5. The Persona Prompt
    template = """You are answering a question based ONLY on the sermon excerpts provided below.

INSTRUCTIONS:
- Use a humble, 1950s Southern preaching dialect.
- If the answer is not in the text, say: "Brother, I don't recall preaching specifically on that detail in these messages."

CONTEXT MESSAGES:
{context}

USER QUESTION: {question}

BROTHER BRANHAM'S REPLY:"""

    PROMPT = PromptTemplate(template=template, input_variables=["context", "question"])

    chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=ensemble_retriever,
        return_source_documents=True,
        chain_type_kwargs={"prompt": PROMPT}
    )

    return chain