import os
import pickle
import sys
import zipfile
import shutil
from dotenv import load_dotenv

# --- 1. CLOUD DEPLOYMENT FIX (SQLITE) ---
try:
    __import__('pysqlite3')
    import sys
    sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
except ImportError:
    pass

# --- 2. ROBUST UNZIPPER (Runs inside get_rag_chain) ---
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DB_FOLDER_NAME = "branham_db"
DB_ZIP_NAME = "branham_db.zip"
CHUNKS_FILE_NAME = "sermon_chunks.pkl"
CHUNKS_ZIP_NAME = "sermon_chunks.zip"

def setup_files():
    """Ensures database and chunk files are ready."""
    print(f"📂 Setup: Checking files in {BASE_DIR}")
    
    # A. Handle Database
    db_path = os.path.join(BASE_DIR, DB_FOLDER_NAME)
    zip_path = os.path.join(BASE_DIR, DB_ZIP_NAME)
    
    if not os.path.exists(db_path):
        if os.path.exists(zip_path):
            print(f"🚀 Found {DB_ZIP_NAME}. Unzipping...")
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall(BASE_DIR)
            print("✅ Database unzipped.")
        else:
            print(f"⚠️ WARNING: Neither '{DB_FOLDER_NAME}' folder nor '{DB_ZIP_NAME}' found.")
            # Fallback check: Did you verify the zip name on Hugging Face?
            print(f"Files available: {os.listdir(BASE_DIR)}")

    # B. Handle Chunks
    chunks_path = os.path.join(BASE_DIR, CHUNKS_FILE_NAME)
    chunks_zip_path = os.path.join(BASE_DIR, CHUNKS_ZIP_NAME)
    
    if not os.path.exists(chunks_path):
        if os.path.exists(chunks_zip_path):
            print(f"🚀 Found {CHUNKS_ZIP_NAME}. Unzipping...")
            with zipfile.ZipFile(chunks_zip_path, 'r') as zip_ref:
                zip_ref.extractall(BASE_DIR)
            print("✅ Chunks unzipped.")
        else:
            print(f"⚠️ WARNING: '{CHUNKS_ZIP_NAME}' not found.")

# --- 3. STANDARD IMPORTS ---
from langchain_core.documents import Document
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_google_genai import HarmBlockThreshold, HarmCategory

# LangChain Import Fix (Handles Version 0.2 vs 0.3)
try:
    from langchain.retrievers import EnsembleRetriever
except ImportError:
    from langchain_community.retrievers import EnsembleRetriever

from langchain_community.retrievers import BM25Retriever
from langchain_chroma import Chroma
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

load_dotenv()

def get_rag_chain():
    """Initializes the RAG system."""
    
    # 1. Run Setup (Unzip files if needed)
    setup_files()

    api_key = os.getenv("GOOGLE_API_KEY")
    if not api_key:
        raise ValueError("GOOGLE_API_KEY missing. Please set it in Settings > Secrets.")

    # 2. Load Vector DB
    embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
    db_full_path = os.path.join(BASE_DIR, DB_FOLDER_NAME)
    
    if not os.path.exists(db_full_path):
         # Detailed error for debugging
         raise FileNotFoundError(f"Database folder '{DB_FOLDER_NAME}' not found. Zip extraction might have failed or created a nested folder. Files in root: {os.listdir(BASE_DIR)}")

    vector_db = Chroma(
        persist_directory=db_full_path, 
        embedding_function=embeddings,
        collection_name="branham_sermons"
    )
    vector_retriever = vector_db.as_retriever(search_kwargs={"k": 4})

    # 3. Load Keyword Retriever
    chunks_full_path = os.path.join(BASE_DIR, CHUNKS_FILE_NAME)
    
    if not os.path.exists(chunks_full_path):
        raise FileNotFoundError(f"File not found: {CHUNKS_FILE_NAME}. Did '{CHUNKS_ZIP_NAME}' unzip correctly?")

    try:
        with open(chunks_full_path, "rb") as f:
            chunks = pickle.load(f)
        keyword_retriever = BM25Retriever.from_documents(chunks)
        keyword_retriever.k = 4
    except Exception as e:
        raise RuntimeError(f"Failed to load {CHUNKS_FILE_NAME}. Error: {e}")

    # 4. Hybrid Search
    ensemble_retriever = EnsembleRetriever(
        retrievers=[vector_retriever, keyword_retriever],
        weights=[0.6, 0.4]
    )

    # 5. Gemini Model
    llm = ChatGoogleGenerativeAI(
        model="gemini-1.5-flash",
        temperature=0.3,
        google_api_key=api_key,
        safety_settings={
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
        }
    )

    # 6. The Persona Prompt
    template = """You are William Marion Branham. You are answering a question based ONLY on the sermon excerpts provided below.

INSTRUCTIONS:
- Speak in the first person ("I said," "The Lord showed me").
- Use a humble, 1950s Southern preaching dialect.
- If the answer is not in the text, say: "Brother, I don't recall preaching specifically on that detail in these messages."
- Always refer to the Bible as the absolute authority.

CONTEXT MESSAGES:
{context}

USER QUESTION: {question}

BROTHER BRANHAM'S REPLY:"""

    PROMPT = PromptTemplate(template=template, input_variables=["context", "question"])

    chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=ensemble_retriever,
        return_source_documents=True,
        chain_type_kwargs={"prompt": PROMPT}
    )
    
    return chain