import os import pickle import zipfile import sys import streamlit as st from dotenv import load_dotenv # --- IMPORTS --- from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings from langchain_community.retrievers import BM25Retriever from langchain_pinecone import PineconeVectorStore from langchain_core.prompts import PromptTemplate from langchain.chains import RetrievalQA # Robust Import for Hybrid Search (Handles different LangChain versions) try:     from langchain.retrievers import EnsembleRetriever except ImportError:     from langchain_community.retrievers import EnsembleRetriever load_dotenv() # --- CONFIGURATION --- INDEX_NAME = "branham-index" CHUNKS_FILE = "sermon_chunks.pkl" CHUNKS_ZIP = "sermon_chunks.zip" def setup_keyword_file():     """     Automatic Unzipper.     GitHub has a 100MB limit, so we upload the zip.     This extracts 'sermon_chunks.pkl' when the app starts.     """     if not os.path.exists(CHUNKS_FILE):         if os.path.exists(CHUNKS_ZIP):             print(f"📦 Unzipping {CHUNKS_ZIP}...")             try:                 with zipfile.ZipFile(CHUNKS_ZIP, 'r') as zip_ref:                     zip_ref.extractall(".")                 print("✅ Unzip complete.")             except Exception as e:                 print(f"❌ Error unzipping file: {e}")         else:             print(f"⚠️ Warning: Neither {CHUNKS_FILE} nor {CHUNKS_ZIP} found.") def get_rag_chain():     """     Initializes the Brain of the AI.     1. Connects to Pinecone (Cloud)     2. Loads BM25 Keywords (Local)     3. Merges them into a Hybrid Search     """         # 1. SETUP & KEYS     setup_keyword_file()     # Check Streamlit Secrets first (Cloud), then .env (Local)     pinecone_key = st.secrets.get("PINECONE_API_KEY") or os.getenv("PINECONE_API_KEY")     google_key = st.secrets.get("GOOGLE_API_KEY") or os.getenv("GOOGLE_API_KEY")     if not pinecone_key or not google_key:         raise ValueError("❌ Missing API Keys. Please set PINECONE_API_KEY and GOOGLE_API_KEY in Secrets.")     # Set keys for LangChain to use automatically     os.environ["PINECONE_API_KEY"] = pinecone_key     os.environ["GOOGLE_API_KEY"] = google_key     # 2. CLOUD VECTOR SEARCH (Pinecone)     # This finds "concepts" (e.g., searching for 'marriage' finds 'wedding')     print("🔌 Connecting to Pinecone...")     embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")         vector_store = PineconeVectorStore(         index_name=INDEX_NAME,         embedding=embeddings     )     vector_retriever = vector_store.as_retriever(search_kwargs={"k": 5})     # 3. LOCAL KEYWORD SEARCH (BM25)     # This finds "exact matches" (e.g., searching for 'E-53' finds exactly E-53)     print("🔌 Loading Keyword Search...")     keyword_retriever = None         try:         if os.path.exists(CHUNKS_FILE):             with open(CHUNKS_FILE, "rb") as f:                 chunks = pickle.load(f)             keyword_retriever = BM25Retriever.from_documents(chunks)             keyword_retriever.k = 5         else:             print("⚠️ Keyword file missing. Running on Pinecone only.")     except Exception as e:         print(f"❌ Failed to load keyword file: {e}")     # 4. HYBRID RETRIEVER (The Merge)     if keyword_retriever:         print("🔗 Linking Hybrid System...")         final_retriever = EnsembleRetriever(             retrievers=[vector_retriever, keyword_retriever],             weights=[0.7, 0.3] # 70% Vector, 30% Keyword         )     else:         final_retriever = vector_retriever     # 5. THE MODEL (Gemini)     llm = ChatGoogleGenerativeAI(         model="gemini-1.5-flash",         temperature=0.3,         convert_system_message_to_human=True     )     # 6. THE PERSONA PROMPT     template = """You are William Marion Branham. INSTRUCTIONS: - Answer the user's question based ONLY on the context provided below. - Speak in the first person ("I said," "The Lord showed me"). - Use a humble, 1950s Southern preaching dialect. - If the answer is not in the text, say: "Brother, I don't recall preaching specifically on that detail in these messages." CONTEXT: {context} USER QUESTION: {question} BROTHER BRANHAM'S REPLY:"""     PROMPT = PromptTemplate(template=template, input_variables=["context", "question"])     chain = RetrievalQA.from_chain_type(         llm=llm,         chain_type="stuff",         retriever=final_retriever,         return_source_documents=True,         chain_type_kwargs={"prompt": PROMPT}     )         return chain