Spaces:

Adoption
/

7th_handle

Sleeping

App Files Files Community

Adoption commited on Nov 28, 2025

Commit

27aff1d

verified ·

1 Parent(s): 91b2bfb

Update src/app.py

Browse files

Files changed (1) hide show

src/app.py +72 -93

src/app.py CHANGED Viewed

@@ -1,98 +1,79 @@
 import os
 import pickle
-import zipfile
 import sys
 import streamlit as st
 from dotenv import load_dotenv
-# --- IMPORTS ---
 from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
 from langchain_community.retrievers import BM25Retriever
 from langchain_pinecone import PineconeVectorStore
 from langchain_core.prompts import PromptTemplate
 from langchain.chains import RetrievalQA
-# Robust Import for Hybrid Search (Handles different LangChain versions)
-try:
-    from langchain.retrievers import EnsembleRetriever
-except ImportError:
-    from langchain_community.retrievers import EnsembleRetriever
 load_dotenv()
-# --- CONFIGURATION ---
-INDEX_NAME = "branham-index"
-CHUNKS_FILE = "sermon_chunks.pkl"
-CHUNKS_ZIP = "sermon_chunks.zip"
 def get_rag_chain():
-    """
-    Initializes the Brain of the AI.
-    1. Connects to Pinecone (Cloud)
-    2. Loads BM25 Keywords (Local)
-    3. Merges them into a Hybrid Search
-    """
-    # 1. SETUP & KEYS
-    # Check Streamlit Secrets first (Cloud), then .env (Local)
-    pinecone_key = st.secrets.get("PINECONE_API_KEY") or os.getenv("PINECONE_API_KEY")
-    google_key = st.secrets.get("GOOGLE_API_KEY") or os.getenv("GOOGLE_API_KEY")
-    if not pinecone_key or not google_key:
-        raise ValueError("❌ Missing API Keys. Please set PINECONE_API_KEY and GOOGLE_API_KEY in Secrets.")
-    # Set keys for LangChain to use automatically
-    os.environ["PINECONE_API_KEY"] = pinecone_key
-    os.environ["GOOGLE_API_KEY"] = google_key
-    # 2. CLOUD VECTOR SEARCH (Pinecone)
-    # This finds "concepts" (e.g., searching for 'marriage' finds 'wedding')
-    print("🔌 Connecting to Pinecone...")
-    embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
-    vector_store = PineconeVectorStore(
-        index_name=INDEX_NAME,
-        embedding=embeddings
-    )
-    vector_retriever = vector_store.as_retriever(search_kwargs={"k": 5})
-    # 3. LOCAL KEYWORD SEARCH (BM25)
-    # This finds "exact matches" (e.g., searching for 'E-53' finds exactly E-53)
-    print("🔌 Loading Keyword Search...")
-    keyword_retriever = None
-    try:
-        if os.path.exists(CHUNKS_FILE):
-            with open(CHUNKS_FILE, "rb") as f:
-                chunks = pickle.load(f)
-            keyword_retriever = BM25Retriever.from_documents(chunks)
-            keyword_retriever.k = 5
-        else:
-            print("⚠️ Keyword file missing. Running on Pinecone only.")
-    except Exception as e:
-        print(f"❌ Failed to load keyword file: {e}")
-    # 4. HYBRID RETRIEVER (The Merge)
-    if keyword_retriever:
-        print("🔗 Linking Hybrid System...")
-        final_retriever = EnsembleRetriever(
-            retrievers=[vector_retriever, keyword_retriever],
-            weights=[0.7, 0.3] # 70% Vector, 30% Keyword
-        )
-    else:
-        final_retriever = vector_retriever
-    # 5. THE MODEL (Gemini)
-    llm = ChatGoogleGenerativeAI(
-        model="gemini-1.5-flash",
-        temperature=0.3,
-        convert_system_message_to_human=True
-    )
-    # 6. THE PERSONA PROMPT
-    template = """You are William Marion Branham.
 INSTRUCTIONS:
 - Answer the user's question based ONLY on the context provided below.
@@ -107,16 +88,14 @@ USER QUESTION: {question}
 BROTHER BRANHAM'S REPLY:"""
-    PROMPT = PromptTemplate(template=template, input_variables=["context", "question"])
-    chain = RetrievalQA.from_chain_type(
-        llm=llm,
-        chain_type="stuff",
-        retriever=final_retriever,
-        return_source_documents=True,
-        chain_type_kwargs={"prompt": PROMPT}
-    )
-    return chain

 import os
 import pickle
 import sys
 import streamlit as st
 from dotenv import load_dotenv
+# --- 1. CONFIGURATION ---
+INDEX_NAME = "branham-index"
+CHUNKS_FILE = "sermon_chunks.pkl"
+# --- 2. IMPORTS ---
 from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
 from langchain_community.retrievers import BM25Retriever
 from langchain_pinecone import PineconeVectorStore
 from langchain_core.prompts import PromptTemplate
 from langchain.chains import RetrievalQA
+from langchain.retrievers import EnsembleRetriever
 load_dotenv()
 def get_rag_chain():
+    # A. Auth (Check Secrets first, then local .env)
+    pinecone_key = st.secrets.get("PINECONE_API_KEY") or os.getenv("PINECONE_API_KEY")
+    google_key = st.secrets.get("GOOGLE_API_KEY") or os.getenv("GOOGLE_API_KEY")
+    if not pinecone_key or not google_key:
+        raise ValueError("❌ Missing API Keys. Set PINECONE_API_KEY and GOOGLE_API_KEY.")
+    # Set environment variables for the libraries to see
+    os.environ["PINECONE_API_KEY"] = pinecone_key
+    os.environ["GOOGLE_API_KEY"] = google_key
+    # B. Pinecone Vector Search (Cloud)
+    print("🔌 Connecting to Pinecone...")
+    embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
+    vector_store = PineconeVectorStore(
+        index_name=INDEX_NAME,
+        embedding=embeddings
+    )
+    vector_retriever = vector_store.as_retriever(search_kwargs={"k": 5})
+    # C. Local Keyword Search (File)
+    print("🔌 Loading Keyword Search...")
+    keyword_retriever = None
+    try:
+        if os.path.exists(CHUNKS_FILE):
+            with open(CHUNKS_FILE, "rb") as f:
+                chunks = pickle.load(f)
+            keyword_retriever = BM25Retriever.from_documents(chunks)
+            keyword_retriever.k = 5
+        else:
+            print(f"⚠️ {CHUNKS_FILE} missing. Using Vector only.")
+    except Exception as e:
+        print(f"❌ Failed to load keyword file: {e}")
+    # D. Hybrid Merge
+    if keyword_retriever:
+        print("🔗 Linking Hybrid System...")
+        final_retriever = EnsembleRetriever(
+            retrievers=[vector_retriever, keyword_retriever],
+            weights=[0.7, 0.3]
+        )
+    else:
+        final_retriever = vector_retriever
+    # E. Model
+    llm = ChatGoogleGenerativeAI(
+        model="gemini-1.5-flash",
+        temperature=0.3,
+        convert_system_message_to_human=True
+    )
+    # F. Prompt
+    template = """You are William Marion Branham.
 INSTRUCTIONS:
 - Answer the user's question based ONLY on the context provided below.
 BROTHER BRANHAM'S REPLY:"""
+    PROMPT = PromptTemplate(template=template, input_variables=["context", "question"])
+    chain = RetrievalQA.from_chain_type(
+        llm=llm,
+        chain_type="stuff",
+        retriever=final_retriever,
+        return_source_documents=True,
+        chain_type_kwargs={"prompt": PROMPT}
+    )
+    return chain