Spaces:

Adoption
/

the_seventh_handle

Sleeping

App Files Files Community

Adoption commited on Nov 27, 2025

Commit

0e8ff58

verified ·

1 Parent(s): b0bbfb5

Update src/app.py

Browse files

Files changed (1) hide show

src/app.py +37 -54

src/app.py CHANGED Viewed

@@ -1,11 +1,10 @@
 import os
 import pickle
 import sys
-import zipfile  # <--- Essential for extracting your data
 from dotenv import load_dotenv
-# --- 1. CLOUD DEPLOYMENT FIX (SQLITE) ---
-# This forces the server to use the modern SQLite version needed for ChromaDB
 try:
     __import__('pysqlite3')
     import sys
@@ -13,79 +12,66 @@ try:
 except ImportError:
     pass
-# --- 2. AUTO-UNZIPPER (RUNS ON STARTUP) ---
-# This automatically extracts your zipped data when the app wakes up
-def check_and_unzip():
-    # Unzip the Database
-    if os.path.exists("db.zip") and not os.path.exists("branham_db"):
-        print("📂 Unzipping Database (db.zip)...")
-        with zipfile.ZipFile("db.zip", 'r') as zip_ref:
-            zip_ref.extractall(".")
-        print("✅ Database unzipped.")
-    # Unzip the Chunks
-    if os.path.exists("chunks.zip") and not os.path.exists("sermon_chunks.pkl"):
-        print("📂 Unzipping Chunks (chunks.zip)...")
-        with zipfile.ZipFile("chunks.zip", 'r') as zip_ref:
-            zip_ref.extractall(".")
-        print("✅ Chunks unzipped.")
-# Execute immediately
-check_and_unzip()
-# ... previous code ...
-# --- 3. STANDARD IMPORTS ---
 from langchain_core.documents import Document
 from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
 from langchain_google_genai import HarmBlockThreshold, HarmCategory
 from langchain_community.retrievers import BM25Retriever
-# TRY/EXCEPT BLOCK FOR ENSEMBLE RETRIEVER
-# This handles different LangChain versions automatically
-try:
-    from langchain.retrievers import EnsembleRetriever
-except ImportError:
-    from langchain.retrievers.ensemble import EnsembleRetriever
 from langchain_chroma import Chroma
 from langchain.prompts import PromptTemplate
 from langchain.chains import RetrievalQA
-# ... rest of code ...
 load_dotenv()
-# --- 4. PATH SETUP ---
-# Defines where files live relative to this script
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 DB_PATH = os.path.join(BASE_DIR, "branham_db")
 CHUNKS_PATH = os.path.join(BASE_DIR, "sermon_chunks.pkl")
 def get_rag_chain():
     """Initializes the RAG system."""
-    # API Key Check
     api_key = os.getenv("GOOGLE_API_KEY")
     if not api_key:
         raise ValueError("GOOGLE_API_KEY missing. Please set it in Settings > Secrets.")
-    # A. Load Vector DB (Semantic Search)
     embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
     if not os.path.exists(DB_PATH):
-        raise FileNotFoundError(f"Database folder 'branham_db' not found. Did 'db.zip' unzip correctly?")
     vector_db = Chroma(
-        persist_directory=DB_PATH,
         embedding_function=embeddings,
         collection_name="branham_sermons"
     )
     vector_retriever = vector_db.as_retriever(search_kwargs={"k": 4})
-    # B. Load Keyword Retriever (BM25)
     if not os.path.exists(CHUNKS_PATH):
-        raise FileNotFoundError(f"File not found: {CHUNKS_PATH}. Did 'chunks.zip' unzip correctly?")
     try:
         with open(CHUNKS_PATH, "rb") as f:
@@ -95,15 +81,15 @@ def get_rag_chain():
     except Exception as e:
         raise RuntimeError(f"Failed to load sermon_chunks.pkl. Error: {e}")
-    # C. Hybrid Search
     ensemble_retriever = EnsembleRetriever(
         retrievers=[vector_retriever, keyword_retriever],
         weights=[0.6, 0.4]
     )
-    # D. Gemini Model (Using your requested 2.5-pro)
     llm = ChatGoogleGenerativeAI(
-        model="gemini-2.0-flash", # Note: 2.5-pro isn't widely public yet, falling back to 2.0-flash or 1.5-pro is safer if this fails.
         temperature=0.3,
         google_api_key=api_key,
         safety_settings={
@@ -114,13 +100,10 @@ def get_rag_chain():
         }
     )
-    # E. The Persona Prompt
-    template = """You are William Marion Branham, a humble evangelist from the 1950s.
 INSTRUCTIONS:
-- If the user asks who you are or greets you, answer warmly as Brother Branham without needing a source text.
-- For all other questions (doctrine, bible, stories), answer based ONLY on the sermon excerpts provided below.
-- Speak in the first person ("I said," "The Lord showed me").
 - Use a humble, 1950s Southern preaching dialect.
 - If the answer is not in the text, say: "Brother, I don't recall preaching specifically on that detail in these messages."
@@ -140,5 +123,5 @@ BROTHER BRANHAM'S REPLY:"""
         return_source_documents=True,
         chain_type_kwargs={"prompt": PROMPT}
     )
     return chain

 import os
 import pickle
 import sys
+import zipfile
 from dotenv import load_dotenv
+# --- CLOUD FIX ---
 try:
     __import__('pysqlite3')
     import sys
 except ImportError:
     pass
+# --- STANDARD IMPORTS ---
 from langchain_core.documents import Document
 from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
 from langchain_google_genai import HarmBlockThreshold, HarmCategory
 from langchain_community.retrievers import BM25Retriever
+from langchain.retrievers import EnsembleRetriever
 from langchain_chroma import Chroma
 from langchain.prompts import PromptTemplate
 from langchain.chains import RetrievalQA
 load_dotenv()
+# --- PATH SETUP ---
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 DB_PATH = os.path.join(BASE_DIR, "branham_db")
 CHUNKS_PATH = os.path.join(BASE_DIR, "sermon_chunks.pkl")
+def check_and_unzip():
+    """Unzips files only if they are missing."""
+    # 1. Unzip Database
+    if os.path.exists("db.zip") and not os.path.exists("branham_db"):
+        print("📂 Unzipping Database (db.zip)...")
+        with zipfile.ZipFile("db.zip", 'r') as zip_ref:
+            zip_ref.extractall(".")
+        print("✅ Database unzipped.")
+    # 2. Unzip Chunks
+    if os.path.exists("chunks.zip") and not os.path.exists("sermon_chunks.pkl"):
+        print("📂 Unzipping Chunks (chunks.zip)...")
+        with zipfile.ZipFile("chunks.zip", 'r') as zip_ref:
+            zip_ref.extractall(".")
+        print("✅ Chunks unzipped.")
 def get_rag_chain():
     """Initializes the RAG system."""
+    # --- CRITICAL: RUN UNZIP HERE, NOT AT TOP LEVEL ---
+    check_and_unzip()
+    # --------------------------------------------------
     api_key = os.getenv("GOOGLE_API_KEY")
     if not api_key:
         raise ValueError("GOOGLE_API_KEY missing. Please set it in Settings > Secrets.")
+    # 1. Load Vector DB
     embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
     if not os.path.exists(DB_PATH):
+         raise FileNotFoundError(f"Database folder 'branham_db' not found. Unzip failed.")
     vector_db = Chroma(
+        persist_directory=DB_PATH,
         embedding_function=embeddings,
         collection_name="branham_sermons"
     )
     vector_retriever = vector_db.as_retriever(search_kwargs={"k": 4})
+    # 2. Load Keyword Retriever
     if not os.path.exists(CHUNKS_PATH):
+        raise FileNotFoundError(f"File not found: {CHUNKS_PATH}")
     try:
         with open(CHUNKS_PATH, "rb") as f:
     except Exception as e:
         raise RuntimeError(f"Failed to load sermon_chunks.pkl. Error: {e}")
+    # 3. Hybrid Search
     ensemble_retriever = EnsembleRetriever(
         retrievers=[vector_retriever, keyword_retriever],
         weights=[0.6, 0.4]
     )
+    # 4. Gemini Model
     llm = ChatGoogleGenerativeAI(
+        model="gemini-1.5-flash", # Using stable flash for speed
         temperature=0.3,
         google_api_key=api_key,
         safety_settings={
         }
     )
+    # 5. The Persona Prompt
+    template = """You are answering a question based ONLY on the sermon excerpts provided below.
 INSTRUCTIONS:
 - Use a humble, 1950s Southern preaching dialect.
 - If the answer is not in the text, say: "Brother, I don't recall preaching specifically on that detail in these messages."
         return_source_documents=True,
         chain_type_kwargs={"prompt": PROMPT}
     )
     return chain