Adoption commited on
Commit
0e8ff58
Β·
verified Β·
1 Parent(s): b0bbfb5

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +37 -54
src/app.py CHANGED
@@ -1,11 +1,10 @@
1
  import os
2
  import pickle
3
  import sys
4
- import zipfile # <--- Essential for extracting your data
5
  from dotenv import load_dotenv
6
 
7
- # --- 1. CLOUD DEPLOYMENT FIX (SQLITE) ---
8
- # This forces the server to use the modern SQLite version needed for ChromaDB
9
  try:
10
  __import__('pysqlite3')
11
  import sys
@@ -13,79 +12,66 @@ try:
13
  except ImportError:
14
  pass
15
 
16
- # --- 2. AUTO-UNZIPPER (RUNS ON STARTUP) ---
17
- # This automatically extracts your zipped data when the app wakes up
18
- def check_and_unzip():
19
- # Unzip the Database
20
- if os.path.exists("db.zip") and not os.path.exists("branham_db"):
21
- print("πŸ“‚ Unzipping Database (db.zip)...")
22
- with zipfile.ZipFile("db.zip", 'r') as zip_ref:
23
- zip_ref.extractall(".")
24
- print("βœ… Database unzipped.")
25
-
26
- # Unzip the Chunks
27
- if os.path.exists("chunks.zip") and not os.path.exists("sermon_chunks.pkl"):
28
- print("πŸ“‚ Unzipping Chunks (chunks.zip)...")
29
- with zipfile.ZipFile("chunks.zip", 'r') as zip_ref:
30
- zip_ref.extractall(".")
31
- print("βœ… Chunks unzipped.")
32
-
33
- # Execute immediately
34
- check_and_unzip()
35
-
36
- # ... previous code ...
37
-
38
- # --- 3. STANDARD IMPORTS ---
39
  from langchain_core.documents import Document
40
  from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
41
  from langchain_google_genai import HarmBlockThreshold, HarmCategory
42
  from langchain_community.retrievers import BM25Retriever
43
-
44
- # TRY/EXCEPT BLOCK FOR ENSEMBLE RETRIEVER
45
- # This handles different LangChain versions automatically
46
- try:
47
- from langchain.retrievers import EnsembleRetriever
48
- except ImportError:
49
- from langchain.retrievers.ensemble import EnsembleRetriever
50
-
51
  from langchain_chroma import Chroma
52
  from langchain.prompts import PromptTemplate
53
  from langchain.chains import RetrievalQA
54
 
55
- # ... rest of code ...
56
-
57
  load_dotenv()
58
 
59
- # --- 4. PATH SETUP ---
60
- # Defines where files live relative to this script
61
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
62
  DB_PATH = os.path.join(BASE_DIR, "branham_db")
63
  CHUNKS_PATH = os.path.join(BASE_DIR, "sermon_chunks.pkl")
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  def get_rag_chain():
66
  """Initializes the RAG system."""
 
 
 
 
67
 
68
- # API Key Check
69
  api_key = os.getenv("GOOGLE_API_KEY")
70
  if not api_key:
71
  raise ValueError("GOOGLE_API_KEY missing. Please set it in Settings > Secrets.")
72
 
73
- # A. Load Vector DB (Semantic Search)
74
  embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
75
-
76
  if not os.path.exists(DB_PATH):
77
- raise FileNotFoundError(f"Database folder 'branham_db' not found. Did 'db.zip' unzip correctly?")
78
 
79
  vector_db = Chroma(
80
- persist_directory=DB_PATH,
81
  embedding_function=embeddings,
82
  collection_name="branham_sermons"
83
  )
84
  vector_retriever = vector_db.as_retriever(search_kwargs={"k": 4})
85
 
86
- # B. Load Keyword Retriever (BM25)
87
  if not os.path.exists(CHUNKS_PATH):
88
- raise FileNotFoundError(f"File not found: {CHUNKS_PATH}. Did 'chunks.zip' unzip correctly?")
89
 
90
  try:
91
  with open(CHUNKS_PATH, "rb") as f:
@@ -95,15 +81,15 @@ def get_rag_chain():
95
  except Exception as e:
96
  raise RuntimeError(f"Failed to load sermon_chunks.pkl. Error: {e}")
97
 
98
- # C. Hybrid Search
99
  ensemble_retriever = EnsembleRetriever(
100
  retrievers=[vector_retriever, keyword_retriever],
101
  weights=[0.6, 0.4]
102
  )
103
 
104
- # D. Gemini Model (Using your requested 2.5-pro)
105
  llm = ChatGoogleGenerativeAI(
106
- model="gemini-2.0-flash", # Note: 2.5-pro isn't widely public yet, falling back to 2.0-flash or 1.5-pro is safer if this fails.
107
  temperature=0.3,
108
  google_api_key=api_key,
109
  safety_settings={
@@ -114,13 +100,10 @@ def get_rag_chain():
114
  }
115
  )
116
 
117
- # E. The Persona Prompt
118
- template = """You are William Marion Branham, a humble evangelist from the 1950s.
119
 
120
  INSTRUCTIONS:
121
- - If the user asks who you are or greets you, answer warmly as Brother Branham without needing a source text.
122
- - For all other questions (doctrine, bible, stories), answer based ONLY on the sermon excerpts provided below.
123
- - Speak in the first person ("I said," "The Lord showed me").
124
  - Use a humble, 1950s Southern preaching dialect.
125
  - If the answer is not in the text, say: "Brother, I don't recall preaching specifically on that detail in these messages."
126
 
@@ -140,5 +123,5 @@ BROTHER BRANHAM'S REPLY:"""
140
  return_source_documents=True,
141
  chain_type_kwargs={"prompt": PROMPT}
142
  )
143
-
144
  return chain
 
1
  import os
2
  import pickle
3
  import sys
4
+ import zipfile
5
  from dotenv import load_dotenv
6
 
7
+ # --- CLOUD FIX ---
 
8
  try:
9
  __import__('pysqlite3')
10
  import sys
 
12
  except ImportError:
13
  pass
14
 
15
+ # --- STANDARD IMPORTS ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  from langchain_core.documents import Document
17
  from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
18
  from langchain_google_genai import HarmBlockThreshold, HarmCategory
19
  from langchain_community.retrievers import BM25Retriever
20
+ from langchain.retrievers import EnsembleRetriever
 
 
 
 
 
 
 
21
  from langchain_chroma import Chroma
22
  from langchain.prompts import PromptTemplate
23
  from langchain.chains import RetrievalQA
24
 
 
 
25
  load_dotenv()
26
 
27
+ # --- PATH SETUP ---
 
28
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
29
  DB_PATH = os.path.join(BASE_DIR, "branham_db")
30
  CHUNKS_PATH = os.path.join(BASE_DIR, "sermon_chunks.pkl")
31
 
32
+ def check_and_unzip():
33
+ """Unzips files only if they are missing."""
34
+ # 1. Unzip Database
35
+ if os.path.exists("db.zip") and not os.path.exists("branham_db"):
36
+ print("πŸ“‚ Unzipping Database (db.zip)...")
37
+ with zipfile.ZipFile("db.zip", 'r') as zip_ref:
38
+ zip_ref.extractall(".")
39
+ print("βœ… Database unzipped.")
40
+
41
+ # 2. Unzip Chunks
42
+ if os.path.exists("chunks.zip") and not os.path.exists("sermon_chunks.pkl"):
43
+ print("πŸ“‚ Unzipping Chunks (chunks.zip)...")
44
+ with zipfile.ZipFile("chunks.zip", 'r') as zip_ref:
45
+ zip_ref.extractall(".")
46
+ print("βœ… Chunks unzipped.")
47
+
48
  def get_rag_chain():
49
  """Initializes the RAG system."""
50
+
51
+ # --- CRITICAL: RUN UNZIP HERE, NOT AT TOP LEVEL ---
52
+ check_and_unzip()
53
+ # --------------------------------------------------
54
 
 
55
  api_key = os.getenv("GOOGLE_API_KEY")
56
  if not api_key:
57
  raise ValueError("GOOGLE_API_KEY missing. Please set it in Settings > Secrets.")
58
 
59
+ # 1. Load Vector DB
60
  embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
61
+
62
  if not os.path.exists(DB_PATH):
63
+ raise FileNotFoundError(f"Database folder 'branham_db' not found. Unzip failed.")
64
 
65
  vector_db = Chroma(
66
+ persist_directory=DB_PATH,
67
  embedding_function=embeddings,
68
  collection_name="branham_sermons"
69
  )
70
  vector_retriever = vector_db.as_retriever(search_kwargs={"k": 4})
71
 
72
+ # 2. Load Keyword Retriever
73
  if not os.path.exists(CHUNKS_PATH):
74
+ raise FileNotFoundError(f"File not found: {CHUNKS_PATH}")
75
 
76
  try:
77
  with open(CHUNKS_PATH, "rb") as f:
 
81
  except Exception as e:
82
  raise RuntimeError(f"Failed to load sermon_chunks.pkl. Error: {e}")
83
 
84
+ # 3. Hybrid Search
85
  ensemble_retriever = EnsembleRetriever(
86
  retrievers=[vector_retriever, keyword_retriever],
87
  weights=[0.6, 0.4]
88
  )
89
 
90
+ # 4. Gemini Model
91
  llm = ChatGoogleGenerativeAI(
92
+ model="gemini-1.5-flash", # Using stable flash for speed
93
  temperature=0.3,
94
  google_api_key=api_key,
95
  safety_settings={
 
100
  }
101
  )
102
 
103
+ # 5. The Persona Prompt
104
+ template = """You are answering a question based ONLY on the sermon excerpts provided below.
105
 
106
  INSTRUCTIONS:
 
 
 
107
  - Use a humble, 1950s Southern preaching dialect.
108
  - If the answer is not in the text, say: "Brother, I don't recall preaching specifically on that detail in these messages."
109
 
 
123
  return_source_documents=True,
124
  chain_type_kwargs={"prompt": PROMPT}
125
  )
126
+
127
  return chain