Adoption commited on
Commit
27aff1d
·
verified ·
1 Parent(s): 91b2bfb

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +72 -93
src/app.py CHANGED
@@ -1,98 +1,79 @@
1
  import os
2
  import pickle
3
- import zipfile
4
  import sys
5
  import streamlit as st
6
  from dotenv import load_dotenv
7
 
8
- # --- IMPORTS ---
 
 
 
 
9
  from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
10
  from langchain_community.retrievers import BM25Retriever
11
  from langchain_pinecone import PineconeVectorStore
12
  from langchain_core.prompts import PromptTemplate
13
  from langchain.chains import RetrievalQA
14
-
15
- # Robust Import for Hybrid Search (Handles different LangChain versions)
16
- try:
17
-     from langchain.retrievers import EnsembleRetriever
18
- except ImportError:
19
-     from langchain_community.retrievers import EnsembleRetriever
20
 
21
  load_dotenv()
22
 
23
- # --- CONFIGURATION ---
24
- INDEX_NAME = "branham-index"
25
- CHUNKS_FILE = "sermon_chunks.pkl"
26
- CHUNKS_ZIP = "sermon_chunks.zip"
27
-
28
  def get_rag_chain():
29
-     """
30
-     Initializes the Brain of the AI.
31
-     1. Connects to Pinecone (Cloud)
32
-     2. Loads BM25 Keywords (Local)
33
-     3. Merges them into a Hybrid Search
34
-     """
35
-    
36
-     # 1. SETUP & KEYS
37
-    
38
-
39
-     # Check Streamlit Secrets first (Cloud), then .env (Local)
40
-     pinecone_key = st.secrets.get("PINECONE_API_KEY") or os.getenv("PINECONE_API_KEY")
41
-     google_key = st.secrets.get("GOOGLE_API_KEY") or os.getenv("GOOGLE_API_KEY")
42
-
43
-     if not pinecone_key or not google_key:
44
-         raise ValueError("❌ Missing API Keys. Please set PINECONE_API_KEY and GOOGLE_API_KEY in Secrets.")
45
-
46
-     # Set keys for LangChain to use automatically
47
-     os.environ["PINECONE_API_KEY"] = pinecone_key
48
-     os.environ["GOOGLE_API_KEY"] = google_key
49
-
50
-     # 2. CLOUD VECTOR SEARCH (Pinecone)
51
-     # This finds "concepts" (e.g., searching for 'marriage' finds 'wedding')
52
-     print("🔌 Connecting to Pinecone...")
53
-     embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
54
-    
55
-     vector_store = PineconeVectorStore(
56
-         index_name=INDEX_NAME,
57
-         embedding=embeddings
58
-     )
59
-     vector_retriever = vector_store.as_retriever(search_kwargs={"k": 5})
60
-
61
-     # 3. LOCAL KEYWORD SEARCH (BM25)
62
-     # This finds "exact matches" (e.g., searching for 'E-53' finds exactly E-53)
63
-     print("🔌 Loading Keyword Search...")
64
-     keyword_retriever = None
65
-    
66
-     try:
67
-         if os.path.exists(CHUNKS_FILE):
68
-             with open(CHUNKS_FILE, "rb") as f:
69
-                 chunks = pickle.load(f)
70
-             keyword_retriever = BM25Retriever.from_documents(chunks)
71
-             keyword_retriever.k = 5
72
-         else:
73
-             print("⚠️ Keyword file missing. Running on Pinecone only.")
74
-     except Exception as e:
75
-         print(f"❌ Failed to load keyword file: {e}")
76
-
77
-     # 4. HYBRID RETRIEVER (The Merge)
78
-     if keyword_retriever:
79
-         print("🔗 Linking Hybrid System...")
80
-         final_retriever = EnsembleRetriever(
81
-             retrievers=[vector_retriever, keyword_retriever],
82
-             weights=[0.7, 0.3] # 70% Vector, 30% Keyword
83
-         )
84
-     else:
85
-         final_retriever = vector_retriever
86
-
87
-     # 5. THE MODEL (Gemini)
88
-     llm = ChatGoogleGenerativeAI(
89
-         model="gemini-1.5-flash",
90
-         temperature=0.3,
91
-         convert_system_message_to_human=True
92
-     )
93
-
94
-     # 6. THE PERSONA PROMPT
95
-     template = """You are William Marion Branham.
96
 
97
  INSTRUCTIONS:
98
  - Answer the user's question based ONLY on the context provided below.
@@ -107,16 +88,14 @@ USER QUESTION: {question}
107
 
108
  BROTHER BRANHAM'S REPLY:"""
109
 
110
-     PROMPT = PromptTemplate(template=template, input_variables=["context", "question"])
111
-
112
-     chain = RetrievalQA.from_chain_type(
113
-         llm=llm,
114
-         chain_type="stuff",
115
-         retriever=final_retriever,
116
-         return_source_documents=True,
117
-         chain_type_kwargs={"prompt": PROMPT}
118
-     )
119
-    
120
-     return chain
121
-
122
-
 
1
  import os
2
  import pickle
 
3
  import sys
4
  import streamlit as st
5
  from dotenv import load_dotenv
6
 
7
+ # --- 1. CONFIGURATION ---
8
+ INDEX_NAME = "branham-index"
9
+ CHUNKS_FILE = "sermon_chunks.pkl"
10
+
11
+ # --- 2. IMPORTS ---
12
  from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
13
  from langchain_community.retrievers import BM25Retriever
14
  from langchain_pinecone import PineconeVectorStore
15
  from langchain_core.prompts import PromptTemplate
16
  from langchain.chains import RetrievalQA
17
+ from langchain.retrievers import EnsembleRetriever
 
 
 
 
 
18
 
19
  load_dotenv()
20
 
 
 
 
 
 
21
  def get_rag_chain():
22
+ # A. Auth (Check Secrets first, then local .env)
23
+ pinecone_key = st.secrets.get("PINECONE_API_KEY") or os.getenv("PINECONE_API_KEY")
24
+ google_key = st.secrets.get("GOOGLE_API_KEY") or os.getenv("GOOGLE_API_KEY")
25
+
26
+ if not pinecone_key or not google_key:
27
+ raise ValueError("❌ Missing API Keys. Set PINECONE_API_KEY and GOOGLE_API_KEY.")
28
+
29
+ # Set environment variables for the libraries to see
30
+ os.environ["PINECONE_API_KEY"] = pinecone_key
31
+ os.environ["GOOGLE_API_KEY"] = google_key
32
+
33
+ # B. Pinecone Vector Search (Cloud)
34
+ print("🔌 Connecting to Pinecone...")
35
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
36
+
37
+ vector_store = PineconeVectorStore(
38
+ index_name=INDEX_NAME,
39
+ embedding=embeddings
40
+ )
41
+ vector_retriever = vector_store.as_retriever(search_kwargs={"k": 5})
42
+
43
+ # C. Local Keyword Search (File)
44
+ print("🔌 Loading Keyword Search...")
45
+ keyword_retriever = None
46
+
47
+ try:
48
+ if os.path.exists(CHUNKS_FILE):
49
+ with open(CHUNKS_FILE, "rb") as f:
50
+ chunks = pickle.load(f)
51
+ keyword_retriever = BM25Retriever.from_documents(chunks)
52
+ keyword_retriever.k = 5
53
+ else:
54
+ print(f"⚠️ {CHUNKS_FILE} missing. Using Vector only.")
55
+ except Exception as e:
56
+ print(f" Failed to load keyword file: {e}")
57
+
58
+ # D. Hybrid Merge
59
+ if keyword_retriever:
60
+ print("🔗 Linking Hybrid System...")
61
+ final_retriever = EnsembleRetriever(
62
+ retrievers=[vector_retriever, keyword_retriever],
63
+ weights=[0.7, 0.3]
64
+ )
65
+ else:
66
+ final_retriever = vector_retriever
67
+
68
+ # E. Model
69
+ llm = ChatGoogleGenerativeAI(
70
+ model="gemini-1.5-flash",
71
+ temperature=0.3,
72
+ convert_system_message_to_human=True
73
+ )
74
+
75
+ # F. Prompt
76
+ template = """You are William Marion Branham.
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  INSTRUCTIONS:
79
  - Answer the user's question based ONLY on the context provided below.
 
88
 
89
  BROTHER BRANHAM'S REPLY:"""
90
 
91
+ PROMPT = PromptTemplate(template=template, input_variables=["context", "question"])
92
+
93
+ chain = RetrievalQA.from_chain_type(
94
+ llm=llm,
95
+ chain_type="stuff",
96
+ retriever=final_retriever,
97
+ return_source_documents=True,
98
+ chain_type_kwargs={"prompt": PROMPT}
99
+ )
100
+
101
+ return chain