Adoption commited on
Commit
d8dffb8
·
verified ·
1 Parent(s): b6d7af1

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +21 -10
src/app.py CHANGED
@@ -21,37 +21,48 @@ _CACHED_RETRIEVER = None
21
  def get_retriever():
22
  global _CACHED_RETRIEVER
23
  if _CACHED_RETRIEVER is not None: return _CACHED_RETRIEVER
24
-
 
25
  pinecone_key = os.environ.get("PINECONE_API_KEY") or st.secrets.get("PINECONE_API_KEY")
26
  google_key = os.environ.get("GOOGLE_API_KEY") or st.secrets.get("GOOGLE_API_KEY")
27
-
28
- if not pinecone_key or not google_key:
29
- raise ValueError("Missing API Keys.")
30
-
31
  os.environ["PINECONE_API_KEY"] = pinecone_key
32
  os.environ["GOOGLE_API_KEY"] = google_key
33
 
 
34
  embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
35
  vector_store = PineconeVectorStore(index_name=INDEX_NAME, embedding=embeddings)
36
- vector_retriever = vector_store.as_retriever(search_kwargs={"k": 30})
 
 
 
 
 
 
 
 
37
 
 
38
  keyword_retriever = None
39
  if os.path.exists(CHUNKS_FILE):
40
  try:
41
  with open(CHUNKS_FILE, "rb") as f:
42
  chunks = pickle.load(f)
43
  keyword_retriever = BM25Retriever.from_documents(chunks)
44
- keyword_retriever.k = 30
45
- except: pass
 
46
 
 
47
  if keyword_retriever:
48
  final_retriever = EnsembleRetriever(
49
  retrievers=[vector_retriever, keyword_retriever],
50
- weights=[0.5, 0.5]
51
  )
52
  else:
53
  final_retriever = vector_retriever
54
-
55
  _CACHED_RETRIEVER = final_retriever
56
  return final_retriever
57
 
 
21
  def get_retriever():
22
  global _CACHED_RETRIEVER
23
  if _CACHED_RETRIEVER is not None: return _CACHED_RETRIEVER
24
+
25
+ # 1. Setup Keys
26
  pinecone_key = os.environ.get("PINECONE_API_KEY") or st.secrets.get("PINECONE_API_KEY")
27
  google_key = os.environ.get("GOOGLE_API_KEY") or st.secrets.get("GOOGLE_API_KEY")
28
+
29
+ if not pinecone_key or not google_key: raise ValueError("Missing API Keys.")
 
 
30
  os.environ["PINECONE_API_KEY"] = pinecone_key
31
  os.environ["GOOGLE_API_KEY"] = google_key
32
 
33
+ # 2. Setup Vector Store (Pinecone) with SCORE THRESHOLD
34
  embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
35
  vector_store = PineconeVectorStore(index_name=INDEX_NAME, embedding=embeddings)
36
+
37
+ # CRITICAL PROFESSIONAL FIX:
38
+ # We set a "score_threshold" of 0.5.
39
+ # This means: "If the AI is less than 50% sure, DO NOT show the result."
40
+ # This kills the "Prayer Card B" noise immediately.
41
+ vector_retriever = vector_store.as_retriever(
42
+ search_type="similarity_score_threshold",
43
+ search_kwargs={"k": 20, "score_threshold": 0.5}
44
+ )
45
 
46
+ # 3. Setup Keyword Store (BM25)
47
  keyword_retriever = None
48
  if os.path.exists(CHUNKS_FILE):
49
  try:
50
  with open(CHUNKS_FILE, "rb") as f:
51
  chunks = pickle.load(f)
52
  keyword_retriever = BM25Retriever.from_documents(chunks)
53
+ keyword_retriever.k = 20
54
+ except Exception as e:
55
+ print(f"BM25 Error: {e}")
56
 
57
+ # 4. Create Hybrid Ensemble
58
  if keyword_retriever:
59
  final_retriever = EnsembleRetriever(
60
  retrievers=[vector_retriever, keyword_retriever],
61
+ weights=[0.4, 0.6] # 40% Vector (Concepts), 60% Keyword (Precision)
62
  )
63
  else:
64
  final_retriever = vector_retriever
65
+
66
  _CACHED_RETRIEVER = final_retriever
67
  return final_retriever
68