OnlyTheTruth03 commited on
Commit
46670de
ยท
1 Parent(s): baad26a

Fix Faiss lazy loading and ingestion order

Browse files
Files changed (2) hide show
  1. src/rag.py +27 -7
  2. src/streamlit_app.py +10 -3
src/rag.py CHANGED
@@ -1,30 +1,49 @@
1
  import os
2
  import pickle
3
  import faiss
4
- import numpy as np
5
  from sentence_transformers import SentenceTransformer
6
  from groq import Groq
7
 
8
  INDEX_DIR = "src/index"
9
- TOP_K = 4
 
10
 
11
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
12
  if not GROQ_API_KEY:
13
  raise RuntimeError("โŒ GROQ_API_KEY not set in HF Secrets")
14
 
15
  client = Groq(api_key=GROQ_API_KEY)
16
-
17
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
18
 
19
- index = faiss.read_index(f"{INDEX_DIR}/faiss.index")
20
- with open(f"{INDEX_DIR}/documents.pkl", "rb") as f:
21
- documents = pickle.load(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- def retrieve(query, top_k=TOP_K):
24
  q_emb = embedder.encode([query]).astype("float32")
25
  _, indices = index.search(q_emb, top_k)
 
26
  return [documents[i] for i in indices[0] if i != -1]
27
 
 
28
  def ask_llm(query, contexts):
29
  context_text = "\n\n".join(
30
  f"[{c['source']} p.{c['page']}]\n{c['text']}"
@@ -39,4 +58,5 @@ def ask_llm(query, contexts):
39
  ],
40
  temperature=0.2
41
  )
 
42
  return response.choices[0].message.content
 
1
  import os
2
  import pickle
3
  import faiss
 
4
  from sentence_transformers import SentenceTransformer
5
  from groq import Groq
6
 
7
  INDEX_DIR = "src/index"
8
+ INDEX_FILE = f"{INDEX_DIR}/faiss.index"
9
+ DOC_FILE = f"{INDEX_DIR}/documents.pkl"
10
 
11
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
12
  if not GROQ_API_KEY:
13
  raise RuntimeError("โŒ GROQ_API_KEY not set in HF Secrets")
14
 
15
  client = Groq(api_key=GROQ_API_KEY)
 
16
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
17
 
18
+ _index = None
19
+ _documents = None
20
+
21
+
22
+ def load_index():
23
+ global _index, _documents
24
+
25
+ if _index is None or _documents is None:
26
+ if not os.path.exists(INDEX_FILE):
27
+ raise RuntimeError(
28
+ "โŒ FAISS index not found. Ingestion must run first."
29
+ )
30
+
31
+ _index = faiss.read_index(INDEX_FILE)
32
+ with open(DOC_FILE, "rb") as f:
33
+ _documents = pickle.load(f)
34
+
35
+ return _index, _documents
36
+
37
+
38
+ def retrieve(query, top_k=4):
39
+ index, documents = load_index()
40
 
 
41
  q_emb = embedder.encode([query]).astype("float32")
42
  _, indices = index.search(q_emb, top_k)
43
+
44
  return [documents[i] for i in indices[0] if i != -1]
45
 
46
+
47
  def ask_llm(query, contexts):
48
  context_text = "\n\n".join(
49
  f"[{c['source']} p.{c['page']}]\n{c['text']}"
 
58
  ],
59
  temperature=0.2
60
  )
61
+
62
  return response.choices[0].message.content
src/streamlit_app.py CHANGED
@@ -1,12 +1,19 @@
1
  import os
2
  import streamlit as st
3
- from rag import retrieve, ask_llm
4
- import ingest
5
 
6
- st.set_page_config(page_title="Astrology RAG Bot", layout="wide")
7
 
 
8
  st.title("๐Ÿ”ฎ Astrology Knowledge Bot")
9
 
 
 
 
 
 
 
 
 
10
  query = st.text_input("Ask your astrology question")
11
 
12
  if query:
 
1
  import os
2
  import streamlit as st
 
 
3
 
4
+ INDEX_FILE = "src/index/faiss.index"
5
 
6
+ st.set_page_config(page_title="Astrology RAG Bot", layout="wide")
7
  st.title("๐Ÿ”ฎ Astrology Knowledge Bot")
8
 
9
+ # ๐Ÿš€ Run ingestion ONCE if index is missing
10
+ if not os.path.exists(INDEX_FILE):
11
+ with st.spinner("๐Ÿ“š Preparing knowledge base (first run only)..."):
12
+ import ingest # runs ingestion
13
+ st.success("โœ… Knowledge base ready!")
14
+
15
+ from rag import retrieve, ask_llm # SAFE to import now
16
+
17
  query = st.text_input("Ask your astrology question")
18
 
19
  if query: