OnlyTheTruth03 commited on
Commit
3298e37
ยท
1 Parent(s): 3bbb203

Load index

Browse files
Files changed (2) hide show
  1. src/rag.py +29 -22
  2. src/streamlit_app.py +5 -18
src/rag.py CHANGED
@@ -1,52 +1,59 @@
 
1
  import os
2
  import pickle
3
  import faiss
 
4
  from sentence_transformers import SentenceTransformer
5
  from groq import Groq
 
6
 
7
  INDEX_DIR = "src/index"
8
- INDEX_FILE = f"{INDEX_DIR}/faiss.index"
9
- DOC_FILE = f"{INDEX_DIR}/documents.pkl"
 
10
 
11
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
12
  if not GROQ_API_KEY:
13
- raise RuntimeError("โŒ GROQ_API_KEY not set in HF Secrets")
14
 
15
  client = Groq(api_key=GROQ_API_KEY)
16
- embedder = SentenceTransformer("all-MiniLM-L6-v2")
17
-
18
- _index = None
19
- _documents = None
20
 
21
 
22
  def load_index():
23
- global _index, _documents
 
 
 
 
 
 
24
 
25
- if _index is None or _documents is None:
26
- if not os.path.exists(INDEX_FILE):
27
- raise RuntimeError(
28
- "โŒ FAISS index not found. Ingestion must run first."
29
- )
30
 
31
- _index = faiss.read_index(INDEX_FILE)
32
- with open(DOC_FILE, "rb") as f:
33
- _documents = pickle.load(f)
34
 
35
- return _index, _documents
36
 
37
 
38
- def retrieve(query, top_k=4):
39
  index, documents = load_index()
40
 
41
- q_emb = embedder.encode([query]).astype("float32")
42
- _, indices = index.search(q_emb, top_k)
 
 
 
 
 
 
43
 
44
- return [documents[i] for i in indices[0] if i != -1]
45
 
46
 
47
  def ask_llm(query, contexts):
48
  context_text = "\n\n".join(
49
- f"[{c['source']} p.{c['page']}]\n{c['text']}"
50
  for c in contexts
51
  )
52
 
 
1
+ # src/rag.py
2
  import os
3
  import pickle
4
  import faiss
5
+ import numpy as np
6
  from sentence_transformers import SentenceTransformer
7
  from groq import Groq
8
+ from ingest import build_index # ๐Ÿ‘ˆ important
9
 
10
  INDEX_DIR = "src/index"
11
+ TOP_K = 4
12
+
13
+ embedder = SentenceTransformer("all-MiniLM-L6-v2")
14
 
15
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
16
  if not GROQ_API_KEY:
17
+ raise RuntimeError("โŒ GROQ_API_KEY not set in Hugging Face Secrets")
18
 
19
  client = Groq(api_key=GROQ_API_KEY)
 
 
 
 
20
 
21
 
22
  def load_index():
23
+ index_path = f"{INDEX_DIR}/faiss.index"
24
+ docs_path = f"{INDEX_DIR}/documents.pkl"
25
+
26
+ # ๐Ÿ”ฅ AUTO INGEST IF MISSING
27
+ if not os.path.exists(index_path) or not os.path.exists(docs_path):
28
+ print("โš ๏ธ FAISS index missing. Running ingestion...")
29
+ build_index()
30
 
31
+ index = faiss.read_index(index_path)
 
 
 
 
32
 
33
+ with open(docs_path, "rb") as f:
34
+ documents = pickle.load(f)
 
35
 
36
+ return index, documents
37
 
38
 
39
+ def retrieve(query, top_k=TOP_K):
40
  index, documents = load_index()
41
 
42
+ query_embedding = embedder.encode([query]).astype("float32")
43
+ distances, indices = index.search(query_embedding, top_k)
44
+
45
+ results = []
46
+ for idx in indices[0]:
47
+ if idx == -1:
48
+ continue
49
+ results.append(documents[idx])
50
 
51
+ return results
52
 
53
 
54
  def ask_llm(query, contexts):
55
  context_text = "\n\n".join(
56
+ f"[p.{c['page']}]\n{c['text']}"
57
  for c in contexts
58
  )
59
 
src/streamlit_app.py CHANGED
@@ -1,20 +1,11 @@
1
- import os
2
  import streamlit as st
 
3
 
4
- INDEX_FILE = "src/index/faiss.index"
 
5
 
6
- st.set_page_config(page_title="Astrology RAG Bot", layout="wide")
7
- st.title("๐Ÿ”ฎ Astrology Knowledge Bot")
8
-
9
- # ๐Ÿš€ Run ingestion ONCE if index is missing
10
- if not os.path.exists(INDEX_FILE):
11
- with st.spinner("๐Ÿ“š Preparing knowledge base (first run only)..."):
12
- import ingest # runs ingestion
13
- st.success("โœ… Knowledge base ready!")
14
-
15
- from rag import retrieve, ask_llm # SAFE to import now
16
-
17
- query = st.text_input("Ask your astrology question")
18
 
19
  if query:
20
  with st.spinner("Thinking..."):
@@ -23,7 +14,3 @@ if query:
23
 
24
  st.markdown("### ๐Ÿช Answer")
25
  st.write(answer)
26
-
27
- st.markdown("### ๐Ÿ“˜ References")
28
- for c in contexts:
29
- st.write(f"{c['source']} โ€” page {c['page']}")
 
1
+ # src/streamlit_app.py
2
  import streamlit as st
3
+ from rag import retrieve, ask_llm
4
 
5
+ st.set_page_config(page_title="OTT Astrology Bot", layout="wide")
6
+ st.title("๐Ÿช OTT Astrology Assistant")
7
 
8
+ query = st.text_input("Ask your astrology question:")
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  if query:
11
  with st.spinner("Thinking..."):
 
14
 
15
  st.markdown("### ๐Ÿช Answer")
16
  st.write(answer)