Spaces:
Sleeping
Sleeping
Commit
ยท
3298e37
1
Parent(s):
3bbb203
Load index
Browse files- src/rag.py +29 -22
- src/streamlit_app.py +5 -18
src/rag.py
CHANGED
|
@@ -1,52 +1,59 @@
|
|
|
|
|
| 1 |
import os
|
| 2 |
import pickle
|
| 3 |
import faiss
|
|
|
|
| 4 |
from sentence_transformers import SentenceTransformer
|
| 5 |
from groq import Groq
|
|
|
|
| 6 |
|
| 7 |
INDEX_DIR = "src/index"
|
| 8 |
-
|
| 9 |
-
|
|
|
|
| 10 |
|
| 11 |
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
|
| 12 |
if not GROQ_API_KEY:
|
| 13 |
-
raise RuntimeError("โ GROQ_API_KEY not set in
|
| 14 |
|
| 15 |
client = Groq(api_key=GROQ_API_KEY)
|
| 16 |
-
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
| 17 |
-
|
| 18 |
-
_index = None
|
| 19 |
-
_documents = None
|
| 20 |
|
| 21 |
|
| 22 |
def load_index():
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
-
|
| 26 |
-
if not os.path.exists(INDEX_FILE):
|
| 27 |
-
raise RuntimeError(
|
| 28 |
-
"โ FAISS index not found. Ingestion must run first."
|
| 29 |
-
)
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
_documents = pickle.load(f)
|
| 34 |
|
| 35 |
-
return
|
| 36 |
|
| 37 |
|
| 38 |
-
def retrieve(query, top_k=
|
| 39 |
index, documents = load_index()
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
return
|
| 45 |
|
| 46 |
|
| 47 |
def ask_llm(query, contexts):
|
| 48 |
context_text = "\n\n".join(
|
| 49 |
-
f"[
|
| 50 |
for c in contexts
|
| 51 |
)
|
| 52 |
|
|
|
|
| 1 |
+
# src/rag.py
|
| 2 |
import os
|
| 3 |
import pickle
|
| 4 |
import faiss
|
| 5 |
+
import numpy as np
|
| 6 |
from sentence_transformers import SentenceTransformer
|
| 7 |
from groq import Groq
|
| 8 |
+
from ingest import build_index # ๐ important
|
| 9 |
|
| 10 |
INDEX_DIR = "src/index"
|
| 11 |
+
TOP_K = 4
|
| 12 |
+
|
| 13 |
+
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
| 14 |
|
| 15 |
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
|
| 16 |
if not GROQ_API_KEY:
|
| 17 |
+
raise RuntimeError("โ GROQ_API_KEY not set in Hugging Face Secrets")
|
| 18 |
|
| 19 |
client = Groq(api_key=GROQ_API_KEY)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
def load_index():
|
| 23 |
+
index_path = f"{INDEX_DIR}/faiss.index"
|
| 24 |
+
docs_path = f"{INDEX_DIR}/documents.pkl"
|
| 25 |
+
|
| 26 |
+
# ๐ฅ AUTO INGEST IF MISSING
|
| 27 |
+
if not os.path.exists(index_path) or not os.path.exists(docs_path):
|
| 28 |
+
print("โ ๏ธ FAISS index missing. Running ingestion...")
|
| 29 |
+
build_index()
|
| 30 |
|
| 31 |
+
index = faiss.read_index(index_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
+
with open(docs_path, "rb") as f:
|
| 34 |
+
documents = pickle.load(f)
|
|
|
|
| 35 |
|
| 36 |
+
return index, documents
|
| 37 |
|
| 38 |
|
| 39 |
+
def retrieve(query, top_k=TOP_K):
|
| 40 |
index, documents = load_index()
|
| 41 |
|
| 42 |
+
query_embedding = embedder.encode([query]).astype("float32")
|
| 43 |
+
distances, indices = index.search(query_embedding, top_k)
|
| 44 |
+
|
| 45 |
+
results = []
|
| 46 |
+
for idx in indices[0]:
|
| 47 |
+
if idx == -1:
|
| 48 |
+
continue
|
| 49 |
+
results.append(documents[idx])
|
| 50 |
|
| 51 |
+
return results
|
| 52 |
|
| 53 |
|
| 54 |
def ask_llm(query, contexts):
|
| 55 |
context_text = "\n\n".join(
|
| 56 |
+
f"[p.{c['page']}]\n{c['text']}"
|
| 57 |
for c in contexts
|
| 58 |
)
|
| 59 |
|
src/streamlit_app.py
CHANGED
|
@@ -1,20 +1,11 @@
|
|
| 1 |
-
|
| 2 |
import streamlit as st
|
|
|
|
| 3 |
|
| 4 |
-
|
|
|
|
| 5 |
|
| 6 |
-
st.
|
| 7 |
-
st.title("๐ฎ Astrology Knowledge Bot")
|
| 8 |
-
|
| 9 |
-
# ๐ Run ingestion ONCE if index is missing
|
| 10 |
-
if not os.path.exists(INDEX_FILE):
|
| 11 |
-
with st.spinner("๐ Preparing knowledge base (first run only)..."):
|
| 12 |
-
import ingest # runs ingestion
|
| 13 |
-
st.success("โ
Knowledge base ready!")
|
| 14 |
-
|
| 15 |
-
from rag import retrieve, ask_llm # SAFE to import now
|
| 16 |
-
|
| 17 |
-
query = st.text_input("Ask your astrology question")
|
| 18 |
|
| 19 |
if query:
|
| 20 |
with st.spinner("Thinking..."):
|
|
@@ -23,7 +14,3 @@ if query:
|
|
| 23 |
|
| 24 |
st.markdown("### ๐ช Answer")
|
| 25 |
st.write(answer)
|
| 26 |
-
|
| 27 |
-
st.markdown("### ๐ References")
|
| 28 |
-
for c in contexts:
|
| 29 |
-
st.write(f"{c['source']} โ page {c['page']}")
|
|
|
|
| 1 |
+
# src/streamlit_app.py
|
| 2 |
import streamlit as st
|
| 3 |
+
from rag import retrieve, ask_llm
|
| 4 |
|
| 5 |
+
st.set_page_config(page_title="OTT Astrology Bot", layout="wide")
|
| 6 |
+
st.title("๐ช OTT Astrology Assistant")
|
| 7 |
|
| 8 |
+
query = st.text_input("Ask your astrology question:")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
if query:
|
| 11 |
with st.spinner("Thinking..."):
|
|
|
|
| 14 |
|
| 15 |
st.markdown("### ๐ช Answer")
|
| 16 |
st.write(answer)
|
|
|
|
|
|
|
|
|
|
|
|