OnlyTheTruth03 commited on
Commit
dc425e6
Β·
1 Parent(s): dfecce6

Move RAG modules into src and update Streamlit app

Browse files
ingest.py β†’ src/ingest.py RENAMED
@@ -4,8 +4,8 @@ import pdfplumber
4
  from sentence_transformers import SentenceTransformer
5
  import faiss
6
 
7
- PDF_DIR = "data"
8
- IMAGE_DIR = "data/images"
9
  INDEX_DIR = "index"
10
 
11
  os.makedirs(IMAGE_DIR, exist_ok=True)
 
4
  from sentence_transformers import SentenceTransformer
5
  import faiss
6
 
7
+ PDF_DIR = "src/data"
8
+ IMAGE_DIR = "src/data/images"
9
  INDEX_DIR = "index"
10
 
11
  os.makedirs(IMAGE_DIR, exist_ok=True)
rag.py β†’ src/rag.py RENAMED
@@ -2,22 +2,20 @@ import os
2
  import pickle
3
  import faiss
4
  import numpy as np
5
- from dotenv import load_dotenv
6
  from sentence_transformers import SentenceTransformer
7
  from groq import Groq
8
 
9
  # ---------------- CONFIG ----------------
10
- INDEX_DIR = "index"
11
  TOP_K = 4
12
 
13
- # ---------------- LOAD ENV ----------------
14
- load_dotenv()
15
 
16
- api_key = os.getenv("GROQ_API_KEY")
17
- if not api_key:
18
- raise ValueError("❌ GROQ_API_KEY not found in .env")
19
 
20
- client = Groq(api_key=api_key)
21
 
22
  # ---------------- LOAD INDEX ----------------
23
  index = faiss.read_index(f"{INDEX_DIR}/faiss.index")
@@ -25,15 +23,11 @@ index = faiss.read_index(f"{INDEX_DIR}/faiss.index")
25
  with open(f"{INDEX_DIR}/documents.pkl", "rb") as f:
26
  documents = pickle.load(f)
27
 
28
- # ---------------- EMBEDDINGS ----------------
29
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
30
 
31
-
32
- # ---------------- RETRIEVAL ----------------
33
  def retrieve(query, top_k=TOP_K):
34
- query_embedding = embedder.encode([query])
35
- query_embedding = np.array(query_embedding).astype("float32")
36
-
37
  distances, indices = index.search(query_embedding, top_k)
38
 
39
  results = []
@@ -44,7 +38,6 @@ def retrieve(query, top_k=TOP_K):
44
 
45
  return results
46
 
47
-
48
  # ---------------- LLM ----------------
49
  def ask_llm(query, contexts):
50
  context_text = "\n\n".join(
@@ -57,13 +50,7 @@ def ask_llm(query, contexts):
57
  messages=[
58
  {
59
  "role": "system",
60
- "content": """
61
- You are an astrology tutor.
62
- Explain concepts clearly and practically.
63
- If a chart or diagram from the reference material is useful,
64
- explicitly say: "Refer to the diagram below."
65
- Otherwise, do not mention diagrams.
66
- """
67
  },
68
  {
69
  "role": "user",
 
2
  import pickle
3
  import faiss
4
  import numpy as np
 
5
  from sentence_transformers import SentenceTransformer
6
  from groq import Groq
7
 
8
  # ---------------- CONFIG ----------------
9
+ INDEX_DIR = "src/index"
10
  TOP_K = 4
11
 
12
+ # ---------------- GROQ CLIENT ----------------
13
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
14
 
15
+ if not GROQ_API_KEY:
16
+ raise RuntimeError("❌ GROQ_API_KEY not set in Hugging Face Secrets")
 
17
 
18
+ client = Groq(api_key=GROQ_API_KEY)
19
 
20
  # ---------------- LOAD INDEX ----------------
21
  index = faiss.read_index(f"{INDEX_DIR}/faiss.index")
 
23
  with open(f"{INDEX_DIR}/documents.pkl", "rb") as f:
24
  documents = pickle.load(f)
25
 
 
26
  embedder = SentenceTransformer("all-MiniLM-L6-v2")
27
 
28
+ # ---------------- RETRIEVE ----------------
 
29
  def retrieve(query, top_k=TOP_K):
30
+ query_embedding = embedder.encode([query]).astype("float32")
 
 
31
  distances, indices = index.search(query_embedding, top_k)
32
 
33
  results = []
 
38
 
39
  return results
40
 
 
41
  # ---------------- LLM ----------------
42
  def ask_llm(query, contexts):
43
  context_text = "\n\n".join(
 
50
  messages=[
51
  {
52
  "role": "system",
53
+ "content": "You are an astrology tutor. Answer clearly and practically."
 
 
 
 
 
 
54
  },
55
  {
56
  "role": "user",
src/streamlit_app.py CHANGED
@@ -1,40 +1,41 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
5
-
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from rag import retrieve, ask_llm
3
+ import os
4
+
5
+ st.set_page_config(
6
+ page_title="Only The Truth – Astrology Bot",
7
+ page_icon="πŸͺ",
8
+ layout="wide"
9
+ )
10
+
11
+ st.title("πŸͺ Only The Truth – Astrology Assistant")
12
+
13
+ st.markdown(
14
+ "Ask questions based on the astrology lessons and reference material."
15
+ )
16
+
17
+ # --- Input ---
18
+ query = st.text_input("Ask your question")
19
+
20
+ if query:
21
+ with st.spinner("πŸ” Searching knowledge base..."):
22
+ contexts = retrieve(query)
23
+
24
+ with st.spinner("🧠 Generating answer..."):
25
+ answer = ask_llm(query, contexts)
26
+
27
+ # --- Answer ---
28
+ st.subheader("πŸͺ Answer")
29
+ st.write(answer)
30
+
31
+ # --- References ---
32
+ if contexts:
33
+ st.subheader("πŸ“˜ References")
34
+ for c in contexts:
35
+ st.markdown(f"**{c['source']} β€” page {c['page']}**")
36
+
37
+ # Show images only if present and file exists
38
+ for img in c.get("images", []):
39
+ img_path = os.path.join("src", "data", "images", img)
40
+ if os.path.exists(img_path):
41
+ st.image(img_path, use_column_width=True)