DOMMETI commited on
Commit
aa2cf91
·
verified ·
1 Parent(s): f31b2d0

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +19 -43
src/streamlit_app.py CHANGED
@@ -6,45 +6,27 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
6
 
7
  st.set_page_config(page_title="RAG Search", page_icon="🔍")
8
 
9
- # ---------------------------------------------
10
- # 1️⃣ Locate your Chroma DB zip
11
- # ---------------------------------------------
12
- possible_paths = [
13
- "chroma_db.zip",
14
- os.path.join("src", "chroma_db.zip"),
15
- os.path.join(os.path.dirname(__file__), "chroma_db.zip"),
16
- os.path.join(os.path.dirname(__file__), "..", "chroma_db.zip"),
17
- "/app/chroma_db.zip",
18
- ]
19
 
20
- ZIP_PATH = None
21
- for p in possible_paths:
22
- if os.path.exists(p):
23
- ZIP_PATH = p
24
- break
25
-
26
- if ZIP_PATH is None:
27
- st.error("❌ Could not find 'chroma_db.zip'. Please ensure it's in your repo root.")
28
- st.stop()
29
-
30
- DB_PATH = "chroma_db"
31
-
32
- # ---------------------------------------------
33
- # 2️⃣ Extract only once per app session
34
- # ---------------------------------------------
35
  if "db_ready" not in st.session_state:
36
  if not os.path.exists(DB_PATH):
37
- st.info("📦 Extracting Chroma DB for the first time...")
38
- with zipfile.ZipFile(ZIP_PATH, "r") as zip_ref:
39
- zip_ref.extractall(DB_PATH)
40
- st.success("✅ Database extracted successfully!")
 
 
 
 
41
  else:
42
- st.info("✅ Chroma DB folder already exists.")
43
- st.session_state.db_ready = True # Mark as ready
44
 
45
- # ---------------------------------------------
46
- # 3️⃣ Load embeddings (CPU safe)
47
- # ---------------------------------------------
48
  @st.cache_resource(show_spinner=False)
49
  def load_embeddings():
50
  return HuggingFaceEmbeddings(
@@ -54,24 +36,19 @@ def load_embeddings():
54
 
55
  embeddings = load_embeddings()
56
 
57
- # ---------------------------------------------
58
- # 4️⃣ Load Chroma DB (cached)
59
- # ---------------------------------------------
60
  @st.cache_resource(show_spinner=False)
61
  def load_vectordb():
62
  return Chroma(persist_directory=DB_PATH, embedding_function=embeddings)
63
 
64
  vectordb = load_vectordb()
65
 
66
- # ---------------------------------------------
67
- # 5️⃣ Query + Display Results
68
- # ---------------------------------------------
69
  query = st.text_input("Enter your query:", "What is SystemVerilog interface?")
70
 
71
  if st.button("Search"):
72
  st.write("🔎 Searching your local vector database...")
73
  results = vectordb.similarity_search(query, k=3)
74
-
75
  if results:
76
  for i, doc in enumerate(results):
77
  st.subheader(f"Result {i+1}")
@@ -79,5 +56,4 @@ if st.button("Search"):
79
  st.caption(doc.metadata)
80
  st.markdown("---")
81
  else:
82
- st.warning("⚠️ No matching results found.")
83
-
 
6
 
7
  st.set_page_config(page_title="RAG Search", page_icon="🔍")
8
 
9
+ # --- 1️⃣ Define correct paths ---
10
+ ROOT_DIR = "/app" # Hugging Face Space root
11
+ ZIP_PATH = os.path.join(ROOT_DIR, "chroma_db.zip")
12
+ DB_PATH = os.path.join(ROOT_DIR, "chroma_db")
 
 
 
 
 
 
13
 
14
+ # --- 2️⃣ Extract only once per app session ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  if "db_ready" not in st.session_state:
16
  if not os.path.exists(DB_PATH):
17
+ if os.path.exists(ZIP_PATH):
18
+ st.info("📦 Extracting Chroma DB for the first time...")
19
+ with zipfile.ZipFile(ZIP_PATH, "r") as zip_ref:
20
+ zip_ref.extractall(DB_PATH)
21
+ st.success("✅ Database extracted successfully!")
22
+ else:
23
+ st.error(f"❌ Database zip not found at: {ZIP_PATH}")
24
+ st.stop()
25
  else:
26
+ st.info("✅ Chroma DB already extracted.")
27
+ st.session_state.db_ready = True # mark done
28
 
29
+ # --- 3️⃣ Load embeddings (CPU-only) ---
 
 
30
  @st.cache_resource(show_spinner=False)
31
  def load_embeddings():
32
  return HuggingFaceEmbeddings(
 
36
 
37
  embeddings = load_embeddings()
38
 
39
+ # --- 4️⃣ Load Chroma DB (cached) ---
 
 
40
  @st.cache_resource(show_spinner=False)
41
  def load_vectordb():
42
  return Chroma(persist_directory=DB_PATH, embedding_function=embeddings)
43
 
44
  vectordb = load_vectordb()
45
 
46
+ # --- 5️⃣ Query input ---
 
 
47
  query = st.text_input("Enter your query:", "What is SystemVerilog interface?")
48
 
49
  if st.button("Search"):
50
  st.write("🔎 Searching your local vector database...")
51
  results = vectordb.similarity_search(query, k=3)
 
52
  if results:
53
  for i, doc in enumerate(results):
54
  st.subheader(f"Result {i+1}")
 
56
  st.caption(doc.metadata)
57
  st.markdown("---")
58
  else:
59
+ st.warning("⚠️ No results found.")