File size: 2,024 Bytes
2721138
c4deb04
 
30dcc56
5ec5100
7189b3c
 
 
aa2cf91
 
 
 
f31b2d0
aa2cf91
f31b2d0
 
aa2cf91
 
 
 
 
 
 
 
f31b2d0
aa2cf91
 
f31b2d0
aa2cf91
f31b2d0
 
 
 
 
 
 
 
 
aa2cf91
f31b2d0
 
 
 
 
 
aa2cf91
c4deb04
7189b3c
 
 
c4deb04
7189b3c
 
 
 
 
 
 
aa2cf91
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import streamlit as st
import zipfile
import os
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings

st.set_page_config(page_title="RAG Search", page_icon="🔍")

# --- 1️⃣ Define correct paths ---
ROOT_DIR = "/app"  # Hugging Face Space root
ZIP_PATH = os.path.join(ROOT_DIR, "chroma_db.zip")
DB_PATH = os.path.join(ROOT_DIR, "chroma_db")

# --- 2️⃣ Extract only once per app session ---
if "db_ready" not in st.session_state:
    if not os.path.exists(DB_PATH):
        if os.path.exists(ZIP_PATH):
            st.info("📦 Extracting Chroma DB for the first time...")
            with zipfile.ZipFile(ZIP_PATH, "r") as zip_ref:
                zip_ref.extractall(DB_PATH)
            st.success("✅ Database extracted successfully!")
        else:
            st.error(f"❌ Database zip not found at: {ZIP_PATH}")
            st.stop()
    else:
        st.info("✅ Chroma DB already extracted.")
    st.session_state.db_ready = True  # mark done

# --- 3️⃣ Load embeddings (CPU-only) ---
@st.cache_resource(show_spinner=False)
def load_embeddings():
    return HuggingFaceEmbeddings(
        model_name="mixedbread-ai/mxbai-embed-large-v1",
        model_kwargs={"device": "cpu"}
    )

embeddings = load_embeddings()

# --- 4️⃣ Load Chroma DB (cached) ---
@st.cache_resource(show_spinner=False)
def load_vectordb():
    return Chroma(persist_directory=DB_PATH, embedding_function=embeddings)

vectordb = load_vectordb()

# --- 5️⃣ Query input ---
query = st.text_input("Enter your query:", "What is SystemVerilog interface?")

if st.button("Search"):
    st.write("🔎 Searching your local vector database...")
    results = vectordb.similarity_search(query, k=3)
    if results:
        for i, doc in enumerate(results):
            st.subheader(f"Result {i+1}")
            st.write(doc.page_content)
            st.caption(doc.metadata)
            st.markdown("---")
    else:
        st.warning("⚠️ No results found.")