import streamlit as st import zipfile import os from langchain_community.vectorstores import Chroma from langchain_community.embeddings import HuggingFaceEmbeddings st.set_page_config(page_title="RAG Search", page_icon="🔍") # --- 1️⃣ Define correct paths --- ROOT_DIR = "/app" # Hugging Face Space root ZIP_PATH = os.path.join(ROOT_DIR, "chroma_db.zip") DB_PATH = os.path.join(ROOT_DIR, "chroma_db") # --- 2️⃣ Extract only once per app session --- if "db_ready" not in st.session_state: if not os.path.exists(DB_PATH): if os.path.exists(ZIP_PATH): st.info("📦 Extracting Chroma DB for the first time...") with zipfile.ZipFile(ZIP_PATH, "r") as zip_ref: zip_ref.extractall(DB_PATH) st.success("✅ Database extracted successfully!") else: st.error(f"❌ Database zip not found at: {ZIP_PATH}") st.stop() else: st.info("✅ Chroma DB already extracted.") st.session_state.db_ready = True # mark done # --- 3️⃣ Load embeddings (CPU-only) --- @st.cache_resource(show_spinner=False) def load_embeddings(): return HuggingFaceEmbeddings( model_name="mixedbread-ai/mxbai-embed-large-v1", model_kwargs={"device": "cpu"} ) embeddings = load_embeddings() # --- 4️⃣ Load Chroma DB (cached) --- @st.cache_resource(show_spinner=False) def load_vectordb(): return Chroma(persist_directory=DB_PATH, embedding_function=embeddings) vectordb = load_vectordb() # --- 5️⃣ Query input --- query = st.text_input("Enter your query:", "What is SystemVerilog interface?") if st.button("Search"): st.write("🔎 Searching your local vector database...") results = vectordb.similarity_search(query, k=3) if results: for i, doc in enumerate(results): st.subheader(f"Result {i+1}") st.write(doc.page_content) st.caption(doc.metadata) st.markdown("---") else: st.warning("⚠️ No results found.")