Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import zipfile | |
| import os | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| st.set_page_config(page_title="RAG Search", page_icon="🔍") | |
| # --- 1️⃣ Define correct paths --- | |
| ROOT_DIR = "/app" # Hugging Face Space root | |
| ZIP_PATH = os.path.join(ROOT_DIR, "chroma_db.zip") | |
| DB_PATH = os.path.join(ROOT_DIR, "chroma_db") | |
| # --- 2️⃣ Extract only once per app session --- | |
| if "db_ready" not in st.session_state: | |
| if not os.path.exists(DB_PATH): | |
| if os.path.exists(ZIP_PATH): | |
| st.info("📦 Extracting Chroma DB for the first time...") | |
| with zipfile.ZipFile(ZIP_PATH, "r") as zip_ref: | |
| zip_ref.extractall(DB_PATH) | |
| st.success("✅ Database extracted successfully!") | |
| else: | |
| st.error(f"❌ Database zip not found at: {ZIP_PATH}") | |
| st.stop() | |
| else: | |
| st.info("✅ Chroma DB already extracted.") | |
| st.session_state.db_ready = True # mark done | |
| # --- 3️⃣ Load embeddings (CPU-only) --- | |
| def load_embeddings(): | |
| return HuggingFaceEmbeddings( | |
| model_name="mixedbread-ai/mxbai-embed-large-v1", | |
| model_kwargs={"device": "cpu"} | |
| ) | |
| embeddings = load_embeddings() | |
| # --- 4️⃣ Load Chroma DB (cached) --- | |
| def load_vectordb(): | |
| return Chroma(persist_directory=DB_PATH, embedding_function=embeddings) | |
| vectordb = load_vectordb() | |
| # --- 5️⃣ Query input --- | |
| query = st.text_input("Enter your query:", "What is SystemVerilog interface?") | |
| if st.button("Search"): | |
| st.write("🔎 Searching your local vector database...") | |
| results = vectordb.similarity_search(query, k=3) | |
| if results: | |
| for i, doc in enumerate(results): | |
| st.subheader(f"Result {i+1}") | |
| st.write(doc.page_content) | |
| st.caption(doc.metadata) | |
| st.markdown("---") | |
| else: | |
| st.warning("⚠️ No results found.") | |