Spaces:

Zwounds
/

LibraryRAG

Sleeping

App Files Files Community

Zwounds commited on Mar 31, 2025

Commit

cab221e

verified ·

1 Parent(s): b10473a

Upload app.py

Browse files

Files changed (1) hide show

app.py +144 -195

app.py CHANGED Viewed

@@ -7,46 +7,45 @@ import os
 from dotenv import load_dotenv
 from huggingface_hub import InferenceClient
 import numpy as np
-import time # Added for embedding delay/timing
-from tqdm import tqdm # Added for embedding progress
-# Import ChromaDB's helper for Sentence Transformers
-import chromadb.utils.embedding_functions as embedding_functions
-# from sentence_transformers import CrossEncoder # Keep if re-ranking might be used
-# --- Page Config (Must be first Streamlit command) ---
-st.set_page_config(layout="wide")
-# ---
 # --- Configuration ---
-DB_PATH = "./chroma_db"
-COLLECTION_NAME = "libguides_content" # Must match the embedding script
-LOCAL_EMBEDDING_MODEL = 'BAAI/bge-m3' # Local model for ChromaDB's function
 HF_GENERATION_MODEL = "google/gemma-3-27b-it" # HF model for generation
-INPUT_FILE = 'extracted_content.jsonl' # Source data for embedding
-EMBEDDING_BATCH_SIZE = 100 # Batch size for adding docs to ChromaDB
-# CROSS_ENCODER_MODEL_NAME = 'cross-encoder/ms-marco-MiniLM-L-6-v2' # Model for re-ranking (DISABLED)
-TOP_K = 10 # Number of *final* unique chunks to send to LLM
-INITIAL_N_RESULTS = 50 # Number of candidates from initial vector search
-API_RETRY_DELAY = 2 # Delay for generation API if needed
-MAX_NEW_TOKENS = 512 # Max tokens for HF text generation
 # ---
 # Setup logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', stream=sys.stderr)
 # --- Load API Key and Initialize HF Generation Client ---
-# Wrap client initialization in a cached function to avoid re-initializing on every interaction
 @st.cache_resource
 def initialize_hf_client():
     generation_client_instance = None
     try:
         load_dotenv()
-        # Read HF_TOKEN from environment variable first (for Spaces secrets), fallback to .env
         HF_TOKEN = os.getenv('HF_TOKEN') or os.getenv('HUGGING_FACE_HUB_TOKEN')
         if not HF_TOKEN:
-            logging.error("HF_TOKEN or HUGGING_FACE_HUB_TOKEN not found in environment variables or .env file.")
-            st.error("🔴 Hugging Face Token not found. Please set it as a Space secret named HF_TOKEN or in the .env file as HUGGING_FACE_HUB_TOKEN.")
-            st.stop() # Stop execution if token is missing
         else:
             generation_client_instance = InferenceClient(model=HF_GENERATION_MODEL, token=HF_TOKEN)
             logging.info(f"Initialized HF Inference Client for generation ({HF_GENERATION_MODEL}).")
@@ -54,18 +53,16 @@ def initialize_hf_client():
     except Exception as e:
         logging.exception("Error initializing Hugging Face Inference Client for generation.")
         st.error(f"🔴 Error initializing Hugging Face Inference Client: {e}")
-        st.stop() # Stop execution on error
-    return None # Should not be reached if st.stop() works
 generation_client = initialize_hf_client()
 # ---
-# --- Embedding Function Definition (Needed for DB creation) ---
-# This part is similar to embed_and_store_local_chroma_ef.py
-# Cache the embedding function definition as well
 @st.cache_resource
-def get_embedding_function():
-    logging.info(f"Defining embedding function for model: {LOCAL_EMBEDDING_MODEL}")
     try:
          import torch
          device = 'cuda' if torch.cuda.is_available() else 'cpu'
@@ -73,175 +70,134 @@ def get_embedding_function():
     except ImportError:
          device = 'cpu'
          logging.info("Torch not found, using device: cpu")
     try:
-        ef = embedding_functions.SentenceTransformerEmbeddingFunction(
-            model_name=LOCAL_EMBEDDING_MODEL,
-            device=device,
-            trust_remote_code=True
-        )
-        logging.info("Embedding function defined.")
-        return ef
     except Exception as e:
-        st.error(f"Failed to initialize embedding function ({LOCAL_EMBEDDING_MODEL}): {e}")
-        logging.exception(f"Failed to initialize embedding function: {e}")
-        return None
-# --- Function to Create and Populate DB ---
-# This integrates logic from embed_and_store_local_chroma_ef.py
-# Use a simple flag file to check if initialization was done in this session/container lifetime
-INIT_FLAG_FILE = os.path.join(DB_PATH, ".initialized")
-def initialize_database():
-    # Check if DB exists and is initialized (using flag file for ephemeral systems)
-    if os.path.exists(INIT_FLAG_FILE):
-        logging.info("Initialization flag file found. Assuming DB is ready.")
-        return True
-    # Check if DB path exists but maybe wasn't fully initialized
-    db_exists = os.path.exists(DB_PATH) and os.listdir(DB_PATH)
-    if db_exists and not os.path.exists(INIT_FLAG_FILE):
-        logging.warning("DB path exists but initialization flag not found. Re-initializing.")
-        # Optionally, could try loading collection here and return True if successful
-        # For simplicity, we'll just re-initialize fully if flag is missing
-    st.warning(f"ChromaDB not found or needs initialization at {DB_PATH}. Initializing and embedding data... This may take a while.")
-    logging.info(f"Database not found or needs initialization. Running embedding process...")
     try:
-        ef = get_embedding_function()
-        if not ef: return False # Stop if embedding function failed
-        # Load Data
-        logging.info(f"Loading data from {INPUT_FILE}...")
-        if not os.path.exists(INPUT_FILE):
-             st.error(f"Source data file '{INPUT_FILE}' not found. Cannot create database.")
-             logging.error(f"Source data file '{INPUT_FILE}' not found.")
-             return False
-        documents = []
-        metadatas = []
-        ids = []
-        with open(INPUT_FILE, 'r', encoding='utf-8') as f:
-            progress_bar = st.progress(0, text="Loading data...")
-            lines = f.readlines()
-            for i, line in enumerate(lines):
-                try:
-                    data = json.loads(line)
-                    text = data.get('text')
-                    if not text: continue
-                    documents.append(text)
-                    metadata = data.get('metadata', {})
-                    if not isinstance(metadata, dict): metadata = {}
-                    metadatas.append(metadata)
-                    ids.append(f"doc_{i}")
-                except Exception as e:
-                     logging.warning(f"Error processing line {i+1}: {e}")
-                progress_bar.progress((i + 1) / len(lines), text=f"Loading data... {i+1}/{len(lines)}")
-        progress_bar.empty()
-        logging.info(f"Loaded {len(documents)} valid documents.")
-        if not documents:
-            st.error("No valid documents loaded from source file.")
-            logging.error("No valid documents loaded.")
-            return False
-        # Setup Vector DB
-        logging.info(f"Initializing ChromaDB client at path: {DB_PATH}")
-        chroma_client = chromadb.PersistentClient(path=DB_PATH)
         try:
             chroma_client.delete_collection(name=COLLECTION_NAME)
-            logging.info(f"Deleted existing collection (if any): {COLLECTION_NAME}")
-        except Exception: pass
-        logging.info(f"Creating new collection '{COLLECTION_NAME}' with embedding function.")
         collection = chroma_client.create_collection(
             name=COLLECTION_NAME,
-            embedding_function=ef,
-            metadata={"hnsw:space": "cosine"}
         )
-        logging.info(f"Created new collection '{COLLECTION_NAME}'.")
-        # Add Documents in Batches
-        logging.info(f"Adding documents to ChromaDB (ChromaDB will embed)...")
         start_time = time.time()
-        total_added = 0
         error_count = 0
-        num_batches = (len(documents) + EMBEDDING_BATCH_SIZE - 1) // EMBEDDING_BATCH_SIZE
-        progress_bar = st.progress(0, text="Embedding documents (this takes time)...")
         for i in range(num_batches):
-            start_idx = i * EMBEDDING_BATCH_SIZE
-            end_idx = start_idx + EMBEDDING_BATCH_SIZE
-            batch_docs = documents[start_idx:end_idx]
-            batch_metadatas = metadatas[start_idx:end_idx]
-            batch_ids = ids[start_idx:end_idx]
             try:
-                collection.add(documents=batch_docs, metadatas=batch_metadatas, ids=batch_ids)
-                total_added += len(batch_ids)
             except Exception as e:
-                logging.error(f"Error adding batch starting at index {start_idx}: {e}")
                 error_count += 1
-            progress_bar.progress((i + 1) / num_batches, text=f"Embedding documents... Batch {i+1}/{num_batches}")
         progress_bar.empty()
         end_time = time.time()
-        logging.info(f"Finished adding documents process.")
-        logging.info(f"Successfully added {total_added} documents to ChromaDB.")
         if error_count > 0:
-            logging.warning(f"Encountered errors in {error_count} batches during add.")
-        logging.info(f"Document adding took {end_time - start_time:.2f} seconds.")
-        # Create flag file on success
-        os.makedirs(DB_PATH, exist_ok=True)
-        with open(INIT_FLAG_FILE, 'w') as f:
-            f.write('initialized')
-        st.success(f"Database initialized successfully with {total_added} documents.")
-        return True
-    except Exception as e:
-        st.error(f"Failed to initialize database: {e}")
-        logging.exception(f"An unexpected error occurred during database initialization: {e}")
-        return False
-# --- Caching Functions ---
-# Modified to depend on successful DB initialization
-@st.cache_resource
-def load_chromadb_collection():
-    if not initialize_database():
-         st.error("Database initialization failed. Cannot load collection.")
-         st.stop()
-    logging.info(f"Attempting to load ChromaDB collection: {COLLECTION_NAME}")
-    try:
-        _client = chromadb.PersistentClient(path=DB_PATH)
-        collection = _client.get_collection(name=COLLECTION_NAME)
-        logging.info(f"Collection '{COLLECTION_NAME}' loaded successfully.")
         return collection
     except Exception as e:
-        st.error(f"Failed to load ChromaDB collection '{COLLECTION_NAME}' after initialization attempt: {e}")
-        logging.error(f"Failed to load ChromaDB collection after initialization attempt: {e}")
-        return None
 # --- Helper Functions ---
 def query_hf_inference(prompt, client_instance=None, model_name=HF_GENERATION_MODEL):
     """Sends the prompt to the HF Inference API using the initialized client."""
     if not client_instance:
         client_instance = generation_client
     if not client_instance:
          logging.error("HF Inference client not initialized in query_hf_inference.")
          return "Error: HF Inference client failed to initialize."
     try:
-        response_text = client_instance.text_generation(
-            prompt,
-            max_new_tokens=MAX_NEW_TOKENS,
-        )
         if not response_text:
              logging.warning(f"Received empty response from HF Inference API ({model_name}) for prompt: {prompt[:100]}...")
              return "Error: Received empty response from generation model."
@@ -271,7 +227,6 @@ commencement schedule
 User Query: "{query}"
 Output:"""
     logging.info(f"Generating query variations for: {query} using {model_name}")
     try:
         response = llm_func(prompt, model_name=model_name)
@@ -304,18 +259,16 @@ Answer:"""
     return prompt
 # --- Streamlit App UI ---
-# st.set_page_config(layout="wide") # MOVED TO TOP
-st.title("📚 Ask the Library Guides (Local Embed + HF Gen)") # Updated title
-# Load resources (this now includes the initialization check)
-collection = load_chromadb_collection()
 # User input (only proceed if collection loaded)
 if collection:
     query = st.text_area("Enter your question:", height=100)
 else:
-    st.error("Application cannot proceed: Failed to load or initialize ChromaDB collection.")
-    st.stop() # Stop if collection failed to load
 # --- Routing Prompt Definition ---
 ROUTING_PROMPT_TEMPLATE = """You are a query routing assistant for a library chatbot. Your task is to classify the user's query into one of the following categories based on its intent:
@@ -387,22 +340,7 @@ if collection and st.button("Ask"):
             if route_decision == "HOURS":
                 st.info("You can find the current library hours here: [https://gc-cuny.libcal.com/hours](https://gc-cuny.libcal.com/hours)")
                 st.stop()
-            elif route_decision == "CATALOG_SEARCH":
-                catalog_url = "https://cuny-gc.primo.exlibrisgroup.com/discovery/search?vid=01CUNY_GC:CUNY_GC"
-                st.info(f"To check for specific books, journals, or articles, please search the library catalog directly here: [{catalog_url}]({catalog_url})")
-                st.stop()
-            elif route_decision == "ILL_REQUEST":
-                 ill_url = "https://ezproxy.gc.cuny.edu/login?url=https://gc-cuny.illiad.oclc.org/illiad/illiad.dll"
-                 st.info(f"For Interlibrary Loan requests or questions, please use the ILL system here: [{ill_url}]({ill_url})")
-                 st.stop()
-            elif route_decision == "ACCOUNT_INFO":
-                 account_url = "https://cuny-gc.primo.exlibrisgroup.com/discovery/account?vid=01CUNY_GC:CUNY_GC&section=overview"
-                 st.info(f"To manage your library account (renewals, fines, etc.), please log in here: [{account_url}]({account_url})")
-                 st.stop()
-            elif route_decision == "TECH_SUPPORT":
-                 support_url = "https://docs.google.com/forms/d/e/1FAIpQLSdF3a-Au-jIYRDN-mxU3MpZSANQJWFx0VEN2if01iRucIXsZA/viewform"
-                 st.info(f"To report a problem with accessing e-resources or other technical issues, please use this form: [{support_url}]({support_url})")
-                 st.stop()
             elif route_decision == "EVENTS_CALENDAR":
                 events_url = "https://gc-cuny.libcal.com/calendar?cid=15537&t=d&d=0000-00-00&cal=15537&inc=0"
                 st.info(f"You can find information about upcoming library events and workshops on the calendar here: [{events_url}]({events_url})")
@@ -417,16 +355,26 @@ if collection and st.button("Ask"):
             all_queries = [query] + query_variations
             logging.info(f"--- DIAGNOSTIC: All queries for search: {all_queries}")
-            # 2. Vector Search (ChromaDB handles query embedding internally)
             vector_results_ids = []
             context_chunks = []
             context_metadata_list = []
             try:
-                logging.info(f"Performing vector search for {len(all_queries)} queries (ChromaDB will embed)...")
-                # Query ChromaDB using query_texts - it uses the collection's embedding function
                 vector_results = collection.query(
-                    query_texts=all_queries, # Pass texts, not embeddings
                     n_results=INITIAL_N_RESULTS,
                     include=['documents', 'metadatas', 'distances']
                 )
@@ -491,7 +439,7 @@ if collection and st.button("Ask"):
                 logging.exception("Vector search/selection failed.")
                 context_chunks = []
-            # 3. Generate Final Prompt based on Route
             if route_decision == "RESEARCH_QUERY":
                 logging.info("Using RESEARCH_QUERY prompt template.")
                 final_prompt = RESEARCH_QUERY_PROMPT_TEMPLATE.format(context_str="\n\n".join(context_chunks), query=query)
@@ -499,14 +447,14 @@ if collection and st.button("Ask"):
                 logging.info("Using standard RAG prompt template.")
                 final_prompt = generate_prompt(query, context_chunks)
-            # 4. Query HF Inference API LLM
             logging.info(f"Sending final prompt to HF Inference API model: {HF_GENERATION_MODEL}...")
             answer = query_hf_inference(final_prompt)
             logging.info(f"Received answer from HF Inference API: {answer[:100]}...")
             if answer.startswith("Error:"):
                  st.error(f"Answer generation failed: {answer}")
-        # 5. Display results
         st.subheader("Answer:")
         st.markdown(answer)
@@ -527,13 +475,14 @@ if collection and st.button("Ask"):
 st.sidebar.header("How to Use")
 st.sidebar.info(
     "1. Ensure your `HUGGING_FACE_HUB_TOKEN` is correctly set as a Space secret (`HF_TOKEN`) or in the `.env` file.\n"
-    f"2. The app will automatically create/embed the database using `{LOCAL_EMBEDDING_MODEL}` on first run if needed (requires `{INPUT_FILE}` to be present).\n"
     "3. Enter your question in the text area.\n"
     "4. Click 'Ask'."
 )
 st.sidebar.header("Configuration")
-st.sidebar.markdown(f"**Embedding:** Local (`{LOCAL_EMBEDDING_MODEL}` via ChromaDB)")
 st.sidebar.markdown(f"**LLM (HF API):** `{HF_GENERATION_MODEL}`")
-st.sidebar.markdown(f"**ChromaDB Collection:** `{COLLECTION_NAME}`")
 st.sidebar.markdown(f"**Retrieval Mode:** Vector Search Only")
 st.sidebar.markdown(f"**Final Unique Chunks:** `{TOP_K}` (from initial `{INITIAL_N_RESULTS}` vector search)")

 from dotenv import load_dotenv
 from huggingface_hub import InferenceClient
 import numpy as np
+import time
+from tqdm import tqdm
+# Need datasets, pandas, sentence-transformers
+from datasets import load_dataset, DatasetDict, Dataset
+import pandas as pd
+from sentence_transformers import SentenceTransformer
+# Keep ChromaDB embedding function import only if needed elsewhere, otherwise remove
+# import chromadb.utils.embedding_functions as embedding_functions
 # --- Configuration ---
+# DB_PATH = "./chroma_db" # No longer using persistent path for app runtime
+COLLECTION_NAME = "libguides_content"
+LOCAL_EMBEDDING_MODEL = 'BAAI/bge-m3' # Local model for QUERY embedding
 HF_GENERATION_MODEL = "google/gemma-3-27b-it" # HF model for generation
+HF_DATASET_ID = "Zwounds/Libguides_Embeddings" # Your HF Dataset ID
+PARQUET_FILENAME = "libguides_embeddings.parquet" # Filename within the dataset
+# INPUT_FILE = 'extracted_content.jsonl' # No longer needed for app runtime
+# EMBEDDING_BATCH_SIZE = 100 # Batch size for adding docs to ChromaDB (now done during load)
+ADD_BATCH_SIZE = 500 # Batch size for adding to in-memory Chroma
+TOP_K = 10
+INITIAL_N_RESULTS = 50
+API_RETRY_DELAY = 2
+MAX_NEW_TOKENS = 512
 # ---
 # Setup logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', stream=sys.stderr)
 # --- Load API Key and Initialize HF Generation Client ---
 @st.cache_resource
 def initialize_hf_client():
     generation_client_instance = None
     try:
         load_dotenv()
         HF_TOKEN = os.getenv('HF_TOKEN') or os.getenv('HUGGING_FACE_HUB_TOKEN')
         if not HF_TOKEN:
+            logging.error("HF_TOKEN or HUGGING_FACE_HUB_TOKEN not found.")
+            st.error("🔴 Hugging Face Token not found. Please set it as a Space secret named HF_TOKEN or in the .env file.")
+            st.stop()
         else:
             generation_client_instance = InferenceClient(model=HF_GENERATION_MODEL, token=HF_TOKEN)
             logging.info(f"Initialized HF Inference Client for generation ({HF_GENERATION_MODEL}).")
     except Exception as e:
         logging.exception("Error initializing Hugging Face Inference Client for generation.")
         st.error(f"🔴 Error initializing Hugging Face Inference Client: {e}")
+        st.stop()
+    return None
 generation_client = initialize_hf_client()
 # ---
+# --- Load Local Embedding Model (for Queries) ---
 @st.cache_resource
+def load_local_embedding_model():
+    logging.info(f"Loading local embedding model for queries: {LOCAL_EMBEDDING_MODEL}")
     try:
          import torch
          device = 'cuda' if torch.cuda.is_available() else 'cpu'
     except ImportError:
          device = 'cpu'
          logging.info("Torch not found, using device: cpu")
     try:
+        model = SentenceTransformer(LOCAL_EMBEDDING_MODEL, device=device, trust_remote_code=True)
+        logging.info("Local embedding model loaded successfully.")
+        return model
     except Exception as e:
+        st.error(f"Failed to load local embedding model ({LOCAL_EMBEDDING_MODEL}): {e}")
+        logging.exception(f"Failed to load local embedding model: {e}")
+        st.stop()
+    return None
+embedding_model = load_local_embedding_model()
+# ---
+# --- Load Data from HF Dataset and Populate In-Memory ChromaDB ---
+@st.cache_resource
+def load_data_and_setup_chroma():
+    if not generation_client or not embedding_model:
+         st.error("Required clients/models not initialized. Cannot proceed.")
+         st.stop()
     try:
+        logging.info(f"Loading dataset '{HF_DATASET_ID}' from Hugging Face Hub...")
+        # Load the dataset - might need split='train' if applicable
+        # Handle potential errors during download/load
+        try:
+            dataset = load_dataset(HF_DATASET_ID, split='train') # Assuming default split is 'train'
+        except Exception as load_e:
+             logging.error(f"Failed to load dataset '{HF_DATASET_ID}': {load_e}")
+             st.error(f"Failed to load dataset '{HF_DATASET_ID}'. Check dataset ID and availability.")
+             st.stop()
+        logging.info("Converting dataset to Pandas DataFrame...")
+        df = dataset.to_pandas()
+        logging.info(f"Dataset loaded into DataFrame with shape: {df.shape}")
+        # Verify required columns
+        required_cols = ['id', 'document', 'embedding', 'metadata']
+        if not all(col in df.columns for col in required_cols):
+            st.error(f"Dataset is missing required columns. Found: {df.columns}. Required: {required_cols}")
+            logging.error(f"Dataset missing required columns. Found: {df.columns}")
+            st.stop()
+        # Ensure embeddings are lists of floats (Parquet might store them efficiently)
+        # This might not be strictly necessary if ChromaDB handles numpy arrays, but safer to convert
+        logging.info("Ensuring embeddings are in list format...")
+        df['embedding'] = df['embedding'].apply(lambda x: list(map(float, x)) if isinstance(x, (np.ndarray, list)) else None)
+        # Drop rows where embedding conversion failed
+        initial_rows = len(df)
+        df.dropna(subset=['embedding'], inplace=True)
+        if len(df) < initial_rows:
+            logging.warning(f"Dropped {initial_rows - len(df)} rows due to invalid embedding format.")
+        if df.empty:
+            st.error("No valid data loaded from the dataset after processing embeddings.")
+            logging.error("DataFrame empty after embedding processing.")
+            st.stop()
+        logging.info("Initializing in-memory ChromaDB client...")
+        chroma_client = chromadb.Client() # In-memory client
+        # Delete collection if it somehow exists in memory (unlikely but safe)
         try:
             chroma_client.delete_collection(name=COLLECTION_NAME)
+        except: pass
+        logging.info(f"Creating in-memory collection: {COLLECTION_NAME}")
+        # Create collection WITHOUT embedding function - we provide pre-computed ones
         collection = chroma_client.create_collection(
             name=COLLECTION_NAME,
+            metadata={"hnsw:space": "cosine"} # Or dot if BGE prefers
         )
+        logging.info(f"Adding {len(df)} documents to in-memory ChromaDB in batches of {ADD_BATCH_SIZE}...")
         start_time = time.time()
         error_count = 0
+        num_batches = (len(df) + ADD_BATCH_SIZE - 1) // ADD_BATCH_SIZE
+        progress_bar = st.progress(0, text="Loading embeddings into memory...")
         for i in range(num_batches):
+            start_idx = i * ADD_BATCH_SIZE
+            end_idx = start_idx + ADD_BATCH_SIZE
+            batch_df = df.iloc[start_idx:end_idx]
             try:
+                collection.add(
+                    ids=batch_df['id'].tolist(),
+                    embeddings=batch_df['embedding'].tolist(),
+                    documents=batch_df['document'].tolist(),
+                    metadatas=batch_df['metadata'].tolist()
+                )
             except Exception as e:
+                logging.error(f"Error adding batch {i+1}/{num_batches} to in-memory Chroma: {e}")
                 error_count += 1
+            progress_bar.progress((i + 1) / num_batches, text=f"Loading embeddings... Batch {i+1}/{num_batches}")
         progress_bar.empty()
         end_time = time.time()
+        logging.info(f"Finished loading data into in-memory ChromaDB. Took {end_time - start_time:.2f} seconds.")
         if error_count > 0:
+            logging.warning(f"Encountered errors in {error_count} batches during add to Chroma.")
+        st.success("Embeddings loaded successfully!")
         return collection
+    except ImportError as e:
+        st.error(f"ImportError: {e}. Required libraries might be missing (datasets, pandas, pyarrow). Check requirements.txt.")
+        logging.error(f"ImportError during dataset loading/Chroma setup: {e}")
+        st.stop()
     except Exception as e:
+        st.error(f"Failed to load data and initialize ChromaDB: {e}")
+        logging.exception(f"An unexpected error occurred during data load/Chroma setup: {e}")
+        st.stop()
+    return None # Should not be reached
+# --- Load data and collection ---
+collection = load_data_and_setup_chroma()
+# ---
 # --- Helper Functions ---
 def query_hf_inference(prompt, client_instance=None, model_name=HF_GENERATION_MODEL):
     """Sends the prompt to the HF Inference API using the initialized client."""
     if not client_instance:
         client_instance = generation_client
     if not client_instance:
          logging.error("HF Inference client not initialized in query_hf_inference.")
          return "Error: HF Inference client failed to initialize."
     try:
+        response_text = client_instance.text_generation(prompt, max_new_tokens=MAX_NEW_TOKENS)
         if not response_text:
              logging.warning(f"Received empty response from HF Inference API ({model_name}) for prompt: {prompt[:100]}...")
              return "Error: Received empty response from generation model."
 User Query: "{query}"
 Output:"""
     logging.info(f"Generating query variations for: {query} using {model_name}")
     try:
         response = llm_func(prompt, model_name=model_name)
     return prompt
 # --- Streamlit App UI ---
+st.set_page_config(layout="wide")
+st.title("📚 Ask the Library Guides (Dataset Embed + HF Gen)") # Updated title
 # User input (only proceed if collection loaded)
 if collection:
     query = st.text_area("Enter your question:", height=100)
 else:
+    # Error handled during load_data_and_setup_chroma
+    st.error("Application initialization failed. Cannot proceed.")
+    st.stop()
 # --- Routing Prompt Definition ---
 ROUTING_PROMPT_TEMPLATE = """You are a query routing assistant for a library chatbot. Your task is to classify the user's query into one of the following categories based on its intent:
             if route_decision == "HOURS":
                 st.info("You can find the current library hours here: [https://gc-cuny.libcal.com/hours](https://gc-cuny.libcal.com/hours)")
                 st.stop()
+            # ... (other routes) ...
             elif route_decision == "EVENTS_CALENDAR":
                 events_url = "https://gc-cuny.libcal.com/calendar?cid=15537&t=d&d=0000-00-00&cal=15537&inc=0"
                 st.info(f"You can find information about upcoming library events and workshops on the calendar here: [{events_url}]({events_url})")
             all_queries = [query] + query_variations
             logging.info(f"--- DIAGNOSTIC: All queries for search: {all_queries}")
+            # 2. Embed Queries Locally
+            try:
+                logging.info(f"Generating query embeddings locally using {LOCAL_EMBEDDING_MODEL}...")
+                query_embeddings = embedding_model.encode(all_queries).tolist()
+                logging.info(f"Generated {len(query_embeddings)} query embeddings locally.")
+            except Exception as e:
+                st.error(f"Failed to embed query using local model: {e}")
+                logging.exception(f"Failed to embed query using local model: {e}")
+                st.stop()
+            # 3. Vector Search (using pre-computed query embeddings)
             vector_results_ids = []
             context_chunks = []
             context_metadata_list = []
             try:
+                logging.info(f"Performing vector search for {len(query_embeddings)} embeddings...")
+                # Query ChromaDB using the computed query_embeddings
                 vector_results = collection.query(
+                    query_embeddings=query_embeddings, # Pass embeddings now
                     n_results=INITIAL_N_RESULTS,
                     include=['documents', 'metadatas', 'distances']
                 )
                 logging.exception("Vector search/selection failed.")
                 context_chunks = []
+            # 4. Generate Final Prompt based on Route
             if route_decision == "RESEARCH_QUERY":
                 logging.info("Using RESEARCH_QUERY prompt template.")
                 final_prompt = RESEARCH_QUERY_PROMPT_TEMPLATE.format(context_str="\n\n".join(context_chunks), query=query)
                 logging.info("Using standard RAG prompt template.")
                 final_prompt = generate_prompt(query, context_chunks)
+            # 5. Query HF Inference API LLM
             logging.info(f"Sending final prompt to HF Inference API model: {HF_GENERATION_MODEL}...")
             answer = query_hf_inference(final_prompt)
             logging.info(f"Received answer from HF Inference API: {answer[:100]}...")
             if answer.startswith("Error:"):
                  st.error(f"Answer generation failed: {answer}")
+        # 6. Display results
         st.subheader("Answer:")
         st.markdown(answer)
 st.sidebar.header("How to Use")
 st.sidebar.info(
     "1. Ensure your `HUGGING_FACE_HUB_TOKEN` is correctly set as a Space secret (`HF_TOKEN`) or in the `.env` file.\n"
+    f"2. The app will load pre-computed embeddings from the HF Dataset (`{HF_DATASET_ID}`).\n"
+    "   (Ensure the dataset was created correctly using `export_chroma_to_parquet.py` and `upload_dataset_to_hf.py`)\n"
     "3. Enter your question in the text area.\n"
     "4. Click 'Ask'."
 )
 st.sidebar.header("Configuration")
+st.sidebar.markdown(f"**Embedding:** Pre-computed (`{LOCAL_EMBEDDING_MODEL}` loaded from HF Dataset)")
 st.sidebar.markdown(f"**LLM (HF API):** `{HF_GENERATION_MODEL}`")
+st.sidebar.markdown(f"**ChromaDB Collection:** `{COLLECTION_NAME}` (In-Memory)")
 st.sidebar.markdown(f"**Retrieval Mode:** Vector Search Only")
 st.sidebar.markdown(f"**Final Unique Chunks:** `{TOP_K}` (from initial `{INITIAL_N_RESULTS}` vector search)")