Yoma commited on
Commit
4c4bf31
·
1 Parent(s): 28bfaa9

added vector db creation in chatbot_app

Browse files
Files changed (1) hide show
  1. chatbot_app.py +35 -1
chatbot_app.py CHANGED
@@ -3,6 +3,8 @@ from retrieval_manager import RetrievalManager
3
  import llm_interface
4
  import json
5
  import logging
 
 
6
 
7
  from dotenv import load_dotenv
8
  load_dotenv() # Loads .env file automatically
@@ -11,8 +13,40 @@ load_dotenv() # Loads .env file automatically
11
  logger = logging.getLogger(__name__)
12
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # 1. Instantiate the retrieval manager
15
- retriever = RetrievalManager()
 
16
 
17
  def respond(message, chat_history):
18
  """
 
3
  import llm_interface
4
  import json
5
  import logging
6
+ import os
7
+ from vector_db_manager import run_etl_pipeline
8
 
9
  from dotenv import load_dotenv
10
  load_dotenv() # Loads .env file automatically
 
13
  logger = logging.getLogger(__name__)
14
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
15
 
16
+ # --- Database and Model Configuration ---
17
+ DB_PATH = "./chroma_db"
18
+ EMBEDDING_MODEL = 'BAAI/bge-large-en-v1.5'
19
+ PRODUCTS_JSON_PATH = 'products.json'
20
+ REVIEWS_JSON_PATH = 'product_reviews.json'
21
+
22
+ # --- Check for and Build VectorDB if it doesn't exist ---
23
+ # This is crucial for environments like HF Spaces where the file system is ephemeral.
24
+ if not os.path.exists(DB_PATH):
25
+ logger.info(f"ChromaDB path '{DB_PATH}' not found. Running ETL pipeline to create and populate the database.")
26
+ logger.info("This may take a few moments...")
27
+
28
+ # Check if data files exist before running ETL
29
+ if not os.path.exists(PRODUCTS_JSON_PATH) or not os.path.exists(REVIEWS_JSON_PATH):
30
+ logger.error(f"FATAL: Required data files ('{PRODUCTS_JSON_PATH}' or '{REVIEWS_JSON_PATH}') not found.")
31
+ # Exit if data is missing, as the app cannot function
32
+ exit()
33
+
34
+ try:
35
+ run_etl_pipeline(
36
+ products_file=PRODUCTS_JSON_PATH,
37
+ reviews_file=REVIEWS_JSON_PATH,
38
+ db_path=DB_PATH,
39
+ model_name=EMBEDDING_MODEL
40
+ )
41
+ logger.info("ETL pipeline completed successfully.")
42
+ except Exception as e:
43
+ logger.error(f"FATAL: An error occurred during the ETL pipeline: {e}", exc_info=True)
44
+ # Exit if the ETL fails, as the app cannot function
45
+ exit()
46
+
47
  # 1. Instantiate the retrieval manager
48
+ # It will now connect to the newly created or existing database
49
+ retriever = RetrievalManager(db_path=DB_PATH, model_name=EMBEDDING_MODEL)
50
 
51
  def respond(message, chat_history):
52
  """