Rulga commited on
Commit
f58bd9b
·
1 Parent(s): 3f8e971

Refactor knowledge base management functions for improved clarity and error handling

Browse files
Files changed (2) hide show
  1. app.py +15 -5
  2. src/knowledge_base/vector_store.py +56 -25
app.py CHANGED
@@ -185,10 +185,18 @@ def respond(
185
 
186
  yield new_history, conversation_id
187
 
188
- def build_kb():
189
- """Function to create knowledge base"""
190
  try:
191
- success, message = create_vector_store()
 
 
 
 
 
 
 
 
192
  return message
193
  except Exception as e:
194
  return f"Error creating knowledge base: {str(e)}"
@@ -271,7 +279,8 @@ with gr.Blocks() as demo:
271
 
272
  with gr.Column(scale=1):
273
  gr.Markdown("### Knowledge Base Management")
274
- build_kb_btn = gr.Button("Create/Update Knowledge Base", variant="primary")
 
275
  kb_status = gr.Textbox(label="Knowledge Base Status", interactive=False)
276
 
277
  submit_btn.click(
@@ -279,7 +288,8 @@ with gr.Blocks() as demo:
279
  [msg, chatbot, conversation_id],
280
  [chatbot, conversation_id, msg]
281
  )
282
- build_kb_btn.click(build_kb, None, kb_status)
 
283
  clear_btn.click(lambda: ([], None), None, [chatbot, conversation_id])
284
 
285
  with gr.Tab("Model Settings"):
 
185
 
186
  yield new_history, conversation_id
187
 
188
+ def update_kb():
189
+ """Function to update existing knowledge base with new documents"""
190
  try:
191
+ success, message = create_vector_store(mode="update")
192
+ return message
193
+ except Exception as e:
194
+ return f"Error updating knowledge base: {str(e)}"
195
+
196
+ def rebuild_kb():
197
+ """Function to create knowledge base from scratch"""
198
+ try:
199
+ success, message = create_vector_store(mode="rebuild")
200
  return message
201
  except Exception as e:
202
  return f"Error creating knowledge base: {str(e)}"
 
279
 
280
  with gr.Column(scale=1):
281
  gr.Markdown("### Knowledge Base Management")
282
+ update_kb_btn = gr.Button("Update Knowledge Base", variant="secondary")
283
+ rebuild_kb_btn = gr.Button("Rebuild Knowledge Base", variant="primary")
284
  kb_status = gr.Textbox(label="Knowledge Base Status", interactive=False)
285
 
286
  submit_btn.click(
 
288
  [msg, chatbot, conversation_id],
289
  [chatbot, conversation_id, msg]
290
  )
291
+ update_kb_btn.click(update_kb, None, kb_status)
292
+ rebuild_kb_btn.click(rebuild_kb, None, kb_status)
293
  clear_btn.click(lambda: ([], None), None, [chatbot, conversation_id])
294
 
295
  with gr.Tab("Model Settings"):
src/knowledge_base/vector_store.py CHANGED
@@ -15,8 +15,16 @@ def get_embeddings():
15
  model_kwargs={'device': 'cpu'}
16
  )
17
 
18
- def create_vector_store():
19
- """Create vector store and upload to dataset"""
 
 
 
 
 
 
 
 
20
  # Load documents
21
  documents = load_documents()
22
 
@@ -33,32 +41,55 @@ def create_vector_store():
33
  # Initialize embeddings
34
  embeddings = get_embeddings()
35
 
36
- # Create vector store in temporary directory
37
- with tempfile.TemporaryDirectory() as temp_dir:
38
- vector_store = FAISS.from_documents(chunks, embeddings)
39
- # Save to temporary directory
40
- vector_store.save_local(folder_path=temp_dir)
41
-
42
- # Copy files to VECTOR_STORE_PATH for subsequent loading
43
- os.makedirs(VECTOR_STORE_PATH, exist_ok=True)
44
- for file in ["index.faiss", "index.pkl"]:
45
- shutil.copy2(
46
- os.path.join(temp_dir, file),
47
- os.path.join(VECTOR_STORE_PATH, file)
48
- )
 
 
 
 
 
 
 
49
 
50
- # Upload to dataset with explicit token passing
51
- from src.knowledge_base.dataset import DatasetManager
52
- dataset = DatasetManager(token=HF_TOKEN)
53
- success, message = dataset.upload_vector_store()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- # Clean up local files after upload
56
- shutil.rmtree(VECTOR_STORE_PATH)
57
 
58
- if not success:
59
- return False, f"Error uploading to dataset: {message}"
60
-
61
- return True, f"Knowledge base created successfully! Loaded {len(documents)} documents, created {len(chunks)} chunks."
62
 
63
  def load_vector_store():
64
  """Load vector store"""
 
15
  model_kwargs={'device': 'cpu'}
16
  )
17
 
18
+ def create_vector_store(mode: str = "rebuild"):
19
+ """
20
+ Create or update vector store and upload to dataset
21
+
22
+ Args:
23
+ mode: Either "rebuild" (create from scratch) or "update" (add new documents)
24
+
25
+ Returns:
26
+ (success, message)
27
+ """
28
  # Load documents
29
  documents = load_documents()
30
 
 
41
  # Initialize embeddings
42
  embeddings = get_embeddings()
43
 
44
+ try:
45
+ if mode == "update":
46
+ # Try to load existing vector store
47
+ from src.knowledge_base.dataset import DatasetManager
48
+ dataset = DatasetManager(token=HF_TOKEN)
49
+ success, result = dataset.download_vector_store()
50
+
51
+ if success:
52
+ # Add new documents to existing store
53
+ vector_store = FAISS.load_local(
54
+ VECTOR_STORE_PATH,
55
+ embeddings,
56
+ allow_dangerous_deserialization=True
57
+ )
58
+ vector_store.add_documents(chunks)
59
+ else:
60
+ return False, "Failed to load existing vector store for update"
61
+ else:
62
+ # Create new vector store
63
+ vector_store = FAISS.from_documents(chunks, embeddings)
64
 
65
+ # Save and upload
66
+ with tempfile.TemporaryDirectory() as temp_dir:
67
+ vector_store.save_local(folder_path=temp_dir)
68
+
69
+ # Copy files to VECTOR_STORE_PATH for subsequent loading
70
+ os.makedirs(VECTOR_STORE_PATH, exist_ok=True)
71
+ for file in ["index.faiss", "index.pkl"]:
72
+ shutil.copy2(
73
+ os.path.join(temp_dir, file),
74
+ os.path.join(VECTOR_STORE_PATH, file)
75
+ )
76
+
77
+ # Upload to dataset
78
+ from src.knowledge_base.dataset import DatasetManager
79
+ dataset = DatasetManager(token=HF_TOKEN)
80
+ success, message = dataset.upload_vector_store()
81
+
82
+ # Clean up local files
83
+ shutil.rmtree(VECTOR_STORE_PATH)
84
+
85
+ if not success:
86
+ return False, f"Error uploading to dataset: {message}"
87
 
88
+ action = "updated" if mode == "update" else "created"
89
+ return True, f"Knowledge base {action} successfully! Processed {len(documents)} documents, {len(chunks)} chunks."
90
 
91
+ except Exception as e:
92
+ return False, f"Error {mode}ing knowledge base: {str(e)}"
 
 
93
 
94
  def load_vector_store():
95
  """Load vector store"""