srilakshu012456 commited on
Commit
9cd2767
·
verified ·
1 Parent(s): 2c3d060

Update services/kb_creation.py

Browse files
Files changed (1) hide show
  1. services/kb_creation.py +8 -5
services/kb_creation.py CHANGED
@@ -1,3 +1,4 @@
 
1
 
2
  # services/kb_creation.py
3
  import os
@@ -318,17 +319,19 @@ def bm25_search(query: str, top_k: int = 50) -> List[Tuple[int, float]]:
318
  # ---------------------------- Semantic-only ----------------------------
319
  def search_knowledge_base(query: str, top_k: int = 10) -> dict:
320
  query_embedding = model.encode(query).tolist()
 
321
  res = collection.query(
322
  query_embeddings=[query_embedding],
323
  n_results=top_k,
324
- include=['documents', 'metadatas', 'distances', 'ids']
325
  )
326
  documents = (res.get("documents", [[]]) or [[]])[0]
327
  metadatas = (res.get("metadatas", [[]]) or [[]])[0]
328
  distances = (res.get("distances", [[]]) or [[]])[0]
329
- ids = (res.get("ids", [[]]) or [[]])[0]
330
 
331
- if not ids and documents:
 
 
332
  synthesized = []
333
  for i, m in enumerate(metadatas):
334
  fn = (m or {}).get("filename", "unknown")
@@ -337,7 +340,7 @@ def search_knowledge_base(query: str, top_k: int = 10) -> dict:
337
  synthesized.append(f"{fn}:{sec}:{idx}")
338
  ids = synthesized
339
 
340
- print(f"[KB] search → {len(documents)} docs (top_k={top_k}); first distance: {distances[0] if distances else 'n/a'}; ids={len(ids)}")
341
  return {
342
  "documents": documents,
343
  "metadatas": metadatas,
@@ -653,4 +656,4 @@ def reset_kb(folder_path: str) -> Dict[str, Any]:
653
  result["info"] = get_kb_runtime_info()
654
  return result
655
  except Exception as e:
656
- return {"status": "ERROR", "error": f"{e}", "info": get_kb_runtime_info()}
 
1
+ #updated
2
 
3
  # services/kb_creation.py
4
  import os
 
319
  # ---------------------------- Semantic-only ----------------------------
320
  def search_knowledge_base(query: str, top_k: int = 10) -> dict:
321
  query_embedding = model.encode(query).tolist()
322
+ # Some Chroma client versions do not support "ids" in include.
323
  res = collection.query(
324
  query_embeddings=[query_embedding],
325
  n_results=top_k,
326
+ include=['documents', 'metadatas', 'distances'] # no 'ids' here
327
  )
328
  documents = (res.get("documents", [[]]) or [[]])[0]
329
  metadatas = (res.get("metadatas", [[]]) or [[]])[0]
330
  distances = (res.get("distances", [[]]) or [[]])[0]
 
331
 
332
+ # Synthesize IDs from metadata (filename:section:chunk_index)
333
+ ids: List[str] = []
334
+ if documents:
335
  synthesized = []
336
  for i, m in enumerate(metadatas):
337
  fn = (m or {}).get("filename", "unknown")
 
340
  synthesized.append(f"{fn}:{sec}:{idx}")
341
  ids = synthesized
342
 
343
+ print(f"[KB] search → {len(documents)} docs (top_k={top_k}); first distance: {distances[0] if distances else 'n/a'}; ids synthesized={len(ids)}")
344
  return {
345
  "documents": documents,
346
  "metadatas": metadatas,
 
656
  result["info"] = get_kb_runtime_info()
657
  return result
658
  except Exception as e:
659
+ return {"status": "ERROR", "error": f"{e}", "info": get_kb_runtime_info()}