Jaita commited on
Commit
190e142
·
verified ·
1 Parent(s): 6cc0940

Update services/kb_creation.py

Browse files
Files changed (1) hide show
  1. services/kb_creation.py +4 -4
services/kb_creation.py CHANGED
@@ -76,9 +76,9 @@ def chunk_text(text, max_words=300):
76
 
77
  def ingest_documents(folder_path):
78
  """Read .docx files, chunk text, generate embeddings, and store in ChromaDB."""
79
- print(f"📂 Checking folder: {folder_path}")
80
  files = [f for f in os.listdir(folder_path) if f.endswith('.docx')]
81
- print(f"Found {len(files)} Word files: {files}")
82
 
83
  if not files:
84
  print("⚠️ No .docx files found. Please check the folder path.")
@@ -89,7 +89,7 @@ def ingest_documents(folder_path):
89
  text = extract_text_from_docx(file_path)
90
  chunks = chunk_text(text)
91
 
92
- print(f"📄 Ingesting {file} with {len(chunks)} chunks")
93
 
94
  for i, chunk in enumerate(chunks):
95
  embedding = model.encode(chunk).tolist()
@@ -107,7 +107,7 @@ def search_knowledge_base(query, top_k=3):
107
  """Search ChromaDB using semantic similarity."""
108
  query_embedding = model.encode(query).tolist()
109
  results = collection.query(query_embeddings=[query_embedding], n_results=top_k,include=['embeddings','documents', 'metadatas', 'distances'])
110
- print("results",results)
111
  return results
112
 
113
  # Example usage:
 
76
 
77
  def ingest_documents(folder_path):
78
  """Read .docx files, chunk text, generate embeddings, and store in ChromaDB."""
79
+ #print(f"📂 Checking folder: {folder_path}")
80
  files = [f for f in os.listdir(folder_path) if f.endswith('.docx')]
81
+ #print(f"Found {len(files)} Word files: {files}")
82
 
83
  if not files:
84
  print("⚠️ No .docx files found. Please check the folder path.")
 
89
  text = extract_text_from_docx(file_path)
90
  chunks = chunk_text(text)
91
 
92
+ #print(f"📄 Ingesting {file} with {len(chunks)} chunks")
93
 
94
  for i, chunk in enumerate(chunks):
95
  embedding = model.encode(chunk).tolist()
 
107
  """Search ChromaDB using semantic similarity."""
108
  query_embedding = model.encode(query).tolist()
109
  results = collection.query(query_embeddings=[query_embedding], n_results=top_k,include=['embeddings','documents', 'metadatas', 'distances'])
110
+ #print("results",results)
111
  return results
112
 
113
  # Example usage: