File size: 2,011 Bytes
0914e96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import os
import sys
import uuid
# Ensure we can import from core
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from core.rag.store import VectorStore
def ingest_knowledge_base():
# Initialize DB
print("π Connecting to Vector Database...")
try:
store = VectorStore()
except Exception as e:
print(f"β Error initializing DB: {e}")
return
base_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base")
documents = []
metadatas = []
ids = []
print(f"π Scanning folder: {base_path}")
if not os.path.exists(base_path):
print(f"β οΈ Knowledge base folder not found at {base_path}")
return
# Saari files scan karo recursive tareeke se
for root, _, files in os.walk(base_path):
for file in files:
if file.endswith(".md") or file.endswith(".txt"):
file_path = os.path.join(root, file)
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
if len(content.strip()) < 10: continue # Skip empty files
# Content aur Meta data ready karo
documents.append(content)
metadatas.append({"source": file, "category": os.path.basename(root)})
ids.append(str(uuid.uuid4()))
print(f" - Prepared: {file}")
except Exception as e:
print(f" - β οΈ Skipped {file}: {e}")
# DB mein daalo
if documents:
print(f"πΎ Saving {len(documents)} documents to ChromaDB...")
store.add_text(documents, metadatas, ids)
print("β
Knowledge Injection Complete!")
else:
print("β οΈ No valid documents found to ingest.")
if __name__ == "__main__":
ingest_knowledge_base() |