File size: 2,011 Bytes
0914e96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import os
import sys
import uuid

# Ensure we can import from core
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from core.rag.store import VectorStore

def ingest_knowledge_base():
    # Initialize DB
    print("πŸš€ Connecting to Vector Database...")
    try:
        store = VectorStore()
    except Exception as e:
        print(f"❌ Error initializing DB: {e}")
        return

    base_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base")
    
    documents = []
    metadatas = []
    ids = []

    print(f"πŸ“‚ Scanning folder: {base_path}")

    if not os.path.exists(base_path):
        print(f"⚠️ Knowledge base folder not found at {base_path}")
        return

    # Saari files scan karo recursive tareeke se
    for root, _, files in os.walk(base_path):
        for file in files:
            if file.endswith(".md") or file.endswith(".txt"):
                file_path = os.path.join(root, file)
                
                try:
                    with open(file_path, "r", encoding="utf-8") as f:
                        content = f.read()
                        if len(content.strip()) < 10: continue # Skip empty files
                        
                        # Content aur Meta data ready karo
                        documents.append(content)
                        metadatas.append({"source": file, "category": os.path.basename(root)})
                        ids.append(str(uuid.uuid4()))
                        
                        print(f"   - Prepared: {file}")
                except Exception as e:
                    print(f"   - ⚠️ Skipped {file}: {e}")

    # DB mein daalo
    if documents:
        print(f"πŸ’Ύ Saving {len(documents)} documents to ChromaDB...")
        store.add_text(documents, metadatas, ids)
        print("βœ… Knowledge Injection Complete!")
    else:
        print("⚠️ No valid documents found to ingest.")

if __name__ == "__main__":
    ingest_knowledge_base()