Rag-ag / ingest.py
beastLucifer's picture
Upload 11 files
9806c71 verified
raw
history blame contribute delete
815 Bytes
import os
import shutil
from src.processor import build_index
DATA_DIR = "./data"
DB_DIR = "./chroma_db"
def main():
print(f"Checking for data in {DATA_DIR}...")
if not os.path.exists(DATA_DIR):
print(f"Create a '{DATA_DIR}' directory and put your PDFs there.")
return
if not any(f.endswith(".pdf") for f in os.listdir(DATA_DIR)):
print("No PDF files found in data directory.")
return
print("Building Vector Database... (This may take a while for large docs)")
# Optional: Clear old DB if you want a fresh start every time
# if os.path.exists(DB_DIR):
# shutil.rmtree(DB_DIR)
vectorstore = build_index(DATA_DIR, DB_DIR)
print(f"Success! Database built at {DB_DIR}")
if __name__ == "__main__":
main()