Spaces:
Sleeping
Sleeping
| import os | |
| import shutil | |
| from src.processor import build_index | |
| DATA_DIR = "./data" | |
| DB_DIR = "./chroma_db" | |
| def main(): | |
| print(f"Checking for data in {DATA_DIR}...") | |
| if not os.path.exists(DATA_DIR): | |
| print(f"Create a '{DATA_DIR}' directory and put your PDFs there.") | |
| return | |
| if not any(f.endswith(".pdf") for f in os.listdir(DATA_DIR)): | |
| print("No PDF files found in data directory.") | |
| return | |
| print("Building Vector Database... (This may take a while for large docs)") | |
| # Optional: Clear old DB if you want a fresh start every time | |
| # if os.path.exists(DB_DIR): | |
| # shutil.rmtree(DB_DIR) | |
| vectorstore = build_index(DATA_DIR, DB_DIR) | |
| print(f"Success! Database built at {DB_DIR}") | |
| if __name__ == "__main__": | |
| main() | |