Spaces:
Running
Running
File size: 1,041 Bytes
daafb32 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | """
Phase 7: Index all embeddings into Qdrant vector database.
Input: data/embeddings/embeddings.npy
data/embeddings/chunk_metadata.json
Output: data/qdrant_db/ (local Qdrant database)
Run from project root:
python run_indexing.py
To force re-index (e.g. after adding more papers):
python run_indexing.py --recreate
"""
import sys
from src.utils.logger import setup_logger, get_logger
from src.vectorstore.indexer import VectorIndexer
setup_logger()
logger = get_logger(__name__)
def main():
recreate = "--recreate" in sys.argv
logger.info("=" * 60)
logger.info(f"PHASE 7 - VECTOR DATABASE INDEXING")
logger.info("=" * 60)
if recreate:
logger.warning("--recreate flag set: existing index will be deleted")
indexer = VectorIndexer()
stats = indexer.run(recreate = recreate)
logger.info("=" * 60)
logger.info("INDEXING COMPLETE")
for k, v in stats.items():
logger.info(f" {k}: {v}")
logger.info("=" * 60)
if __name__ == "__main__":
main() |