File size: 1,041 Bytes
daafb32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
"""
Phase 7: Index all embeddings into Qdrant vector database.

Input:  data/embeddings/embeddings.npy
        data/embeddings/chunk_metadata.json
Output: data/qdrant_db/  (local Qdrant database)

Run from project root:
    python run_indexing.py

To force re-index (e.g. after adding more papers):
    python run_indexing.py --recreate
"""

import sys
from src.utils.logger import setup_logger, get_logger
from src.vectorstore.indexer import VectorIndexer

setup_logger()
logger = get_logger(__name__)


def main():
    recreate = "--recreate" in sys.argv

    logger.info("=" * 60)
    logger.info(f"PHASE 7 - VECTOR DATABASE INDEXING")
    logger.info("=" * 60)


    if recreate:
        logger.warning("--recreate flag set: existing index will be deleted")


    indexer = VectorIndexer()
    stats = indexer.run(recreate = recreate)


    logger.info("=" * 60)
    logger.info("INDEXING COMPLETE")
    for k, v in stats.items():
        logger.info(f"  {k}: {v}")
    
    logger.info("=" * 60)



if __name__ == "__main__":
    main()