File size: 974 Bytes
92145af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from utils import base_utils as bu
from utils import md_to_faiss
from utils import retrieval_utils as ru

def main():    
    config = bu.load_config("configs/config.json")
    model_name = config["embeddings"]["model_name"]
    input_path = config["paths"]["input_path"]
    index_dir = config["paths"]["index_dir"]
    output_dir = config["embeddings"]["output_dir"]
    splitter = config["splitter"]["type"]
    chunk_size = config["splitter"]["chunk_size"]
    chunk_overlap = config["splitter"]["overlap"]
    max_docs = config["embeddings"]["max_files_for_debug"]
    retrieval_model = ru.load_model(model_name)

    md_to_faiss.build_faiss_from_md(
        input_path = input_path,
        index_dir = index_dir,
        model_name = model_name,
        splitter = splitter,
        chunk_size = chunk_size,
        chunk_overlap = chunk_overlap,
        retrieval_model = retrieval_model,
        max_documents = max_docs
    )

if __name__ == "__main__":
    main()