File size: 974 Bytes
92145af | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | from utils import base_utils as bu
from utils import md_to_faiss
from utils import retrieval_utils as ru
def main():
config = bu.load_config("configs/config.json")
model_name = config["embeddings"]["model_name"]
input_path = config["paths"]["input_path"]
index_dir = config["paths"]["index_dir"]
output_dir = config["embeddings"]["output_dir"]
splitter = config["splitter"]["type"]
chunk_size = config["splitter"]["chunk_size"]
chunk_overlap = config["splitter"]["overlap"]
max_docs = config["embeddings"]["max_files_for_debug"]
retrieval_model = ru.load_model(model_name)
md_to_faiss.build_faiss_from_md(
input_path = input_path,
index_dir = index_dir,
model_name = model_name,
splitter = splitter,
chunk_size = chunk_size,
chunk_overlap = chunk_overlap,
retrieval_model = retrieval_model,
max_documents = max_docs
)
if __name__ == "__main__":
main() |