| from utils import base_utils as bu |
| from utils import md_to_faiss |
| from utils import retrieval_utils as ru |
|
|
| def main(): |
| config = bu.load_config("configs/config.json") |
| model_name = config["embeddings"]["model_name"] |
| input_path = config["paths"]["input_path"] |
| index_dir = config["paths"]["index_dir"] |
| output_dir = config["embeddings"]["output_dir"] |
| splitter = config["splitter"]["type"] |
| chunk_size = config["splitter"]["chunk_size"] |
| chunk_overlap = config["splitter"]["overlap"] |
| max_docs = config["embeddings"]["max_files_for_debug"] |
| retrieval_model = ru.load_model(model_name) |
|
|
| md_to_faiss.build_faiss_from_md( |
| input_path = input_path, |
| index_dir = index_dir, |
| model_name = model_name, |
| splitter = splitter, |
| chunk_size = chunk_size, |
| chunk_overlap = chunk_overlap, |
| retrieval_model = retrieval_model, |
| max_documents = max_docs |
| ) |
|
|
| if __name__ == "__main__": |
| main() |