Spaces:
Sleeping
Sleeping
| from src.rag import CustomAgglomerativeSplitter, FaissDB | |
| import argparse | |
| from dotenv import load_dotenv | |
| import os | |
| load_dotenv() | |
| def main(path_to_dataset: str, path_to_index: str): | |
| splitter = CustomAgglomerativeSplitter(emb_model=os.getenv("OPENAI_EMBEDDINGS_MODEL")) | |
| documents = splitter.read_and_split(path_to_dataset) | |
| faiss_db = FaissDB(emb_model=os.getenv("OPENAI_EMBEDDINGS_MODEL")) | |
| faiss_db.init_index(documents) | |
| faiss_db.save_index(path_to_index) | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--path_to_dataset", type=str, required=True) | |
| parser.add_argument("--path_to_index", type=str, required=True) | |
| args = parser.parse_args() | |
| main(args.path_to_dataset, args.path_to_index) | |