Spaces:
Runtime error
Runtime error
| import os | |
| import numpy as np | |
| import pickle | |
| from langchain.vectorstores import FAISS, Chroma, DocArrayInMemorySearch | |
| from langchain.embeddings.huggingface import HuggingFaceEmbeddings | |
| from langchain.document_loaders.csv_loader import CSVLoader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| def create_vector_store_index(file_path, embedding_model_repo_id="sentence-transformers/all-roberta-large-v1"): | |
| file_path_split = file_path.split(".") | |
| file_type = file_path_split[-1].rstrip('/') | |
| if file_type == 'csv': | |
| print(file_path) | |
| loader = CSVLoader(file_path=file_path) | |
| documents = loader.load() | |
| elif file_type == 'pdf': | |
| loader = PyPDFLoader(file_path) | |
| pages = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size = 1024, | |
| chunk_overlap = 128,) | |
| documents = text_splitter.split_documents(pages) | |
| embedding_model = HuggingFaceEmbeddings( | |
| model_name=embedding_model_repo_id | |
| ) | |
| vectordb = FAISS.from_documents(documents, embedding_model) | |
| file_output = "./db/faiss_index" | |
| vectordb.save_local(file_output) | |
| return "Vector store index is created." |