Spaces:
Runtime error
Runtime error
| import os | |
| # from dotenv import load_dotenv | |
| # load_dotenv() | |
| OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] | |
| import tempfile | |
| from langchain.embeddings import OpenAIEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.document_loaders.csv_loader import CSVLoader | |
| from langchain.document_loaders import PyPDFLoader | |
| def check_file_type(file_path): | |
| _, file_extension = os.path.splitext(file_path) | |
| file_extension = file_extension.lower() | |
| # Check if the file is a PDF | |
| if file_extension == '.pdf': | |
| return 1 | |
| # Check if the file is a CSV | |
| if file_extension == '.csv': | |
| return 2 | |
| def configure_retriever(uploaded_files): | |
| docs = [] | |
| temp_dir = tempfile.TemporaryDirectory() | |
| for file in uploaded_files: | |
| check = check_file_type(file) | |
| if check ==1: | |
| loader = PyPDFLoader(file) | |
| if check ==2: | |
| loader = CSVLoader(file) | |
| docs.extend(loader.load()) | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200) | |
| splits = text_splitter.split_documents(docs) | |
| # Create embeddings and store in vectordb | |
| embeddings = OpenAIEmbeddings() | |
| vectordb = FAISS.from_documents(splits, embeddings) | |
| # Define retriever | |
| retriever = vectordb.as_retriever(search_type="mmr", search_kwargs={"k": 2, "fetch_k": 4}) | |
| print("embeddings created") | |
| return retriever | |