| import faiss | |
| import pickle | |
| import os | |
| from PyPDF2 import PdfReader | |
| import glob | |
| from pathlib import Path | |
| import re | |
| import requests | |
| from langchain.chains import LLMChain | |
| from langchain.llms import OpenAI | |
| from langchain import PromptTemplate | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import OpenAIEmbeddings | |
| import dotenv | |
| dotenv.load_dotenv() | |
| class SemanticSearch(): | |
| def __init__( | |
| self, | |
| threshold: float, | |
| with_source=False, | |
| k=5, | |
| ): | |
| self.threshold = threshold | |
| self.with_source = with_source | |
| self.k = k | |
| with open('./data/store.pkl', 'rb') as f: | |
| self.db = pickle.load(f) | |
| def __call__(self, query): | |
| documents = self.db.similarity_search_with_score(query, k=self.k) | |
| if len(documents) == 0: | |
| return None | |
| if not self.with_source: | |
| output = '\n\n\n'.join([i[0].page_content for i in documents]) | |
| else: | |
| output = '\n\n\n'.join([i[0].page_content + '\n\nSource:' + os.path.basename( | |
| str(i[0].metadata['source']) + '\n') for i in documents]) | |
| return output | |
| class ContentSearch(): | |
| def __init__( | |
| self, | |
| semantic_search, | |
| prompt_template, | |
| ): | |
| self.semantic_search = semantic_search | |
| self.prompt_template = prompt_template | |
| def __call__(self, query): | |
| content = self.semantic_search(query) | |
| if content is None: | |
| return "No results found" | |
| else: | |
| return self.prompt_template.format(content=content) | |