OnurKerimoglu commited on
Commit
afcd22f
·
1 Parent(s): 92510cb

src.rag: introduced FAISS option for retriever, and made it default

Browse files
Files changed (1) hide show
  1. src/rag.py +14 -4
src/rag.py CHANGED
@@ -6,6 +6,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
 
7
  from langchain_huggingface import HuggingFaceEmbeddings
8
  from langchain_community.vectorstores import Chroma
 
9
  from langchain_openai import ChatOpenAI
10
  # from langchain_community.llms import HuggingFaceHub
11
  from langchain_huggingface import HuggingFaceEndpoint
@@ -28,6 +29,10 @@ class RAG():
28
  # self.use_model = 'gpt-4o-mini'
29
  self.use_model = 'zephyr-7b-alpha'
30
 
 
 
 
 
31
  # Load environment variables that should contain:
32
  # - 'OPENAI_API_KEY' for OpenAI models
33
  # - 'HUGGINGFACEHUB_API_TOKEN' for HuggingFace models
@@ -71,9 +76,14 @@ class RAG():
71
 
72
  def create_retriever(self, texts, embeddings):
73
  # Create embeddings and vector store
74
- print ('Creating vectore store with Chroma')
75
- vectorstore = Chroma.from_documents(texts, embeddings)
76
- retriever = vectorstore.as_retriever(search_kwargs={"k": self.k})
 
 
 
 
 
77
  return retriever
78
 
79
  def create_llm(self):
@@ -84,7 +94,7 @@ class RAG():
84
  model_name="gpt-4o-mini",
85
  temperature=0)
86
  elif self.use_model == 'zephyr-7b-alpha':
87
- print(f'As llm, using HF model: {self.use_model}')
88
  llm = HuggingFaceEndpoint(
89
  repo_id=f"huggingfaceh4/{self.use_model}",
90
  temperature=0.1,
 
6
 
7
  from langchain_huggingface import HuggingFaceEmbeddings
8
  from langchain_community.vectorstores import Chroma
9
+ from langchain_community.vectorstores import FAISS
10
  from langchain_openai import ChatOpenAI
11
  # from langchain_community.llms import HuggingFaceHub
12
  from langchain_huggingface import HuggingFaceEndpoint
 
29
  # self.use_model = 'gpt-4o-mini'
30
  self.use_model = 'zephyr-7b-alpha'
31
 
32
+ # self.use_vectordb = 'chroma'
33
+ self.use_vectordb = 'faiss'
34
+
35
+
36
  # Load environment variables that should contain:
37
  # - 'OPENAI_API_KEY' for OpenAI models
38
  # - 'HUGGINGFACEHUB_API_TOKEN' for HuggingFace models
 
76
 
77
  def create_retriever(self, texts, embeddings):
78
  # Create embeddings and vector store
79
+ if self.use_vectordb == 'chroma':
80
+ print ('Creating vectore store with Chroma')
81
+ vectorstore = Chroma.from_documents(texts, embeddings)
82
+ retriever = vectorstore.as_retriever(search_kwargs={"k": self.k})
83
+ elif self.use_vectordb == 'faiss':
84
+ print ('Creating vectore store with FAISS')
85
+ vectorstore = FAISS.from_documents(texts, embeddings)
86
+ retriever = vectorstore.as_retriever(search_kwargs={"k": self.k})
87
  return retriever
88
 
89
  def create_llm(self):
 
94
  model_name="gpt-4o-mini",
95
  temperature=0)
96
  elif self.use_model == 'zephyr-7b-alpha':
97
+ print(f'As llm, using HF-Endpint: {self.use_model}')
98
  llm = HuggingFaceEndpoint(
99
  repo_id=f"huggingfaceh4/{self.use_model}",
100
  temperature=0.1,