AmaanP314 commited on
Commit
20d0294
·
verified ·
1 Parent(s): ebb9507

add relevant content + modified prompts

Browse files
Files changed (1) hide show
  1. chatbot.py +3 -22
chatbot.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import os
3
  from pydantic import Field
4
  from langchain_community.retrievers import PineconeHybridSearchRetriever
@@ -18,14 +17,9 @@ from langchain_core.runnables.history import RunnableWithMessageHistory
18
  from langchain.chains import create_history_aware_retriever
19
  from langchain.chains.combine_documents import create_stuff_documents_chain
20
  from dotenv import load_dotenv
21
-
22
- # --- New Imports for Reranking ---
23
  from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
24
  from langchain_community.document_compressors import FlashrankRerank
25
- # It's also good practice to import Ranker from the FlashRank library directly
26
- # to avoid potential Pydantic errors
27
  from flashrank import Ranker
28
- # ----------------------------------
29
 
30
  load_dotenv()
31
 
@@ -46,8 +40,6 @@ class FixedDimensionGoogleGenerativeAIEmbeddings(GoogleGenerativeAIEmbeddings):
46
  None, description="The fixed output dimension for embeddings."
47
  )
48
 
49
- # We override the __init__ to handle the parameter and pass it to the base class.
50
- # The Field definition above will handle the validation, so we don't need a custom pop.
51
  def __init__(self, **kwargs):
52
  super().__init__(**kwargs)
53
 
@@ -61,8 +53,6 @@ class FixedDimensionGoogleGenerativeAIEmbeddings(GoogleGenerativeAIEmbeddings):
61
  kwargs['output_dimensionality'] = self.output_dimensionality
62
  return super().embed_query(text, **kwargs)
63
 
64
- # Now, you can use your new class as intended
65
- # You can pass the output_dimensionality to the constructor directly.
66
  embeddings = FixedDimensionGoogleGenerativeAIEmbeddings(
67
  google_api_key=GOOGLE_API_KEY,
68
  model=embed_model,
@@ -115,10 +105,6 @@ class CustomHybridSearchRetriever(PineconeHybridSearchRetriever):
115
  docs.append(doc)
116
  return docs
117
 
118
- # --- New Reranker Integration Section ---
119
-
120
- # 1. Update the top_k for your base retriever to fetch more documents.
121
- # We will fetch a larger set (e.g., top-50) to give the reranker more options.
122
  namespace = 'portfolio'
123
  base_retriever = CustomHybridSearchRetriever(
124
  embeddings=embeddings,
@@ -128,25 +114,20 @@ base_retriever = CustomHybridSearchRetriever(
128
  namespace=namespace
129
  )
130
 
131
- # 2. Define the FlashRank reranker (the "compressor").
132
- # We specify the top_n to return after reranking (e.g., top 5).
133
  reranker_compressor = FlashrankRerank(
134
- model=rerank_model, # The default lightweight model
135
- top_n=5 # Return the top 5 most relevant documents
136
  )
137
 
138
- # 3. Create the ContextualCompressionRetriever.
139
- # This wraps your base hybrid search retriever and applies the reranker.
140
  retriever = ContextualCompressionRetriever(
141
  base_compressor=reranker_compressor,
142
  base_retriever=base_retriever
143
  )
144
- # -----------------------------------------
145
 
146
  llm = ChatGoogleGenerativeAI(
147
  model=llm_model,
148
  google_api_key=GOOGLE_API_KEY,
149
- temperature=0.0,
150
  )
151
 
152
  store = {}
 
 
1
  import os
2
  from pydantic import Field
3
  from langchain_community.retrievers import PineconeHybridSearchRetriever
 
17
  from langchain.chains import create_history_aware_retriever
18
  from langchain.chains.combine_documents import create_stuff_documents_chain
19
  from dotenv import load_dotenv
 
 
20
  from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
21
  from langchain_community.document_compressors import FlashrankRerank
 
 
22
  from flashrank import Ranker
 
23
 
24
  load_dotenv()
25
 
 
40
  None, description="The fixed output dimension for embeddings."
41
  )
42
 
 
 
43
  def __init__(self, **kwargs):
44
  super().__init__(**kwargs)
45
 
 
53
  kwargs['output_dimensionality'] = self.output_dimensionality
54
  return super().embed_query(text, **kwargs)
55
 
 
 
56
  embeddings = FixedDimensionGoogleGenerativeAIEmbeddings(
57
  google_api_key=GOOGLE_API_KEY,
58
  model=embed_model,
 
105
  docs.append(doc)
106
  return docs
107
 
 
 
 
 
108
  namespace = 'portfolio'
109
  base_retriever = CustomHybridSearchRetriever(
110
  embeddings=embeddings,
 
114
  namespace=namespace
115
  )
116
 
 
 
117
  reranker_compressor = FlashrankRerank(
118
+ model=rerank_model,
119
+ top_n=5
120
  )
121
 
 
 
122
  retriever = ContextualCompressionRetriever(
123
  base_compressor=reranker_compressor,
124
  base_retriever=base_retriever
125
  )
 
126
 
127
  llm = ChatGoogleGenerativeAI(
128
  model=llm_model,
129
  google_api_key=GOOGLE_API_KEY,
130
+ temperature=0.5,
131
  )
132
 
133
  store = {}