solving AttributeError: 'dict' object has no attribute 'page_content'

#18
Files changed (1) hide show
  1. app.py +18 -3
app.py CHANGED
@@ -109,6 +109,7 @@ def format_source(doc):
109
 
110
  # reranker
111
  from ragatouille import RAGPretrainedModel
 
112
  reranker = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
113
 
114
 
@@ -164,19 +165,33 @@ def predict(message, history, request: gr.Request):
164
  relevant_docs = vectorstore.similarity_search(message,k=20) # retriever
165
 
166
  # Rerank using ColBERT through RAGatouille
167
- relevant_docs = reranker.rerank(
168
  query=message,
169
  documents=[doc.page_content for doc in relevant_docs],
170
  k=10
171
  )
172
 
173
- # Build context from retrieved documents
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  context = "\nExtracted documents:\n" + "\n".join([
175
  f"Content document {i+1}: {doc.page_content}\n\n---"
176
  for i, doc in enumerate(relevant_docs)
177
  ])
178
 
179
-
180
 
181
  # RAG tool
182
  RAG_PROMPT_TEMPLATE="""You will be asked information related to Rémi Cazelles's specific projects, work and education.
 
109
 
110
  # reranker
111
  from ragatouille import RAGPretrainedModel
112
+ from langchain.schema import Document
113
  reranker = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
114
 
115
 
 
165
  relevant_docs = vectorstore.similarity_search(message,k=20) # retriever
166
 
167
  # Rerank using ColBERT through RAGatouille
168
+ reranked_results = reranker.rerank(
169
  query=message,
170
  documents=[doc.page_content for doc in relevant_docs],
171
  k=10
172
  )
173
 
174
+
175
+ context = "\nExtracted documents:\n" + "\n".join([
176
+ f"Content document {i+1}: {doc.page_content}\n\n---"
177
+ for i, doc in enumerate(relevant_docs)
178
+ ])
179
+ # Convert back to LangChain Document objects
180
+ relevant_docs = [
181
+ Document(
182
+ page_content=doc['content'],
183
+ metadata={'rerank_score': doc['score'], 'rerank_rank': doc['rank']}
184
+ )
185
+ for doc in reranked_results
186
+ ]
187
+
188
+ # Build context from retrieved documents
189
  context = "\nExtracted documents:\n" + "\n".join([
190
  f"Content document {i+1}: {doc.page_content}\n\n---"
191
  for i, doc in enumerate(relevant_docs)
192
  ])
193
 
194
+
195
 
196
  # RAG tool
197
  RAG_PROMPT_TEMPLATE="""You will be asked information related to Rémi Cazelles's specific projects, work and education.