Spaces:
Sleeping
Sleeping
solving AttributeError: 'dict' object has no attribute 'page_content'
#18
by RCaz - opened
app.py
CHANGED
|
@@ -109,6 +109,7 @@ def format_source(doc):
|
|
| 109 |
|
| 110 |
# reranker
|
| 111 |
from ragatouille import RAGPretrainedModel
|
|
|
|
| 112 |
reranker = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
|
| 113 |
|
| 114 |
|
|
@@ -164,19 +165,33 @@ def predict(message, history, request: gr.Request):
|
|
| 164 |
relevant_docs = vectorstore.similarity_search(message,k=20) # retriever
|
| 165 |
|
| 166 |
# Rerank using ColBERT through RAGatouille
|
| 167 |
-
|
| 168 |
query=message,
|
| 169 |
documents=[doc.page_content for doc in relevant_docs],
|
| 170 |
k=10
|
| 171 |
)
|
| 172 |
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
context = "\nExtracted documents:\n" + "\n".join([
|
| 175 |
f"Content document {i+1}: {doc.page_content}\n\n---"
|
| 176 |
for i, doc in enumerate(relevant_docs)
|
| 177 |
])
|
| 178 |
|
| 179 |
-
|
| 180 |
|
| 181 |
# RAG tool
|
| 182 |
RAG_PROMPT_TEMPLATE="""You will be asked information related to Rémi Cazelles's specific projects, work and education.
|
|
|
|
| 109 |
|
| 110 |
# reranker
|
| 111 |
from ragatouille import RAGPretrainedModel
|
| 112 |
+
from langchain.schema import Document
|
| 113 |
reranker = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
|
| 114 |
|
| 115 |
|
|
|
|
| 165 |
relevant_docs = vectorstore.similarity_search(message,k=20) # retriever
|
| 166 |
|
| 167 |
# Rerank using ColBERT through RAGatouille
|
| 168 |
+
reranked_results = reranker.rerank(
|
| 169 |
query=message,
|
| 170 |
documents=[doc.page_content for doc in relevant_docs],
|
| 171 |
k=10
|
| 172 |
)
|
| 173 |
|
| 174 |
+
|
| 175 |
+
context = "\nExtracted documents:\n" + "\n".join([
|
| 176 |
+
f"Content document {i+1}: {doc.page_content}\n\n---"
|
| 177 |
+
for i, doc in enumerate(relevant_docs)
|
| 178 |
+
])
|
| 179 |
+
# Convert back to LangChain Document objects
|
| 180 |
+
relevant_docs = [
|
| 181 |
+
Document(
|
| 182 |
+
page_content=doc['content'],
|
| 183 |
+
metadata={'rerank_score': doc['score'], 'rerank_rank': doc['rank']}
|
| 184 |
+
)
|
| 185 |
+
for doc in reranked_results
|
| 186 |
+
]
|
| 187 |
+
|
| 188 |
+
# Build context from retrieved documents
|
| 189 |
context = "\nExtracted documents:\n" + "\n".join([
|
| 190 |
f"Content document {i+1}: {doc.page_content}\n\n---"
|
| 191 |
for i, doc in enumerate(relevant_docs)
|
| 192 |
])
|
| 193 |
|
| 194 |
+
|
| 195 |
|
| 196 |
# RAG tool
|
| 197 |
RAG_PROMPT_TEMPLATE="""You will be asked information related to Rémi Cazelles's specific projects, work and education.
|