Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -159,7 +159,7 @@ def create_db(splits):
|
|
| 159 |
vectordb = FAISS.from_documents(splits, embeddings)
|
| 160 |
return vectordb
|
| 161 |
|
| 162 |
-
def rerank_documents(query, docs, top_k=
|
| 163 |
pairs = [[query, doc.page_content] for doc in docs]
|
| 164 |
scores = reranker.predict(pairs)
|
| 165 |
doc_score_pairs = list(zip(docs, scores))
|
|
@@ -187,10 +187,10 @@ def retrieve_documents(query, retriever, embeddings):
|
|
| 187 |
print("No initial results found")
|
| 188 |
return []
|
| 189 |
|
| 190 |
-
reranked_results = rerank_documents(query, results, top_k=
|
| 191 |
print(f"Reranked results count: {len(reranked_results)}")
|
| 192 |
|
| 193 |
-
filtered_chunks = filter_relevant_chunks(query, reranked_results, embeddings, threshold=0.
|
| 194 |
print(f"Filtered chunks count: {len(filtered_chunks)}")
|
| 195 |
|
| 196 |
if not filtered_chunks:
|
|
@@ -206,7 +206,7 @@ def retrieve_documents(query, retriever, embeddings):
|
|
| 206 |
print(f"Score: {score:.4f} | Source: {doc.metadata.get('source', 'Unknown')}")
|
| 207 |
print(f"Content Preview: {doc.page_content[:100]}...\n")
|
| 208 |
|
| 209 |
-
MIN_SIMILARITY = 0.
|
| 210 |
filtered_results = [(doc, sim) for doc, sim in zip(filtered_chunks, similarity_scores) if sim >= MIN_SIMILARITY]
|
| 211 |
print(f"Final filtered results count: {len(filtered_results)}")
|
| 212 |
|
|
|
|
| 159 |
vectordb = FAISS.from_documents(splits, embeddings)
|
| 160 |
return vectordb
|
| 161 |
|
| 162 |
+
def rerank_documents(query, docs, top_k=5):
|
| 163 |
pairs = [[query, doc.page_content] for doc in docs]
|
| 164 |
scores = reranker.predict(pairs)
|
| 165 |
doc_score_pairs = list(zip(docs, scores))
|
|
|
|
| 187 |
print("No initial results found")
|
| 188 |
return []
|
| 189 |
|
| 190 |
+
reranked_results = rerank_documents(query, results, top_k=5)
|
| 191 |
print(f"Reranked results count: {len(reranked_results)}")
|
| 192 |
|
| 193 |
+
filtered_chunks = filter_relevant_chunks(query, reranked_results, embeddings, threshold=0.3)
|
| 194 |
print(f"Filtered chunks count: {len(filtered_chunks)}")
|
| 195 |
|
| 196 |
if not filtered_chunks:
|
|
|
|
| 206 |
print(f"Score: {score:.4f} | Source: {doc.metadata.get('source', 'Unknown')}")
|
| 207 |
print(f"Content Preview: {doc.page_content[:100]}...\n")
|
| 208 |
|
| 209 |
+
MIN_SIMILARITY = 0.3
|
| 210 |
filtered_results = [(doc, sim) for doc, sim in zip(filtered_chunks, similarity_scores) if sim >= MIN_SIMILARITY]
|
| 211 |
print(f"Final filtered results count: {len(filtered_results)}")
|
| 212 |
|