Prakyath01 commited on
Commit
c3bddb9
·
verified ·
1 Parent(s): 7574392

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -13
app.py CHANGED
@@ -99,29 +99,35 @@ retriever = vectordb.as_retriever(
99
  def hybrid_search(query, top_k=5):
100
  vector_results = retriever.invoke(query)
101
 
102
- bm_scores = bm25.get_scores(query.lower().split())
103
- bm_ranked = sorted(zip(bm_scores, chunks), reverse=True)
104
- bm_results = [doc for _, doc in bm_ranked[:top_k]]
 
 
105
 
106
- unique_docs = []
 
 
107
  seen = set()
108
- for doc in vector_results + bm_results:
109
- key = (doc.metadata.get("doc_id"), doc.page_content[:60])
 
110
  if key not in seen:
111
  seen.add(key)
112
- unique_docs.append(doc)
113
 
114
- if not unique_docs:
115
  return []
116
 
117
- pairs = [(query, doc.page_content) for doc in unique_docs]
118
  scores = reranker.predict(pairs)
119
- ranked = sorted(zip(scores, unique_docs), reverse=True)[:top_k]
120
 
121
- for score, doc in ranked:
122
- doc.metadata["rerank_score"] = float(score)
 
 
 
123
 
124
- return [doc for score, doc in ranked]
125
 
126
 
127
  # ------------------ LLM ------------------ #
 
99
  def hybrid_search(query, top_k=5):
100
  vector_results = retriever.invoke(query)
101
 
102
+ tokenized_query = query.lower().split()
103
+ bm_scores = bm25.get_scores(tokenized_query)
104
+
105
+ bm_ranked = sorted(zip(bm_scores, chunks), key=lambda x: x[0], reverse=True)
106
+ bm_results = [d for _, d in bm_ranked[:top_k]]
107
 
108
+ combined = vector_results + bm_results
109
+
110
+ # remove duplicates
111
  seen = set()
112
+ unique = []
113
+ for d in combined:
114
+ key = (d.metadata.get("doc_id"), d.page_content[:80])
115
  if key not in seen:
116
  seen.add(key)
117
+ unique.append(d)
118
 
119
+ if not unique:
120
  return []
121
 
122
+ pairs = [(query, doc.page_content) for doc in unique]
123
  scores = reranker.predict(pairs)
 
124
 
125
+ ranked = sorted(zip(scores, unique), key=lambda x: x[0], reverse=True)[:top_k]
126
+ for s, doc in ranked:
127
+ doc.metadata["rerank_score"] = float(s)
128
+
129
+ return [doc for _, doc in ranked]
130
 
 
131
 
132
 
133
  # ------------------ LLM ------------------ #